From 02a2f383b21da427b901171add813d5d90d9623e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 29 Jul 2013 17:34:52 +0000 Subject: [PATCH] LUCENE-5127: FixedGapTermsIndex should use monotonic compression git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1508147 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 + .../codecs/blockterms/BlockTermsReader.java | 12 +- .../blockterms/FixedGapTermsIndexReader.java | 294 +++++------------- .../blockterms/FixedGapTermsIndexWriter.java | 87 +++--- .../blockterms/TermsIndexReaderBase.java | 2 - .../VariableGapTermsIndexReader.java | 94 ++---- .../codecs/pulsing/PulsingPostingsFormat.java | 3 +- .../TestFixedGapPostingsFormat.java | 4 +- ...stVarGapDocFreqIntervalPostingsFormat.java | 35 +++ ...TestVarGapFixedIntervalPostingsFormat.java | 35 +++ .../lucene/codecs/BlockTreeTermsReader.java | 28 +- .../lucene40/Lucene40PostingsFormat.java | 3 +- .../lucene41/Lucene41PostingsFormat.java | 3 +- .../org/apache/lucene/index/CheckIndex.java | 2 +- .../apache/lucene/index/DirectoryReader.java | 51 +-- .../index/DocumentsWriterPerThread.java | 1 - .../org/apache/lucene/index/IndexWriter.java | 11 +- .../lucene/index/IndexWriterConfig.java | 26 -- .../lucene/index/LiveIndexWriterConfig.java | 103 ------ .../lucene/index/ReadersAndLiveDocs.java | 66 +--- .../lucene/index/SegmentCoreReaders.java | 11 +- .../apache/lucene/index/SegmentMerger.java | 6 +- .../apache/lucene/index/SegmentReadState.java | 18 +- .../apache/lucene/index/SegmentReader.java | 10 +- .../lucene/index/SegmentWriteState.java | 11 +- .../lucene/index/StandardDirectoryReader.java | 22 +- .../lucene40/TestLucene40PostingsFormat.java | 2 +- .../perfield/TestPerFieldPostingsFormat2.java | 2 +- .../org/apache/lucene/index/TestCodecs.java | 7 +- .../lucene/index/TestDirectoryReader.java | 48 --- .../test/org/apache/lucene/index/TestDoc.java | 8 +- .../lucene/index/TestDocumentWriter.java | 8 +- .../apache/lucene/index/TestIndexWriter.java | 44 --- .../lucene/index/TestIndexWriterConfig.java | 23 -- .../index/TestIndexWriterForceMerge.java | 6 +- .../lucene/index/TestIndexWriterReader.java | 31 +- .../lucene/index/TestSegmentMerger.java | 8 +- .../lucene/index/TestSegmentReader.java | 2 +- .../lucene/index/TestSegmentTermDocs.java | 33 +- .../lucene/facet/util/TaxonomyMergeUtils.java | 2 +- .../codecs/lucene41ords/Lucene41WithOrds.java | 11 +- .../Lucene41VarGapDocFreqInterval.java | 144 +++++++++ .../Lucene41VarGapFixedInterval.java | 141 +++++++++ .../lucene/codecs/lucene41vargap/package.html | 25 ++ .../MockFixedIntBlockPostingsFormat.java | 1 - .../MockVariableIntBlockPostingsFormat.java | 1 - .../mockrandom/MockRandomPostingsFormat.java | 20 +- .../codecs/mocksep/MockSepPostingsFormat.java | 1 - .../NestedPulsingPostingsFormat.java | 3 +- .../index/BaseDocValuesFormatTestCase.java | 2 +- .../index/BasePostingsFormatTestCase.java | 87 +++++- .../org/apache/lucene/index/RandomCodec.java | 6 +- .../lucene/index/RandomIndexWriter.java | 2 +- .../apache/lucene/util/LuceneTestCase.java | 10 - .../org.apache.lucene.codecs.PostingsFormat | 2 + .../apache/solr/core/IndexReaderFactory.java | 22 +- .../solr/core/StandardIndexReaderFactory.java | 2 +- .../apache/solr/update/SolrIndexConfig.java | 10 +- .../collection1/conf/solrconfig-termindex.xml | 5 +- .../solr/core/IndexReaderFactoryTest.java | 3 - .../test/org/apache/solr/core/TestConfig.java | 21 -- .../solr/collection1/conf/solrconfig.xml | 14 - .../solr/collection1/conf/solrconfig.xml | 14 - 63 files changed, 717 insertions(+), 999 deletions(-) create mode 100644 lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java create mode 100644 lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapDocFreqInterval.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapFixedInterval.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/package.html diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ea707fc9264..60118c53e29 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -27,6 +27,13 @@ Changes in backwards compatibility policy no longer support multiple "dictionaries" as there is only one dictionary available. (Dawid Weiss) +* LUCENE-5127: Reduce RAM usage of FixedGapTermsIndex. Remove + IndexWriterConfig.setTermIndexInterval, IndexWriterConfig.setReaderTermsIndexDivisor, + and termsIndexDivisor from StandardDirectoryReader. These options have been no-ops + with the default codec since Lucene 4.0. If you want to configure the interval for + this term index, pass it directly in your codec, where it can also be configured + per-field. (Robert Muir) + New Features * LUCENE-4747: Move to Java 7 as minimum Java version. diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index eb13fe907ac..7fa0e14d55c 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -313,11 +313,6 @@ public class BlockTermsReader extends FieldsProducer { calls next() (which is not "typical"), then we'll do the real seek */ private boolean seekPending; - /* How many blocks we've read since last seek. Once this - is >= indexEnum.getDivisor() we set indexIsCurrent to false (since - the index can no long bracket seek-within-block). */ - private int blocksSinceSeek; - private byte[] termSuffixes; private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput(); @@ -420,8 +415,7 @@ public class BlockTermsReader extends FieldsProducer { assert result; indexIsCurrent = true; - didIndexNext = false; - blocksSinceSeek = 0; + didIndexNext = false; if (doOrd) { state.ord = indexEnum.ord()-1; @@ -729,7 +723,6 @@ public class BlockTermsReader extends FieldsProducer { indexIsCurrent = true; didIndexNext = false; - blocksSinceSeek = 0; seekPending = false; state.ord = indexEnum.ord()-1; @@ -802,8 +795,7 @@ public class BlockTermsReader extends FieldsProducer { postingsReader.readTermsBlock(in, fieldInfo, state); - blocksSinceSeek++; - indexIsCurrent = indexIsCurrent && (blocksSinceSeek < indexReader.getDivisor()); + indexIsCurrent = false; //System.out.println(" indexIsCurrent=" + indexIsCurrent); return true; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java index 2066fcf3b1e..0d69e9406e4 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PagedBytes; -import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.MonotonicBlockPackedReader; import java.util.HashMap; import java.util.Comparator; @@ -43,21 +43,15 @@ import org.apache.lucene.index.IndexFileNames; */ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { - // NOTE: long is overkill here, since this number is 128 - // by default and only indexDivisor * 128 if you change - // the indexDivisor at search time. But, we use this in a + // NOTE: long is overkill here, but we use this in a // number of places to multiply out the actual ord, and we // will overflow int during those multiplies. So to avoid // having to upgrade each multiple to long in multiple // places (error prone), we use long here: - private long totalIndexInterval; - - private int indexDivisor; - final private int indexInterval; - - // Closed if indexLoaded is true: - private IndexInput in; - private volatile boolean indexLoaded; + private final long indexInterval; + + private final int packedIntsVersion; + private final int blocksize; private final Comparator termComp; @@ -72,35 +66,24 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { // start of the field info data private long dirOffset; - private final int version; - - public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator termComp, String segmentSuffix, IOContext context) + public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator termComp, String segmentSuffix, IOContext context) throws IOException { this.termComp = termComp; - - assert indexDivisor == -1 || indexDivisor > 0; - - in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); + + final IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); boolean success = false; try { - version = readHeader(in); - indexInterval = in.readInt(); + readHeader(in); + indexInterval = in.readVInt(); if (indexInterval < 1) { throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")"); } - this.indexDivisor = indexDivisor; - - if (indexDivisor < 0) { - totalIndexInterval = indexInterval; - } else { - // In case terms index gets loaded, later, on demand - totalIndexInterval = indexInterval * indexDivisor; - } - assert totalIndexInterval > 0; + packedIntsVersion = in.readVInt(); + blocksize = in.readVInt(); seekDir(in, dirOffset); @@ -112,7 +95,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields); for(int i=0;i 2B index terms if (numIndexTerms < 0) { throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + in + ")"); } @@ -124,47 +107,33 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + in + ")"); } final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart)); + FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(in, indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")"); } } success = true; } finally { - if (!success) { + if (success) { + IOUtils.close(in); + } else { IOUtils.closeWhileHandlingException(in); } - if (indexDivisor > 0) { - in.close(); - in = null; - if (success) { - indexLoaded = true; - } - termBytesReader = termBytes.freeze(true); - } + termBytesReader = termBytes.freeze(true); } } - - @Override - public int getDivisor() { - return indexDivisor; - } - private int readHeader(IndexInput input) throws IOException { - int version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME, - FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_CURRENT); - if (version < FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) { - dirOffset = input.readLong(); - } - return version; + private void readHeader(IndexInput input) throws IOException { + CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME, + FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT); } private class IndexEnum extends FieldIndexEnum { - private final FieldIndexData.CoreFieldIndex fieldIndex; + private final FieldIndexData fieldIndex; private final BytesRef term = new BytesRef(); private long ord; - public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) { + public IndexEnum(FieldIndexData fieldIndex) { this.fieldIndex = fieldIndex; } @@ -175,12 +144,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { @Override public long seek(BytesRef target) { - int lo = 0; // binary search - int hi = fieldIndex.numIndexTerms - 1; - assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval; + long lo = 0; // binary search + long hi = fieldIndex.numIndexTerms - 1; while (hi >= lo) { - int mid = (lo + hi) >>> 1; + long mid = (lo + hi) >>> 1; final long offset = fieldIndex.termOffsets.get(mid); final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset); @@ -193,7 +161,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { lo = mid + 1; } else { assert mid >= 0; - ord = mid*totalIndexInterval; + ord = mid*indexInterval; return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid); } } @@ -207,17 +175,17 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset); termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length); - ord = hi*totalIndexInterval; + ord = hi*indexInterval; return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi); } @Override public long next() { - final int idx = 1 + (int) (ord / totalIndexInterval); + final long idx = 1 + (ord / indexInterval); if (idx >= fieldIndex.numIndexTerms) { return -1; } - ord += totalIndexInterval; + ord += indexInterval; final long offset = fieldIndex.termOffsets.get(idx); final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset); @@ -232,13 +200,13 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { @Override public long seek(long ord) { - int idx = (int) (ord / totalIndexInterval); + long idx = ord / indexInterval; // caller must ensure ord is in bounds assert idx < fieldIndex.numIndexTerms; final long offset = fieldIndex.termOffsets.get(idx); final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset); termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length); - this.ord = idx * totalIndexInterval; + this.ord = idx * indexInterval; return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx); } } @@ -249,176 +217,58 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { } private final class FieldIndexData { - - volatile CoreFieldIndex coreIndex; - - private final long indexStart; - private final long termsStart; - private final long packedIndexStart; - private final long packedOffsetsStart; - - private final int numIndexTerms; - - public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart, - long packedOffsetsStart) throws IOException { - + // where this field's terms begin in the packed byte[] + // data + final long termBytesStart; + + // offset into index termBytes + final MonotonicBlockPackedReader termOffsets; + + // index pointers into main terms dict + final MonotonicBlockPackedReader termsDictOffsets; + + final long numIndexTerms; + final long termsStart; + + public FieldIndexData(IndexInput in, long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, long numIndexTerms) throws IOException { + this.termsStart = termsStart; - this.indexStart = indexStart; - this.packedIndexStart = packedIndexStart; - this.packedOffsetsStart = packedOffsetsStart; + termBytesStart = termBytes.getPointer(); + + IndexInput clone = in.clone(); + clone.seek(indexStart); + this.numIndexTerms = numIndexTerms; - - if (indexDivisor > 0) { - loadTermsIndex(); - } - } - - private void loadTermsIndex() throws IOException { - if (coreIndex == null) { - coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms); - } - } - - private final class CoreFieldIndex { - - // where this field's terms begin in the packed byte[] - // data - final long termBytesStart; - - // offset into index termBytes - final PackedInts.Reader termOffsets; - - // index pointers into main terms dict - final PackedInts.Reader termsDictOffsets; - - final int numIndexTerms; - final long termsStart; - - public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException { - - this.termsStart = termsStart; - termBytesStart = termBytes.getPointer(); - - IndexInput clone = in.clone(); - clone.seek(indexStart); - - // -1 is passed to mean "don't load term index", but - // if we are then later loaded it's overwritten with - // a real value - assert indexDivisor > 0; - - this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor; - - assert this.numIndexTerms > 0: "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor; - - if (indexDivisor == 1) { - // Default (load all index terms) is fast -- slurp in the images from disk: - - try { - final long numTermBytes = packedIndexStart - indexStart; - termBytes.copy(clone, numTermBytes); - - // records offsets into main terms dict file - termsDictOffsets = PackedInts.getReader(clone); - assert termsDictOffsets.size() == numIndexTerms; - - // records offsets into byte[] term data - termOffsets = PackedInts.getReader(clone); - assert termOffsets.size() == 1+numIndexTerms; - } finally { - clone.close(); - } - } else { - // Get packed iterators - final IndexInput clone1 = in.clone(); - final IndexInput clone2 = in.clone(); - - try { - // Subsample the index terms - clone1.seek(packedIndexStart); - final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE); - - clone2.seek(packedOffsetsStart); - final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2, PackedInts.DEFAULT_BUFFER_SIZE); - - // TODO: often we can get by w/ fewer bits per - // value, below.. .but this'd be more complex: - // we'd have to try @ fewer bits and then grow - // if we overflowed it. - - PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT); - PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT); - - termsDictOffsets = termsDictOffsetsM; - termOffsets = termOffsetsM; - - int upto = 0; - - long termOffsetUpto = 0; - - while(upto < this.numIndexTerms) { - // main file offset copies straight over - termsDictOffsetsM.set(upto, termsDictOffsetsIter.next()); - - termOffsetsM.set(upto, termOffsetUpto); - - long termOffset = termOffsetsIter.next(); - long nextTermOffset = termOffsetsIter.next(); - final int numTermBytes = (int) (nextTermOffset - termOffset); - - clone.seek(indexStart + termOffset); - assert indexStart + termOffset < clone.length() : "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.length(); - assert indexStart + termOffset + numTermBytes < clone.length(); - - termBytes.copy(clone, numTermBytes); - termOffsetUpto += numTermBytes; - - upto++; - if (upto == this.numIndexTerms) { - break; - } - - // skip terms: - termsDictOffsetsIter.next(); - for(int i=0;i 0: "numIndexTerms=" + numIndexTerms; + + // slurp in the images from disk: + + try { + final long numTermBytes = packedIndexStart - indexStart; + termBytes.copy(clone, numTermBytes); + + // records offsets into main terms dict file + termsDictOffsets = new MonotonicBlockPackedReader(clone, packedIntsVersion, blocksize, numIndexTerms, false); + + // records offsets into byte[] term data + termOffsets = new MonotonicBlockPackedReader(clone, packedIntsVersion, blocksize, 1+numIndexTerms, false); + } finally { + clone.close(); } } } @Override public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) { - final FieldIndexData fieldData = fields.get(fieldInfo); - if (fieldData.coreIndex == null) { - return null; - } else { - return new IndexEnum(fieldData.coreIndex); - } + return new IndexEnum(fields.get(fieldInfo)); } @Override - public void close() throws IOException { - if (in != null && !indexLoaded) { - in.close(); - } - } + public void close() throws IOException {} private void seekDir(IndexInput input, long dirOffset) throws IOException { - if (version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) { - input.seek(input.length() - 8); - dirOffset = input.readLong(); - } + input.seek(input.length() - 8); + dirOffset = input.readLong(); input.seek(dirOffset); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java index b270fcc9714..789300ef785 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java @@ -18,15 +18,16 @@ package org.apache.lucene.codecs.blockterms; */ import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermStats; -import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; +import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; import org.apache.lucene.util.packed.PackedInts; import java.util.List; @@ -50,23 +51,32 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX"; final static int VERSION_START = 0; final static int VERSION_APPEND_ONLY = 1; - final static int VERSION_CURRENT = VERSION_APPEND_ONLY; + final static int VERSION_MONOTONIC_ADDRESSING = 2; + final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING; + final static int BLOCKSIZE = 4096; final private int termIndexInterval; + public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; private final List fields = new ArrayList(); - @SuppressWarnings("unused") private final FieldInfos fieldInfos; // unread - public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException { + this(state, DEFAULT_TERM_INDEX_INTERVAL); + } + + public FixedGapTermsIndexWriter(SegmentWriteState state, int termIndexInterval) throws IOException { + if (termIndexInterval <= 0) { + throw new IllegalArgumentException("invalid termIndexInterval: " + termIndexInterval); + } + this.termIndexInterval = termIndexInterval; final String indexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION); - termIndexInterval = state.termIndexInterval; out = state.directory.createOutput(indexFileName, state.context); boolean success = false; try { - fieldInfos = state.fieldInfos; writeHeader(out); - out.writeInt(termIndexInterval); + out.writeVInt(termIndexInterval); + out.writeVInt(PackedInts.VERSION_CURRENT); + out.writeVInt(BLOCKSIZE); success = true; } finally { if (!success) { @@ -114,22 +124,25 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { long packedOffsetsStart; private long numTerms; - // TODO: we could conceivably make a PackedInts wrapper - // that auto-grows... then we wouldn't force 6 bytes RAM - // per index term: - private short[] termLengths; - private int[] termsPointerDeltas; - private long lastTermsPointer; - private long totTermLength; + private RAMOutputStream offsetsBuffer = new RAMOutputStream(); + private MonotonicBlockPackedWriter termOffsets = new MonotonicBlockPackedWriter(offsetsBuffer, BLOCKSIZE); + private long currentOffset; + + private RAMOutputStream addressBuffer = new RAMOutputStream(); + private MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCKSIZE); private final BytesRef lastTerm = new BytesRef(); SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer) { this.fieldInfo = fieldInfo; indexStart = out.getFilePointer(); - termsStart = lastTermsPointer = termsFilePointer; - termLengths = new short[0]; - termsPointerDeltas = new int[0]; + termsStart = termsFilePointer; + // we write terms+1 offsets, term n's length is n+1 - n + try { + termOffsets.add(0L); + } catch (IOException bogus) { + throw new RuntimeException(bogus); + } } @Override @@ -157,21 +170,13 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { // against prior term out.writeBytes(text.bytes, text.offset, indexedTermLength); - if (termLengths.length == numIndexTerms) { - termLengths = ArrayUtil.grow(termLengths); - } - if (termsPointerDeltas.length == numIndexTerms) { - termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas); - } - // save delta terms pointer - termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer); - lastTermsPointer = termsFilePointer; + termAddresses.add(termsFilePointer - termsStart); // save term length (in bytes) assert indexedTermLength <= Short.MAX_VALUE; - termLengths[numIndexTerms] = (short) indexedTermLength; - totTermLength += indexedTermLength; + currentOffset += indexedTermLength; + termOffsets.add(currentOffset); lastTerm.copyBytes(text); numIndexTerms++; @@ -183,32 +188,20 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { // write primary terms dict offsets packedIndexStart = out.getFilePointer(); - PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer), PackedInts.DEFAULT); - // relative to our indexStart - long upto = 0; - for(int i=0;i fields = new HashMap(); @@ -59,17 +53,15 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { private final int version; final String segment; - public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) + public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, String segmentSuffix, IOContext context) throws IOException { - in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); + final IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); this.segment = segment; boolean success = false; - assert indexDivisor == -1 || indexDivisor > 0; try { version = readHeader(in); - this.indexDivisor = indexDivisor; seekDir(in, dirOffset); @@ -83,27 +75,20 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { final int field = in.readVInt(); final long indexStart = in.readVLong(); final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, indexStart)); + FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(in, fieldInfo, indexStart)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")"); } } success = true; } finally { - if (indexDivisor > 0) { - in.close(); - in = null; - if (success) { - indexLoaded = true; - } + if (success) { + IOUtils.close(in); + } else { + IOUtils.closeWhileHandlingException(in); } } } - - @Override - public int getDivisor() { - return indexDivisor; - } private int readHeader(IndexInput input) throws IOException { int version = CodecUtil.checkHeader(input, VariableGapTermsIndexWriter.CODEC_NAME, @@ -168,52 +153,21 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { } private final class FieldIndexData { + private final FST fst; - private final long indexStart; - // Set only if terms index is loaded: - private volatile FST fst; + public FieldIndexData(IndexInput in, FieldInfo fieldInfo, long indexStart) throws IOException { + IndexInput clone = in.clone(); + clone.seek(indexStart); + fst = new FST(clone, fstOutputs); + clone.close(); - public FieldIndexData(FieldInfo fieldInfo, long indexStart) throws IOException { - this.indexStart = indexStart; - - if (indexDivisor > 0) { - loadTermsIndex(); - } - } - - private void loadTermsIndex() throws IOException { - if (fst == null) { - IndexInput clone = in.clone(); - clone.seek(indexStart); - fst = new FST(clone, fstOutputs); - clone.close(); - - /* - final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; - Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); - Util.toDot(fst, w, false, false); - System.out.println("FST INDEX: SAVED to " + dotFileName); - w.close(); - */ - - if (indexDivisor > 1) { - // subsample - final IntsRef scratchIntsRef = new IntsRef(); - final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); - final Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); - final BytesRefFSTEnum fstEnum = new BytesRefFSTEnum(fst); - BytesRefFSTEnum.InputOutput result; - int count = indexDivisor; - while((result = fstEnum.next()) != null) { - if (count == indexDivisor) { - builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output); - count = 0; - } - count++; - } - fst = builder.finish(); - } - } + /* + final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; + Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); + Util.toDot(fst, w, false, false); + System.out.println("FST INDEX: SAVED to " + dotFileName); + w.close(); + */ } } @@ -228,11 +182,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { } @Override - public void close() throws IOException { - if (in != null && !indexLoaded) { - in.close(); - } - } + public void close() throws IOException {} private void seekDir(IndexInput input, long dirOffset) throws IOException { if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java index 2aeb5a3906d..735efdcb65a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java @@ -103,8 +103,7 @@ public abstract class PulsingPostingsFormat extends PostingsFormat { state.directory, state.fieldInfos, state.segmentInfo, pulsingReader, state.context, - state.segmentSuffix, - state.termsIndexDivisor); + state.segmentSuffix); success = true; return ret; } finally { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java index b0586a0f7e6..1aaeeff9cba 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java @@ -25,10 +25,8 @@ import org.apache.lucene.util._TestUtil; /** * Basic tests of a PF using FixedGap terms dictionary */ -// TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package -// TODO: ensure both of these are also in rotation in RandomCodec public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase { - private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds()); + private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds(_TestUtil.nextInt(random(), 1, 1000))); @Override protected Codec getCodec() { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java new file mode 100644 index 00000000000..16ae249695c --- /dev/null +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapDocFreqIntervalPostingsFormat.java @@ -0,0 +1,35 @@ +package org.apache.lucene.codecs.blockterms; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval; +import org.apache.lucene.index.BasePostingsFormatTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Basic tests of a PF using VariableGap terms dictionary (fixed interval) + */ +public class TestVarGapDocFreqIntervalPostingsFormat extends BasePostingsFormatTestCase { + private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41VarGapFixedInterval(_TestUtil.nextInt(random(), 1, 1000))); + + @Override + protected Codec getCodec() { + return codec; + } +} diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java new file mode 100644 index 00000000000..4ae298d5a8b --- /dev/null +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestVarGapFixedIntervalPostingsFormat.java @@ -0,0 +1,35 @@ +package org.apache.lucene.codecs.blockterms; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval; +import org.apache.lucene.index.BasePostingsFormatTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Basic tests of a PF using VariableGap terms dictionary (fixed interval, docFreq threshold) + */ +public class TestVarGapFixedIntervalPostingsFormat extends BasePostingsFormatTestCase { + private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41VarGapDocFreqInterval(_TestUtil.nextInt(random(), 1, 100), _TestUtil.nextInt(random(), 1, 1000))); + + @Override + protected Codec getCodec() { + return codec; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java index 9545d57c108..9ed141488aa 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java @@ -67,9 +67,9 @@ import org.apache.lucene.util.fst.Util; * does not support a pluggable terms index * implementation). * - *

NOTE: this terms dictionary does not support - * index divisor when opening an IndexReader. Instead, you - * can change the min/maxItemsPerBlock during indexing.

+ *

NOTE: this terms dictionary supports + * min/maxItemsPerBlock during indexing to control how + * much memory the terms index uses.

* *

The data structure used by this implementation is very * similar to a burst trie @@ -112,7 +112,7 @@ public class BlockTreeTermsReader extends FieldsProducer { /** Sole constructor. */ public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext ioContext, - String segmentSuffix, int indexDivisor) + String segmentSuffix) throws IOException { this.postingsReader = postingsReader; @@ -126,13 +126,11 @@ public class BlockTreeTermsReader extends FieldsProducer { try { version = readHeader(in); - if (indexDivisor != -1) { - indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION), + indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION), ioContext); - int indexVersion = readIndexHeader(indexIn); - if (indexVersion != version) { - throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion); - } + int indexVersion = readIndexHeader(indexIn); + if (indexVersion != version) { + throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion); } // Have PostingsReader init itself @@ -140,9 +138,7 @@ public class BlockTreeTermsReader extends FieldsProducer { // Read per-field details seekDir(in, dirOffset); - if (indexDivisor != -1) { - seekDir(indexIn, indexDirOffset); - } + seekDir(indexIn, indexDirOffset); final int numFields = in.readVInt(); if (numFields < 0) { @@ -171,15 +167,13 @@ public class BlockTreeTermsReader extends FieldsProducer { if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")"); } - final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0; + final long indexStartFP = indexIn.readVLong(); FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")"); } } - if (indexDivisor != -1) { - indexIn.close(); - } + indexIn.close(); success = true; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java index 061f512b7a8..7d342e61a3b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java @@ -258,8 +258,7 @@ public class Lucene40PostingsFormat extends PostingsFormat { state.segmentInfo, postings, state.context, - state.segmentSuffix, - state.termsIndexDivisor); + state.segmentSuffix); success = true; return ret; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java index df7fe6f65be..d7be3e03727 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java @@ -439,8 +439,7 @@ public final class Lucene41PostingsFormat extends PostingsFormat { state.segmentInfo, postingsReader, state.context, - state.segmentSuffix, - state.termsIndexDivisor); + state.segmentSuffix); success = true; return ret; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 727e094c383..612fbc5d093 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -533,7 +533,7 @@ public class CheckIndex { } if (infoStream != null) infoStream.print(" test: open reader........."); - reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT); + reader = new SegmentReader(info, IOContext.DEFAULT); segInfoStat.openReaderPassed = true; diff --git a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java index e2372a19298..e2a61213594 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java @@ -52,9 +52,6 @@ import org.apache.lucene.store.NoSuchDirectoryException; */ public abstract class DirectoryReader extends BaseCompositeReader { - /** Default termInfosIndexDivisor. */ - public static final int DEFAULT_TERMS_INDEX_DIVISOR = 1; - /** The index directory. */ protected final Directory directory; @@ -64,29 +61,7 @@ public abstract class DirectoryReader extends BaseCompositeReader * @throws IOException if there is a low-level IO error */ public static DirectoryReader open(final Directory directory) throws IOException { - return StandardDirectoryReader.open(directory, null, DEFAULT_TERMS_INDEX_DIVISOR); - } - - /** Expert: Returns a IndexReader reading the index in the given - * Directory with the given termInfosIndexDivisor. - * @param directory the index directory - * @param termInfosIndexDivisor Subsamples which indexed - * terms are loaded into RAM. This has the same effect as {@link - * IndexWriterConfig#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1. Set this - * to -1 to skip loading the terms index entirely. - * NOTE: divisor settings > 1 do not apply to all PostingsFormat - * implementations, including the default one in this release. It only makes - * sense for terms indexes that can efficiently re-sample terms at load time. - * @throws IOException if there is a low-level IO error - */ - public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException { - return StandardDirectoryReader.open(directory, null, termInfosIndexDivisor); + return StandardDirectoryReader.open(directory, null); } /** @@ -118,29 +93,7 @@ public abstract class DirectoryReader extends BaseCompositeReader * @throws IOException if there is a low-level IO error */ public static DirectoryReader open(final IndexCommit commit) throws IOException { - return StandardDirectoryReader.open(commit.getDirectory(), commit, DEFAULT_TERMS_INDEX_DIVISOR); - } - - /** Expert: returns an IndexReader reading the index in the given - * {@link IndexCommit} and termInfosIndexDivisor. - * @param commit the commit point to open - * @param termInfosIndexDivisor Subsamples which indexed - * terms are loaded into RAM. This has the same effect as {@link - * IndexWriterConfig#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1. Set this - * to -1 to skip loading the terms index entirely. - * NOTE: divisor settings > 1 do not apply to all PostingsFormat - * implementations, including the default one in this release. It only makes - * sense for terms indexes that can efficiently re-sample terms at load time. - * @throws IOException if there is a low-level IO error - */ - public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException { - return StandardDirectoryReader.open(commit.getDirectory(), commit, termInfosIndexDivisor); + return StandardDirectoryReader.open(commit.getDirectory(), commit); } /** diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index cdc5088f304..84f50e8b489 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -468,7 +468,6 @@ class DocumentsWriterPerThread { assert deleteSlice == null : "all deletes must be applied in prepareFlush"; segmentInfo.setDocCount(numDocsInRAM); flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(), - writer.getConfig().getTermIndexInterval(), pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed()))); final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.; diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 8e6f8e24f84..93fd9ed45b5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -2437,10 +2437,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * to call this method multiple times, each time with a small set of readers. * In principle, if you use a merge policy with a {@code mergeFactor} or * {@code maxMergeAtOnce} parameter, you should pass that many readers in one - * call. Also, if the given readers are {@link DirectoryReader}s, they can be - * opened with {@code termIndexInterval=-1} to save RAM, since during merge - * the in-memory structure is not used. See - * {@link DirectoryReader#open(Directory, int)}. + * call. * *

* NOTE: if you call {@link #close(boolean)} with false, which @@ -2488,7 +2485,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, codec, null, null); - SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, config.getTermIndexInterval(), + SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, MergeState.CheckAbort.NONE, globalFieldNumberMap, context); MergeState mergeState; @@ -3670,7 +3667,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // Hold onto the "live" reader; we will use this to // commit merged deletes final ReadersAndLiveDocs rld = readerPool.get(info, true); - SegmentReader reader = rld.getMergeReader(context); + SegmentReader reader = rld.getReader(context); assert reader != null; // Carefully pull the most recent live docs: @@ -3727,7 +3724,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(), - merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(), + merge.info.info, infoStream, dirWrapper, checkAbort, globalFieldNumberMap, context); merge.checkAborted(directory); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index 920adf45d08..8993d1bfe45 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -72,9 +72,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl CREATE_OR_APPEND } - /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */ - public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here - /** Denotes a flush trigger is disabled. */ public final static int DISABLE_AUTO_FLUSH = -1; @@ -100,9 +97,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl /** Default setting for {@link #setReaderPooling}. */ public final static boolean DEFAULT_READER_POOLING = false; - /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */ - public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR; - /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */ public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945; @@ -503,16 +497,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl return super.getRAMBufferSizeMB(); } - @Override - public int getReaderTermsIndexDivisor() { - return super.getReaderTermsIndexDivisor(); - } - - @Override - public int getTermIndexInterval() { - return super.getTermIndexInterval(); - } - /** If non-null, information about merges, deletes and a * message when maxFieldLength is reached will be printed * to this. @@ -554,16 +538,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl return (IndexWriterConfig) super.setRAMBufferSizeMB(ramBufferSizeMB); } - @Override - public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) { - return (IndexWriterConfig) super.setReaderTermsIndexDivisor(divisor); - } - - @Override - public IndexWriterConfig setTermIndexInterval(int interval) { - return (IndexWriterConfig) super.setTermIndexInterval(interval); - } - @Override public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) { return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile); diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index 8bb344ef73c..64eca6e3fc9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -41,9 +40,7 @@ public class LiveIndexWriterConfig { private volatile int maxBufferedDocs; private volatile double ramBufferSizeMB; private volatile int maxBufferedDeleteTerms; - private volatile int readerTermsIndexDivisor; private volatile IndexReaderWarmer mergedSegmentWarmer; - private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here // modified by IndexWriterConfig /** {@link IndexDeletionPolicy} controlling when commit @@ -108,9 +105,7 @@ public class LiveIndexWriterConfig { ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS; - readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR; mergedSegmentWarmer = null; - termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here delPolicy = new KeepOnlyLastCommitDeletionPolicy(); commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; @@ -140,8 +135,6 @@ public class LiveIndexWriterConfig { maxBufferedDocs = config.getMaxBufferedDocs(); mergedSegmentWarmer = config.getMergedSegmentWarmer(); ramBufferSizeMB = config.getRAMBufferSizeMB(); - readerTermsIndexDivisor = config.getReaderTermsIndexDivisor(); - termIndexInterval = config.getTermIndexInterval(); matchVersion = config.matchVersion; analyzer = config.getAnalyzer(); delPolicy = config.getIndexDeletionPolicy(); @@ -165,69 +158,6 @@ public class LiveIndexWriterConfig { public Analyzer getAnalyzer() { return analyzer; } - - /** - * Expert: set the interval between indexed terms. Large values cause less - * memory to be used by IndexReader, but slow random-access to terms. Small - * values cause more memory to be used by an IndexReader, and speed - * random-access to terms. - *

- * This parameter determines the amount of computation required per query - * term, regardless of the number of documents that contain that term. In - * particular, it is the maximum number of other terms that must be scanned - * before a term is located and its frequency and position information may be - * processed. In a large index with user-entered query terms, query processing - * time is likely to be dominated not by term lookup but rather by the - * processing of frequency and positional data. In a small index or when many - * uncommon query terms are generated (e.g., by wildcard queries) term lookup - * may become a dominant cost. - *

- * In particular, numUniqueTerms/interval terms are read into - * memory by an IndexReader, and, on average, interval/2 terms - * must be scanned for each random term access. - * - *

- * Takes effect immediately, but only applies to newly flushed/merged - * segments. - * - *

- * NOTE: This parameter does not apply to all PostingsFormat implementations, - * including the default one in this release. It only makes sense for term indexes - * that are implemented as a fixed gap between terms. For example, - * {@link Lucene41PostingsFormat} implements the term index instead based upon how - * terms share prefixes. To configure its parameters (the minimum and maximum size - * for a block), you would instead use {@link Lucene41PostingsFormat#Lucene41PostingsFormat(int, int)}. - * which can also be configured on a per-field basis: - *

-   * //customize Lucene41PostingsFormat, passing minBlockSize=50, maxBlockSize=100
-   * final PostingsFormat tweakedPostings = new Lucene41PostingsFormat(50, 100);
-   * iwc.setCodec(new Lucene42Codec() {
-   *   @Override
-   *   public PostingsFormat getPostingsFormatForField(String field) {
-   *     if (field.equals("fieldWithTonsOfTerms"))
-   *       return tweakedPostings;
-   *     else
-   *       return super.getPostingsFormatForField(field);
-   *   }
-   * });
-   * 
- * Note that other implementations may have their own parameters, or no parameters at all. - * - * @see IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL - */ - public LiveIndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here - this.termIndexInterval = interval; - return this; - } - - /** - * Returns the interval between indexed terms. - * - * @see #setTermIndexInterval(int) - */ - public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here - return termIndexInterval; - } /** * Determines the minimal number of delete terms required before the buffered @@ -390,37 +320,6 @@ public class LiveIndexWriterConfig { public IndexReaderWarmer getMergedSegmentWarmer() { return mergedSegmentWarmer; } - - /** - * Sets the termsIndexDivisor passed to any readers that IndexWriter opens, - * for example when applying deletes or creating a near-real-time reader in - * {@link DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the - * terms index won't be loaded by the readers. This is only useful in advanced - * situations when you will only .next() through all terms; attempts to seek - * will hit an exception. - * - *

- * Takes effect immediately, but only applies to readers opened after this - * call - *

- * NOTE: divisor settings > 1 do not apply to all PostingsFormat - * implementations, including the default one in this release. It only makes - * sense for terms indexes that can efficiently re-sample terms at load time. - */ - public LiveIndexWriterConfig setReaderTermsIndexDivisor(int divisor) { - if (divisor <= 0 && divisor != -1) { - throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")"); - } - readerTermsIndexDivisor = divisor; - return this; - } - - /** Returns the {@code termInfosIndexDivisor}. - * - * @see #setReaderTermsIndexDivisor(int) */ - public int getReaderTermsIndexDivisor() { - return readerTermsIndexDivisor; - } /** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */ public OpenMode getOpenMode() { @@ -583,8 +482,6 @@ public class LiveIndexWriterConfig { sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n"); sb.append("maxBufferedDeleteTerms=").append(getMaxBufferedDeleteTerms()).append("\n"); sb.append("mergedSegmentWarmer=").append(getMergedSegmentWarmer()).append("\n"); - sb.append("readerTermsIndexDivisor=").append(getReaderTermsIndexDivisor()).append("\n"); - sb.append("termIndexInterval=").append(getTermIndexInterval()).append("\n"); // TODO: this should be private to the codec, not settable here sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n"); IndexCommit commit = getIndexCommit(); sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java index a80a7ce3ef2..a3615833c55 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java @@ -43,16 +43,6 @@ class ReadersAndLiveDocs { // Set once (null, and then maybe set, and never set again): private SegmentReader reader; - // TODO: it's sometimes wasteful that we hold open two - // separate SRs (one for merging one for - // reading)... maybe just use a single SR? The gains of - // not loading the terms index (for merging in the - // non-NRT case) are far less now... and if the app has - // any deletes it'll open real readers anyway. - - // Set once (null, and then maybe set, and never set again): - private SegmentReader mergeReader; - // Holds the current shared (readable and writable // liveDocs). This is null when there are no deleted // docs, and it's copy-on-write (cloned whenever we need @@ -118,7 +108,7 @@ class ReadersAndLiveDocs { if (reader == null) { // We steal returned ref: - reader = new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), context); + reader = new SegmentReader(info, context); if (liveDocs == null) { liveDocs = reader.getLiveDocs(); } @@ -131,37 +121,6 @@ class ReadersAndLiveDocs { return reader; } - // Get reader for merging (does not load the terms - // index): - public synchronized SegmentReader getMergeReader(IOContext context) throws IOException { - //System.out.println(" livedocs=" + rld.liveDocs); - - if (mergeReader == null) { - - if (reader != null) { - // Just use the already opened non-merge reader - // for merging. In the NRT case this saves us - // pointless double-open: - //System.out.println("PROMOTE non-merge reader seg=" + rld.info); - // Ref for us: - reader.incRef(); - mergeReader = reader; - //System.out.println(Thread.currentThread().getName() + ": getMergeReader share seg=" + info.name); - } else { - //System.out.println(Thread.currentThread().getName() + ": getMergeReader seg=" + info.name); - // We steal returned ref: - mergeReader = new SegmentReader(info, -1, context); - if (liveDocs == null) { - liveDocs = mergeReader.getLiveDocs(); - } - } - } - - // Ref for caller - mergeReader.incRef(); - return mergeReader; - } - public synchronized void release(SegmentReader sr) throws IOException { assert info == sr.getSegmentInfo(); sr.decRef(); @@ -185,23 +144,12 @@ class ReadersAndLiveDocs { public synchronized void dropReaders() throws IOException { // TODO: can we somehow use IOUtils here...? problem is // we are calling .decRef not .close)... - try { - if (reader != null) { - //System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount()); - try { - reader.decRef(); - } finally { - reader = null; - } - } - } finally { - if (mergeReader != null) { - //System.out.println(" pool.drop info=" + info + " merge rc=" + mergeReader.getRefCount()); - try { - mergeReader.decRef(); - } finally { - mergeReader = null; - } + if (reader != null) { + //System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount()); + try { + reader.decRef(); + } finally { + reader = null; } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java index e5498a24687..ab0348293a9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -57,8 +57,6 @@ final class SegmentCoreReaders { final DocValuesProducer dvProducer; final DocValuesProducer normsProducer; - final int termsIndexDivisor; - private final SegmentReader owner; final StoredFieldsReader fieldsReaderOrig; @@ -100,11 +98,7 @@ final class SegmentCoreReaders { private final Set coreClosedListeners = Collections.synchronizedSet(new LinkedHashSet()); - SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentInfoPerCommit si, IOContext context, int termsIndexDivisor) throws IOException { - - if (termsIndexDivisor == 0) { - throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); - } + SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentInfoPerCommit si, IOContext context) throws IOException { final Codec codec = si.info.getCodec(); final Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. @@ -120,9 +114,8 @@ final class SegmentCoreReaders { } fieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info.name, IOContext.READONCE); - this.termsIndexDivisor = termsIndexDivisor; final PostingsFormat format = codec.postingsFormat(); - final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context, termsIndexDivisor); + final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context); // Ask codec for its Fields fields = format.fieldsProducer(segmentReadState); assert fields != null; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java index 6631020f759..4c1fdb9caff 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java @@ -42,7 +42,6 @@ import org.apache.lucene.util.InfoStream; */ final class SegmentMerger { private final Directory directory; - private final int termIndexInterval; private final Codec codec; @@ -52,11 +51,10 @@ final class SegmentMerger { private final FieldInfos.Builder fieldInfosBuilder; // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! - SegmentMerger(List readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, + SegmentMerger(List readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) { mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); directory = dir; - this.termIndexInterval = termIndexInterval; this.codec = segmentInfo.getCodec(); this.context = context; this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); @@ -91,7 +89,7 @@ final class SegmentMerger { assert numMerged == mergeState.segmentInfo.getDocCount(); final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo, - mergeState.fieldInfos, termIndexInterval, null, context); + mergeState.fieldInfos, null, context); if (mergeState.infoStream.isEnabled("SM")) { t0 = System.nanoTime(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java index fc1ea8f43f1..b7eef06a747 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java @@ -41,17 +41,6 @@ public class SegmentReadState { * Directory#openInput(String,IOContext)}. */ public final IOContext context; - /** The {@code termInfosIndexDivisor} to use, if - * appropriate (not all {@link PostingsFormat}s support - * it; in particular the current default does not). - * - *

NOTE: if this is < 0, that means "defer terms index - * load until needed". But if the codec must load the - * terms index on init (preflex is the only once currently - * that must do so), then it should negate this value to - * get the app's terms divisor */ - public int termsIndexDivisor; - /** Unique suffix for any postings files read for this * segment. {@link PerFieldPostingsFormat} sets this for * each of the postings formats it wraps. If you create @@ -62,8 +51,8 @@ public class SegmentReadState { /** Create a {@code SegmentReadState}. */ public SegmentReadState(Directory dir, SegmentInfo info, - FieldInfos fieldInfos, IOContext context, int termsIndexDivisor) { - this(dir, info, fieldInfos, context, termsIndexDivisor, ""); + FieldInfos fieldInfos, IOContext context) { + this(dir, info, fieldInfos, context, ""); } /** Create a {@code SegmentReadState}. */ @@ -71,13 +60,11 @@ public class SegmentReadState { SegmentInfo info, FieldInfos fieldInfos, IOContext context, - int termsIndexDivisor, String segmentSuffix) { this.directory = dir; this.segmentInfo = info; this.fieldInfos = fieldInfos; this.context = context; - this.termsIndexDivisor = termsIndexDivisor; this.segmentSuffix = segmentSuffix; } @@ -88,7 +75,6 @@ public class SegmentReadState { this.segmentInfo = other.segmentInfo; this.fieldInfos = other.fieldInfos; this.context = other.context; - this.termsIndexDivisor = other.termsIndexDivisor; this.segmentSuffix = newSegmentSuffix; } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index a539de1ae8c..8214a980cd9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -51,9 +51,9 @@ public final class SegmentReader extends AtomicReader { * @throws IOException if there is a low-level IO error */ // TODO: why is this public? - public SegmentReader(SegmentInfoPerCommit si, int termInfosIndexDivisor, IOContext context) throws IOException { + public SegmentReader(SegmentInfoPerCommit si, IOContext context) throws IOException { this.si = si; - core = new SegmentCoreReaders(this, si.info.dir, si, context, termInfosIndexDivisor); + core = new SegmentCoreReaders(this, si.info.dir, si, context); boolean success = false; try { if (si.hasDeletions()) { @@ -217,12 +217,6 @@ public final class SegmentReader extends AtomicReader { return this; } - /** Returns term infos index divisor originally passed to - * {@link #SegmentReader(SegmentInfoPerCommit, int, IOContext)}. */ - public int getTermInfosIndexDivisor() { - return core.termsIndexDivisor; - } - @Override public NumericDocValues getNumericDocValues(String field) throws IOException { ensureOpen(); diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java index ed390c0c4e7..a058e93938c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -66,13 +66,6 @@ public class SegmentWriteState { * write/read must be derived using this suffix (use * {@link IndexFileNames#segmentFileName(String,String,String)}). */ public final String segmentSuffix; - - /** Expert: The fraction of terms in the "dictionary" which should be stored - * in RAM. Smaller values use more memory, but make searching slightly - * faster, while larger values use less memory and make searching slightly - * slower. Searching is typically not dominated by dictionary lookup, so - * tweaking this is rarely useful.*/ - public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC /** {@link IOContext} for all writes; you should pass this * to {@link Directory#createOutput(String,IOContext)}. */ @@ -80,13 +73,12 @@ public class SegmentWriteState { /** Sole constructor. */ public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, - int termIndexInterval, BufferedDeletes segDeletes, IOContext context) { + BufferedDeletes segDeletes, IOContext context) { this.infoStream = infoStream; this.segDeletes = segDeletes; this.directory = directory; this.segmentInfo = segmentInfo; this.fieldInfos = fieldInfos; - this.termIndexInterval = termIndexInterval; segmentSuffix = ""; this.context = context; } @@ -99,7 +91,6 @@ public class SegmentWriteState { directory = state.directory; segmentInfo = state.segmentInfo; fieldInfos = state.fieldInfos; - termIndexInterval = state.termIndexInterval; context = state.context; this.segmentSuffix = segmentSuffix; segDeletes = state.segDeletes; diff --git a/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java index 18d84634bbc..7e4316a67ac 100644 --- a/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java @@ -33,22 +33,19 @@ final class StandardDirectoryReader extends DirectoryReader { private final IndexWriter writer; private final SegmentInfos segmentInfos; - private final int termInfosIndexDivisor; private final boolean applyAllDeletes; /** called only from static open() methods */ StandardDirectoryReader(Directory directory, AtomicReader[] readers, IndexWriter writer, - SegmentInfos sis, int termInfosIndexDivisor, boolean applyAllDeletes) { + SegmentInfos sis, boolean applyAllDeletes) { super(directory, readers); this.writer = writer; this.segmentInfos = sis; - this.termInfosIndexDivisor = termInfosIndexDivisor; this.applyAllDeletes = applyAllDeletes; } /** called from DirectoryReader.open(...) methods */ - static DirectoryReader open(final Directory directory, final IndexCommit commit, - final int termInfosIndexDivisor) throws IOException { + static DirectoryReader open(final Directory directory, final IndexCommit commit) throws IOException { return (DirectoryReader) new SegmentInfos.FindSegmentsFile(directory) { @Override protected Object doBody(String segmentFileName) throws IOException { @@ -59,7 +56,7 @@ final class StandardDirectoryReader extends DirectoryReader { IOException prior = null; boolean success = false; try { - readers[i] = new SegmentReader(sis.info(i), termInfosIndexDivisor, IOContext.READ); + readers[i] = new SegmentReader(sis.info(i), IOContext.READ); success = true; } catch(IOException ex) { prior = ex; @@ -68,7 +65,7 @@ final class StandardDirectoryReader extends DirectoryReader { IOUtils.closeWhileHandlingException(prior, readers); } } - return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false); + return new StandardDirectoryReader(directory, readers, null, sis, false); } }.run(commit); } @@ -119,12 +116,11 @@ final class StandardDirectoryReader extends DirectoryReader { } } return new StandardDirectoryReader(dir, readers.toArray(new SegmentReader[readers.size()]), - writer, segmentInfos, writer.getConfig().getReaderTermsIndexDivisor(), applyAllDeletes); + writer, segmentInfos, applyAllDeletes); } /** This constructor is only used for {@link #doOpenIfChanged(SegmentInfos)} */ - private static DirectoryReader open(Directory directory, SegmentInfos infos, List oldReaders, - int termInfosIndexDivisor) throws IOException { + private static DirectoryReader open(Directory directory, SegmentInfos infos, List oldReaders) throws IOException { // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name @@ -162,7 +158,7 @@ final class StandardDirectoryReader extends DirectoryReader { if (newReaders[i] == null || infos.info(i).info.getUseCompoundFile() != newReaders[i].getSegmentInfo().info.getUseCompoundFile()) { // this is a new reader; in case we hit an exception we can close it safely - newReader = new SegmentReader(infos.info(i), termInfosIndexDivisor, IOContext.READ); + newReader = new SegmentReader(infos.info(i), IOContext.READ); readerShared[i] = false; newReaders[i] = newReader; } else { @@ -212,7 +208,7 @@ final class StandardDirectoryReader extends DirectoryReader { } } } - return new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false); + return new StandardDirectoryReader(directory, newReaders, null, infos, false); } @Override @@ -313,7 +309,7 @@ final class StandardDirectoryReader extends DirectoryReader { } DirectoryReader doOpenIfChanged(SegmentInfos infos) throws IOException { - return StandardDirectoryReader.open(directory, infos, getSequentialSubReaders(), termInfosIndexDivisor); + return StandardDirectoryReader.open(directory, infos, getSequentialSubReaders()); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java index 39950491a6e..199690f8ef7 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java @@ -24,7 +24,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; * Tests Lucene40PostingsFormat */ public class TestLucene40PostingsFormat extends BasePostingsFormatTestCase { - private final Codec codec = new Lucene40Codec(); + private final Codec codec = new Lucene40RWCodec(); @Override protected Codec getCodec() { diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java index d1eed427061..8d9f8838fba 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java @@ -192,7 +192,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { if (VERBOSE) { System.out.println("\nTEST: assertQuery " + t); } - IndexReader reader = DirectoryReader.open(dir, 1); + IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); TopDocs search = searcher.search(new TermQuery(t), num + 10); assertEquals(num, search.totalHits); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 16107613806..8e56129c790 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -296,7 +296,7 @@ public class TestCodecs extends LuceneTestCase { Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null); - final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); + final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); final Iterator fieldsEnum = reader.iterator(); String fieldName = fieldsEnum.next(); @@ -357,7 +357,7 @@ public class TestCodecs extends LuceneTestCase { if (VERBOSE) { System.out.println("TEST: now read postings"); } - final FieldsProducer terms = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); + final FieldsProducer terms = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); final Verify[] threads = new Verify[NUM_TEST_THREADS-1]; for(int i=0;i leaves = r2.leaves(); - assertEquals(2, leaves.size()); - for(AtomicReaderContext ctx : leaves) { - try { - ctx.reader().docFreq(new Term("field", "f")); - fail("did not hit expected exception"); - } catch (IllegalStateException ise) { - // expected - } - } - r2.close(); - dir.close(); - } - // LUCENE-2046 public void testPrepareCommitIsCurrent() throws Throwable { Directory dir = newDirectory(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index 04b397bfe58..7557b297c21 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -212,15 +212,15 @@ public class TestDoc extends LuceneTestCase { private SegmentInfoPerCommit merge(Directory dir, SegmentInfoPerCommit si1, SegmentInfoPerCommit si2, String merged, boolean useCompoundFile) throws Exception { IOContext context = newIOContext(random()); - SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); - SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); + SegmentReader r1 = new SegmentReader(si1, context); + SegmentReader r2 = new SegmentReader(si2, context); final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null, null); SegmentMerger merger = new SegmentMerger(Arrays.asList(r1, r2), - si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, + si, InfoStream.getDefault(), trackingDir, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context); MergeState mergeState = merger.merge(); @@ -245,7 +245,7 @@ public class TestDoc extends LuceneTestCase { private void printSegment(PrintWriter out, SegmentInfoPerCommit si) throws Exception { - SegmentReader reader = new SegmentReader(si, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); + SegmentReader reader = new SegmentReader(si, newIOContext(random())); for (int i = 0; i < reader.numDocs(); i++) out.println(reader.document(i)); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java index e36479e6768..1984258f93f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java @@ -65,7 +65,7 @@ public class TestDocumentWriter extends LuceneTestCase { SegmentInfoPerCommit info = writer.newestSegment(); writer.close(); //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); + SegmentReader reader = new SegmentReader(info, newIOContext(random())); assertTrue(reader != null); StoredDocument doc = reader.document(0); assertTrue(doc != null); @@ -126,7 +126,7 @@ public class TestDocumentWriter extends LuceneTestCase { writer.commit(); SegmentInfoPerCommit info = writer.newestSegment(); writer.close(); - SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); + SegmentReader reader = new SegmentReader(info, newIOContext(random())); DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "repeated", new BytesRef("repeated")); @@ -198,7 +198,7 @@ public class TestDocumentWriter extends LuceneTestCase { writer.commit(); SegmentInfoPerCommit info = writer.newestSegment(); writer.close(); - SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); + SegmentReader reader = new SegmentReader(info, newIOContext(random())); DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -241,7 +241,7 @@ public class TestDocumentWriter extends LuceneTestCase { writer.commit(); SegmentInfoPerCommit info = writer.newestSegment(); writer.close(); - SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); + SegmentReader reader = new SegmentReader(info, newIOContext(random())); DocsAndPositionsEnum termPositions = reader.termPositionsEnum(new Term("preanalyzed", "term1")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 8caafd6dcba..1db3d81b830 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1305,36 +1305,6 @@ public class TestIndexWriter extends LuceneTestCase { dir.close(); } - public void testIndexDivisor() throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - config.setTermIndexInterval(2); - IndexWriter w = new IndexWriter(dir, config); - StringBuilder s = new StringBuilder(); - // must be > 256 - for(int i=0;i<300;i++) { - s.append(' ').append(i); - } - Document d = new Document(); - Field f = newTextField("field", s.toString(), Field.Store.NO); - d.add(f); - w.addDocument(d); - - AtomicReader r = getOnlySegmentReader(w.getReader()); - TermsEnum t = r.fields().terms("field").iterator(null); - int count = 0; - while(t.next() != null) { - final DocsEnum docs = _TestUtil.docs(random(), t, null, null, DocsEnum.FLAG_NONE); - assertEquals(0, docs.nextDoc()); - assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc()); - count++; - } - assertEquals(300, count); - r.close(); - w.close(); - dir.close(); - } - public void testDeleteUnusedFiles() throws Exception { for(int iter=0;iter<2;iter++) { Directory dir = newMockDirectory(); // relies on windows semantics @@ -1716,20 +1686,6 @@ public class TestIndexWriter extends LuceneTestCase { dir.close(); } - // LUCENE-3183 - public void testEmptyFieldNameTIIOne() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setTermIndexInterval(1); - iwc.setReaderTermsIndexDivisor(1); - IndexWriter writer = new IndexWriter(dir, iwc); - Document doc = new Document(); - doc.add(newTextField("", "a b c", Field.Store.NO)); - writer.addDocument(doc); - writer.close(); - dir.close(); - } - public void testDeleteAllNRTLeftoverFiles() throws Exception { Directory d = new MockDirectoryWrapper(random(), new RAMDirectory()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java index c01f287c8f5..bdd83a7b17f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java @@ -66,7 +66,6 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode()); // we don't need to assert this, it should be unspecified assertTrue(IndexSearcher.getDefaultSimilarity() == conf.getSimilarity()); - assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval()); assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout()); assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout()); assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.getMaxBufferedDeleteTerms()); @@ -75,7 +74,6 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling()); assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); assertNull(conf.getMergedSegmentWarmer()); - assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor()); assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass()); assertEquals(FlushByRamOrCountsPolicy.class, conf.getFlushPolicy().getClass()); @@ -92,7 +90,6 @@ public class TestIndexWriterConfig extends LuceneTestCase { getters.add("getMergeScheduler"); getters.add("getOpenMode"); getters.add("getSimilarity"); - getters.add("getTermIndexInterval"); getters.add("getWriteLockTimeout"); getters.add("getDefaultWriteLockTimeout"); getters.add("getMaxBufferedDeleteTerms"); @@ -104,7 +101,6 @@ public class TestIndexWriterConfig extends LuceneTestCase { getters.add("getMaxThreadStates"); getters.add("getReaderPooling"); getters.add("getIndexerThreadPool"); - getters.add("getReaderTermsIndexDivisor"); getters.add("getFlushPolicy"); getters.add("getRAMPerThreadHardLimitMB"); getters.add("getCodec"); @@ -200,14 +196,12 @@ public class TestIndexWriterConfig extends LuceneTestCase { public void testConstants() throws Exception { // Tests that the values of the constants does not change assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT); - assertEquals(32, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL); assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH); assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS); assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0); assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING); assertEquals(true, IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM); - assertEquals(DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR); } @Test @@ -341,23 +335,6 @@ public class TestIndexWriterConfig extends LuceneTestCase { } catch (IllegalArgumentException e) { // this is expected } - - // Test setReaderTermsIndexDivisor - try { - conf.setReaderTermsIndexDivisor(0); - fail("should not have succeeded to set termsIndexDivisor to 0"); - } catch (IllegalArgumentException e) { - // this is expected - } - - // Setting to -1 is ok - conf.setReaderTermsIndexDivisor(-1); - try { - conf.setReaderTermsIndexDivisor(-2); - fail("should not have succeeded to set termsIndexDivisor to < -1"); - } catch (IllegalArgumentException e) { - // this is expected - } try { conf.setRAMPerThreadHardLimitMB(2048); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java index e3042cb9cf8..433e49c2f65 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java @@ -130,7 +130,6 @@ public class TestIndexWriterForceMerge extends LuceneTestCase { for(int j=0;j<500;j++) { TestIndexWriter.addDocWithIndex(writer, j); } - final int termIndexInterval = writer.getConfig().getTermIndexInterval(); // force one extra segment w/ different doc store so // we see the doc stores get merged writer.commit(); @@ -152,10 +151,7 @@ public class TestIndexWriterForceMerge extends LuceneTestCase { dir.resetMaxUsedSizeInBytes(); dir.setTrackDiskUsage(true); - // Import to use same term index interval else a - // smaller one here could increase the disk usage and - // cause a false failure: - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval).setMergePolicy(newLogMergePolicy())); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); writer.forceMerge(1); writer.close(); long maxDiskUsage = dir.getMaxUsedSizeInBytes(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java index 94cb8d61b4c..64841cde215 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java @@ -314,7 +314,7 @@ public class TestIndexWriterReader extends LuceneTestCase { boolean doFullMerge = true; Directory dir1 = newDirectory(); - IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setReaderTermsIndexDivisor(2)); + IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); // create the index createIndexNoClose(!doFullMerge, "index1", writer); writer.flush(false, true); @@ -1006,35 +1006,6 @@ public class TestIndexWriterReader extends LuceneTestCase { assertTrue(didWarm.get()); } - public void testNoTermsIndex() throws Exception { - // Some Codecs don't honor the ReaderTermsIndexDivisor, so skip the test if - // they're picked. - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random())).setReaderTermsIndexDivisor(-1); - - // Don't proceed if picked Codec is in the list of illegal ones. - final String format = _TestUtil.getPostingsFormat("f"); - assumeFalse("Format: " + format + " does not support ReaderTermsIndexDivisor!", - (format.equals("SimpleText") || format.equals("Memory") || format.equals("Direct"))); - - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, conf); - Document doc = new Document(); - doc.add(new TextField("f", "val", Field.Store.NO)); - w.addDocument(doc); - SegmentReader r = getOnlySegmentReader(DirectoryReader.open(w, true)); - try { - _TestUtil.docs(random(), r, "f", new BytesRef("val"), null, null, DocsEnum.FLAG_NONE); - fail("should have failed to seek since terms index was not loaded."); - } catch (IllegalStateException e) { - // expected - we didn't load the term index - } finally { - r.close(); - w.close(); - dir.close(); - } - } - public void testReopenAfterNoRealChange() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter( diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java index a95049796e4..9ac95eb56ad 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -54,8 +54,8 @@ public class TestSegmentMerger extends LuceneTestCase { SegmentInfoPerCommit info1 = DocHelper.writeDoc(random(), merge1Dir, doc1); DocHelper.setupDoc(doc2); SegmentInfoPerCommit info2 = DocHelper.writeDoc(random(), merge2Dir, doc2); - reader1 = new SegmentReader(info1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); - reader2 = new SegmentReader(info2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); + reader1 = new SegmentReader(info1, newIOContext(random())); + reader2 = new SegmentReader(info2, newIOContext(random())); } @Override @@ -81,7 +81,7 @@ public class TestSegmentMerger extends LuceneTestCase { final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null, null); SegmentMerger merger = new SegmentMerger(Arrays.asList(reader1, reader2), - si, InfoStream.getDefault(), mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, + si, InfoStream.getDefault(), mergedDir, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random())); MergeState mergeState = merger.merge(); int docsMerged = mergeState.segmentInfo.getDocCount(); @@ -91,7 +91,7 @@ public class TestSegmentMerger extends LuceneTestCase { new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, docsMerged, false, codec, null, null), 0, -1L), - DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); + newIOContext(random())); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); StoredDocument newDoc1 = mergedReader.document(0); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java index c2161263db7..239af2883b1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -43,7 +43,7 @@ public class TestSegmentReader extends LuceneTestCase { dir = newDirectory(); DocHelper.setupDoc(testDoc); SegmentInfoPerCommit info = DocHelper.writeDoc(random(), dir, testDoc); - reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.READ); + reader = new SegmentReader(info, IOContext.READ); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermDocs.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermDocs.java index 7366a92caab..b3507da6c39 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermDocs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermDocs.java @@ -50,16 +50,11 @@ public class TestSegmentTermDocs extends LuceneTestCase { public void test() { assertTrue(dir != null); } - - public void testTermDocs() throws IOException { - testTermDocs(1); - } - public void testTermDocs(int indexDivisor) throws IOException { + public void testTermDocs() throws IOException { //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(info, indexDivisor, newIOContext(random())); + SegmentReader reader = new SegmentReader(info, newIOContext(random())); assertTrue(reader != null); - assertEquals(indexDivisor, reader.getTermInfosIndexDivisor()); TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator(null); terms.seekCeil(new BytesRef("field")); @@ -74,13 +69,9 @@ public class TestSegmentTermDocs extends LuceneTestCase { } public void testBadSeek() throws IOException { - testBadSeek(1); - } - - public void testBadSeek(int indexDivisor) throws IOException { { //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(info, indexDivisor, newIOContext(random())); + SegmentReader reader = new SegmentReader(info, newIOContext(random())); assertTrue(reader != null); DocsEnum termDocs = _TestUtil.docs(random(), reader, "textField2", @@ -94,7 +85,7 @@ public class TestSegmentTermDocs extends LuceneTestCase { } { //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(info, indexDivisor, newIOContext(random())); + SegmentReader reader = new SegmentReader(info, newIOContext(random())); assertTrue(reader != null); DocsEnum termDocs = _TestUtil.docs(random(), reader, "junk", @@ -108,10 +99,6 @@ public class TestSegmentTermDocs extends LuceneTestCase { } public void testSkipTo() throws IOException { - testSkipTo(1); - } - - public void testSkipTo(int indexDivisor) throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); @@ -131,7 +118,7 @@ public class TestSegmentTermDocs extends LuceneTestCase { writer.forceMerge(1); writer.close(); - IndexReader reader = DirectoryReader.open(dir, indexDivisor); + IndexReader reader = DirectoryReader.open(dir); DocsEnum tdocs = _TestUtil.docs(random(), reader, ta.field(), @@ -267,15 +254,7 @@ public class TestSegmentTermDocs extends LuceneTestCase { reader.close(); dir.close(); } - - public void testIndexDivisor() throws IOException { - testDoc = new Document(); - DocHelper.setupDoc(testDoc); - DocHelper.writeDoc(random(), dir, testDoc); - testTermDocs(2); - testBadSeek(2); - testSkipTo(2); - } + private void addDoc(IndexWriter writer, String value) throws IOException { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/util/TaxonomyMergeUtils.java b/lucene/facet/src/java/org/apache/lucene/facet/util/TaxonomyMergeUtils.java index 3a6c75b7fe7..276317196dc 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/util/TaxonomyMergeUtils.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/util/TaxonomyMergeUtils.java @@ -45,7 +45,7 @@ public class TaxonomyMergeUtils { // merge the taxonomies destTaxWriter.addTaxonomy(srcTaxDir, map); int ordinalMap[] = map.getMap(); - DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1); + DirectoryReader reader = DirectoryReader.open(srcIndexDir); List leaves = reader.leaves(); int numReaders = leaves.size(); AtomicReader wrappedLeaves[] = new AtomicReader[numReaders]; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java index 7058d671bf8..a6e42d5d001 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java @@ -45,9 +45,15 @@ import org.apache.lucene.util.BytesRef; * {@link FixedGapTermsIndexWriter}. */ public final class Lucene41WithOrds extends PostingsFormat { - + final int termIndexInterval; + public Lucene41WithOrds() { + this(FixedGapTermsIndexWriter.DEFAULT_TERM_INDEX_INTERVAL); + } + + public Lucene41WithOrds(int termIndexInterval) { super("Lucene41WithOrds"); + this.termIndexInterval = termIndexInterval; } @Override @@ -61,7 +67,7 @@ public final class Lucene41WithOrds extends PostingsFormat { TermsIndexWriterBase indexWriter; boolean success = false; try { - indexWriter = new FixedGapTermsIndexWriter(state); + indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval); success = true; } finally { if (!success) { @@ -97,7 +103,6 @@ public final class Lucene41WithOrds extends PostingsFormat { indexReader = new FixedGapTermsIndexReader(state.directory, state.fieldInfos, state.segmentInfo.name, - state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, state.context); success = true; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapDocFreqInterval.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapDocFreqInterval.java new file mode 100644 index 00000000000..9e937e189f3 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapDocFreqInterval.java @@ -0,0 +1,144 @@ +package org.apache.lucene.codecs.lucene41vargap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.blockterms.BlockTermsReader; +import org.apache.lucene.codecs.blockterms.BlockTermsWriter; +import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; +import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; +import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; +import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader; +import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +// TODO: we could make separate base class that can wrapp +// any PostingsBaseFormat and make it ord-able... + +/** + * Customized version of {@link Lucene41PostingsFormat} that uses + * {@link VariableGapTermsIndexWriter} with a fixed interval, but + * forcing high docfreq terms to be indexed terms. + */ +public final class Lucene41VarGapDocFreqInterval extends PostingsFormat { + final int termIndexInterval; + final int docFreqThreshold; + + public Lucene41VarGapDocFreqInterval() { + this(1000000, FixedGapTermsIndexWriter.DEFAULT_TERM_INDEX_INTERVAL); + } + + public Lucene41VarGapDocFreqInterval(int docFreqThreshold, int termIndexInterval) { + super("Lucene41VarGapFixedInterval"); + this.termIndexInterval = termIndexInterval; + this.docFreqThreshold = docFreqThreshold; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + PostingsWriterBase docs = new Lucene41PostingsWriter(state); + + // TODO: should we make the terms index more easily + // pluggable? Ie so that this codec would record which + // index impl was used, and switch on loading? + // Or... you must make a new Codec for this? + TermsIndexWriterBase indexWriter; + boolean success = false; + try { + indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThreshold, termIndexInterval)); + success = true; + } finally { + if (!success) { + docs.close(); + } + } + + success = false; + try { + // Must use BlockTermsWriter (not BlockTree) because + // BlockTree doens't support ords (yet)... + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs); + success = true; + return ret; + } finally { + if (!success) { + try { + docs.close(); + } finally { + indexWriter.close(); + } + } + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postings = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + TermsIndexReaderBase indexReader; + + boolean success = false; + try { + indexReader = new VariableGapTermsIndexReader(state.directory, + state.fieldInfos, + state.segmentInfo.name, + state.segmentSuffix, state.context); + success = true; + } finally { + if (!success) { + postings.close(); + } + } + + success = false; + try { + FieldsProducer ret = new BlockTermsReader(indexReader, + state.directory, + state.fieldInfos, + state.segmentInfo, + postings, + state.context, + state.segmentSuffix); + success = true; + return ret; + } finally { + if (!success) { + try { + postings.close(); + } finally { + indexReader.close(); + } + } + } + } + + /** Extension of freq postings file */ + static final String FREQ_EXTENSION = "frq"; + + /** Extension of prox postings file */ + static final String PROX_EXTENSION = "prx"; +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapFixedInterval.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapFixedInterval.java new file mode 100644 index 00000000000..5d77f037e78 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/Lucene41VarGapFixedInterval.java @@ -0,0 +1,141 @@ +package org.apache.lucene.codecs.lucene41vargap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.blockterms.BlockTermsReader; +import org.apache.lucene.codecs.blockterms.BlockTermsWriter; +import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; +import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; +import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; +import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader; +import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +// TODO: we could make separate base class that can wrapp +// any PostingsBaseFormat and make it ord-able... + +/** + * Customized version of {@link Lucene41PostingsFormat} that uses + * {@link VariableGapTermsIndexWriter} with a fixed interval. + */ +public final class Lucene41VarGapFixedInterval extends PostingsFormat { + final int termIndexInterval; + + public Lucene41VarGapFixedInterval() { + this(FixedGapTermsIndexWriter.DEFAULT_TERM_INDEX_INTERVAL); + } + + public Lucene41VarGapFixedInterval(int termIndexInterval) { + super("Lucene41VarGapFixedInterval"); + this.termIndexInterval = termIndexInterval; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + PostingsWriterBase docs = new Lucene41PostingsWriter(state); + + // TODO: should we make the terms index more easily + // pluggable? Ie so that this codec would record which + // index impl was used, and switch on loading? + // Or... you must make a new Codec for this? + TermsIndexWriterBase indexWriter; + boolean success = false; + try { + indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(termIndexInterval)); + success = true; + } finally { + if (!success) { + docs.close(); + } + } + + success = false; + try { + // Must use BlockTermsWriter (not BlockTree) because + // BlockTree doens't support ords (yet)... + FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs); + success = true; + return ret; + } finally { + if (!success) { + try { + docs.close(); + } finally { + indexWriter.close(); + } + } + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postings = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + TermsIndexReaderBase indexReader; + + boolean success = false; + try { + indexReader = new VariableGapTermsIndexReader(state.directory, + state.fieldInfos, + state.segmentInfo.name, + state.segmentSuffix, state.context); + success = true; + } finally { + if (!success) { + postings.close(); + } + } + + success = false; + try { + FieldsProducer ret = new BlockTermsReader(indexReader, + state.directory, + state.fieldInfos, + state.segmentInfo, + postings, + state.context, + state.segmentSuffix); + success = true; + return ret; + } finally { + if (!success) { + try { + postings.close(); + } finally { + indexReader.close(); + } + } + } + } + + /** Extension of freq postings file */ + static final String FREQ_EXTENSION = "frq"; + + /** Extension of prox postings file */ + static final String PROX_EXTENSION = "prx"; +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/package.html new file mode 100644 index 00000000000..606bb064d70 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41vargap/package.html @@ -0,0 +1,25 @@ + + + + + + + +Codecs for testing that support {@link org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader} + + diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java index 8e0fadc75b0..fff571c20a3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java @@ -169,7 +169,6 @@ public final class MockFixedIntBlockPostingsFormat extends PostingsFormat { indexReader = new FixedGapTermsIndexReader(state.directory, state.fieldInfos, state.segmentInfo.name, - state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, IOContext.DEFAULT); success = true; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java index e4c70c809f8..48face99cbc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java @@ -194,7 +194,6 @@ public final class MockVariableIntBlockPostingsFormat extends PostingsFormat { indexReader = new FixedGapTermsIndexReader(state.directory, state.fieldInfos, state.segmentInfo.name, - state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, state.context); success = true; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java index 3e6266d7d5d..502457fd81c 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java @@ -220,11 +220,11 @@ public final class MockRandomPostingsFormat extends PostingsFormat { final TermsIndexWriterBase indexWriter; try { if (random.nextBoolean()) { - state.termIndexInterval = _TestUtil.nextInt(random, 1, 100); + int termIndexInterval = _TestUtil.nextInt(random, 1, 100); if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")"); + System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + termIndexInterval + ")"); } - indexWriter = new FixedGapTermsIndexWriter(state); + indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval); } else { final VariableGapTermsIndexWriter.IndexTermSelector selector; final int n2 = random.nextInt(3); @@ -340,8 +340,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { state.segmentInfo, postingsReader, state.context, - state.segmentSuffix, - state.termsIndexDivisor); + state.segmentSuffix); success = true; } finally { if (!success) { @@ -359,20 +358,14 @@ public final class MockRandomPostingsFormat extends PostingsFormat { final boolean doFixedGap = random.nextBoolean(); // randomness diverges from writer, here: - if (state.termsIndexDivisor != -1) { - state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10); - } if (doFixedGap) { - // if termsIndexDivisor is set to -1, we should not touch it. It means a - // test explicitly instructed not to load the terms index. if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")"); + System.out.println("MockRandomCodec: fixed-gap terms index"); } indexReader = new FixedGapTermsIndexReader(state.directory, state.fieldInfos, state.segmentInfo.name, - state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, state.context); } else { @@ -383,12 +376,11 @@ public final class MockRandomPostingsFormat extends PostingsFormat { random.nextLong(); } if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: variable-gap terms index (divisor=" + state.termsIndexDivisor + ")"); + System.out.println("MockRandomCodec: variable-gap terms index"); } indexReader = new VariableGapTermsIndexReader(state.directory, state.fieldInfos, state.segmentInfo.name, - state.termsIndexDivisor, state.segmentSuffix, state.context); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java index 02f2a220485..f31700470d6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java @@ -92,7 +92,6 @@ public final class MockSepPostingsFormat extends PostingsFormat { indexReader = new FixedGapTermsIndexReader(state.directory, state.fieldInfos, state.segmentInfo.name, - state.termsIndexDivisor, BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix, state.context); success = true; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java index c509189706c..54cabfb355a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java @@ -84,8 +84,7 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat { state.directory, state.fieldInfos, state.segmentInfo, pulsingReader, state.context, - state.segmentSuffix, - state.termsIndexDivisor); + state.segmentSuffix); success = true; return ret; } finally { diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java index af66e03a622..b016252a9ce 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java @@ -1022,7 +1022,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase { writer.close(true); - DirectoryReader reader = DirectoryReader.open(dir, 1); + DirectoryReader reader = DirectoryReader.open(dir); assertEquals(1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java index a864cc30f23..8010e24b7ac 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java @@ -38,6 +38,8 @@ import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsConsumer; import org.apache.lucene.codecs.TermStats; import org.apache.lucene.codecs.TermsConsumer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.store.Directory; @@ -483,7 +485,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase { SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, newFieldInfos, - 32, null, new IOContext(new FlushInfo(maxDoc, bytes))); + null, new IOContext(new FlushInfo(maxDoc, bytes))); FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState); for(Map.Entry> fieldEnt : fields.entrySet()) { @@ -567,7 +569,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase { currentFieldInfos = newFieldInfos; - SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.DEFAULT, 1); + SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.DEFAULT); return codec.postingsFormat().fieldsProducer(readState); } @@ -1130,4 +1132,85 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase { _TestUtil.rmDir(path); } } + + public void testEmptyField() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null); + iwc.setCodec(getCodec()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(newStringField("", "something", Field.Store.NO)); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + AtomicReader ar = getOnlySegmentReader(ir); + Fields fields = ar.fields(); + assertTrue(fields.size() == 1); + Terms terms = ar.terms(""); + assertNotNull(terms); + TermsEnum termsEnum = terms.iterator(null); + assertNotNull(termsEnum.next()); + assertEquals(termsEnum.term(), new BytesRef("something")); + assertNull(termsEnum.next()); + ir.close(); + iw.close(); + dir.close(); + } + + public void testEmptyFieldAndEmptyTerm() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null); + iwc.setCodec(getCodec()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(newStringField("", "", Field.Store.NO)); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + AtomicReader ar = getOnlySegmentReader(ir); + Fields fields = ar.fields(); + assertTrue(fields.size() == 1); + Terms terms = ar.terms(""); + assertNotNull(terms); + TermsEnum termsEnum = terms.iterator(null); + assertNotNull(termsEnum.next()); + assertEquals(termsEnum.term(), new BytesRef("")); + assertNull(termsEnum.next()); + ir.close(); + iw.close(); + dir.close(); + } + + // tests that ghost fields still work + // TODO: can this be improved? + public void testGhosts() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null); + iwc.setCodec(getCodec()); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + iw.addDocument(doc); + doc.add(newStringField("ghostField", "something", Field.Store.NO)); + iw.addDocument(doc); + iw.forceMerge(1); + iw.deleteDocuments(new Term("ghostField", "something")); // delete the only term for the field + iw.forceMerge(1); + DirectoryReader ir = iw.getReader(); + AtomicReader ar = getOnlySegmentReader(ir); + Fields fields = ar.fields(); + // Ghost busting terms dict impls will have + // fields.size() == 0; all others must be == 1: + assertTrue(fields.size() <= 1); + Terms terms = fields.terms("ghostField"); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(null); + BytesRef term = termsEnum.next(); + if (term != null) { + DocsEnum docsEnum = termsEnum.docs(null, null); + assertTrue(docsEnum.nextDoc() == DocsEnum.NO_MORE_DOCS); + } + } + ir.close(); + iw.close(); + dir.close(); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java index 86258c0a750..9ecc5122f33 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java @@ -33,6 +33,8 @@ import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat; import org.apache.lucene.codecs.asserting.AssertingPostingsFormat; import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds; +import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval; +import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval; import org.apache.lucene.codecs.lucene42.Lucene42Codec; import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat; import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings; @@ -137,7 +139,9 @@ public class RandomCodec extends Lucene42Codec { new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)), new MockRandomPostingsFormat(random), new NestedPulsingPostingsFormat(), - new Lucene41WithOrds(), + new Lucene41WithOrds(_TestUtil.nextInt(random, 1, 1000)), + new Lucene41VarGapFixedInterval(_TestUtil.nextInt(random, 1, 1000)), + new Lucene41VarGapDocFreqInterval(_TestUtil.nextInt(random, 1, 100), _TestUtil.nextInt(random, 1, 1000)), new SimpleTextPostingsFormat(), new AssertingPostingsFormat(), new MemoryPostingsFormat(true, random.nextFloat()), diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java index 3fea86c8d33..fb67b4428c3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java @@ -308,7 +308,7 @@ public class RandomIndexWriter implements Closeable { } w.commit(); if (r.nextBoolean()) { - return DirectoryReader.open(w.getDirectory(), _TestUtil.nextInt(r, 1, 10)); + return DirectoryReader.open(w.getDirectory()); } else { return w.getReader(applyDeletions); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 5e38d92ad13..b173fef07b8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -753,15 +753,6 @@ public abstract class LuceneTestCase extends Assert { c.setMaxBufferedDocs(_TestUtil.nextInt(r, 16, 1000)); } } - if (r.nextBoolean()) { - if (rarely(r)) { - // crazy value - c.setTermIndexInterval(r.nextBoolean() ? _TestUtil.nextInt(r, 1, 31) : _TestUtil.nextInt(r, 129, 1000)); - } else { - // reasonable value - c.setTermIndexInterval(_TestUtil.nextInt(r, 32, 128)); - } - } if (r.nextBoolean()) { int maxNumThreadStates = rarely(r) ? _TestUtil.nextInt(r, 5, 20) // crazy value : _TestUtil.nextInt(r, 1, 4); // reasonable value @@ -816,7 +807,6 @@ public abstract class LuceneTestCase extends Assert { } c.setUseCompoundFile(r.nextBoolean()); c.setReaderPooling(r.nextBoolean()); - c.setReaderTermsIndexDivisor(_TestUtil.nextInt(r, 1, 4)); return c; } diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 3b7b3836da4..59d0dd3f33f 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -20,6 +20,8 @@ org.apache.lucene.codecs.mocksep.MockSepPostingsFormat org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds +org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval +org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings org.apache.lucene.codecs.asserting.AssertingPostingsFormat org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat diff --git a/solr/core/src/java/org/apache/solr/core/IndexReaderFactory.java b/solr/core/src/java/org/apache/solr/core/IndexReaderFactory.java index 0407ea7aad0..9418778415e 100644 --- a/solr/core/src/java/org/apache/solr/core/IndexReaderFactory.java +++ b/solr/core/src/java/org/apache/solr/core/IndexReaderFactory.java @@ -27,12 +27,8 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin; * Factory used to build a new IndexReader instance. */ public abstract class IndexReaderFactory implements NamedListInitializedPlugin { - protected int termInfosIndexDivisor = 1;//IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; Set this once Lucene makes this public. /** - * Potentially initializes {@link #termInfosIndexDivisor}. Overriding classes should call super.init() in order - * to make sure termInfosIndexDivisor is set. - *

- * init will be called just once, immediately after creation. + * init will be called just once, immediately after creation. *

* The args are user-level initialization parameters that may be specified * when declaring an indexReaderFactory in solrconfig.xml @@ -40,18 +36,10 @@ public abstract class IndexReaderFactory implements NamedListInitializedPlugin { */ @Override public void init(NamedList args) { - Integer v = (Integer)args.get("setTermIndexDivisor"); - if (v != null) { - termInfosIndexDivisor = v.intValue(); - } - } - - /** - * - * @return The setting of {@link #termInfosIndexDivisor} - */ - public int getTermInfosIndexDivisor() { - return termInfosIndexDivisor; + Object v = args.get("setTermIndexDivisor"); + if (v != null) { + throw new IllegalArgumentException("Illegal parameter 'setTermIndexDivisor'"); + } } /** diff --git a/solr/core/src/java/org/apache/solr/core/StandardIndexReaderFactory.java b/solr/core/src/java/org/apache/solr/core/StandardIndexReaderFactory.java index 133eba86a13..d84e4cb2a67 100644 --- a/solr/core/src/java/org/apache/solr/core/StandardIndexReaderFactory.java +++ b/solr/core/src/java/org/apache/solr/core/StandardIndexReaderFactory.java @@ -31,6 +31,6 @@ public class StandardIndexReaderFactory extends IndexReaderFactory { @Override public DirectoryReader newReader(Directory indexDir, SolrCore core) throws IOException { - return DirectoryReader.open(indexDir, termInfosIndexDivisor); + return DirectoryReader.open(indexDir); } } diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java index 6c3155cb13f..f7d592e1c88 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -69,7 +69,6 @@ public class SolrIndexConfig { public final String lockType; public final PluginInfo mergePolicyInfo; public final PluginInfo mergeSchedulerInfo; - public final int termIndexInterval; public final PluginInfo mergedSegmentWarmerInfo; @@ -95,7 +94,6 @@ public class SolrIndexConfig { ramBufferSizeMB = 100; writeLockTimeout = -1; lockType = LOCK_TYPE_NATIVE; - termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; mergePolicyInfo = null; mergeSchedulerInfo = null; defaultMergePolicyClassName = TieredMergePolicy.class.getName(); @@ -148,7 +146,10 @@ public class SolrIndexConfig { mergeSchedulerInfo = getPluginInfo(prefix + "/mergeScheduler", solrConfig, def.mergeSchedulerInfo); mergePolicyInfo = getPluginInfo(prefix + "/mergePolicy", solrConfig, def.mergePolicyInfo); - termIndexInterval = solrConfig.getInt(prefix + "/termIndexInterval", def.termIndexInterval); + String val = solrConfig.get(prefix + "/termIndexInterval", null); + if (val != null) { + throw new IllegalArgumentException("Illegal parameter 'termIndexInterval'"); + } boolean infoStreamEnabled = solrConfig.getBool(prefix + "/infoStream", false); if(infoStreamEnabled) { @@ -198,9 +199,6 @@ public class SolrIndexConfig { if (ramBufferSizeMB != -1) iwc.setRAMBufferSizeMB(ramBufferSizeMB); - if (termIndexInterval != -1) - iwc.setTermIndexInterval(termIndexInterval); - if (writeLockTimeout != -1) iwc.setWriteLockTimeout(writeLockTimeout); diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-termindex.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-termindex.xml index 2afd813f29d..e13a8cc43e8 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-termindex.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-termindex.xml @@ -32,7 +32,6 @@ ${useCompoundFile:false} - 256 @@ -40,9 +39,7 @@ - - 12 - + diff --git a/solr/core/src/test/org/apache/solr/core/IndexReaderFactoryTest.java b/solr/core/src/test/org/apache/solr/core/IndexReaderFactoryTest.java index 16fd7c5df22..05645637efc 100644 --- a/solr/core/src/test/org/apache/solr/core/IndexReaderFactoryTest.java +++ b/solr/core/src/test/org/apache/solr/core/IndexReaderFactoryTest.java @@ -33,8 +33,5 @@ public class IndexReaderFactoryTest extends AbstractSolrTestCase { IndexReaderFactory readerFactory = h.getCore().getIndexReaderFactory(); assertNotNull("Factory is null", readerFactory); assertTrue("readerFactory is not an instanceof " + AlternateDirectoryTest.TestIndexReaderFactory.class, readerFactory instanceof StandardIndexReaderFactory); - assertTrue("termInfoIndexDivisor not set to 12", readerFactory.getTermInfosIndexDivisor() == 12); - - } } \ No newline at end of file diff --git a/solr/core/src/test/org/apache/solr/core/TestConfig.java b/solr/core/src/test/org/apache/solr/core/TestConfig.java index 1e2398ea8dc..ef4a5a98102 100644 --- a/solr/core/src/test/org/apache/solr/core/TestConfig.java +++ b/solr/core/src/test/org/apache/solr/core/TestConfig.java @@ -114,27 +114,6 @@ public class TestConfig extends SolrTestCaseJ4 { assertTrue(handler.getHiddenFiles().contains("PROTWORDS.TXT")); } - @Test - public void testTermIndexInterval() throws Exception { - RefCounted iw = ((DirectUpdateHandler2) h.getCore() - .getUpdateHandler()).getSolrCoreState().getIndexWriter(h.getCore()); - int interval = 0; - try { - IndexWriter writer = iw.get(); - interval = writer.getConfig().getTermIndexInterval(); - } finally { - iw.decref(); - } - assertEquals(256, interval); - } - - @Test - public void testTermIndexDivisor() throws Exception { - IndexReaderFactory irf = h.getCore().getIndexReaderFactory(); - StandardIndexReaderFactory sirf = (StandardIndexReaderFactory) irf; - assertEquals(12, sirf.termInfosIndexDivisor); - } - // If defaults change, add test methods to cover each version @Test public void testDefaults() throws Exception { diff --git a/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml b/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml index 405532db324..c08f74317fd 100755 --- a/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml +++ b/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml @@ -257,11 +257,6 @@ false --> - - - @@ -440,15 +435,6 @@ Some Value --> - - - - - - -