mirror of https://github.com/apache/lucene.git
LUCENE-5127: FixedGapTermsIndex should use monotonic compression
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1508147 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a16d24160f
commit
02a2f383b2
|
@ -27,6 +27,13 @@ Changes in backwards compatibility policy
|
||||||
no longer support multiple "dictionaries" as there is only one dictionary available.
|
no longer support multiple "dictionaries" as there is only one dictionary available.
|
||||||
(Dawid Weiss)
|
(Dawid Weiss)
|
||||||
|
|
||||||
|
* LUCENE-5127: Reduce RAM usage of FixedGapTermsIndex. Remove
|
||||||
|
IndexWriterConfig.setTermIndexInterval, IndexWriterConfig.setReaderTermsIndexDivisor,
|
||||||
|
and termsIndexDivisor from StandardDirectoryReader. These options have been no-ops
|
||||||
|
with the default codec since Lucene 4.0. If you want to configure the interval for
|
||||||
|
this term index, pass it directly in your codec, where it can also be configured
|
||||||
|
per-field. (Robert Muir)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-4747: Move to Java 7 as minimum Java version.
|
* LUCENE-4747: Move to Java 7 as minimum Java version.
|
||||||
|
|
|
@ -313,11 +313,6 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
calls next() (which is not "typical"), then we'll do the real seek */
|
calls next() (which is not "typical"), then we'll do the real seek */
|
||||||
private boolean seekPending;
|
private boolean seekPending;
|
||||||
|
|
||||||
/* How many blocks we've read since last seek. Once this
|
|
||||||
is >= indexEnum.getDivisor() we set indexIsCurrent to false (since
|
|
||||||
the index can no long bracket seek-within-block). */
|
|
||||||
private int blocksSinceSeek;
|
|
||||||
|
|
||||||
private byte[] termSuffixes;
|
private byte[] termSuffixes;
|
||||||
private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput();
|
private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput();
|
||||||
|
|
||||||
|
@ -420,8 +415,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
assert result;
|
assert result;
|
||||||
|
|
||||||
indexIsCurrent = true;
|
indexIsCurrent = true;
|
||||||
didIndexNext = false;
|
didIndexNext = false;
|
||||||
blocksSinceSeek = 0;
|
|
||||||
|
|
||||||
if (doOrd) {
|
if (doOrd) {
|
||||||
state.ord = indexEnum.ord()-1;
|
state.ord = indexEnum.ord()-1;
|
||||||
|
@ -729,7 +723,6 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
indexIsCurrent = true;
|
indexIsCurrent = true;
|
||||||
didIndexNext = false;
|
didIndexNext = false;
|
||||||
blocksSinceSeek = 0;
|
|
||||||
seekPending = false;
|
seekPending = false;
|
||||||
|
|
||||||
state.ord = indexEnum.ord()-1;
|
state.ord = indexEnum.ord()-1;
|
||||||
|
@ -802,8 +795,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
postingsReader.readTermsBlock(in, fieldInfo, state);
|
postingsReader.readTermsBlock(in, fieldInfo, state);
|
||||||
|
|
||||||
blocksSinceSeek++;
|
indexIsCurrent = false;
|
||||||
indexIsCurrent = indexIsCurrent && (blocksSinceSeek < indexReader.getDivisor());
|
|
||||||
//System.out.println(" indexIsCurrent=" + indexIsCurrent);
|
//System.out.println(" indexIsCurrent=" + indexIsCurrent);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
@ -43,21 +43,15 @@ import org.apache.lucene.index.IndexFileNames;
|
||||||
*/
|
*/
|
||||||
public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
|
|
||||||
// NOTE: long is overkill here, since this number is 128
|
// NOTE: long is overkill here, but we use this in a
|
||||||
// by default and only indexDivisor * 128 if you change
|
|
||||||
// the indexDivisor at search time. But, we use this in a
|
|
||||||
// number of places to multiply out the actual ord, and we
|
// number of places to multiply out the actual ord, and we
|
||||||
// will overflow int during those multiplies. So to avoid
|
// will overflow int during those multiplies. So to avoid
|
||||||
// having to upgrade each multiple to long in multiple
|
// having to upgrade each multiple to long in multiple
|
||||||
// places (error prone), we use long here:
|
// places (error prone), we use long here:
|
||||||
private long totalIndexInterval;
|
private final long indexInterval;
|
||||||
|
|
||||||
private int indexDivisor;
|
private final int packedIntsVersion;
|
||||||
final private int indexInterval;
|
private final int blocksize;
|
||||||
|
|
||||||
// Closed if indexLoaded is true:
|
|
||||||
private IndexInput in;
|
|
||||||
private volatile boolean indexLoaded;
|
|
||||||
|
|
||||||
private final Comparator<BytesRef> termComp;
|
private final Comparator<BytesRef> termComp;
|
||||||
|
|
||||||
|
@ -72,35 +66,24 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
// start of the field info data
|
// start of the field info data
|
||||||
private long dirOffset;
|
private long dirOffset;
|
||||||
|
|
||||||
private final int version;
|
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
|
||||||
|
|
||||||
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
this.termComp = termComp;
|
this.termComp = termComp;
|
||||||
|
|
||||||
assert indexDivisor == -1 || indexDivisor > 0;
|
final IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context);
|
||||||
|
|
||||||
in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context);
|
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
version = readHeader(in);
|
readHeader(in);
|
||||||
indexInterval = in.readInt();
|
indexInterval = in.readVInt();
|
||||||
if (indexInterval < 1) {
|
if (indexInterval < 1) {
|
||||||
throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
|
throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
this.indexDivisor = indexDivisor;
|
packedIntsVersion = in.readVInt();
|
||||||
|
blocksize = in.readVInt();
|
||||||
if (indexDivisor < 0) {
|
|
||||||
totalIndexInterval = indexInterval;
|
|
||||||
} else {
|
|
||||||
// In case terms index gets loaded, later, on demand
|
|
||||||
totalIndexInterval = indexInterval * indexDivisor;
|
|
||||||
}
|
|
||||||
assert totalIndexInterval > 0;
|
|
||||||
|
|
||||||
seekDir(in, dirOffset);
|
seekDir(in, dirOffset);
|
||||||
|
|
||||||
|
@ -112,7 +95,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
//System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
|
//System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
|
||||||
for(int i=0;i<numFields;i++) {
|
for(int i=0;i<numFields;i++) {
|
||||||
final int field = in.readVInt();
|
final int field = in.readVInt();
|
||||||
final int numIndexTerms = in.readVInt();
|
final long numIndexTerms = in.readVInt(); // TODO: change this to a vLong if we fix writer to support > 2B index terms
|
||||||
if (numIndexTerms < 0) {
|
if (numIndexTerms < 0) {
|
||||||
throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
|
throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
|
@ -124,47 +107,33 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
|
throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||||
FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
|
FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(in, indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms));
|
||||||
if (previous != null) {
|
if (previous != null) {
|
||||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
|
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (success) {
|
||||||
|
IOUtils.close(in);
|
||||||
|
} else {
|
||||||
IOUtils.closeWhileHandlingException(in);
|
IOUtils.closeWhileHandlingException(in);
|
||||||
}
|
}
|
||||||
if (indexDivisor > 0) {
|
termBytesReader = termBytes.freeze(true);
|
||||||
in.close();
|
|
||||||
in = null;
|
|
||||||
if (success) {
|
|
||||||
indexLoaded = true;
|
|
||||||
}
|
|
||||||
termBytesReader = termBytes.freeze(true);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDivisor() {
|
|
||||||
return indexDivisor;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int readHeader(IndexInput input) throws IOException {
|
private void readHeader(IndexInput input) throws IOException {
|
||||||
int version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
|
CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
|
||||||
FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_CURRENT);
|
FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT);
|
||||||
if (version < FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) {
|
|
||||||
dirOffset = input.readLong();
|
|
||||||
}
|
|
||||||
return version;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class IndexEnum extends FieldIndexEnum {
|
private class IndexEnum extends FieldIndexEnum {
|
||||||
private final FieldIndexData.CoreFieldIndex fieldIndex;
|
private final FieldIndexData fieldIndex;
|
||||||
private final BytesRef term = new BytesRef();
|
private final BytesRef term = new BytesRef();
|
||||||
private long ord;
|
private long ord;
|
||||||
|
|
||||||
public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) {
|
public IndexEnum(FieldIndexData fieldIndex) {
|
||||||
this.fieldIndex = fieldIndex;
|
this.fieldIndex = fieldIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -175,12 +144,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long seek(BytesRef target) {
|
public long seek(BytesRef target) {
|
||||||
int lo = 0; // binary search
|
long lo = 0; // binary search
|
||||||
int hi = fieldIndex.numIndexTerms - 1;
|
long hi = fieldIndex.numIndexTerms - 1;
|
||||||
assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
|
|
||||||
|
|
||||||
while (hi >= lo) {
|
while (hi >= lo) {
|
||||||
int mid = (lo + hi) >>> 1;
|
long mid = (lo + hi) >>> 1;
|
||||||
|
|
||||||
final long offset = fieldIndex.termOffsets.get(mid);
|
final long offset = fieldIndex.termOffsets.get(mid);
|
||||||
final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset);
|
final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset);
|
||||||
|
@ -193,7 +161,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
lo = mid + 1;
|
lo = mid + 1;
|
||||||
} else {
|
} else {
|
||||||
assert mid >= 0;
|
assert mid >= 0;
|
||||||
ord = mid*totalIndexInterval;
|
ord = mid*indexInterval;
|
||||||
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
|
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -207,17 +175,17 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset);
|
final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset);
|
||||||
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
|
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
|
||||||
|
|
||||||
ord = hi*totalIndexInterval;
|
ord = hi*indexInterval;
|
||||||
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
|
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long next() {
|
public long next() {
|
||||||
final int idx = 1 + (int) (ord / totalIndexInterval);
|
final long idx = 1 + (ord / indexInterval);
|
||||||
if (idx >= fieldIndex.numIndexTerms) {
|
if (idx >= fieldIndex.numIndexTerms) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
ord += totalIndexInterval;
|
ord += indexInterval;
|
||||||
|
|
||||||
final long offset = fieldIndex.termOffsets.get(idx);
|
final long offset = fieldIndex.termOffsets.get(idx);
|
||||||
final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
|
final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
|
||||||
|
@ -232,13 +200,13 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long seek(long ord) {
|
public long seek(long ord) {
|
||||||
int idx = (int) (ord / totalIndexInterval);
|
long idx = ord / indexInterval;
|
||||||
// caller must ensure ord is in bounds
|
// caller must ensure ord is in bounds
|
||||||
assert idx < fieldIndex.numIndexTerms;
|
assert idx < fieldIndex.numIndexTerms;
|
||||||
final long offset = fieldIndex.termOffsets.get(idx);
|
final long offset = fieldIndex.termOffsets.get(idx);
|
||||||
final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
|
final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
|
||||||
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
|
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
|
||||||
this.ord = idx * totalIndexInterval;
|
this.ord = idx * indexInterval;
|
||||||
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
|
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -249,176 +217,58 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final class FieldIndexData {
|
private final class FieldIndexData {
|
||||||
|
// where this field's terms begin in the packed byte[]
|
||||||
volatile CoreFieldIndex coreIndex;
|
// data
|
||||||
|
final long termBytesStart;
|
||||||
private final long indexStart;
|
|
||||||
private final long termsStart;
|
// offset into index termBytes
|
||||||
private final long packedIndexStart;
|
final MonotonicBlockPackedReader termOffsets;
|
||||||
private final long packedOffsetsStart;
|
|
||||||
|
// index pointers into main terms dict
|
||||||
private final int numIndexTerms;
|
final MonotonicBlockPackedReader termsDictOffsets;
|
||||||
|
|
||||||
public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
|
final long numIndexTerms;
|
||||||
long packedOffsetsStart) throws IOException {
|
final long termsStart;
|
||||||
|
|
||||||
|
public FieldIndexData(IndexInput in, long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, long numIndexTerms) throws IOException {
|
||||||
|
|
||||||
this.termsStart = termsStart;
|
this.termsStart = termsStart;
|
||||||
this.indexStart = indexStart;
|
termBytesStart = termBytes.getPointer();
|
||||||
this.packedIndexStart = packedIndexStart;
|
|
||||||
this.packedOffsetsStart = packedOffsetsStart;
|
IndexInput clone = in.clone();
|
||||||
|
clone.seek(indexStart);
|
||||||
|
|
||||||
this.numIndexTerms = numIndexTerms;
|
this.numIndexTerms = numIndexTerms;
|
||||||
|
assert this.numIndexTerms > 0: "numIndexTerms=" + numIndexTerms;
|
||||||
if (indexDivisor > 0) {
|
|
||||||
loadTermsIndex();
|
// slurp in the images from disk:
|
||||||
}
|
|
||||||
}
|
try {
|
||||||
|
final long numTermBytes = packedIndexStart - indexStart;
|
||||||
private void loadTermsIndex() throws IOException {
|
termBytes.copy(clone, numTermBytes);
|
||||||
if (coreIndex == null) {
|
|
||||||
coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms);
|
// records offsets into main terms dict file
|
||||||
}
|
termsDictOffsets = new MonotonicBlockPackedReader(clone, packedIntsVersion, blocksize, numIndexTerms, false);
|
||||||
}
|
|
||||||
|
// records offsets into byte[] term data
|
||||||
private final class CoreFieldIndex {
|
termOffsets = new MonotonicBlockPackedReader(clone, packedIntsVersion, blocksize, 1+numIndexTerms, false);
|
||||||
|
} finally {
|
||||||
// where this field's terms begin in the packed byte[]
|
clone.close();
|
||||||
// data
|
|
||||||
final long termBytesStart;
|
|
||||||
|
|
||||||
// offset into index termBytes
|
|
||||||
final PackedInts.Reader termOffsets;
|
|
||||||
|
|
||||||
// index pointers into main terms dict
|
|
||||||
final PackedInts.Reader termsDictOffsets;
|
|
||||||
|
|
||||||
final int numIndexTerms;
|
|
||||||
final long termsStart;
|
|
||||||
|
|
||||||
public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException {
|
|
||||||
|
|
||||||
this.termsStart = termsStart;
|
|
||||||
termBytesStart = termBytes.getPointer();
|
|
||||||
|
|
||||||
IndexInput clone = in.clone();
|
|
||||||
clone.seek(indexStart);
|
|
||||||
|
|
||||||
// -1 is passed to mean "don't load term index", but
|
|
||||||
// if we are then later loaded it's overwritten with
|
|
||||||
// a real value
|
|
||||||
assert indexDivisor > 0;
|
|
||||||
|
|
||||||
this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor;
|
|
||||||
|
|
||||||
assert this.numIndexTerms > 0: "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor;
|
|
||||||
|
|
||||||
if (indexDivisor == 1) {
|
|
||||||
// Default (load all index terms) is fast -- slurp in the images from disk:
|
|
||||||
|
|
||||||
try {
|
|
||||||
final long numTermBytes = packedIndexStart - indexStart;
|
|
||||||
termBytes.copy(clone, numTermBytes);
|
|
||||||
|
|
||||||
// records offsets into main terms dict file
|
|
||||||
termsDictOffsets = PackedInts.getReader(clone);
|
|
||||||
assert termsDictOffsets.size() == numIndexTerms;
|
|
||||||
|
|
||||||
// records offsets into byte[] term data
|
|
||||||
termOffsets = PackedInts.getReader(clone);
|
|
||||||
assert termOffsets.size() == 1+numIndexTerms;
|
|
||||||
} finally {
|
|
||||||
clone.close();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Get packed iterators
|
|
||||||
final IndexInput clone1 = in.clone();
|
|
||||||
final IndexInput clone2 = in.clone();
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Subsample the index terms
|
|
||||||
clone1.seek(packedIndexStart);
|
|
||||||
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE);
|
|
||||||
|
|
||||||
clone2.seek(packedOffsetsStart);
|
|
||||||
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2, PackedInts.DEFAULT_BUFFER_SIZE);
|
|
||||||
|
|
||||||
// TODO: often we can get by w/ fewer bits per
|
|
||||||
// value, below.. .but this'd be more complex:
|
|
||||||
// we'd have to try @ fewer bits and then grow
|
|
||||||
// if we overflowed it.
|
|
||||||
|
|
||||||
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
|
|
||||||
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
|
|
||||||
|
|
||||||
termsDictOffsets = termsDictOffsetsM;
|
|
||||||
termOffsets = termOffsetsM;
|
|
||||||
|
|
||||||
int upto = 0;
|
|
||||||
|
|
||||||
long termOffsetUpto = 0;
|
|
||||||
|
|
||||||
while(upto < this.numIndexTerms) {
|
|
||||||
// main file offset copies straight over
|
|
||||||
termsDictOffsetsM.set(upto, termsDictOffsetsIter.next());
|
|
||||||
|
|
||||||
termOffsetsM.set(upto, termOffsetUpto);
|
|
||||||
|
|
||||||
long termOffset = termOffsetsIter.next();
|
|
||||||
long nextTermOffset = termOffsetsIter.next();
|
|
||||||
final int numTermBytes = (int) (nextTermOffset - termOffset);
|
|
||||||
|
|
||||||
clone.seek(indexStart + termOffset);
|
|
||||||
assert indexStart + termOffset < clone.length() : "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.length();
|
|
||||||
assert indexStart + termOffset + numTermBytes < clone.length();
|
|
||||||
|
|
||||||
termBytes.copy(clone, numTermBytes);
|
|
||||||
termOffsetUpto += numTermBytes;
|
|
||||||
|
|
||||||
upto++;
|
|
||||||
if (upto == this.numIndexTerms) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// skip terms:
|
|
||||||
termsDictOffsetsIter.next();
|
|
||||||
for(int i=0;i<indexDivisor-2;i++) {
|
|
||||||
termOffsetsIter.next();
|
|
||||||
termsDictOffsetsIter.next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
termOffsetsM.set(upto, termOffsetUpto);
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
clone1.close();
|
|
||||||
clone2.close();
|
|
||||||
clone.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
|
public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
|
||||||
final FieldIndexData fieldData = fields.get(fieldInfo);
|
return new IndexEnum(fields.get(fieldInfo));
|
||||||
if (fieldData.coreIndex == null) {
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
return new IndexEnum(fieldData.coreIndex);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {}
|
||||||
if (in != null && !indexLoaded) {
|
|
||||||
in.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void seekDir(IndexInput input, long dirOffset) throws IOException {
|
private void seekDir(IndexInput input, long dirOffset) throws IOException {
|
||||||
if (version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) {
|
input.seek(input.length() - 8);
|
||||||
input.seek(input.length() - 8);
|
dirOffset = input.readLong();
|
||||||
dirOffset = input.readLong();
|
|
||||||
}
|
|
||||||
input.seek(dirOffset);
|
input.seek(dirOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,15 +18,16 @@ package org.apache.lucene.codecs.blockterms;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.TermStats;
|
import org.apache.lucene.codecs.TermStats;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||||
|
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -50,23 +51,32 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
|
final static String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
|
||||||
final static int VERSION_START = 0;
|
final static int VERSION_START = 0;
|
||||||
final static int VERSION_APPEND_ONLY = 1;
|
final static int VERSION_APPEND_ONLY = 1;
|
||||||
final static int VERSION_CURRENT = VERSION_APPEND_ONLY;
|
final static int VERSION_MONOTONIC_ADDRESSING = 2;
|
||||||
|
final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING;
|
||||||
|
|
||||||
|
final static int BLOCKSIZE = 4096;
|
||||||
final private int termIndexInterval;
|
final private int termIndexInterval;
|
||||||
|
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32;
|
||||||
|
|
||||||
private final List<SimpleFieldWriter> fields = new ArrayList<SimpleFieldWriter>();
|
private final List<SimpleFieldWriter> fields = new ArrayList<SimpleFieldWriter>();
|
||||||
|
|
||||||
@SuppressWarnings("unused") private final FieldInfos fieldInfos; // unread
|
|
||||||
|
|
||||||
public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException {
|
public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException {
|
||||||
|
this(state, DEFAULT_TERM_INDEX_INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public FixedGapTermsIndexWriter(SegmentWriteState state, int termIndexInterval) throws IOException {
|
||||||
|
if (termIndexInterval <= 0) {
|
||||||
|
throw new IllegalArgumentException("invalid termIndexInterval: " + termIndexInterval);
|
||||||
|
}
|
||||||
|
this.termIndexInterval = termIndexInterval;
|
||||||
final String indexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
|
final String indexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
|
||||||
termIndexInterval = state.termIndexInterval;
|
|
||||||
out = state.directory.createOutput(indexFileName, state.context);
|
out = state.directory.createOutput(indexFileName, state.context);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
fieldInfos = state.fieldInfos;
|
|
||||||
writeHeader(out);
|
writeHeader(out);
|
||||||
out.writeInt(termIndexInterval);
|
out.writeVInt(termIndexInterval);
|
||||||
|
out.writeVInt(PackedInts.VERSION_CURRENT);
|
||||||
|
out.writeVInt(BLOCKSIZE);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -114,22 +124,25 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
long packedOffsetsStart;
|
long packedOffsetsStart;
|
||||||
private long numTerms;
|
private long numTerms;
|
||||||
|
|
||||||
// TODO: we could conceivably make a PackedInts wrapper
|
private RAMOutputStream offsetsBuffer = new RAMOutputStream();
|
||||||
// that auto-grows... then we wouldn't force 6 bytes RAM
|
private MonotonicBlockPackedWriter termOffsets = new MonotonicBlockPackedWriter(offsetsBuffer, BLOCKSIZE);
|
||||||
// per index term:
|
private long currentOffset;
|
||||||
private short[] termLengths;
|
|
||||||
private int[] termsPointerDeltas;
|
private RAMOutputStream addressBuffer = new RAMOutputStream();
|
||||||
private long lastTermsPointer;
|
private MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCKSIZE);
|
||||||
private long totTermLength;
|
|
||||||
|
|
||||||
private final BytesRef lastTerm = new BytesRef();
|
private final BytesRef lastTerm = new BytesRef();
|
||||||
|
|
||||||
SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer) {
|
SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
indexStart = out.getFilePointer();
|
indexStart = out.getFilePointer();
|
||||||
termsStart = lastTermsPointer = termsFilePointer;
|
termsStart = termsFilePointer;
|
||||||
termLengths = new short[0];
|
// we write terms+1 offsets, term n's length is n+1 - n
|
||||||
termsPointerDeltas = new int[0];
|
try {
|
||||||
|
termOffsets.add(0L);
|
||||||
|
} catch (IOException bogus) {
|
||||||
|
throw new RuntimeException(bogus);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -157,21 +170,13 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
// against prior term
|
// against prior term
|
||||||
out.writeBytes(text.bytes, text.offset, indexedTermLength);
|
out.writeBytes(text.bytes, text.offset, indexedTermLength);
|
||||||
|
|
||||||
if (termLengths.length == numIndexTerms) {
|
|
||||||
termLengths = ArrayUtil.grow(termLengths);
|
|
||||||
}
|
|
||||||
if (termsPointerDeltas.length == numIndexTerms) {
|
|
||||||
termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
|
|
||||||
}
|
|
||||||
|
|
||||||
// save delta terms pointer
|
// save delta terms pointer
|
||||||
termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer);
|
termAddresses.add(termsFilePointer - termsStart);
|
||||||
lastTermsPointer = termsFilePointer;
|
|
||||||
|
|
||||||
// save term length (in bytes)
|
// save term length (in bytes)
|
||||||
assert indexedTermLength <= Short.MAX_VALUE;
|
assert indexedTermLength <= Short.MAX_VALUE;
|
||||||
termLengths[numIndexTerms] = (short) indexedTermLength;
|
currentOffset += indexedTermLength;
|
||||||
totTermLength += indexedTermLength;
|
termOffsets.add(currentOffset);
|
||||||
|
|
||||||
lastTerm.copyBytes(text);
|
lastTerm.copyBytes(text);
|
||||||
numIndexTerms++;
|
numIndexTerms++;
|
||||||
|
@ -183,32 +188,20 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
// write primary terms dict offsets
|
// write primary terms dict offsets
|
||||||
packedIndexStart = out.getFilePointer();
|
packedIndexStart = out.getFilePointer();
|
||||||
|
|
||||||
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer), PackedInts.DEFAULT);
|
|
||||||
|
|
||||||
// relative to our indexStart
|
// relative to our indexStart
|
||||||
long upto = 0;
|
termAddresses.finish();
|
||||||
for(int i=0;i<numIndexTerms;i++) {
|
addressBuffer.writeTo(out);
|
||||||
upto += termsPointerDeltas[i];
|
|
||||||
w.add(upto);
|
|
||||||
}
|
|
||||||
w.finish();
|
|
||||||
|
|
||||||
packedOffsetsStart = out.getFilePointer();
|
packedOffsetsStart = out.getFilePointer();
|
||||||
|
|
||||||
// write offsets into the byte[] terms
|
// write offsets into the byte[] terms
|
||||||
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength), PackedInts.DEFAULT);
|
termOffsets.finish();
|
||||||
upto = 0;
|
offsetsBuffer.writeTo(out);
|
||||||
for(int i=0;i<numIndexTerms;i++) {
|
|
||||||
w.add(upto);
|
|
||||||
upto += termLengths[i];
|
|
||||||
}
|
|
||||||
w.add(upto);
|
|
||||||
w.finish();
|
|
||||||
|
|
||||||
// our referrer holds onto us, while other fields are
|
// our referrer holds onto us, while other fields are
|
||||||
// being written, so don't tie up this RAM:
|
// being written, so don't tie up this RAM:
|
||||||
termLengths = null;
|
termOffsets = termAddresses = null;
|
||||||
termsPointerDeltas = null;
|
addressBuffer = offsetsBuffer = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,8 +47,6 @@ public abstract class TermsIndexReaderBase implements Closeable {
|
||||||
|
|
||||||
public abstract boolean supportsOrd();
|
public abstract boolean supportsOrd();
|
||||||
|
|
||||||
public abstract int getDivisor();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Similar to TermsEnum, except, the only "metadata" it
|
* Similar to TermsEnum, except, the only "metadata" it
|
||||||
* reports for a given indexed term is the long fileOffset
|
* reports for a given indexed term is the long fileOffset
|
||||||
|
|
|
@ -32,8 +32,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.fst.Builder;
|
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
|
@ -45,11 +44,6 @@ import org.apache.lucene.util.fst.Util; // for toDot
|
||||||
public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
|
|
||||||
private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
|
private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
|
||||||
private int indexDivisor;
|
|
||||||
|
|
||||||
// Closed if indexLoaded is true:
|
|
||||||
private IndexInput in;
|
|
||||||
private volatile boolean indexLoaded;
|
|
||||||
|
|
||||||
final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
|
final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
|
||||||
|
|
||||||
|
@ -59,17 +53,15 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
private final int version;
|
private final int version;
|
||||||
|
|
||||||
final String segment;
|
final String segment;
|
||||||
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context)
|
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, String segmentSuffix, IOContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
|
final IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
|
||||||
this.segment = segment;
|
this.segment = segment;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
assert indexDivisor == -1 || indexDivisor > 0;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
version = readHeader(in);
|
version = readHeader(in);
|
||||||
this.indexDivisor = indexDivisor;
|
|
||||||
|
|
||||||
seekDir(in, dirOffset);
|
seekDir(in, dirOffset);
|
||||||
|
|
||||||
|
@ -83,27 +75,20 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
final int field = in.readVInt();
|
final int field = in.readVInt();
|
||||||
final long indexStart = in.readVLong();
|
final long indexStart = in.readVLong();
|
||||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||||
FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, indexStart));
|
FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(in, fieldInfo, indexStart));
|
||||||
if (previous != null) {
|
if (previous != null) {
|
||||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
|
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (indexDivisor > 0) {
|
if (success) {
|
||||||
in.close();
|
IOUtils.close(in);
|
||||||
in = null;
|
} else {
|
||||||
if (success) {
|
IOUtils.closeWhileHandlingException(in);
|
||||||
indexLoaded = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDivisor() {
|
|
||||||
return indexDivisor;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int readHeader(IndexInput input) throws IOException {
|
private int readHeader(IndexInput input) throws IOException {
|
||||||
int version = CodecUtil.checkHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
|
int version = CodecUtil.checkHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
|
||||||
|
@ -168,52 +153,21 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final class FieldIndexData {
|
private final class FieldIndexData {
|
||||||
|
private final FST<Long> fst;
|
||||||
|
|
||||||
private final long indexStart;
|
public FieldIndexData(IndexInput in, FieldInfo fieldInfo, long indexStart) throws IOException {
|
||||||
// Set only if terms index is loaded:
|
IndexInput clone = in.clone();
|
||||||
private volatile FST<Long> fst;
|
clone.seek(indexStart);
|
||||||
|
fst = new FST<Long>(clone, fstOutputs);
|
||||||
|
clone.close();
|
||||||
|
|
||||||
public FieldIndexData(FieldInfo fieldInfo, long indexStart) throws IOException {
|
/*
|
||||||
this.indexStart = indexStart;
|
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
||||||
|
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
|
||||||
if (indexDivisor > 0) {
|
Util.toDot(fst, w, false, false);
|
||||||
loadTermsIndex();
|
System.out.println("FST INDEX: SAVED to " + dotFileName);
|
||||||
}
|
w.close();
|
||||||
}
|
*/
|
||||||
|
|
||||||
private void loadTermsIndex() throws IOException {
|
|
||||||
if (fst == null) {
|
|
||||||
IndexInput clone = in.clone();
|
|
||||||
clone.seek(indexStart);
|
|
||||||
fst = new FST<Long>(clone, fstOutputs);
|
|
||||||
clone.close();
|
|
||||||
|
|
||||||
/*
|
|
||||||
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
|
||||||
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
|
|
||||||
Util.toDot(fst, w, false, false);
|
|
||||||
System.out.println("FST INDEX: SAVED to " + dotFileName);
|
|
||||||
w.close();
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (indexDivisor > 1) {
|
|
||||||
// subsample
|
|
||||||
final IntsRef scratchIntsRef = new IntsRef();
|
|
||||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
|
||||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
|
||||||
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
|
||||||
BytesRefFSTEnum.InputOutput<Long> result;
|
|
||||||
int count = indexDivisor;
|
|
||||||
while((result = fstEnum.next()) != null) {
|
|
||||||
if (count == indexDivisor) {
|
|
||||||
builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
|
|
||||||
count = 0;
|
|
||||||
}
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
fst = builder.finish();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,11 +182,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {}
|
||||||
if (in != null && !indexLoaded) {
|
|
||||||
in.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void seekDir(IndexInput input, long dirOffset) throws IOException {
|
private void seekDir(IndexInput input, long dirOffset) throws IOException {
|
||||||
if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
|
if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
|
||||||
|
|
|
@ -103,8 +103,7 @@ public abstract class PulsingPostingsFormat extends PostingsFormat {
|
||||||
state.directory, state.fieldInfos, state.segmentInfo,
|
state.directory, state.fieldInfos, state.segmentInfo,
|
||||||
pulsingReader,
|
pulsingReader,
|
||||||
state.context,
|
state.context,
|
||||||
state.segmentSuffix,
|
state.segmentSuffix);
|
||||||
state.termsIndexDivisor);
|
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -25,10 +25,8 @@ import org.apache.lucene.util._TestUtil;
|
||||||
/**
|
/**
|
||||||
* Basic tests of a PF using FixedGap terms dictionary
|
* Basic tests of a PF using FixedGap terms dictionary
|
||||||
*/
|
*/
|
||||||
// TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package
|
|
||||||
// TODO: ensure both of these are also in rotation in RandomCodec
|
|
||||||
public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase {
|
public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase {
|
||||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds());
|
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds(_TestUtil.nextInt(random(), 1, 1000)));
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Codec getCodec() {
|
protected Codec getCodec() {
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
package org.apache.lucene.codecs.blockterms;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval;
|
||||||
|
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic tests of a PF using VariableGap terms dictionary (fixed interval)
|
||||||
|
*/
|
||||||
|
public class TestVarGapDocFreqIntervalPostingsFormat extends BasePostingsFormatTestCase {
|
||||||
|
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41VarGapFixedInterval(_TestUtil.nextInt(random(), 1, 1000)));
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Codec getCodec() {
|
||||||
|
return codec;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package org.apache.lucene.codecs.blockterms;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval;
|
||||||
|
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic tests of a PF using VariableGap terms dictionary (fixed interval, docFreq threshold)
|
||||||
|
*/
|
||||||
|
public class TestVarGapFixedIntervalPostingsFormat extends BasePostingsFormatTestCase {
|
||||||
|
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41VarGapDocFreqInterval(_TestUtil.nextInt(random(), 1, 100), _TestUtil.nextInt(random(), 1, 1000)));
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Codec getCodec() {
|
||||||
|
return codec;
|
||||||
|
}
|
||||||
|
}
|
|
@ -67,9 +67,9 @@ import org.apache.lucene.util.fst.Util;
|
||||||
* does not support a pluggable terms index
|
* does not support a pluggable terms index
|
||||||
* implementation).
|
* implementation).
|
||||||
*
|
*
|
||||||
* <p><b>NOTE</b>: this terms dictionary does not support
|
* <p><b>NOTE</b>: this terms dictionary supports
|
||||||
* index divisor when opening an IndexReader. Instead, you
|
* min/maxItemsPerBlock during indexing to control how
|
||||||
* can change the min/maxItemsPerBlock during indexing.</p>
|
* much memory the terms index uses.</p>
|
||||||
*
|
*
|
||||||
* <p>The data structure used by this implementation is very
|
* <p>The data structure used by this implementation is very
|
||||||
* similar to a burst trie
|
* similar to a burst trie
|
||||||
|
@ -112,7 +112,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
|
public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
|
||||||
PostingsReaderBase postingsReader, IOContext ioContext,
|
PostingsReaderBase postingsReader, IOContext ioContext,
|
||||||
String segmentSuffix, int indexDivisor)
|
String segmentSuffix)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
this.postingsReader = postingsReader;
|
this.postingsReader = postingsReader;
|
||||||
|
@ -126,13 +126,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
version = readHeader(in);
|
version = readHeader(in);
|
||||||
if (indexDivisor != -1) {
|
indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
|
||||||
indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
|
|
||||||
ioContext);
|
ioContext);
|
||||||
int indexVersion = readIndexHeader(indexIn);
|
int indexVersion = readIndexHeader(indexIn);
|
||||||
if (indexVersion != version) {
|
if (indexVersion != version) {
|
||||||
throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
|
throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Have PostingsReader init itself
|
// Have PostingsReader init itself
|
||||||
|
@ -140,9 +138,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
// Read per-field details
|
// Read per-field details
|
||||||
seekDir(in, dirOffset);
|
seekDir(in, dirOffset);
|
||||||
if (indexDivisor != -1) {
|
seekDir(indexIn, indexDirOffset);
|
||||||
seekDir(indexIn, indexDirOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
final int numFields = in.readVInt();
|
final int numFields = in.readVInt();
|
||||||
if (numFields < 0) {
|
if (numFields < 0) {
|
||||||
|
@ -171,15 +167,13 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
|
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
|
final long indexStartFP = indexIn.readVLong();
|
||||||
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
|
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
|
||||||
if (previous != null) {
|
if (previous != null) {
|
||||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
|
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (indexDivisor != -1) {
|
indexIn.close();
|
||||||
indexIn.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -258,8 +258,7 @@ public class Lucene40PostingsFormat extends PostingsFormat {
|
||||||
state.segmentInfo,
|
state.segmentInfo,
|
||||||
postings,
|
postings,
|
||||||
state.context,
|
state.context,
|
||||||
state.segmentSuffix,
|
state.segmentSuffix);
|
||||||
state.termsIndexDivisor);
|
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -439,8 +439,7 @@ public final class Lucene41PostingsFormat extends PostingsFormat {
|
||||||
state.segmentInfo,
|
state.segmentInfo,
|
||||||
postingsReader,
|
postingsReader,
|
||||||
state.context,
|
state.context,
|
||||||
state.segmentSuffix,
|
state.segmentSuffix);
|
||||||
state.termsIndexDivisor);
|
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -533,7 +533,7 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.print(" test: open reader.........");
|
infoStream.print(" test: open reader.........");
|
||||||
reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
|
reader = new SegmentReader(info, IOContext.DEFAULT);
|
||||||
|
|
||||||
segInfoStat.openReaderPassed = true;
|
segInfoStat.openReaderPassed = true;
|
||||||
|
|
||||||
|
|
|
@ -52,9 +52,6 @@ import org.apache.lucene.store.NoSuchDirectoryException;
|
||||||
*/
|
*/
|
||||||
public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader> {
|
public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader> {
|
||||||
|
|
||||||
/** Default termInfosIndexDivisor. */
|
|
||||||
public static final int DEFAULT_TERMS_INDEX_DIVISOR = 1;
|
|
||||||
|
|
||||||
/** The index directory. */
|
/** The index directory. */
|
||||||
protected final Directory directory;
|
protected final Directory directory;
|
||||||
|
|
||||||
|
@ -64,29 +61,7 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public static DirectoryReader open(final Directory directory) throws IOException {
|
public static DirectoryReader open(final Directory directory) throws IOException {
|
||||||
return StandardDirectoryReader.open(directory, null, DEFAULT_TERMS_INDEX_DIVISOR);
|
return StandardDirectoryReader.open(directory, null);
|
||||||
}
|
|
||||||
|
|
||||||
/** Expert: Returns a IndexReader reading the index in the given
|
|
||||||
* Directory with the given termInfosIndexDivisor.
|
|
||||||
* @param directory the index directory
|
|
||||||
* @param termInfosIndexDivisor Subsamples which indexed
|
|
||||||
* terms are loaded into RAM. This has the same effect as {@link
|
|
||||||
* IndexWriterConfig#setTermIndexInterval} except that setting
|
|
||||||
* must be done at indexing time while this setting can be
|
|
||||||
* set per reader. When set to N, then one in every
|
|
||||||
* N*termIndexInterval terms in the index is loaded into
|
|
||||||
* memory. By setting this to a value > 1 you can reduce
|
|
||||||
* memory usage, at the expense of higher latency when
|
|
||||||
* loading a TermInfo. The default value is 1. Set this
|
|
||||||
* to -1 to skip loading the terms index entirely.
|
|
||||||
* <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
|
|
||||||
* implementations, including the default one in this release. It only makes
|
|
||||||
* sense for terms indexes that can efficiently re-sample terms at load time.
|
|
||||||
* @throws IOException if there is a low-level IO error
|
|
||||||
*/
|
|
||||||
public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException {
|
|
||||||
return StandardDirectoryReader.open(directory, null, termInfosIndexDivisor);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -118,29 +93,7 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public static DirectoryReader open(final IndexCommit commit) throws IOException {
|
public static DirectoryReader open(final IndexCommit commit) throws IOException {
|
||||||
return StandardDirectoryReader.open(commit.getDirectory(), commit, DEFAULT_TERMS_INDEX_DIVISOR);
|
return StandardDirectoryReader.open(commit.getDirectory(), commit);
|
||||||
}
|
|
||||||
|
|
||||||
/** Expert: returns an IndexReader reading the index in the given
|
|
||||||
* {@link IndexCommit} and termInfosIndexDivisor.
|
|
||||||
* @param commit the commit point to open
|
|
||||||
* @param termInfosIndexDivisor Subsamples which indexed
|
|
||||||
* terms are loaded into RAM. This has the same effect as {@link
|
|
||||||
* IndexWriterConfig#setTermIndexInterval} except that setting
|
|
||||||
* must be done at indexing time while this setting can be
|
|
||||||
* set per reader. When set to N, then one in every
|
|
||||||
* N*termIndexInterval terms in the index is loaded into
|
|
||||||
* memory. By setting this to a value > 1 you can reduce
|
|
||||||
* memory usage, at the expense of higher latency when
|
|
||||||
* loading a TermInfo. The default value is 1. Set this
|
|
||||||
* to -1 to skip loading the terms index entirely.
|
|
||||||
* <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
|
|
||||||
* implementations, including the default one in this release. It only makes
|
|
||||||
* sense for terms indexes that can efficiently re-sample terms at load time.
|
|
||||||
* @throws IOException if there is a low-level IO error
|
|
||||||
*/
|
|
||||||
public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException {
|
|
||||||
return StandardDirectoryReader.open(commit.getDirectory(), commit, termInfosIndexDivisor);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -468,7 +468,6 @@ class DocumentsWriterPerThread {
|
||||||
assert deleteSlice == null : "all deletes must be applied in prepareFlush";
|
assert deleteSlice == null : "all deletes must be applied in prepareFlush";
|
||||||
segmentInfo.setDocCount(numDocsInRAM);
|
segmentInfo.setDocCount(numDocsInRAM);
|
||||||
flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(),
|
flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(),
|
||||||
writer.getConfig().getTermIndexInterval(),
|
|
||||||
pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
|
pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
|
||||||
final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.;
|
final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.;
|
||||||
|
|
||||||
|
|
|
@ -2437,10 +2437,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
* to call this method multiple times, each time with a small set of readers.
|
* to call this method multiple times, each time with a small set of readers.
|
||||||
* In principle, if you use a merge policy with a {@code mergeFactor} or
|
* In principle, if you use a merge policy with a {@code mergeFactor} or
|
||||||
* {@code maxMergeAtOnce} parameter, you should pass that many readers in one
|
* {@code maxMergeAtOnce} parameter, you should pass that many readers in one
|
||||||
* call. Also, if the given readers are {@link DirectoryReader}s, they can be
|
* call.
|
||||||
* opened with {@code termIndexInterval=-1} to save RAM, since during merge
|
|
||||||
* the in-memory structure is not used. See
|
|
||||||
* {@link DirectoryReader#open(Directory, int)}.
|
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* <b>NOTE</b>: if you call {@link #close(boolean)} with <tt>false</tt>, which
|
* <b>NOTE</b>: if you call {@link #close(boolean)} with <tt>false</tt>, which
|
||||||
|
@ -2488,7 +2485,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1,
|
SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1,
|
||||||
false, codec, null, null);
|
false, codec, null, null);
|
||||||
|
|
||||||
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, config.getTermIndexInterval(),
|
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
|
||||||
MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
|
MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
|
||||||
|
|
||||||
MergeState mergeState;
|
MergeState mergeState;
|
||||||
|
@ -3670,7 +3667,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
// Hold onto the "live" reader; we will use this to
|
// Hold onto the "live" reader; we will use this to
|
||||||
// commit merged deletes
|
// commit merged deletes
|
||||||
final ReadersAndLiveDocs rld = readerPool.get(info, true);
|
final ReadersAndLiveDocs rld = readerPool.get(info, true);
|
||||||
SegmentReader reader = rld.getMergeReader(context);
|
SegmentReader reader = rld.getReader(context);
|
||||||
assert reader != null;
|
assert reader != null;
|
||||||
|
|
||||||
// Carefully pull the most recent live docs:
|
// Carefully pull the most recent live docs:
|
||||||
|
@ -3727,7 +3724,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
// we pass merge.getMergeReaders() instead of merge.readers to allow the
|
// we pass merge.getMergeReaders() instead of merge.readers to allow the
|
||||||
// OneMerge to return a view over the actual segments to merge
|
// OneMerge to return a view over the actual segments to merge
|
||||||
final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
|
final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
|
||||||
merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(),
|
merge.info.info, infoStream, dirWrapper,
|
||||||
checkAbort, globalFieldNumberMap, context);
|
checkAbort, globalFieldNumberMap, context);
|
||||||
|
|
||||||
merge.checkAborted(directory);
|
merge.checkAborted(directory);
|
||||||
|
|
|
@ -72,9 +72,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
|
||||||
CREATE_OR_APPEND
|
CREATE_OR_APPEND
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
|
|
||||||
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
|
|
||||||
|
|
||||||
/** Denotes a flush trigger is disabled. */
|
/** Denotes a flush trigger is disabled. */
|
||||||
public final static int DISABLE_AUTO_FLUSH = -1;
|
public final static int DISABLE_AUTO_FLUSH = -1;
|
||||||
|
|
||||||
|
@ -100,9 +97,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
|
||||||
/** Default setting for {@link #setReaderPooling}. */
|
/** Default setting for {@link #setReaderPooling}. */
|
||||||
public final static boolean DEFAULT_READER_POOLING = false;
|
public final static boolean DEFAULT_READER_POOLING = false;
|
||||||
|
|
||||||
/** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */
|
|
||||||
public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR;
|
|
||||||
|
|
||||||
/** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */
|
/** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */
|
||||||
public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945;
|
public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945;
|
||||||
|
|
||||||
|
@ -503,16 +497,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
|
||||||
return super.getRAMBufferSizeMB();
|
return super.getRAMBufferSizeMB();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getReaderTermsIndexDivisor() {
|
|
||||||
return super.getReaderTermsIndexDivisor();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getTermIndexInterval() {
|
|
||||||
return super.getTermIndexInterval();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** If non-null, information about merges, deletes and a
|
/** If non-null, information about merges, deletes and a
|
||||||
* message when maxFieldLength is reached will be printed
|
* message when maxFieldLength is reached will be printed
|
||||||
* to this.
|
* to this.
|
||||||
|
@ -554,16 +538,6 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
|
||||||
return (IndexWriterConfig) super.setRAMBufferSizeMB(ramBufferSizeMB);
|
return (IndexWriterConfig) super.setRAMBufferSizeMB(ramBufferSizeMB);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
|
|
||||||
return (IndexWriterConfig) super.setReaderTermsIndexDivisor(divisor);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexWriterConfig setTermIndexInterval(int interval) {
|
|
||||||
return (IndexWriterConfig) super.setTermIndexInterval(interval);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
|
public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
|
||||||
return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile);
|
return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile);
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
|
|
||||||
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
|
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
|
||||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
|
@ -41,9 +40,7 @@ public class LiveIndexWriterConfig {
|
||||||
private volatile int maxBufferedDocs;
|
private volatile int maxBufferedDocs;
|
||||||
private volatile double ramBufferSizeMB;
|
private volatile double ramBufferSizeMB;
|
||||||
private volatile int maxBufferedDeleteTerms;
|
private volatile int maxBufferedDeleteTerms;
|
||||||
private volatile int readerTermsIndexDivisor;
|
|
||||||
private volatile IndexReaderWarmer mergedSegmentWarmer;
|
private volatile IndexReaderWarmer mergedSegmentWarmer;
|
||||||
private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here
|
|
||||||
|
|
||||||
// modified by IndexWriterConfig
|
// modified by IndexWriterConfig
|
||||||
/** {@link IndexDeletionPolicy} controlling when commit
|
/** {@link IndexDeletionPolicy} controlling when commit
|
||||||
|
@ -108,9 +105,7 @@ public class LiveIndexWriterConfig {
|
||||||
ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
|
ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
|
||||||
maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
|
maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
|
||||||
maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
|
maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
|
||||||
readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR;
|
|
||||||
mergedSegmentWarmer = null;
|
mergedSegmentWarmer = null;
|
||||||
termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
|
|
||||||
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
|
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
|
||||||
commit = null;
|
commit = null;
|
||||||
useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
|
useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
|
||||||
|
@ -140,8 +135,6 @@ public class LiveIndexWriterConfig {
|
||||||
maxBufferedDocs = config.getMaxBufferedDocs();
|
maxBufferedDocs = config.getMaxBufferedDocs();
|
||||||
mergedSegmentWarmer = config.getMergedSegmentWarmer();
|
mergedSegmentWarmer = config.getMergedSegmentWarmer();
|
||||||
ramBufferSizeMB = config.getRAMBufferSizeMB();
|
ramBufferSizeMB = config.getRAMBufferSizeMB();
|
||||||
readerTermsIndexDivisor = config.getReaderTermsIndexDivisor();
|
|
||||||
termIndexInterval = config.getTermIndexInterval();
|
|
||||||
matchVersion = config.matchVersion;
|
matchVersion = config.matchVersion;
|
||||||
analyzer = config.getAnalyzer();
|
analyzer = config.getAnalyzer();
|
||||||
delPolicy = config.getIndexDeletionPolicy();
|
delPolicy = config.getIndexDeletionPolicy();
|
||||||
|
@ -165,69 +158,6 @@ public class LiveIndexWriterConfig {
|
||||||
public Analyzer getAnalyzer() {
|
public Analyzer getAnalyzer() {
|
||||||
return analyzer;
|
return analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: set the interval between indexed terms. Large values cause less
|
|
||||||
* memory to be used by IndexReader, but slow random-access to terms. Small
|
|
||||||
* values cause more memory to be used by an IndexReader, and speed
|
|
||||||
* random-access to terms.
|
|
||||||
* <p>
|
|
||||||
* This parameter determines the amount of computation required per query
|
|
||||||
* term, regardless of the number of documents that contain that term. In
|
|
||||||
* particular, it is the maximum number of other terms that must be scanned
|
|
||||||
* before a term is located and its frequency and position information may be
|
|
||||||
* processed. In a large index with user-entered query terms, query processing
|
|
||||||
* time is likely to be dominated not by term lookup but rather by the
|
|
||||||
* processing of frequency and positional data. In a small index or when many
|
|
||||||
* uncommon query terms are generated (e.g., by wildcard queries) term lookup
|
|
||||||
* may become a dominant cost.
|
|
||||||
* <p>
|
|
||||||
* In particular, <code>numUniqueTerms/interval</code> terms are read into
|
|
||||||
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
|
|
||||||
* must be scanned for each random term access.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* Takes effect immediately, but only applies to newly flushed/merged
|
|
||||||
* segments.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> This parameter does not apply to all PostingsFormat implementations,
|
|
||||||
* including the default one in this release. It only makes sense for term indexes
|
|
||||||
* that are implemented as a fixed gap between terms. For example,
|
|
||||||
* {@link Lucene41PostingsFormat} implements the term index instead based upon how
|
|
||||||
* terms share prefixes. To configure its parameters (the minimum and maximum size
|
|
||||||
* for a block), you would instead use {@link Lucene41PostingsFormat#Lucene41PostingsFormat(int, int)}.
|
|
||||||
* which can also be configured on a per-field basis:
|
|
||||||
* <pre class="prettyprint">
|
|
||||||
* //customize Lucene41PostingsFormat, passing minBlockSize=50, maxBlockSize=100
|
|
||||||
* final PostingsFormat tweakedPostings = new Lucene41PostingsFormat(50, 100);
|
|
||||||
* iwc.setCodec(new Lucene42Codec() {
|
|
||||||
* @Override
|
|
||||||
* public PostingsFormat getPostingsFormatForField(String field) {
|
|
||||||
* if (field.equals("fieldWithTonsOfTerms"))
|
|
||||||
* return tweakedPostings;
|
|
||||||
* else
|
|
||||||
* return super.getPostingsFormatForField(field);
|
|
||||||
* }
|
|
||||||
* });
|
|
||||||
* </pre>
|
|
||||||
* Note that other implementations may have their own parameters, or no parameters at all.
|
|
||||||
*
|
|
||||||
* @see IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL
|
|
||||||
*/
|
|
||||||
public LiveIndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
|
|
||||||
this.termIndexInterval = interval;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the interval between indexed terms.
|
|
||||||
*
|
|
||||||
* @see #setTermIndexInterval(int)
|
|
||||||
*/
|
|
||||||
public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
|
|
||||||
return termIndexInterval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines the minimal number of delete terms required before the buffered
|
* Determines the minimal number of delete terms required before the buffered
|
||||||
|
@ -390,37 +320,6 @@ public class LiveIndexWriterConfig {
|
||||||
public IndexReaderWarmer getMergedSegmentWarmer() {
|
public IndexReaderWarmer getMergedSegmentWarmer() {
|
||||||
return mergedSegmentWarmer;
|
return mergedSegmentWarmer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the termsIndexDivisor passed to any readers that IndexWriter opens,
|
|
||||||
* for example when applying deletes or creating a near-real-time reader in
|
|
||||||
* {@link DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the
|
|
||||||
* terms index won't be loaded by the readers. This is only useful in advanced
|
|
||||||
* situations when you will only .next() through all terms; attempts to seek
|
|
||||||
* will hit an exception.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* Takes effect immediately, but only applies to readers opened after this
|
|
||||||
* call
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> divisor settings > 1 do not apply to all PostingsFormat
|
|
||||||
* implementations, including the default one in this release. It only makes
|
|
||||||
* sense for terms indexes that can efficiently re-sample terms at load time.
|
|
||||||
*/
|
|
||||||
public LiveIndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
|
|
||||||
if (divisor <= 0 && divisor != -1) {
|
|
||||||
throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
|
|
||||||
}
|
|
||||||
readerTermsIndexDivisor = divisor;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the {@code termInfosIndexDivisor}.
|
|
||||||
*
|
|
||||||
* @see #setReaderTermsIndexDivisor(int) */
|
|
||||||
public int getReaderTermsIndexDivisor() {
|
|
||||||
return readerTermsIndexDivisor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */
|
/** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */
|
||||||
public OpenMode getOpenMode() {
|
public OpenMode getOpenMode() {
|
||||||
|
@ -583,8 +482,6 @@ public class LiveIndexWriterConfig {
|
||||||
sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n");
|
sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n");
|
||||||
sb.append("maxBufferedDeleteTerms=").append(getMaxBufferedDeleteTerms()).append("\n");
|
sb.append("maxBufferedDeleteTerms=").append(getMaxBufferedDeleteTerms()).append("\n");
|
||||||
sb.append("mergedSegmentWarmer=").append(getMergedSegmentWarmer()).append("\n");
|
sb.append("mergedSegmentWarmer=").append(getMergedSegmentWarmer()).append("\n");
|
||||||
sb.append("readerTermsIndexDivisor=").append(getReaderTermsIndexDivisor()).append("\n");
|
|
||||||
sb.append("termIndexInterval=").append(getTermIndexInterval()).append("\n"); // TODO: this should be private to the codec, not settable here
|
|
||||||
sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n");
|
sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n");
|
||||||
IndexCommit commit = getIndexCommit();
|
IndexCommit commit = getIndexCommit();
|
||||||
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
|
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
|
||||||
|
|
|
@ -43,16 +43,6 @@ class ReadersAndLiveDocs {
|
||||||
// Set once (null, and then maybe set, and never set again):
|
// Set once (null, and then maybe set, and never set again):
|
||||||
private SegmentReader reader;
|
private SegmentReader reader;
|
||||||
|
|
||||||
// TODO: it's sometimes wasteful that we hold open two
|
|
||||||
// separate SRs (one for merging one for
|
|
||||||
// reading)... maybe just use a single SR? The gains of
|
|
||||||
// not loading the terms index (for merging in the
|
|
||||||
// non-NRT case) are far less now... and if the app has
|
|
||||||
// any deletes it'll open real readers anyway.
|
|
||||||
|
|
||||||
// Set once (null, and then maybe set, and never set again):
|
|
||||||
private SegmentReader mergeReader;
|
|
||||||
|
|
||||||
// Holds the current shared (readable and writable
|
// Holds the current shared (readable and writable
|
||||||
// liveDocs). This is null when there are no deleted
|
// liveDocs). This is null when there are no deleted
|
||||||
// docs, and it's copy-on-write (cloned whenever we need
|
// docs, and it's copy-on-write (cloned whenever we need
|
||||||
|
@ -118,7 +108,7 @@ class ReadersAndLiveDocs {
|
||||||
|
|
||||||
if (reader == null) {
|
if (reader == null) {
|
||||||
// We steal returned ref:
|
// We steal returned ref:
|
||||||
reader = new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), context);
|
reader = new SegmentReader(info, context);
|
||||||
if (liveDocs == null) {
|
if (liveDocs == null) {
|
||||||
liveDocs = reader.getLiveDocs();
|
liveDocs = reader.getLiveDocs();
|
||||||
}
|
}
|
||||||
|
@ -131,37 +121,6 @@ class ReadersAndLiveDocs {
|
||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get reader for merging (does not load the terms
|
|
||||||
// index):
|
|
||||||
public synchronized SegmentReader getMergeReader(IOContext context) throws IOException {
|
|
||||||
//System.out.println(" livedocs=" + rld.liveDocs);
|
|
||||||
|
|
||||||
if (mergeReader == null) {
|
|
||||||
|
|
||||||
if (reader != null) {
|
|
||||||
// Just use the already opened non-merge reader
|
|
||||||
// for merging. In the NRT case this saves us
|
|
||||||
// pointless double-open:
|
|
||||||
//System.out.println("PROMOTE non-merge reader seg=" + rld.info);
|
|
||||||
// Ref for us:
|
|
||||||
reader.incRef();
|
|
||||||
mergeReader = reader;
|
|
||||||
//System.out.println(Thread.currentThread().getName() + ": getMergeReader share seg=" + info.name);
|
|
||||||
} else {
|
|
||||||
//System.out.println(Thread.currentThread().getName() + ": getMergeReader seg=" + info.name);
|
|
||||||
// We steal returned ref:
|
|
||||||
mergeReader = new SegmentReader(info, -1, context);
|
|
||||||
if (liveDocs == null) {
|
|
||||||
liveDocs = mergeReader.getLiveDocs();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ref for caller
|
|
||||||
mergeReader.incRef();
|
|
||||||
return mergeReader;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void release(SegmentReader sr) throws IOException {
|
public synchronized void release(SegmentReader sr) throws IOException {
|
||||||
assert info == sr.getSegmentInfo();
|
assert info == sr.getSegmentInfo();
|
||||||
sr.decRef();
|
sr.decRef();
|
||||||
|
@ -185,23 +144,12 @@ class ReadersAndLiveDocs {
|
||||||
public synchronized void dropReaders() throws IOException {
|
public synchronized void dropReaders() throws IOException {
|
||||||
// TODO: can we somehow use IOUtils here...? problem is
|
// TODO: can we somehow use IOUtils here...? problem is
|
||||||
// we are calling .decRef not .close)...
|
// we are calling .decRef not .close)...
|
||||||
try {
|
if (reader != null) {
|
||||||
if (reader != null) {
|
//System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount());
|
||||||
//System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount());
|
try {
|
||||||
try {
|
reader.decRef();
|
||||||
reader.decRef();
|
} finally {
|
||||||
} finally {
|
reader = null;
|
||||||
reader = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
if (mergeReader != null) {
|
|
||||||
//System.out.println(" pool.drop info=" + info + " merge rc=" + mergeReader.getRefCount());
|
|
||||||
try {
|
|
||||||
mergeReader.decRef();
|
|
||||||
} finally {
|
|
||||||
mergeReader = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,8 +57,6 @@ final class SegmentCoreReaders {
|
||||||
final DocValuesProducer dvProducer;
|
final DocValuesProducer dvProducer;
|
||||||
final DocValuesProducer normsProducer;
|
final DocValuesProducer normsProducer;
|
||||||
|
|
||||||
final int termsIndexDivisor;
|
|
||||||
|
|
||||||
private final SegmentReader owner;
|
private final SegmentReader owner;
|
||||||
|
|
||||||
final StoredFieldsReader fieldsReaderOrig;
|
final StoredFieldsReader fieldsReaderOrig;
|
||||||
|
@ -100,11 +98,7 @@ final class SegmentCoreReaders {
|
||||||
private final Set<CoreClosedListener> coreClosedListeners =
|
private final Set<CoreClosedListener> coreClosedListeners =
|
||||||
Collections.synchronizedSet(new LinkedHashSet<CoreClosedListener>());
|
Collections.synchronizedSet(new LinkedHashSet<CoreClosedListener>());
|
||||||
|
|
||||||
SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentInfoPerCommit si, IOContext context, int termsIndexDivisor) throws IOException {
|
SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentInfoPerCommit si, IOContext context) throws IOException {
|
||||||
|
|
||||||
if (termsIndexDivisor == 0) {
|
|
||||||
throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
|
|
||||||
}
|
|
||||||
|
|
||||||
final Codec codec = si.info.getCodec();
|
final Codec codec = si.info.getCodec();
|
||||||
final Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.
|
final Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.
|
||||||
|
@ -120,9 +114,8 @@ final class SegmentCoreReaders {
|
||||||
}
|
}
|
||||||
fieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info.name, IOContext.READONCE);
|
fieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info.name, IOContext.READONCE);
|
||||||
|
|
||||||
this.termsIndexDivisor = termsIndexDivisor;
|
|
||||||
final PostingsFormat format = codec.postingsFormat();
|
final PostingsFormat format = codec.postingsFormat();
|
||||||
final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context, termsIndexDivisor);
|
final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context);
|
||||||
// Ask codec for its Fields
|
// Ask codec for its Fields
|
||||||
fields = format.fieldsProducer(segmentReadState);
|
fields = format.fieldsProducer(segmentReadState);
|
||||||
assert fields != null;
|
assert fields != null;
|
||||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.lucene.util.InfoStream;
|
||||||
*/
|
*/
|
||||||
final class SegmentMerger {
|
final class SegmentMerger {
|
||||||
private final Directory directory;
|
private final Directory directory;
|
||||||
private final int termIndexInterval;
|
|
||||||
|
|
||||||
private final Codec codec;
|
private final Codec codec;
|
||||||
|
|
||||||
|
@ -52,11 +51,10 @@ final class SegmentMerger {
|
||||||
private final FieldInfos.Builder fieldInfosBuilder;
|
private final FieldInfos.Builder fieldInfosBuilder;
|
||||||
|
|
||||||
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
|
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
|
||||||
SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval,
|
SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
|
||||||
MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) {
|
MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) {
|
||||||
mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
|
mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
|
||||||
directory = dir;
|
directory = dir;
|
||||||
this.termIndexInterval = termIndexInterval;
|
|
||||||
this.codec = segmentInfo.getCodec();
|
this.codec = segmentInfo.getCodec();
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
|
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
|
||||||
|
@ -91,7 +89,7 @@ final class SegmentMerger {
|
||||||
assert numMerged == mergeState.segmentInfo.getDocCount();
|
assert numMerged == mergeState.segmentInfo.getDocCount();
|
||||||
|
|
||||||
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo,
|
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo,
|
||||||
mergeState.fieldInfos, termIndexInterval, null, context);
|
mergeState.fieldInfos, null, context);
|
||||||
if (mergeState.infoStream.isEnabled("SM")) {
|
if (mergeState.infoStream.isEnabled("SM")) {
|
||||||
t0 = System.nanoTime();
|
t0 = System.nanoTime();
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,17 +41,6 @@ public class SegmentReadState {
|
||||||
* Directory#openInput(String,IOContext)}. */
|
* Directory#openInput(String,IOContext)}. */
|
||||||
public final IOContext context;
|
public final IOContext context;
|
||||||
|
|
||||||
/** The {@code termInfosIndexDivisor} to use, if
|
|
||||||
* appropriate (not all {@link PostingsFormat}s support
|
|
||||||
* it; in particular the current default does not).
|
|
||||||
*
|
|
||||||
* <p> NOTE: if this is < 0, that means "defer terms index
|
|
||||||
* load until needed". But if the codec must load the
|
|
||||||
* terms index on init (preflex is the only once currently
|
|
||||||
* that must do so), then it should negate this value to
|
|
||||||
* get the app's terms divisor */
|
|
||||||
public int termsIndexDivisor;
|
|
||||||
|
|
||||||
/** Unique suffix for any postings files read for this
|
/** Unique suffix for any postings files read for this
|
||||||
* segment. {@link PerFieldPostingsFormat} sets this for
|
* segment. {@link PerFieldPostingsFormat} sets this for
|
||||||
* each of the postings formats it wraps. If you create
|
* each of the postings formats it wraps. If you create
|
||||||
|
@ -62,8 +51,8 @@ public class SegmentReadState {
|
||||||
|
|
||||||
/** Create a {@code SegmentReadState}. */
|
/** Create a {@code SegmentReadState}. */
|
||||||
public SegmentReadState(Directory dir, SegmentInfo info,
|
public SegmentReadState(Directory dir, SegmentInfo info,
|
||||||
FieldInfos fieldInfos, IOContext context, int termsIndexDivisor) {
|
FieldInfos fieldInfos, IOContext context) {
|
||||||
this(dir, info, fieldInfos, context, termsIndexDivisor, "");
|
this(dir, info, fieldInfos, context, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create a {@code SegmentReadState}. */
|
/** Create a {@code SegmentReadState}. */
|
||||||
|
@ -71,13 +60,11 @@ public class SegmentReadState {
|
||||||
SegmentInfo info,
|
SegmentInfo info,
|
||||||
FieldInfos fieldInfos,
|
FieldInfos fieldInfos,
|
||||||
IOContext context,
|
IOContext context,
|
||||||
int termsIndexDivisor,
|
|
||||||
String segmentSuffix) {
|
String segmentSuffix) {
|
||||||
this.directory = dir;
|
this.directory = dir;
|
||||||
this.segmentInfo = info;
|
this.segmentInfo = info;
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.termsIndexDivisor = termsIndexDivisor;
|
|
||||||
this.segmentSuffix = segmentSuffix;
|
this.segmentSuffix = segmentSuffix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +75,6 @@ public class SegmentReadState {
|
||||||
this.segmentInfo = other.segmentInfo;
|
this.segmentInfo = other.segmentInfo;
|
||||||
this.fieldInfos = other.fieldInfos;
|
this.fieldInfos = other.fieldInfos;
|
||||||
this.context = other.context;
|
this.context = other.context;
|
||||||
this.termsIndexDivisor = other.termsIndexDivisor;
|
|
||||||
this.segmentSuffix = newSegmentSuffix;
|
this.segmentSuffix = newSegmentSuffix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,9 +51,9 @@ public final class SegmentReader extends AtomicReader {
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
// TODO: why is this public?
|
// TODO: why is this public?
|
||||||
public SegmentReader(SegmentInfoPerCommit si, int termInfosIndexDivisor, IOContext context) throws IOException {
|
public SegmentReader(SegmentInfoPerCommit si, IOContext context) throws IOException {
|
||||||
this.si = si;
|
this.si = si;
|
||||||
core = new SegmentCoreReaders(this, si.info.dir, si, context, termInfosIndexDivisor);
|
core = new SegmentCoreReaders(this, si.info.dir, si, context);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
if (si.hasDeletions()) {
|
if (si.hasDeletions()) {
|
||||||
|
@ -217,12 +217,6 @@ public final class SegmentReader extends AtomicReader {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns term infos index divisor originally passed to
|
|
||||||
* {@link #SegmentReader(SegmentInfoPerCommit, int, IOContext)}. */
|
|
||||||
public int getTermInfosIndexDivisor() {
|
|
||||||
return core.termsIndexDivisor;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
|
|
|
@ -66,13 +66,6 @@ public class SegmentWriteState {
|
||||||
* write/read must be derived using this suffix (use
|
* write/read must be derived using this suffix (use
|
||||||
* {@link IndexFileNames#segmentFileName(String,String,String)}). */
|
* {@link IndexFileNames#segmentFileName(String,String,String)}). */
|
||||||
public final String segmentSuffix;
|
public final String segmentSuffix;
|
||||||
|
|
||||||
/** Expert: The fraction of terms in the "dictionary" which should be stored
|
|
||||||
* in RAM. Smaller values use more memory, but make searching slightly
|
|
||||||
* faster, while larger values use less memory and make searching slightly
|
|
||||||
* slower. Searching is typically not dominated by dictionary lookup, so
|
|
||||||
* tweaking this is rarely useful.*/
|
|
||||||
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
|
|
||||||
|
|
||||||
/** {@link IOContext} for all writes; you should pass this
|
/** {@link IOContext} for all writes; you should pass this
|
||||||
* to {@link Directory#createOutput(String,IOContext)}. */
|
* to {@link Directory#createOutput(String,IOContext)}. */
|
||||||
|
@ -80,13 +73,12 @@ public class SegmentWriteState {
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos,
|
public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos,
|
||||||
int termIndexInterval, BufferedDeletes segDeletes, IOContext context) {
|
BufferedDeletes segDeletes, IOContext context) {
|
||||||
this.infoStream = infoStream;
|
this.infoStream = infoStream;
|
||||||
this.segDeletes = segDeletes;
|
this.segDeletes = segDeletes;
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
this.segmentInfo = segmentInfo;
|
this.segmentInfo = segmentInfo;
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
this.termIndexInterval = termIndexInterval;
|
|
||||||
segmentSuffix = "";
|
segmentSuffix = "";
|
||||||
this.context = context;
|
this.context = context;
|
||||||
}
|
}
|
||||||
|
@ -99,7 +91,6 @@ public class SegmentWriteState {
|
||||||
directory = state.directory;
|
directory = state.directory;
|
||||||
segmentInfo = state.segmentInfo;
|
segmentInfo = state.segmentInfo;
|
||||||
fieldInfos = state.fieldInfos;
|
fieldInfos = state.fieldInfos;
|
||||||
termIndexInterval = state.termIndexInterval;
|
|
||||||
context = state.context;
|
context = state.context;
|
||||||
this.segmentSuffix = segmentSuffix;
|
this.segmentSuffix = segmentSuffix;
|
||||||
segDeletes = state.segDeletes;
|
segDeletes = state.segDeletes;
|
||||||
|
|
|
@ -33,22 +33,19 @@ final class StandardDirectoryReader extends DirectoryReader {
|
||||||
|
|
||||||
private final IndexWriter writer;
|
private final IndexWriter writer;
|
||||||
private final SegmentInfos segmentInfos;
|
private final SegmentInfos segmentInfos;
|
||||||
private final int termInfosIndexDivisor;
|
|
||||||
private final boolean applyAllDeletes;
|
private final boolean applyAllDeletes;
|
||||||
|
|
||||||
/** called only from static open() methods */
|
/** called only from static open() methods */
|
||||||
StandardDirectoryReader(Directory directory, AtomicReader[] readers, IndexWriter writer,
|
StandardDirectoryReader(Directory directory, AtomicReader[] readers, IndexWriter writer,
|
||||||
SegmentInfos sis, int termInfosIndexDivisor, boolean applyAllDeletes) {
|
SegmentInfos sis, boolean applyAllDeletes) {
|
||||||
super(directory, readers);
|
super(directory, readers);
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
this.segmentInfos = sis;
|
this.segmentInfos = sis;
|
||||||
this.termInfosIndexDivisor = termInfosIndexDivisor;
|
|
||||||
this.applyAllDeletes = applyAllDeletes;
|
this.applyAllDeletes = applyAllDeletes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** called from DirectoryReader.open(...) methods */
|
/** called from DirectoryReader.open(...) methods */
|
||||||
static DirectoryReader open(final Directory directory, final IndexCommit commit,
|
static DirectoryReader open(final Directory directory, final IndexCommit commit) throws IOException {
|
||||||
final int termInfosIndexDivisor) throws IOException {
|
|
||||||
return (DirectoryReader) new SegmentInfos.FindSegmentsFile(directory) {
|
return (DirectoryReader) new SegmentInfos.FindSegmentsFile(directory) {
|
||||||
@Override
|
@Override
|
||||||
protected Object doBody(String segmentFileName) throws IOException {
|
protected Object doBody(String segmentFileName) throws IOException {
|
||||||
|
@ -59,7 +56,7 @@ final class StandardDirectoryReader extends DirectoryReader {
|
||||||
IOException prior = null;
|
IOException prior = null;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
readers[i] = new SegmentReader(sis.info(i), termInfosIndexDivisor, IOContext.READ);
|
readers[i] = new SegmentReader(sis.info(i), IOContext.READ);
|
||||||
success = true;
|
success = true;
|
||||||
} catch(IOException ex) {
|
} catch(IOException ex) {
|
||||||
prior = ex;
|
prior = ex;
|
||||||
|
@ -68,7 +65,7 @@ final class StandardDirectoryReader extends DirectoryReader {
|
||||||
IOUtils.closeWhileHandlingException(prior, readers);
|
IOUtils.closeWhileHandlingException(prior, readers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false);
|
return new StandardDirectoryReader(directory, readers, null, sis, false);
|
||||||
}
|
}
|
||||||
}.run(commit);
|
}.run(commit);
|
||||||
}
|
}
|
||||||
|
@ -119,12 +116,11 @@ final class StandardDirectoryReader extends DirectoryReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new StandardDirectoryReader(dir, readers.toArray(new SegmentReader[readers.size()]),
|
return new StandardDirectoryReader(dir, readers.toArray(new SegmentReader[readers.size()]),
|
||||||
writer, segmentInfos, writer.getConfig().getReaderTermsIndexDivisor(), applyAllDeletes);
|
writer, segmentInfos, applyAllDeletes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This constructor is only used for {@link #doOpenIfChanged(SegmentInfos)} */
|
/** This constructor is only used for {@link #doOpenIfChanged(SegmentInfos)} */
|
||||||
private static DirectoryReader open(Directory directory, SegmentInfos infos, List<? extends AtomicReader> oldReaders,
|
private static DirectoryReader open(Directory directory, SegmentInfos infos, List<? extends AtomicReader> oldReaders) throws IOException {
|
||||||
int termInfosIndexDivisor) throws IOException {
|
|
||||||
|
|
||||||
// we put the old SegmentReaders in a map, that allows us
|
// we put the old SegmentReaders in a map, that allows us
|
||||||
// to lookup a reader using its segment name
|
// to lookup a reader using its segment name
|
||||||
|
@ -162,7 +158,7 @@ final class StandardDirectoryReader extends DirectoryReader {
|
||||||
if (newReaders[i] == null || infos.info(i).info.getUseCompoundFile() != newReaders[i].getSegmentInfo().info.getUseCompoundFile()) {
|
if (newReaders[i] == null || infos.info(i).info.getUseCompoundFile() != newReaders[i].getSegmentInfo().info.getUseCompoundFile()) {
|
||||||
|
|
||||||
// this is a new reader; in case we hit an exception we can close it safely
|
// this is a new reader; in case we hit an exception we can close it safely
|
||||||
newReader = new SegmentReader(infos.info(i), termInfosIndexDivisor, IOContext.READ);
|
newReader = new SegmentReader(infos.info(i), IOContext.READ);
|
||||||
readerShared[i] = false;
|
readerShared[i] = false;
|
||||||
newReaders[i] = newReader;
|
newReaders[i] = newReader;
|
||||||
} else {
|
} else {
|
||||||
|
@ -212,7 +208,7 @@ final class StandardDirectoryReader extends DirectoryReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false);
|
return new StandardDirectoryReader(directory, newReaders, null, infos, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -313,7 +309,7 @@ final class StandardDirectoryReader extends DirectoryReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectoryReader doOpenIfChanged(SegmentInfos infos) throws IOException {
|
DirectoryReader doOpenIfChanged(SegmentInfos infos) throws IOException {
|
||||||
return StandardDirectoryReader.open(directory, infos, getSequentialSubReaders(), termInfosIndexDivisor);
|
return StandardDirectoryReader.open(directory, infos, getSequentialSubReaders());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||||
* Tests Lucene40PostingsFormat
|
* Tests Lucene40PostingsFormat
|
||||||
*/
|
*/
|
||||||
public class TestLucene40PostingsFormat extends BasePostingsFormatTestCase {
|
public class TestLucene40PostingsFormat extends BasePostingsFormatTestCase {
|
||||||
private final Codec codec = new Lucene40Codec();
|
private final Codec codec = new Lucene40RWCodec();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Codec getCodec() {
|
protected Codec getCodec() {
|
||||||
|
|
|
@ -192,7 +192,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("\nTEST: assertQuery " + t);
|
System.out.println("\nTEST: assertQuery " + t);
|
||||||
}
|
}
|
||||||
IndexReader reader = DirectoryReader.open(dir, 1);
|
IndexReader reader = DirectoryReader.open(dir);
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
TopDocs search = searcher.search(new TermQuery(t), num + 10);
|
TopDocs search = searcher.search(new TermQuery(t), num + 10);
|
||||||
assertEquals(num, search.totalHits);
|
assertEquals(num, search.totalHits);
|
||||||
|
|
|
@ -296,7 +296,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
Codec codec = Codec.getDefault();
|
Codec codec = Codec.getDefault();
|
||||||
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null);
|
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null);
|
||||||
|
|
||||||
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));
|
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random())));
|
||||||
|
|
||||||
final Iterator<String> fieldsEnum = reader.iterator();
|
final Iterator<String> fieldsEnum = reader.iterator();
|
||||||
String fieldName = fieldsEnum.next();
|
String fieldName = fieldsEnum.next();
|
||||||
|
@ -357,7 +357,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: now read postings");
|
System.out.println("TEST: now read postings");
|
||||||
}
|
}
|
||||||
final FieldsProducer terms = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));
|
final FieldsProducer terms = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random())));
|
||||||
|
|
||||||
final Verify[] threads = new Verify[NUM_TEST_THREADS-1];
|
final Verify[] threads = new Verify[NUM_TEST_THREADS-1];
|
||||||
for(int i=0;i<NUM_TEST_THREADS-1;i++) {
|
for(int i=0;i<NUM_TEST_THREADS-1;i++) {
|
||||||
|
@ -655,10 +655,9 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
|
|
||||||
private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) throws Throwable {
|
private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) throws Throwable {
|
||||||
|
|
||||||
final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27);
|
|
||||||
final Codec codec = Codec.getDefault();
|
final Codec codec = Codec.getDefault();
|
||||||
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null);
|
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null);
|
||||||
final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random()));
|
final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, null, newIOContext(random()));
|
||||||
|
|
||||||
final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state);
|
final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state);
|
||||||
Arrays.sort(fields);
|
Arrays.sort(fields);
|
||||||
|
|
|
@ -822,54 +822,6 @@ public void testFilesOpenClose() throws IOException {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-1609: don't load terms index
|
|
||||||
public void testNoTermsIndex() throws Throwable {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())));
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(newTextField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO));
|
|
||||||
doc.add(newTextField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO));
|
|
||||||
writer.addDocument(doc);
|
|
||||||
writer.addDocument(doc);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
DirectoryReader r = DirectoryReader.open(dir, -1);
|
|
||||||
try {
|
|
||||||
r.docFreq(new Term("field", "f"));
|
|
||||||
fail("did not hit expected exception");
|
|
||||||
} catch (IllegalStateException ise) {
|
|
||||||
// expected
|
|
||||||
}
|
|
||||||
|
|
||||||
assertEquals(-1, ((SegmentReader) r.leaves().get(0).reader()).getTermInfosIndexDivisor());
|
|
||||||
writer = new IndexWriter(
|
|
||||||
dir,
|
|
||||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
|
|
||||||
setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())).
|
|
||||||
setMergePolicy(newLogMergePolicy(10))
|
|
||||||
);
|
|
||||||
writer.addDocument(doc);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
// LUCENE-1718: ensure re-open carries over no terms index:
|
|
||||||
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
|
|
||||||
assertNotNull(r2);
|
|
||||||
assertNull(DirectoryReader.openIfChanged(r2));
|
|
||||||
r.close();
|
|
||||||
List<AtomicReaderContext> leaves = r2.leaves();
|
|
||||||
assertEquals(2, leaves.size());
|
|
||||||
for(AtomicReaderContext ctx : leaves) {
|
|
||||||
try {
|
|
||||||
ctx.reader().docFreq(new Term("field", "f"));
|
|
||||||
fail("did not hit expected exception");
|
|
||||||
} catch (IllegalStateException ise) {
|
|
||||||
// expected
|
|
||||||
}
|
|
||||||
}
|
|
||||||
r2.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
// LUCENE-2046
|
// LUCENE-2046
|
||||||
public void testPrepareCommitIsCurrent() throws Throwable {
|
public void testPrepareCommitIsCurrent() throws Throwable {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
|
|
|
@ -212,15 +212,15 @@ public class TestDoc extends LuceneTestCase {
|
||||||
private SegmentInfoPerCommit merge(Directory dir, SegmentInfoPerCommit si1, SegmentInfoPerCommit si2, String merged, boolean useCompoundFile)
|
private SegmentInfoPerCommit merge(Directory dir, SegmentInfoPerCommit si1, SegmentInfoPerCommit si2, String merged, boolean useCompoundFile)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
IOContext context = newIOContext(random());
|
IOContext context = newIOContext(random());
|
||||||
SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);
|
SegmentReader r1 = new SegmentReader(si1, context);
|
||||||
SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);
|
SegmentReader r2 = new SegmentReader(si2, context);
|
||||||
|
|
||||||
final Codec codec = Codec.getDefault();
|
final Codec codec = Codec.getDefault();
|
||||||
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
|
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
|
||||||
final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null, null);
|
final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null, null);
|
||||||
|
|
||||||
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
|
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
|
||||||
si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
|
si, InfoStream.getDefault(), trackingDir,
|
||||||
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context);
|
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context);
|
||||||
|
|
||||||
MergeState mergeState = merger.merge();
|
MergeState mergeState = merger.merge();
|
||||||
|
@ -245,7 +245,7 @@ public class TestDoc extends LuceneTestCase {
|
||||||
|
|
||||||
private void printSegment(PrintWriter out, SegmentInfoPerCommit si)
|
private void printSegment(PrintWriter out, SegmentInfoPerCommit si)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
SegmentReader reader = new SegmentReader(si, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(si, newIOContext(random()));
|
||||||
|
|
||||||
for (int i = 0; i < reader.numDocs(); i++)
|
for (int i = 0; i < reader.numDocs(); i++)
|
||||||
out.println(reader.document(i));
|
out.println(reader.document(i));
|
||||||
|
|
|
@ -65,7 +65,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
SegmentInfoPerCommit info = writer.newestSegment();
|
SegmentInfoPerCommit info = writer.newestSegment();
|
||||||
writer.close();
|
writer.close();
|
||||||
//After adding the document, we should be able to read it back in
|
//After adding the document, we should be able to read it back in
|
||||||
SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, newIOContext(random()));
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
StoredDocument doc = reader.document(0);
|
StoredDocument doc = reader.document(0);
|
||||||
assertTrue(doc != null);
|
assertTrue(doc != null);
|
||||||
|
@ -126,7 +126,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
writer.commit();
|
writer.commit();
|
||||||
SegmentInfoPerCommit info = writer.newestSegment();
|
SegmentInfoPerCommit info = writer.newestSegment();
|
||||||
writer.close();
|
writer.close();
|
||||||
SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, newIOContext(random()));
|
||||||
|
|
||||||
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader),
|
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader),
|
||||||
"repeated", new BytesRef("repeated"));
|
"repeated", new BytesRef("repeated"));
|
||||||
|
@ -198,7 +198,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
writer.commit();
|
writer.commit();
|
||||||
SegmentInfoPerCommit info = writer.newestSegment();
|
SegmentInfoPerCommit info = writer.newestSegment();
|
||||||
writer.close();
|
writer.close();
|
||||||
SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, newIOContext(random()));
|
||||||
|
|
||||||
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a"));
|
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a"));
|
||||||
assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
@ -241,7 +241,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
writer.commit();
|
writer.commit();
|
||||||
SegmentInfoPerCommit info = writer.newestSegment();
|
SegmentInfoPerCommit info = writer.newestSegment();
|
||||||
writer.close();
|
writer.close();
|
||||||
SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, newIOContext(random()));
|
||||||
|
|
||||||
DocsAndPositionsEnum termPositions = reader.termPositionsEnum(new Term("preanalyzed", "term1"));
|
DocsAndPositionsEnum termPositions = reader.termPositionsEnum(new Term("preanalyzed", "term1"));
|
||||||
assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
|
|
@ -1305,36 +1305,6 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIndexDivisor() throws Exception {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
|
||||||
config.setTermIndexInterval(2);
|
|
||||||
IndexWriter w = new IndexWriter(dir, config);
|
|
||||||
StringBuilder s = new StringBuilder();
|
|
||||||
// must be > 256
|
|
||||||
for(int i=0;i<300;i++) {
|
|
||||||
s.append(' ').append(i);
|
|
||||||
}
|
|
||||||
Document d = new Document();
|
|
||||||
Field f = newTextField("field", s.toString(), Field.Store.NO);
|
|
||||||
d.add(f);
|
|
||||||
w.addDocument(d);
|
|
||||||
|
|
||||||
AtomicReader r = getOnlySegmentReader(w.getReader());
|
|
||||||
TermsEnum t = r.fields().terms("field").iterator(null);
|
|
||||||
int count = 0;
|
|
||||||
while(t.next() != null) {
|
|
||||||
final DocsEnum docs = _TestUtil.docs(random(), t, null, null, DocsEnum.FLAG_NONE);
|
|
||||||
assertEquals(0, docs.nextDoc());
|
|
||||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc());
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
assertEquals(300, count);
|
|
||||||
r.close();
|
|
||||||
w.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testDeleteUnusedFiles() throws Exception {
|
public void testDeleteUnusedFiles() throws Exception {
|
||||||
for(int iter=0;iter<2;iter++) {
|
for(int iter=0;iter<2;iter++) {
|
||||||
Directory dir = newMockDirectory(); // relies on windows semantics
|
Directory dir = newMockDirectory(); // relies on windows semantics
|
||||||
|
@ -1716,20 +1686,6 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-3183
|
|
||||||
public void testEmptyFieldNameTIIOne() throws IOException {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
|
||||||
iwc.setTermIndexInterval(1);
|
|
||||||
iwc.setReaderTermsIndexDivisor(1);
|
|
||||||
IndexWriter writer = new IndexWriter(dir, iwc);
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(newTextField("", "a b c", Field.Store.NO));
|
|
||||||
writer.addDocument(doc);
|
|
||||||
writer.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testDeleteAllNRTLeftoverFiles() throws Exception {
|
public void testDeleteAllNRTLeftoverFiles() throws Exception {
|
||||||
|
|
||||||
Directory d = new MockDirectoryWrapper(random(), new RAMDirectory());
|
Directory d = new MockDirectoryWrapper(random(), new RAMDirectory());
|
||||||
|
|
|
@ -66,7 +66,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
|
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
|
||||||
// we don't need to assert this, it should be unspecified
|
// we don't need to assert this, it should be unspecified
|
||||||
assertTrue(IndexSearcher.getDefaultSimilarity() == conf.getSimilarity());
|
assertTrue(IndexSearcher.getDefaultSimilarity() == conf.getSimilarity());
|
||||||
assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval());
|
|
||||||
assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout());
|
assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout());
|
||||||
assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout());
|
assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout());
|
||||||
assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.getMaxBufferedDeleteTerms());
|
assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.getMaxBufferedDeleteTerms());
|
||||||
|
@ -75,7 +74,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling());
|
assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling());
|
||||||
assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain());
|
assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain());
|
||||||
assertNull(conf.getMergedSegmentWarmer());
|
assertNull(conf.getMergedSegmentWarmer());
|
||||||
assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor());
|
|
||||||
assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
|
assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
|
||||||
assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
|
assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
|
||||||
assertEquals(FlushByRamOrCountsPolicy.class, conf.getFlushPolicy().getClass());
|
assertEquals(FlushByRamOrCountsPolicy.class, conf.getFlushPolicy().getClass());
|
||||||
|
@ -92,7 +90,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
getters.add("getMergeScheduler");
|
getters.add("getMergeScheduler");
|
||||||
getters.add("getOpenMode");
|
getters.add("getOpenMode");
|
||||||
getters.add("getSimilarity");
|
getters.add("getSimilarity");
|
||||||
getters.add("getTermIndexInterval");
|
|
||||||
getters.add("getWriteLockTimeout");
|
getters.add("getWriteLockTimeout");
|
||||||
getters.add("getDefaultWriteLockTimeout");
|
getters.add("getDefaultWriteLockTimeout");
|
||||||
getters.add("getMaxBufferedDeleteTerms");
|
getters.add("getMaxBufferedDeleteTerms");
|
||||||
|
@ -104,7 +101,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
getters.add("getMaxThreadStates");
|
getters.add("getMaxThreadStates");
|
||||||
getters.add("getReaderPooling");
|
getters.add("getReaderPooling");
|
||||||
getters.add("getIndexerThreadPool");
|
getters.add("getIndexerThreadPool");
|
||||||
getters.add("getReaderTermsIndexDivisor");
|
|
||||||
getters.add("getFlushPolicy");
|
getters.add("getFlushPolicy");
|
||||||
getters.add("getRAMPerThreadHardLimitMB");
|
getters.add("getRAMPerThreadHardLimitMB");
|
||||||
getters.add("getCodec");
|
getters.add("getCodec");
|
||||||
|
@ -200,14 +196,12 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
public void testConstants() throws Exception {
|
public void testConstants() throws Exception {
|
||||||
// Tests that the values of the constants does not change
|
// Tests that the values of the constants does not change
|
||||||
assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT);
|
assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT);
|
||||||
assertEquals(32, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL);
|
|
||||||
assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
||||||
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS);
|
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS);
|
||||||
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
|
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
|
||||||
assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0);
|
assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0);
|
||||||
assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING);
|
assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING);
|
||||||
assertEquals(true, IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM);
|
assertEquals(true, IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM);
|
||||||
assertEquals(DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -341,23 +335,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
// this is expected
|
// this is expected
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test setReaderTermsIndexDivisor
|
|
||||||
try {
|
|
||||||
conf.setReaderTermsIndexDivisor(0);
|
|
||||||
fail("should not have succeeded to set termsIndexDivisor to 0");
|
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
// this is expected
|
|
||||||
}
|
|
||||||
|
|
||||||
// Setting to -1 is ok
|
|
||||||
conf.setReaderTermsIndexDivisor(-1);
|
|
||||||
try {
|
|
||||||
conf.setReaderTermsIndexDivisor(-2);
|
|
||||||
fail("should not have succeeded to set termsIndexDivisor to < -1");
|
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
// this is expected
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
conf.setRAMPerThreadHardLimitMB(2048);
|
conf.setRAMPerThreadHardLimitMB(2048);
|
||||||
|
|
|
@ -130,7 +130,6 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
|
||||||
for(int j=0;j<500;j++) {
|
for(int j=0;j<500;j++) {
|
||||||
TestIndexWriter.addDocWithIndex(writer, j);
|
TestIndexWriter.addDocWithIndex(writer, j);
|
||||||
}
|
}
|
||||||
final int termIndexInterval = writer.getConfig().getTermIndexInterval();
|
|
||||||
// force one extra segment w/ different doc store so
|
// force one extra segment w/ different doc store so
|
||||||
// we see the doc stores get merged
|
// we see the doc stores get merged
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
@ -152,10 +151,7 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
|
||||||
dir.resetMaxUsedSizeInBytes();
|
dir.resetMaxUsedSizeInBytes();
|
||||||
dir.setTrackDiskUsage(true);
|
dir.setTrackDiskUsage(true);
|
||||||
|
|
||||||
// Import to use same term index interval else a
|
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
|
||||||
// smaller one here could increase the disk usage and
|
|
||||||
// cause a false failure:
|
|
||||||
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval).setMergePolicy(newLogMergePolicy()));
|
|
||||||
writer.forceMerge(1);
|
writer.forceMerge(1);
|
||||||
writer.close();
|
writer.close();
|
||||||
long maxDiskUsage = dir.getMaxUsedSizeInBytes();
|
long maxDiskUsage = dir.getMaxUsedSizeInBytes();
|
||||||
|
|
|
@ -314,7 +314,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
boolean doFullMerge = true;
|
boolean doFullMerge = true;
|
||||||
|
|
||||||
Directory dir1 = newDirectory();
|
Directory dir1 = newDirectory();
|
||||||
IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setReaderTermsIndexDivisor(2));
|
IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||||
// create the index
|
// create the index
|
||||||
createIndexNoClose(!doFullMerge, "index1", writer);
|
createIndexNoClose(!doFullMerge, "index1", writer);
|
||||||
writer.flush(false, true);
|
writer.flush(false, true);
|
||||||
|
@ -1006,35 +1006,6 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
assertTrue(didWarm.get());
|
assertTrue(didWarm.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNoTermsIndex() throws Exception {
|
|
||||||
// Some Codecs don't honor the ReaderTermsIndexDivisor, so skip the test if
|
|
||||||
// they're picked.
|
|
||||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
|
||||||
new MockAnalyzer(random())).setReaderTermsIndexDivisor(-1);
|
|
||||||
|
|
||||||
// Don't proceed if picked Codec is in the list of illegal ones.
|
|
||||||
final String format = _TestUtil.getPostingsFormat("f");
|
|
||||||
assumeFalse("Format: " + format + " does not support ReaderTermsIndexDivisor!",
|
|
||||||
(format.equals("SimpleText") || format.equals("Memory") || format.equals("Direct")));
|
|
||||||
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
IndexWriter w = new IndexWriter(dir, conf);
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(new TextField("f", "val", Field.Store.NO));
|
|
||||||
w.addDocument(doc);
|
|
||||||
SegmentReader r = getOnlySegmentReader(DirectoryReader.open(w, true));
|
|
||||||
try {
|
|
||||||
_TestUtil.docs(random(), r, "f", new BytesRef("val"), null, null, DocsEnum.FLAG_NONE);
|
|
||||||
fail("should have failed to seek since terms index was not loaded.");
|
|
||||||
} catch (IllegalStateException e) {
|
|
||||||
// expected - we didn't load the term index
|
|
||||||
} finally {
|
|
||||||
r.close();
|
|
||||||
w.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testReopenAfterNoRealChange() throws Exception {
|
public void testReopenAfterNoRealChange() throws Exception {
|
||||||
Directory d = newDirectory();
|
Directory d = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(
|
IndexWriter w = new IndexWriter(
|
||||||
|
|
|
@ -54,8 +54,8 @@ public class TestSegmentMerger extends LuceneTestCase {
|
||||||
SegmentInfoPerCommit info1 = DocHelper.writeDoc(random(), merge1Dir, doc1);
|
SegmentInfoPerCommit info1 = DocHelper.writeDoc(random(), merge1Dir, doc1);
|
||||||
DocHelper.setupDoc(doc2);
|
DocHelper.setupDoc(doc2);
|
||||||
SegmentInfoPerCommit info2 = DocHelper.writeDoc(random(), merge2Dir, doc2);
|
SegmentInfoPerCommit info2 = DocHelper.writeDoc(random(), merge2Dir, doc2);
|
||||||
reader1 = new SegmentReader(info1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
reader1 = new SegmentReader(info1, newIOContext(random()));
|
||||||
reader2 = new SegmentReader(info2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
reader2 = new SegmentReader(info2, newIOContext(random()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -81,7 +81,7 @@ public class TestSegmentMerger extends LuceneTestCase {
|
||||||
final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null, null);
|
final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null, null);
|
||||||
|
|
||||||
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(reader1, reader2),
|
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(reader1, reader2),
|
||||||
si, InfoStream.getDefault(), mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
|
si, InfoStream.getDefault(), mergedDir,
|
||||||
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()));
|
MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()));
|
||||||
MergeState mergeState = merger.merge();
|
MergeState mergeState = merger.merge();
|
||||||
int docsMerged = mergeState.segmentInfo.getDocCount();
|
int docsMerged = mergeState.segmentInfo.getDocCount();
|
||||||
|
@ -91,7 +91,7 @@ public class TestSegmentMerger extends LuceneTestCase {
|
||||||
new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, docsMerged,
|
new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, docsMerged,
|
||||||
false, codec, null, null),
|
false, codec, null, null),
|
||||||
0, -1L),
|
0, -1L),
|
||||||
DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
newIOContext(random()));
|
||||||
assertTrue(mergedReader != null);
|
assertTrue(mergedReader != null);
|
||||||
assertTrue(mergedReader.numDocs() == 2);
|
assertTrue(mergedReader.numDocs() == 2);
|
||||||
StoredDocument newDoc1 = mergedReader.document(0);
|
StoredDocument newDoc1 = mergedReader.document(0);
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class TestSegmentReader extends LuceneTestCase {
|
||||||
dir = newDirectory();
|
dir = newDirectory();
|
||||||
DocHelper.setupDoc(testDoc);
|
DocHelper.setupDoc(testDoc);
|
||||||
SegmentInfoPerCommit info = DocHelper.writeDoc(random(), dir, testDoc);
|
SegmentInfoPerCommit info = DocHelper.writeDoc(random(), dir, testDoc);
|
||||||
reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.READ);
|
reader = new SegmentReader(info, IOContext.READ);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -50,16 +50,11 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
||||||
public void test() {
|
public void test() {
|
||||||
assertTrue(dir != null);
|
assertTrue(dir != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTermDocs() throws IOException {
|
|
||||||
testTermDocs(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testTermDocs(int indexDivisor) throws IOException {
|
public void testTermDocs() throws IOException {
|
||||||
//After adding the document, we should be able to read it back in
|
//After adding the document, we should be able to read it back in
|
||||||
SegmentReader reader = new SegmentReader(info, indexDivisor, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, newIOContext(random()));
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());
|
|
||||||
|
|
||||||
TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator(null);
|
TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator(null);
|
||||||
terms.seekCeil(new BytesRef("field"));
|
terms.seekCeil(new BytesRef("field"));
|
||||||
|
@ -74,13 +69,9 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testBadSeek() throws IOException {
|
public void testBadSeek() throws IOException {
|
||||||
testBadSeek(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testBadSeek(int indexDivisor) throws IOException {
|
|
||||||
{
|
{
|
||||||
//After adding the document, we should be able to read it back in
|
//After adding the document, we should be able to read it back in
|
||||||
SegmentReader reader = new SegmentReader(info, indexDivisor, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, newIOContext(random()));
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
DocsEnum termDocs = _TestUtil.docs(random(), reader,
|
DocsEnum termDocs = _TestUtil.docs(random(), reader,
|
||||||
"textField2",
|
"textField2",
|
||||||
|
@ -94,7 +85,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
//After adding the document, we should be able to read it back in
|
//After adding the document, we should be able to read it back in
|
||||||
SegmentReader reader = new SegmentReader(info, indexDivisor, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, newIOContext(random()));
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
DocsEnum termDocs = _TestUtil.docs(random(), reader,
|
DocsEnum termDocs = _TestUtil.docs(random(), reader,
|
||||||
"junk",
|
"junk",
|
||||||
|
@ -108,10 +99,6 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSkipTo() throws IOException {
|
public void testSkipTo() throws IOException {
|
||||||
testSkipTo(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testSkipTo(int indexDivisor) throws IOException {
|
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||||
|
|
||||||
|
@ -131,7 +118,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
||||||
writer.forceMerge(1);
|
writer.forceMerge(1);
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
IndexReader reader = DirectoryReader.open(dir, indexDivisor);
|
IndexReader reader = DirectoryReader.open(dir);
|
||||||
|
|
||||||
DocsEnum tdocs = _TestUtil.docs(random(), reader,
|
DocsEnum tdocs = _TestUtil.docs(random(), reader,
|
||||||
ta.field(),
|
ta.field(),
|
||||||
|
@ -267,15 +254,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIndexDivisor() throws IOException {
|
|
||||||
testDoc = new Document();
|
|
||||||
DocHelper.setupDoc(testDoc);
|
|
||||||
DocHelper.writeDoc(random(), dir, testDoc);
|
|
||||||
testTermDocs(2);
|
|
||||||
testBadSeek(2);
|
|
||||||
testSkipTo(2);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addDoc(IndexWriter writer, String value) throws IOException
|
private void addDoc(IndexWriter writer, String value) throws IOException
|
||||||
{
|
{
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class TaxonomyMergeUtils {
|
||||||
// merge the taxonomies
|
// merge the taxonomies
|
||||||
destTaxWriter.addTaxonomy(srcTaxDir, map);
|
destTaxWriter.addTaxonomy(srcTaxDir, map);
|
||||||
int ordinalMap[] = map.getMap();
|
int ordinalMap[] = map.getMap();
|
||||||
DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1);
|
DirectoryReader reader = DirectoryReader.open(srcIndexDir);
|
||||||
List<AtomicReaderContext> leaves = reader.leaves();
|
List<AtomicReaderContext> leaves = reader.leaves();
|
||||||
int numReaders = leaves.size();
|
int numReaders = leaves.size();
|
||||||
AtomicReader wrappedLeaves[] = new AtomicReader[numReaders];
|
AtomicReader wrappedLeaves[] = new AtomicReader[numReaders];
|
||||||
|
|
|
@ -45,9 +45,15 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* {@link FixedGapTermsIndexWriter}.
|
* {@link FixedGapTermsIndexWriter}.
|
||||||
*/
|
*/
|
||||||
public final class Lucene41WithOrds extends PostingsFormat {
|
public final class Lucene41WithOrds extends PostingsFormat {
|
||||||
|
final int termIndexInterval;
|
||||||
|
|
||||||
public Lucene41WithOrds() {
|
public Lucene41WithOrds() {
|
||||||
|
this(FixedGapTermsIndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Lucene41WithOrds(int termIndexInterval) {
|
||||||
super("Lucene41WithOrds");
|
super("Lucene41WithOrds");
|
||||||
|
this.termIndexInterval = termIndexInterval;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -61,7 +67,7 @@ public final class Lucene41WithOrds extends PostingsFormat {
|
||||||
TermsIndexWriterBase indexWriter;
|
TermsIndexWriterBase indexWriter;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
indexWriter = new FixedGapTermsIndexWriter(state);
|
indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -97,7 +103,6 @@ public final class Lucene41WithOrds extends PostingsFormat {
|
||||||
indexReader = new FixedGapTermsIndexReader(state.directory,
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
state.fieldInfos,
|
state.fieldInfos,
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.termsIndexDivisor,
|
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
||||||
state.segmentSuffix, state.context);
|
state.segmentSuffix, state.context);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -0,0 +1,144 @@
|
||||||
|
package org.apache.lucene.codecs.lucene41vargap;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
|
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
|
||||||
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
|
||||||
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||||
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
|
// TODO: we could make separate base class that can wrapp
|
||||||
|
// any PostingsBaseFormat and make it ord-able...
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Customized version of {@link Lucene41PostingsFormat} that uses
|
||||||
|
* {@link VariableGapTermsIndexWriter} with a fixed interval, but
|
||||||
|
* forcing high docfreq terms to be indexed terms.
|
||||||
|
*/
|
||||||
|
public final class Lucene41VarGapDocFreqInterval extends PostingsFormat {
|
||||||
|
final int termIndexInterval;
|
||||||
|
final int docFreqThreshold;
|
||||||
|
|
||||||
|
public Lucene41VarGapDocFreqInterval() {
|
||||||
|
this(1000000, FixedGapTermsIndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Lucene41VarGapDocFreqInterval(int docFreqThreshold, int termIndexInterval) {
|
||||||
|
super("Lucene41VarGapFixedInterval");
|
||||||
|
this.termIndexInterval = termIndexInterval;
|
||||||
|
this.docFreqThreshold = docFreqThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
PostingsWriterBase docs = new Lucene41PostingsWriter(state);
|
||||||
|
|
||||||
|
// TODO: should we make the terms index more easily
|
||||||
|
// pluggable? Ie so that this codec would record which
|
||||||
|
// index impl was used, and switch on loading?
|
||||||
|
// Or... you must make a new Codec for this?
|
||||||
|
TermsIndexWriterBase indexWriter;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThreshold, termIndexInterval));
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
docs.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
// Must use BlockTermsWriter (not BlockTree) because
|
||||||
|
// BlockTree doens't support ords (yet)...
|
||||||
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
docs.close();
|
||||||
|
} finally {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
|
PostingsReaderBase postings = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
|
||||||
|
TermsIndexReaderBase indexReader;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
indexReader = new VariableGapTermsIndexReader(state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo.name,
|
||||||
|
state.segmentSuffix, state.context);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postings.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsProducer ret = new BlockTermsReader(indexReader,
|
||||||
|
state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo,
|
||||||
|
postings,
|
||||||
|
state.context,
|
||||||
|
state.segmentSuffix);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postings.close();
|
||||||
|
} finally {
|
||||||
|
indexReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Extension of freq postings file */
|
||||||
|
static final String FREQ_EXTENSION = "frq";
|
||||||
|
|
||||||
|
/** Extension of prox postings file */
|
||||||
|
static final String PROX_EXTENSION = "prx";
|
||||||
|
}
|
|
@ -0,0 +1,141 @@
|
||||||
|
package org.apache.lucene.codecs.lucene41vargap;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
|
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
|
||||||
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
|
||||||
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||||
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
|
// TODO: we could make separate base class that can wrapp
|
||||||
|
// any PostingsBaseFormat and make it ord-able...
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Customized version of {@link Lucene41PostingsFormat} that uses
|
||||||
|
* {@link VariableGapTermsIndexWriter} with a fixed interval.
|
||||||
|
*/
|
||||||
|
public final class Lucene41VarGapFixedInterval extends PostingsFormat {
|
||||||
|
final int termIndexInterval;
|
||||||
|
|
||||||
|
public Lucene41VarGapFixedInterval() {
|
||||||
|
this(FixedGapTermsIndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Lucene41VarGapFixedInterval(int termIndexInterval) {
|
||||||
|
super("Lucene41VarGapFixedInterval");
|
||||||
|
this.termIndexInterval = termIndexInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
PostingsWriterBase docs = new Lucene41PostingsWriter(state);
|
||||||
|
|
||||||
|
// TODO: should we make the terms index more easily
|
||||||
|
// pluggable? Ie so that this codec would record which
|
||||||
|
// index impl was used, and switch on loading?
|
||||||
|
// Or... you must make a new Codec for this?
|
||||||
|
TermsIndexWriterBase indexWriter;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(termIndexInterval));
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
docs.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
// Must use BlockTermsWriter (not BlockTree) because
|
||||||
|
// BlockTree doens't support ords (yet)...
|
||||||
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
docs.close();
|
||||||
|
} finally {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
|
PostingsReaderBase postings = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
|
||||||
|
TermsIndexReaderBase indexReader;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
indexReader = new VariableGapTermsIndexReader(state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo.name,
|
||||||
|
state.segmentSuffix, state.context);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postings.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsProducer ret = new BlockTermsReader(indexReader,
|
||||||
|
state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo,
|
||||||
|
postings,
|
||||||
|
state.context,
|
||||||
|
state.segmentSuffix);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postings.close();
|
||||||
|
} finally {
|
||||||
|
indexReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Extension of freq postings file */
|
||||||
|
static final String FREQ_EXTENSION = "frq";
|
||||||
|
|
||||||
|
/** Extension of prox postings file */
|
||||||
|
static final String PROX_EXTENSION = "prx";
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Codecs for testing that support {@link org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader}
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -169,7 +169,6 @@ public final class MockFixedIntBlockPostingsFormat extends PostingsFormat {
|
||||||
indexReader = new FixedGapTermsIndexReader(state.directory,
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
state.fieldInfos,
|
state.fieldInfos,
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.termsIndexDivisor,
|
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix,
|
BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix,
|
||||||
IOContext.DEFAULT);
|
IOContext.DEFAULT);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -194,7 +194,6 @@ public final class MockVariableIntBlockPostingsFormat extends PostingsFormat {
|
||||||
indexReader = new FixedGapTermsIndexReader(state.directory,
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
state.fieldInfos,
|
state.fieldInfos,
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.termsIndexDivisor,
|
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
||||||
state.segmentSuffix, state.context);
|
state.segmentSuffix, state.context);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -220,11 +220,11 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
||||||
final TermsIndexWriterBase indexWriter;
|
final TermsIndexWriterBase indexWriter;
|
||||||
try {
|
try {
|
||||||
if (random.nextBoolean()) {
|
if (random.nextBoolean()) {
|
||||||
state.termIndexInterval = _TestUtil.nextInt(random, 1, 100);
|
int termIndexInterval = _TestUtil.nextInt(random, 1, 100);
|
||||||
if (LuceneTestCase.VERBOSE) {
|
if (LuceneTestCase.VERBOSE) {
|
||||||
System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")");
|
System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + termIndexInterval + ")");
|
||||||
}
|
}
|
||||||
indexWriter = new FixedGapTermsIndexWriter(state);
|
indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval);
|
||||||
} else {
|
} else {
|
||||||
final VariableGapTermsIndexWriter.IndexTermSelector selector;
|
final VariableGapTermsIndexWriter.IndexTermSelector selector;
|
||||||
final int n2 = random.nextInt(3);
|
final int n2 = random.nextInt(3);
|
||||||
|
@ -340,8 +340,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
||||||
state.segmentInfo,
|
state.segmentInfo,
|
||||||
postingsReader,
|
postingsReader,
|
||||||
state.context,
|
state.context,
|
||||||
state.segmentSuffix,
|
state.segmentSuffix);
|
||||||
state.termsIndexDivisor);
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -359,20 +358,14 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
||||||
final boolean doFixedGap = random.nextBoolean();
|
final boolean doFixedGap = random.nextBoolean();
|
||||||
|
|
||||||
// randomness diverges from writer, here:
|
// randomness diverges from writer, here:
|
||||||
if (state.termsIndexDivisor != -1) {
|
|
||||||
state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (doFixedGap) {
|
if (doFixedGap) {
|
||||||
// if termsIndexDivisor is set to -1, we should not touch it. It means a
|
|
||||||
// test explicitly instructed not to load the terms index.
|
|
||||||
if (LuceneTestCase.VERBOSE) {
|
if (LuceneTestCase.VERBOSE) {
|
||||||
System.out.println("MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")");
|
System.out.println("MockRandomCodec: fixed-gap terms index");
|
||||||
}
|
}
|
||||||
indexReader = new FixedGapTermsIndexReader(state.directory,
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
state.fieldInfos,
|
state.fieldInfos,
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.termsIndexDivisor,
|
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
||||||
state.segmentSuffix, state.context);
|
state.segmentSuffix, state.context);
|
||||||
} else {
|
} else {
|
||||||
|
@ -383,12 +376,11 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
||||||
random.nextLong();
|
random.nextLong();
|
||||||
}
|
}
|
||||||
if (LuceneTestCase.VERBOSE) {
|
if (LuceneTestCase.VERBOSE) {
|
||||||
System.out.println("MockRandomCodec: variable-gap terms index (divisor=" + state.termsIndexDivisor + ")");
|
System.out.println("MockRandomCodec: variable-gap terms index");
|
||||||
}
|
}
|
||||||
indexReader = new VariableGapTermsIndexReader(state.directory,
|
indexReader = new VariableGapTermsIndexReader(state.directory,
|
||||||
state.fieldInfos,
|
state.fieldInfos,
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.termsIndexDivisor,
|
|
||||||
state.segmentSuffix, state.context);
|
state.segmentSuffix, state.context);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,7 +92,6 @@ public final class MockSepPostingsFormat extends PostingsFormat {
|
||||||
indexReader = new FixedGapTermsIndexReader(state.directory,
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
state.fieldInfos,
|
state.fieldInfos,
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
state.termsIndexDivisor,
|
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
||||||
state.segmentSuffix, state.context);
|
state.segmentSuffix, state.context);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -84,8 +84,7 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat {
|
||||||
state.directory, state.fieldInfos, state.segmentInfo,
|
state.directory, state.fieldInfos, state.segmentInfo,
|
||||||
pulsingReader,
|
pulsingReader,
|
||||||
state.context,
|
state.context,
|
||||||
state.segmentSuffix,
|
state.segmentSuffix);
|
||||||
state.termsIndexDivisor);
|
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -1022,7 +1022,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
|
||||||
|
|
||||||
writer.close(true);
|
writer.close(true);
|
||||||
|
|
||||||
DirectoryReader reader = DirectoryReader.open(dir, 1);
|
DirectoryReader reader = DirectoryReader.open(dir);
|
||||||
assertEquals(1, reader.leaves().size());
|
assertEquals(1, reader.leaves().size());
|
||||||
|
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
|
|
@ -38,6 +38,8 @@ import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsConsumer;
|
import org.apache.lucene.codecs.PostingsConsumer;
|
||||||
import org.apache.lucene.codecs.TermStats;
|
import org.apache.lucene.codecs.TermStats;
|
||||||
import org.apache.lucene.codecs.TermsConsumer;
|
import org.apache.lucene.codecs.TermsConsumer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -483,7 +485,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
|
||||||
|
|
||||||
SegmentWriteState writeState = new SegmentWriteState(null, dir,
|
SegmentWriteState writeState = new SegmentWriteState(null, dir,
|
||||||
segmentInfo, newFieldInfos,
|
segmentInfo, newFieldInfos,
|
||||||
32, null, new IOContext(new FlushInfo(maxDoc, bytes)));
|
null, new IOContext(new FlushInfo(maxDoc, bytes)));
|
||||||
FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState);
|
FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState);
|
||||||
|
|
||||||
for(Map.Entry<String,Map<BytesRef,Long>> fieldEnt : fields.entrySet()) {
|
for(Map.Entry<String,Map<BytesRef,Long>> fieldEnt : fields.entrySet()) {
|
||||||
|
@ -567,7 +569,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
|
||||||
|
|
||||||
currentFieldInfos = newFieldInfos;
|
currentFieldInfos = newFieldInfos;
|
||||||
|
|
||||||
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.DEFAULT, 1);
|
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.DEFAULT);
|
||||||
|
|
||||||
return codec.postingsFormat().fieldsProducer(readState);
|
return codec.postingsFormat().fieldsProducer(readState);
|
||||||
}
|
}
|
||||||
|
@ -1130,4 +1132,85 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
|
||||||
_TestUtil.rmDir(path);
|
_TestUtil.rmDir(path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testEmptyField() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||||
|
iwc.setCodec(getCodec());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newStringField("", "something", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
AtomicReader ar = getOnlySegmentReader(ir);
|
||||||
|
Fields fields = ar.fields();
|
||||||
|
assertTrue(fields.size() == 1);
|
||||||
|
Terms terms = ar.terms("");
|
||||||
|
assertNotNull(terms);
|
||||||
|
TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
assertNotNull(termsEnum.next());
|
||||||
|
assertEquals(termsEnum.term(), new BytesRef("something"));
|
||||||
|
assertNull(termsEnum.next());
|
||||||
|
ir.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmptyFieldAndEmptyTerm() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||||
|
iwc.setCodec(getCodec());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newStringField("", "", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
AtomicReader ar = getOnlySegmentReader(ir);
|
||||||
|
Fields fields = ar.fields();
|
||||||
|
assertTrue(fields.size() == 1);
|
||||||
|
Terms terms = ar.terms("");
|
||||||
|
assertNotNull(terms);
|
||||||
|
TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
assertNotNull(termsEnum.next());
|
||||||
|
assertEquals(termsEnum.term(), new BytesRef(""));
|
||||||
|
assertNull(termsEnum.next());
|
||||||
|
ir.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// tests that ghost fields still work
|
||||||
|
// TODO: can this be improved?
|
||||||
|
public void testGhosts() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||||
|
iwc.setCodec(getCodec());
|
||||||
|
iwc.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
Document doc = new Document();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
doc.add(newStringField("ghostField", "something", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.deleteDocuments(new Term("ghostField", "something")); // delete the only term for the field
|
||||||
|
iw.forceMerge(1);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
AtomicReader ar = getOnlySegmentReader(ir);
|
||||||
|
Fields fields = ar.fields();
|
||||||
|
// Ghost busting terms dict impls will have
|
||||||
|
// fields.size() == 0; all others must be == 1:
|
||||||
|
assertTrue(fields.size() <= 1);
|
||||||
|
Terms terms = fields.terms("ghostField");
|
||||||
|
if (terms != null) {
|
||||||
|
TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
BytesRef term = termsEnum.next();
|
||||||
|
if (term != null) {
|
||||||
|
DocsEnum docsEnum = termsEnum.docs(null, null);
|
||||||
|
assertTrue(docsEnum.nextDoc() == DocsEnum.NO_MORE_DOCS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ir.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat;
|
||||||
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
|
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
|
||||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
|
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
|
||||||
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
|
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
|
||||||
|
import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval;
|
||||||
|
import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval;
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
|
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
|
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
|
||||||
|
@ -137,7 +139,9 @@ public class RandomCodec extends Lucene42Codec {
|
||||||
new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)),
|
new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)),
|
||||||
new MockRandomPostingsFormat(random),
|
new MockRandomPostingsFormat(random),
|
||||||
new NestedPulsingPostingsFormat(),
|
new NestedPulsingPostingsFormat(),
|
||||||
new Lucene41WithOrds(),
|
new Lucene41WithOrds(_TestUtil.nextInt(random, 1, 1000)),
|
||||||
|
new Lucene41VarGapFixedInterval(_TestUtil.nextInt(random, 1, 1000)),
|
||||||
|
new Lucene41VarGapDocFreqInterval(_TestUtil.nextInt(random, 1, 100), _TestUtil.nextInt(random, 1, 1000)),
|
||||||
new SimpleTextPostingsFormat(),
|
new SimpleTextPostingsFormat(),
|
||||||
new AssertingPostingsFormat(),
|
new AssertingPostingsFormat(),
|
||||||
new MemoryPostingsFormat(true, random.nextFloat()),
|
new MemoryPostingsFormat(true, random.nextFloat()),
|
||||||
|
|
|
@ -308,7 +308,7 @@ public class RandomIndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
w.commit();
|
w.commit();
|
||||||
if (r.nextBoolean()) {
|
if (r.nextBoolean()) {
|
||||||
return DirectoryReader.open(w.getDirectory(), _TestUtil.nextInt(r, 1, 10));
|
return DirectoryReader.open(w.getDirectory());
|
||||||
} else {
|
} else {
|
||||||
return w.getReader(applyDeletions);
|
return w.getReader(applyDeletions);
|
||||||
}
|
}
|
||||||
|
|
|
@ -753,15 +753,6 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
c.setMaxBufferedDocs(_TestUtil.nextInt(r, 16, 1000));
|
c.setMaxBufferedDocs(_TestUtil.nextInt(r, 16, 1000));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (r.nextBoolean()) {
|
|
||||||
if (rarely(r)) {
|
|
||||||
// crazy value
|
|
||||||
c.setTermIndexInterval(r.nextBoolean() ? _TestUtil.nextInt(r, 1, 31) : _TestUtil.nextInt(r, 129, 1000));
|
|
||||||
} else {
|
|
||||||
// reasonable value
|
|
||||||
c.setTermIndexInterval(_TestUtil.nextInt(r, 32, 128));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (r.nextBoolean()) {
|
if (r.nextBoolean()) {
|
||||||
int maxNumThreadStates = rarely(r) ? _TestUtil.nextInt(r, 5, 20) // crazy value
|
int maxNumThreadStates = rarely(r) ? _TestUtil.nextInt(r, 5, 20) // crazy value
|
||||||
: _TestUtil.nextInt(r, 1, 4); // reasonable value
|
: _TestUtil.nextInt(r, 1, 4); // reasonable value
|
||||||
|
@ -816,7 +807,6 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
}
|
}
|
||||||
c.setUseCompoundFile(r.nextBoolean());
|
c.setUseCompoundFile(r.nextBoolean());
|
||||||
c.setReaderPooling(r.nextBoolean());
|
c.setReaderPooling(r.nextBoolean());
|
||||||
c.setReaderTermsIndexDivisor(_TestUtil.nextInt(r, 1, 4));
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,8 @@ org.apache.lucene.codecs.mocksep.MockSepPostingsFormat
|
||||||
org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat
|
org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat
|
||||||
org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
|
org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
|
||||||
org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds
|
org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds
|
||||||
|
org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval
|
||||||
|
org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval
|
||||||
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings
|
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings
|
||||||
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
||||||
org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat
|
org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat
|
||||||
|
|
|
@ -27,12 +27,8 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||||
* Factory used to build a new IndexReader instance.
|
* Factory used to build a new IndexReader instance.
|
||||||
*/
|
*/
|
||||||
public abstract class IndexReaderFactory implements NamedListInitializedPlugin {
|
public abstract class IndexReaderFactory implements NamedListInitializedPlugin {
|
||||||
protected int termInfosIndexDivisor = 1;//IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; Set this once Lucene makes this public.
|
|
||||||
/**
|
/**
|
||||||
* Potentially initializes {@link #termInfosIndexDivisor}. Overriding classes should call super.init() in order
|
* init will be called just once, immediately after creation.
|
||||||
* to make sure termInfosIndexDivisor is set.
|
|
||||||
* <p>
|
|
||||||
* <code>init</code> will be called just once, immediately after creation.
|
|
||||||
* <p>
|
* <p>
|
||||||
* The args are user-level initialization parameters that may be specified
|
* The args are user-level initialization parameters that may be specified
|
||||||
* when declaring an indexReaderFactory in solrconfig.xml
|
* when declaring an indexReaderFactory in solrconfig.xml
|
||||||
|
@ -40,18 +36,10 @@ public abstract class IndexReaderFactory implements NamedListInitializedPlugin {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void init(NamedList args) {
|
public void init(NamedList args) {
|
||||||
Integer v = (Integer)args.get("setTermIndexDivisor");
|
Object v = args.get("setTermIndexDivisor");
|
||||||
if (v != null) {
|
if (v != null) {
|
||||||
termInfosIndexDivisor = v.intValue();
|
throw new IllegalArgumentException("Illegal parameter 'setTermIndexDivisor'");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return The setting of {@link #termInfosIndexDivisor}
|
|
||||||
*/
|
|
||||||
public int getTermInfosIndexDivisor() {
|
|
||||||
return termInfosIndexDivisor;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -31,6 +31,6 @@ public class StandardIndexReaderFactory extends IndexReaderFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DirectoryReader newReader(Directory indexDir, SolrCore core) throws IOException {
|
public DirectoryReader newReader(Directory indexDir, SolrCore core) throws IOException {
|
||||||
return DirectoryReader.open(indexDir, termInfosIndexDivisor);
|
return DirectoryReader.open(indexDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,6 @@ public class SolrIndexConfig {
|
||||||
public final String lockType;
|
public final String lockType;
|
||||||
public final PluginInfo mergePolicyInfo;
|
public final PluginInfo mergePolicyInfo;
|
||||||
public final PluginInfo mergeSchedulerInfo;
|
public final PluginInfo mergeSchedulerInfo;
|
||||||
public final int termIndexInterval;
|
|
||||||
|
|
||||||
public final PluginInfo mergedSegmentWarmerInfo;
|
public final PluginInfo mergedSegmentWarmerInfo;
|
||||||
|
|
||||||
|
@ -95,7 +94,6 @@ public class SolrIndexConfig {
|
||||||
ramBufferSizeMB = 100;
|
ramBufferSizeMB = 100;
|
||||||
writeLockTimeout = -1;
|
writeLockTimeout = -1;
|
||||||
lockType = LOCK_TYPE_NATIVE;
|
lockType = LOCK_TYPE_NATIVE;
|
||||||
termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
|
|
||||||
mergePolicyInfo = null;
|
mergePolicyInfo = null;
|
||||||
mergeSchedulerInfo = null;
|
mergeSchedulerInfo = null;
|
||||||
defaultMergePolicyClassName = TieredMergePolicy.class.getName();
|
defaultMergePolicyClassName = TieredMergePolicy.class.getName();
|
||||||
|
@ -148,7 +146,10 @@ public class SolrIndexConfig {
|
||||||
mergeSchedulerInfo = getPluginInfo(prefix + "/mergeScheduler", solrConfig, def.mergeSchedulerInfo);
|
mergeSchedulerInfo = getPluginInfo(prefix + "/mergeScheduler", solrConfig, def.mergeSchedulerInfo);
|
||||||
mergePolicyInfo = getPluginInfo(prefix + "/mergePolicy", solrConfig, def.mergePolicyInfo);
|
mergePolicyInfo = getPluginInfo(prefix + "/mergePolicy", solrConfig, def.mergePolicyInfo);
|
||||||
|
|
||||||
termIndexInterval = solrConfig.getInt(prefix + "/termIndexInterval", def.termIndexInterval);
|
String val = solrConfig.get(prefix + "/termIndexInterval", null);
|
||||||
|
if (val != null) {
|
||||||
|
throw new IllegalArgumentException("Illegal parameter 'termIndexInterval'");
|
||||||
|
}
|
||||||
|
|
||||||
boolean infoStreamEnabled = solrConfig.getBool(prefix + "/infoStream", false);
|
boolean infoStreamEnabled = solrConfig.getBool(prefix + "/infoStream", false);
|
||||||
if(infoStreamEnabled) {
|
if(infoStreamEnabled) {
|
||||||
|
@ -198,9 +199,6 @@ public class SolrIndexConfig {
|
||||||
if (ramBufferSizeMB != -1)
|
if (ramBufferSizeMB != -1)
|
||||||
iwc.setRAMBufferSizeMB(ramBufferSizeMB);
|
iwc.setRAMBufferSizeMB(ramBufferSizeMB);
|
||||||
|
|
||||||
if (termIndexInterval != -1)
|
|
||||||
iwc.setTermIndexInterval(termIndexInterval);
|
|
||||||
|
|
||||||
if (writeLockTimeout != -1)
|
if (writeLockTimeout != -1)
|
||||||
iwc.setWriteLockTimeout(writeLockTimeout);
|
iwc.setWriteLockTimeout(writeLockTimeout);
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,6 @@
|
||||||
|
|
||||||
<indexConfig>
|
<indexConfig>
|
||||||
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
|
<useCompoundFile>${useCompoundFile:false}</useCompoundFile>
|
||||||
<termIndexInterval>256</termIndexInterval>
|
|
||||||
<mergePolicy class="org.apache.lucene.index.TieredMergePolicy"/>
|
<mergePolicy class="org.apache.lucene.index.TieredMergePolicy"/>
|
||||||
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
|
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
|
||||||
</indexConfig>
|
</indexConfig>
|
||||||
|
@ -40,9 +39,7 @@
|
||||||
<updateHandler class="solr.DirectUpdateHandler2">
|
<updateHandler class="solr.DirectUpdateHandler2">
|
||||||
</updateHandler>
|
</updateHandler>
|
||||||
|
|
||||||
<indexReaderFactory name="IndexReaderFactory" class="org.apache.solr.core.StandardIndexReaderFactory">
|
<indexReaderFactory name="IndexReaderFactory" class="org.apache.solr.core.StandardIndexReaderFactory"/>
|
||||||
<int name="setTermIndexDivisor">12</int>
|
|
||||||
</indexReaderFactory >
|
|
||||||
|
|
||||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
||||||
|
|
||||||
|
|
|
@ -33,8 +33,5 @@ public class IndexReaderFactoryTest extends AbstractSolrTestCase {
|
||||||
IndexReaderFactory readerFactory = h.getCore().getIndexReaderFactory();
|
IndexReaderFactory readerFactory = h.getCore().getIndexReaderFactory();
|
||||||
assertNotNull("Factory is null", readerFactory);
|
assertNotNull("Factory is null", readerFactory);
|
||||||
assertTrue("readerFactory is not an instanceof " + AlternateDirectoryTest.TestIndexReaderFactory.class, readerFactory instanceof StandardIndexReaderFactory);
|
assertTrue("readerFactory is not an instanceof " + AlternateDirectoryTest.TestIndexReaderFactory.class, readerFactory instanceof StandardIndexReaderFactory);
|
||||||
assertTrue("termInfoIndexDivisor not set to 12", readerFactory.getTermInfosIndexDivisor() == 12);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -114,27 +114,6 @@ public class TestConfig extends SolrTestCaseJ4 {
|
||||||
assertTrue(handler.getHiddenFiles().contains("PROTWORDS.TXT"));
|
assertTrue(handler.getHiddenFiles().contains("PROTWORDS.TXT"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTermIndexInterval() throws Exception {
|
|
||||||
RefCounted<IndexWriter> iw = ((DirectUpdateHandler2) h.getCore()
|
|
||||||
.getUpdateHandler()).getSolrCoreState().getIndexWriter(h.getCore());
|
|
||||||
int interval = 0;
|
|
||||||
try {
|
|
||||||
IndexWriter writer = iw.get();
|
|
||||||
interval = writer.getConfig().getTermIndexInterval();
|
|
||||||
} finally {
|
|
||||||
iw.decref();
|
|
||||||
}
|
|
||||||
assertEquals(256, interval);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTermIndexDivisor() throws Exception {
|
|
||||||
IndexReaderFactory irf = h.getCore().getIndexReaderFactory();
|
|
||||||
StandardIndexReaderFactory sirf = (StandardIndexReaderFactory) irf;
|
|
||||||
assertEquals(12, sirf.termInfosIndexDivisor);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If defaults change, add test methods to cover each version
|
// If defaults change, add test methods to cover each version
|
||||||
@Test
|
@Test
|
||||||
public void testDefaults() throws Exception {
|
public void testDefaults() throws Exception {
|
||||||
|
|
|
@ -257,11 +257,6 @@
|
||||||
<unlockOnStartup>false</unlockOnStartup>
|
<unlockOnStartup>false</unlockOnStartup>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- Expert: Controls how often Lucene loads terms into memory
|
|
||||||
Default is 128 and is likely good for most everyone.
|
|
||||||
-->
|
|
||||||
<!-- <termIndexInterval>128</termIndexInterval> -->
|
|
||||||
|
|
||||||
<!-- If true, IndexReaders will be reopened (often more efficient)
|
<!-- If true, IndexReaders will be reopened (often more efficient)
|
||||||
instead of closed and then opened. Default: true
|
instead of closed and then opened. Default: true
|
||||||
-->
|
-->
|
||||||
|
@ -440,15 +435,6 @@
|
||||||
<str name="someArg">Some Value</str>
|
<str name="someArg">Some Value</str>
|
||||||
</indexReaderFactory >
|
</indexReaderFactory >
|
||||||
-->
|
-->
|
||||||
<!-- By explicitly declaring the Factory, the termIndexDivisor can
|
|
||||||
be specified.
|
|
||||||
-->
|
|
||||||
<!--
|
|
||||||
<indexReaderFactory name="IndexReaderFactory"
|
|
||||||
class="solr.StandardIndexReaderFactory">
|
|
||||||
<int name="setTermIndexDivisor">12</int>
|
|
||||||
</indexReaderFactory >
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
Query section - these settings control query time things like caches
|
Query section - these settings control query time things like caches
|
||||||
|
|
|
@ -257,11 +257,6 @@
|
||||||
<!--
|
<!--
|
||||||
<unlockOnStartup>false</unlockOnStartup>
|
<unlockOnStartup>false</unlockOnStartup>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- Expert: Controls how often Lucene loads terms into memory
|
|
||||||
Default is 128 and is likely good for most everyone.
|
|
||||||
-->
|
|
||||||
<!-- <termIndexInterval>128</termIndexInterval> -->
|
|
||||||
|
|
||||||
<!-- If true, IndexReaders will be reopened (often more efficient)
|
<!-- If true, IndexReaders will be reopened (often more efficient)
|
||||||
instead of closed and then opened. Default: true
|
instead of closed and then opened. Default: true
|
||||||
|
@ -441,15 +436,6 @@
|
||||||
<str name="someArg">Some Value</str>
|
<str name="someArg">Some Value</str>
|
||||||
</indexReaderFactory >
|
</indexReaderFactory >
|
||||||
-->
|
-->
|
||||||
<!-- By explicitly declaring the Factory, the termIndexDivisor can
|
|
||||||
be specified.
|
|
||||||
-->
|
|
||||||
<!--
|
|
||||||
<indexReaderFactory name="IndexReaderFactory"
|
|
||||||
class="solr.StandardIndexReaderFactory">
|
|
||||||
<int name="setTermIndexDivisor">12</int>
|
|
||||||
</indexReaderFactory >
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
Query section - these settings control query time things like caches
|
Query section - these settings control query time things like caches
|
||||||
|
|
Loading…
Reference in New Issue