fix DV/FC thread safety

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-01-21 13:39:15 +00:00
parent 9502b8e6cc
commit 4b71fcbf72
4 changed files with 435 additions and 396 deletions

View File

@ -21,6 +21,12 @@ import org.apache.lucene.util.BytesRef;
public abstract class BinaryDocValues { public abstract class BinaryDocValues {
/** Lookup the value for document.
*
* <p><b>NOTE</b>: you should not share the provided
* {@link BytesRef} result with other doc values sources
* (other BinaryDocValues or SortedDocValues): a single
* "private" instance should be used for each source. */
public abstract void get(int docID, BytesRef result); public abstract void get(int docID, BytesRef result);
public static final byte[] MISSING = new byte[0]; public static final byte[] MISSING = new byte[0];

View File

@ -370,7 +370,19 @@ class FieldCacheImpl implements FieldCache {
// inherit javadocs // inherit javadocs
public Bytes getBytes(AtomicReader reader, String field, ByteParser parser, boolean setDocsWithField) public Bytes getBytes(AtomicReader reader, String field, ByteParser parser, boolean setDocsWithField)
throws IOException { throws IOException {
return (Bytes) caches.get(Byte.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Bytes() {
@Override
public byte get(int docID) {
return (byte) valuesIn.get(docID);
}
};
} else {
return (Bytes) caches.get(Byte.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
} }
// nocommit move up? // nocommit move up?
@ -396,50 +408,39 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException { throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(key.field); int maxDoc = reader.maxDoc();
if (valuesIn != null) { final byte[] values;
return new Bytes() { final ByteParser parser = (ByteParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = DEFAULT_SHORT_PARSER) so cache
// key includes DEFAULT_SHORT_PARSER:
return wrapper.getBytes(reader, key.field, DEFAULT_BYTE_PARSER, setDocsWithField);
}
values = new byte[maxDoc];
Uninvert u = new Uninvert() {
private byte currentValue;
@Override @Override
public byte get(int docID) { public void visitTerm(BytesRef term) {
return (byte) valuesIn.get(docID); currentValue = parser.parseByte(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
} }
}; };
} else {
int maxDoc = reader.maxDoc(); u.uninvert(reader, key.field, setDocsWithField);
final byte[] values;
final ByteParser parser = (ByteParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = DEFAULT_SHORT_PARSER) so cache
// key includes DEFAULT_SHORT_PARSER:
return wrapper.getBytes(reader, key.field, DEFAULT_BYTE_PARSER, setDocsWithField);
}
values = new byte[maxDoc]; if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
Uninvert u = new Uninvert() {
private byte currentValue;
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseByte(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
return new BytesFromArray(values);
} }
return new BytesFromArray(values);
} }
} }
@ -451,7 +452,19 @@ class FieldCacheImpl implements FieldCache {
// inherit javadocs // inherit javadocs
public Shorts getShorts(AtomicReader reader, String field, ShortParser parser, boolean setDocsWithField) public Shorts getShorts(AtomicReader reader, String field, ShortParser parser, boolean setDocsWithField)
throws IOException { throws IOException {
return (Shorts) caches.get(Short.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Shorts() {
@Override
public short get(int docID) {
return (short) valuesIn.get(docID);
}
};
} else {
return (Shorts) caches.get(Short.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
} }
// nocommit move up? // nocommit move up?
@ -477,47 +490,37 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException { throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(key.field); int maxDoc = reader.maxDoc();
if (valuesIn != null) { final short[] values;
return new Shorts() { final ShortParser parser = (ShortParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = DEFAULT_SHORT_PARSER) so cache
// key includes DEFAULT_SHORT_PARSER:
return wrapper.getShorts(reader, key.field, DEFAULT_SHORT_PARSER, setDocsWithField);
}
values = new short[maxDoc];
Uninvert u = new Uninvert() {
private short currentValue;
@Override @Override
public short get(int docID) { public void visitTerm(BytesRef term) {
return (short) valuesIn.get(docID); currentValue = parser.parseShort(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
} }
}; };
} else {
int maxDoc = reader.maxDoc();
final short[] values;
final ShortParser parser = (ShortParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser = DEFAULT_SHORT_PARSER) so cache
// key includes DEFAULT_SHORT_PARSER:
return wrapper.getShorts(reader, key.field, DEFAULT_SHORT_PARSER, setDocsWithField);
}
values = new short[maxDoc]; u.uninvert(reader, key.field, setDocsWithField);
Uninvert u = new Uninvert() {
private short currentValue;
@Override if (setDocsWithField) {
public void visitTerm(BytesRef term) { wrapper.setDocsWithField(reader, key.field, u.docsWithField);
currentValue = parser.parseShort(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
return new ShortsFromArray(values);
} }
return new ShortsFromArray(values);
} }
} }
@ -529,7 +532,19 @@ class FieldCacheImpl implements FieldCache {
// inherit javadocs // inherit javadocs
public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField) public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField)
throws IOException { throws IOException {
return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Ints() {
@Override
public int get(int docID) {
return (int) valuesIn.get(docID);
}
};
} else {
return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
} }
// nocommit move up? // nocommit move up?
@ -555,55 +570,45 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField) protected Object createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException { throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(key.field); final int[] values;
if (valuesIn != null) { final IntParser parser = (IntParser) key.custom;
return new Ints() { if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_INT_PARSER/NUMERIC_UTILS_INT_PARSER) so
// cache key includes
// DEFAULT_INT_PARSER/NUMERIC_UTILS_INT_PARSER:
try {
return wrapper.getInts(reader, key.field, DEFAULT_INT_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getInts(reader, key.field, NUMERIC_UTILS_INT_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field
// case ...
values = new int[reader.maxDoc()];
Uninvert u = new Uninvert() {
private int currentValue;
@Override @Override
public int get(int docID) { public void visitTerm(BytesRef term) {
return (int) valuesIn.get(docID); currentValue = parser.parseInt(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
} }
}; };
} else {
final int[] values;
final IntParser parser = (IntParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_INT_PARSER/NUMERIC_UTILS_INT_PARSER) so
// cache key includes
// DEFAULT_INT_PARSER/NUMERIC_UTILS_INT_PARSER:
try {
return wrapper.getInts(reader, key.field, DEFAULT_INT_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getInts(reader, key.field, NUMERIC_UTILS_INT_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field u.uninvert(reader, key.field, setDocsWithField);
// case ...
values = new int[reader.maxDoc()];
Uninvert u = new Uninvert() { if (setDocsWithField) {
private int currentValue; wrapper.setDocsWithField(reader, key.field, u.docsWithField);
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseInt(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
return new IntsFromArray(values);
} }
return new IntsFromArray(values);
} }
} }
@ -679,7 +684,19 @@ class FieldCacheImpl implements FieldCache {
// inherit javadocs // inherit javadocs
public Floats getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField) public Floats getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField)
throws IOException { throws IOException {
return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Floats() {
@Override
public float get(int docID) {
return Float.intBitsToFloat((int) valuesIn.get(docID));
}
};
} else {
return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
} }
// nocommit move up? // nocommit move up?
@ -705,56 +722,46 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException { throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(key.field); final float[] values;
if (valuesIn != null) { final FloatParser parser = (FloatParser) key.custom;
return new Floats() { if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_FLOAT_PARSER/NUMERIC_UTILS_FLOAT_PARSER) so
// cache key includes
// DEFAULT_FLOAT_PARSER/NUMERIC_UTILS_FLOAT_PARSER:
try {
return wrapper.getFloats(reader, key.field, DEFAULT_FLOAT_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getFloats(reader, key.field, NUMERIC_UTILS_FLOAT_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field
// case ...
values = new float[reader.maxDoc()];
Uninvert u = new Uninvert() {
private float currentValue;
@Override @Override
public float get(int docID) { public void visitTerm(BytesRef term) {
return Float.intBitsToFloat((int) valuesIn.get(docID)); currentValue = parser.parseFloat(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
} }
}; };
} else {
final float[] values;
final FloatParser parser = (FloatParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_FLOAT_PARSER/NUMERIC_UTILS_FLOAT_PARSER) so
// cache key includes
// DEFAULT_FLOAT_PARSER/NUMERIC_UTILS_FLOAT_PARSER:
try {
return wrapper.getFloats(reader, key.field, DEFAULT_FLOAT_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getFloats(reader, key.field, NUMERIC_UTILS_FLOAT_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field u.uninvert(reader, key.field, setDocsWithField);
// case ...
values = new float[reader.maxDoc()];
Uninvert u = new Uninvert() { if (setDocsWithField) {
private float currentValue; wrapper.setDocsWithField(reader, key.field, u.docsWithField);
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseFloat(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
return new FloatsFromArray(values);
} }
return new FloatsFromArray(values);
} }
} }
@ -766,7 +773,19 @@ class FieldCacheImpl implements FieldCache {
// inherit javadocs // inherit javadocs
public Longs getLongs(AtomicReader reader, String field, FieldCache.LongParser parser, boolean setDocsWithField) public Longs getLongs(AtomicReader reader, String field, FieldCache.LongParser parser, boolean setDocsWithField)
throws IOException { throws IOException {
return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Longs() {
@Override
public long get(int docID) {
return valuesIn.get(docID);
}
};
} else {
return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
} }
// nocommit move up? // nocommit move up?
@ -792,55 +811,45 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException { throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(key.field); final long[] values;
if (valuesIn != null) { final LongParser parser = (LongParser) key.custom;
return new Longs() { if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_LONG_PARSER/NUMERIC_UTILS_LONG_PARSER) so
// cache key includes
// DEFAULT_LONG_PARSER/NUMERIC_UTILS_LONG_PARSER:
try {
return wrapper.getLongs(reader, key.field, DEFAULT_LONG_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getLongs(reader, key.field, NUMERIC_UTILS_LONG_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field
// case ...
values = new long[reader.maxDoc()];
Uninvert u = new Uninvert() {
private long currentValue;
@Override @Override
public long get(int docID) { public void visitTerm(BytesRef term) {
return valuesIn.get(docID); currentValue = parser.parseLong(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
} }
}; };
} else {
final long[] values;
final LongParser parser = (LongParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_LONG_PARSER/NUMERIC_UTILS_LONG_PARSER) so
// cache key includes
// DEFAULT_LONG_PARSER/NUMERIC_UTILS_LONG_PARSER:
try {
return wrapper.getLongs(reader, key.field, DEFAULT_LONG_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getLongs(reader, key.field, NUMERIC_UTILS_LONG_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field u.uninvert(reader, key.field, setDocsWithField);
// case ...
values = new long[reader.maxDoc()];
Uninvert u = new Uninvert() { if (setDocsWithField) {
private long currentValue; wrapper.setDocsWithField(reader, key.field, u.docsWithField);
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseLong(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
return new LongsFromArray(values);
} }
return new LongsFromArray(values);
} }
} }
@ -853,7 +862,19 @@ class FieldCacheImpl implements FieldCache {
// inherit javadocs // inherit javadocs
public Doubles getDoubles(AtomicReader reader, String field, FieldCache.DoubleParser parser, boolean setDocsWithField) public Doubles getDoubles(AtomicReader reader, String field, FieldCache.DoubleParser parser, boolean setDocsWithField)
throws IOException { throws IOException {
return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); final NumericDocValues valuesIn = reader.getNumericDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return new Doubles() {
@Override
public double get(int docID) {
return Double.longBitsToDouble(valuesIn.get(docID));
}
};
} else {
return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
} }
// nocommit move up? // nocommit move up?
@ -879,55 +900,45 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException { throws IOException {
final NumericDocValues valuesIn = reader.getNumericDocValues(key.field); final double[] values;
if (valuesIn != null) { final DoubleParser parser = (DoubleParser) key.custom;
return new Doubles() { if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_DOUBLE_PARSER/NUMERIC_UTILS_DOUBLE_PARSER) so
// cache key includes
// DEFAULT_DOUBLE_PARSER/NUMERIC_UTILS_DOUBLE_PARSER:
try {
return wrapper.getDoubles(reader, key.field, DEFAULT_DOUBLE_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getDoubles(reader, key.field, NUMERIC_UTILS_DOUBLE_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field
// case ...
values = new double[reader.maxDoc()];
Uninvert u = new Uninvert() {
private double currentValue;
@Override @Override
public double get(int docID) { public void visitTerm(BytesRef term) {
return Double.longBitsToDouble(valuesIn.get(docID)); currentValue = parser.parseDouble(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
} }
}; };
} else {
final double[] values;
final DoubleParser parser = (DoubleParser) key.custom;
if (parser == null) {
// Confusing: must delegate to wrapper (vs simply
// setting parser =
// DEFAULT_DOUBLE_PARSER/NUMERIC_UTILS_DOUBLE_PARSER) so
// cache key includes
// DEFAULT_DOUBLE_PARSER/NUMERIC_UTILS_DOUBLE_PARSER:
try {
return wrapper.getDoubles(reader, key.field, DEFAULT_DOUBLE_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getDoubles(reader, key.field, NUMERIC_UTILS_DOUBLE_PARSER, setDocsWithField);
}
}
// nocommit how to avoid double alloc in numeric field u.uninvert(reader, key.field, setDocsWithField);
// case ...
values = new double[reader.maxDoc()];
Uninvert u = new Uninvert() { if (setDocsWithField) {
private double currentValue; wrapper.setDocsWithField(reader, key.field, u.docsWithField);
@Override
public void visitTerm(BytesRef term) {
currentValue = parser.parseDouble(term);
}
@Override
public void visitDoc(int docID) {
values[docID] = currentValue;
}
};
u.uninvert(reader, key.field, setDocsWithField);
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, u.docsWithField);
}
return new DoublesFromArray(values);
} }
return new DoublesFromArray(values);
} }
} }
@ -1118,7 +1129,14 @@ class FieldCacheImpl implements FieldCache {
} }
public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); SortedDocValues valuesIn = reader.getSortedDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return valuesIn;
} else {
return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
}
} }
static class SortedDocValuesCache extends Cache { static class SortedDocValuesCache extends Cache {
@ -1131,107 +1149,99 @@ class FieldCacheImpl implements FieldCache {
throws IOException { throws IOException {
final int maxDoc = reader.maxDoc(); final int maxDoc = reader.maxDoc();
SortedDocValues valuesIn = reader.getSortedDocValues(key.field);
if (valuesIn != null) { Terms terms = reader.terms(key.field);
// nocommit we need thread DV test that would
// uncover this bug!! final float acceptableOverheadRatio = ((Float) key.custom).floatValue();
// nocommit we should not cache in this case?
return valuesIn; final PagedBytes bytes = new PagedBytes(15);
int startBytesBPV;
int startTermsBPV;
int startNumUniqueTerms;
final int termCountHardLimit;
if (maxDoc == Integer.MAX_VALUE) {
termCountHardLimit = Integer.MAX_VALUE;
} else { } else {
termCountHardLimit = maxDoc+1;
}
Terms terms = reader.terms(key.field); // TODO: use Uninvert?
if (terms != null) {
final float acceptableOverheadRatio = ((Float) key.custom).floatValue(); // Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
final PagedBytes bytes = new PagedBytes(15); // is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = terms.size();
int startBytesBPV; if (numUniqueTerms != -1L) {
int startTermsBPV; if (numUniqueTerms > termCountHardLimit) {
int startNumUniqueTerms; // app is misusing the API (there is more than
// one term per doc); in this case we make best
final int termCountHardLimit; // effort to load what we can (see LUCENE-2142)
if (maxDoc == Integer.MAX_VALUE) { numUniqueTerms = termCountHardLimit;
termCountHardLimit = Integer.MAX_VALUE;
} else {
termCountHardLimit = maxDoc+1;
}
// TODO: use Uninvert?
if (terms != null) {
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = terms.size();
if (numUniqueTerms != -1L) {
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
} }
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else { } else {
startBytesBPV = 1; startBytesBPV = 1;
startTermsBPV = 1; startTermsBPV = 1;
startNumUniqueTerms = 1; startNumUniqueTerms = 1;
} }
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio); GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio); final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
int termOrd = 0; int termOrd = 0;
// TODO: use Uninvert? // TODO: use Uninvert?
if (terms != null) { if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null); final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null; DocsEnum docs = null;
while(true) { while(true) {
final BytesRef term = termsEnum.next(); final BytesRef term = termsEnum.next();
if (term == null) { if (term == null) {
break; break;
} }
if (termOrd >= termCountHardLimit) { if (termOrd >= termCountHardLimit) {
break; break;
}
if (termOrd == termOrdToBytesOffset.size()) {
// NOTE: this code only runs if the incoming
// reader impl doesn't implement
// size (which should be uncommon)
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
}
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
// Store 1+ ord into packed bits
docToTermOrd.set(docID, 1+termOrd);
}
termOrd++;
} }
if (termOrdToBytesOffset.size() > termOrd) { if (termOrd == termOrdToBytesOffset.size()) {
termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd); // NOTE: this code only runs if the incoming
// reader impl doesn't implement
// size (which should be uncommon)
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
} }
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
// Store 1+ ord into packed bits
docToTermOrd.set(docID, 1+termOrd);
}
termOrd++;
} }
// maybe an int-only impl? if (termOrdToBytesOffset.size() > termOrd) {
return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
}
} }
// maybe an int-only impl?
return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
} }
} }
@ -1264,6 +1274,18 @@ class FieldCacheImpl implements FieldCache {
} }
public BinaryDocValues getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { public BinaryDocValues getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
if (valuesIn == null) {
// nocommit is this auto-fallback ... OK?
valuesIn = reader.getSortedDocValues(field);
}
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return valuesIn;
}
return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
} }
@ -1276,80 +1298,70 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */) protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
throws IOException { throws IOException {
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field); final int maxDoc = reader.maxDoc();
if (valuesIn == null) { Terms terms = reader.terms(key.field);
// nocommit is this auto-fallback ... OK?
valuesIn = reader.getSortedDocValues(key.field);
}
if (valuesIn != null) { final float acceptableOverheadRatio = ((Float) key.custom).floatValue();
return valuesIn;
} else {
final int maxDoc = reader.maxDoc();
Terms terms = reader.terms(key.field);
final float acceptableOverheadRatio = ((Float) key.custom).floatValue(); final int termCountHardLimit = maxDoc;
final int termCountHardLimit = maxDoc; // Holds the actual term data, expanded.
final PagedBytes bytes = new PagedBytes(15);
// Holds the actual term data, expanded. int startBPV;
final PagedBytes bytes = new PagedBytes(15);
int startBPV; if (terms != null) {
// Try for coarse estimate for number of bits; this
if (terms != null) { // should be an underestimate most of the time, which
// Try for coarse estimate for number of bits; this // is fine -- GrowableWriter will reallocate as needed
// should be an underestimate most of the time, which long numUniqueTerms = terms.size();
// is fine -- GrowableWriter will reallocate as needed if (numUniqueTerms != -1L) {
long numUniqueTerms = terms.size(); if (numUniqueTerms > termCountHardLimit) {
if (numUniqueTerms != -1L) { numUniqueTerms = termCountHardLimit;
if (numUniqueTerms > termCountHardLimit) {
numUniqueTerms = termCountHardLimit;
}
startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
} else {
startBPV = 1;
} }
startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
} else { } else {
startBPV = 1; startBPV = 1;
} }
} else {
startBPV = 1;
}
final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio); final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
// pointer==0 means not set // pointer==0 means not set
bytes.copyUsingLengthPrefix(new BytesRef()); bytes.copyUsingLengthPrefix(new BytesRef());
if (terms != null) { if (terms != null) {
int termCount = 0; int termCount = 0;
final TermsEnum termsEnum = terms.iterator(null); final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null; DocsEnum docs = null;
while(true) { while(true) {
if (termCount++ == termCountHardLimit) { if (termCount++ == termCountHardLimit) {
// app is misusing the API (there is more than // app is misusing the API (there is more than
// one term per doc); in this case we make best // one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142) // effort to load what we can (see LUCENE-2142)
break;
}
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final long pointer = bytes.copyUsingLengthPrefix(term);
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break; break;
} }
docToOffset.set(docID, pointer);
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final long pointer = bytes.copyUsingLengthPrefix(term);
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
docToOffset.set(docID, pointer);
}
} }
} }
// maybe an int-only impl?
return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable());
} }
// maybe an int-only impl?
return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable());
} }
} }

View File

@ -82,13 +82,34 @@ public class TestDocValuesWithThreads extends LuceneTestCase {
startingGun.await(); startingGun.await();
int iters = atLeast(1000); int iters = atLeast(1000);
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
BytesRef scratch2 = new BytesRef();
for(int iter=0;iter<iters;iter++) { for(int iter=0;iter<iters;iter++) {
int docID = threadRandom.nextInt(numDocs); int docID = threadRandom.nextInt(numDocs);
assertEquals(numbers.get(docID).longValue(), ndv.get(docID)); switch(threadRandom.nextInt(6)) {
case 0:
assertEquals((byte) numbers.get(docID).longValue(), FieldCache.DEFAULT.getBytes(ar, "number", false).get(docID));
break;
case 1:
assertEquals((short) numbers.get(docID).longValue(), FieldCache.DEFAULT.getShorts(ar, "number", false).get(docID));
break;
case 2:
assertEquals((int) numbers.get(docID).longValue(), FieldCache.DEFAULT.getInts(ar, "number", false).get(docID));
break;
case 3:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getLongs(ar, "number", false).get(docID));
break;
case 4:
assertEquals(Float.intBitsToFloat((int) numbers.get(docID).longValue()), FieldCache.DEFAULT.getFloats(ar, "number", false).get(docID), 0.0f);
break;
case 5:
assertEquals(Double.longBitsToDouble(numbers.get(docID).longValue()), FieldCache.DEFAULT.getDoubles(ar, "number", false).get(docID), 0.0);
break;
}
bdv.get(docID, scratch); bdv.get(docID, scratch);
assertEquals(binary.get(docID), scratch); assertEquals(binary.get(docID), scratch);
sdv.get(docID, scratch); // Cannot share a single scratch against two "sources":
assertEquals(sorted.get(docID), scratch); sdv.get(docID, scratch2);
assertEquals(sorted.get(docID), scratch2);
} }
} catch (Exception e) { } catch (Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);

View File

@ -41,7 +41,7 @@ import org.apache.lucene.util._TestUtil;
/** /**
* Tests IndexSearcher's searchAfter() method * Tests IndexSearcher's searchAfter() method
*/ */
// nocommit fail w/ OOME?: ant test -Dtestcase=TestSearchAfter -Dtests.method=testQueries -Dtests.seed=98B4DA915983D1AE -Dtests.slow=true -Dtests.locale=sr -Dtests.timezone=Etc/GMT+2 -Dtests.file.encoding=UTF-8
public class TestSearchAfter extends LuceneTestCase { public class TestSearchAfter extends LuceneTestCase {
private Directory dir; private Directory dir;
private IndexReader reader; private IndexReader reader;