mirror of https://github.com/apache/lucene.git
ord=-1 now means 'missing' from DocTermsIndex
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411469 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7c7898fc12
commit
7e4bb062e4
|
@ -25,6 +25,11 @@ import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
|
||||||
|
// nocommit add javadocs stating that this must open all
|
||||||
|
// necessary files "on init", not later eg in .getXXX, else
|
||||||
|
// an IW that deletes a commit will cause an SR to hit
|
||||||
|
// exceptions....
|
||||||
|
|
||||||
public abstract class SimpleDVProducer implements Closeable {
|
public abstract class SimpleDVProducer implements Closeable {
|
||||||
|
|
||||||
public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;
|
public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;
|
||||||
|
|
|
@ -519,12 +519,14 @@ public interface FieldCache {
|
||||||
// nocommit: can we merge this api with the SortedDocValues api?
|
// nocommit: can we merge this api with the SortedDocValues api?
|
||||||
public abstract static class DocTermsIndex {
|
public abstract static class DocTermsIndex {
|
||||||
|
|
||||||
|
// nocommit remove this?
|
||||||
public int binarySearchLookup(BytesRef key, BytesRef spare) {
|
public int binarySearchLookup(BytesRef key, BytesRef spare) {
|
||||||
// this special case is the reason that Arrays.binarySearch() isn't useful.
|
// this special case is the reason that Arrays.binarySearch() isn't useful.
|
||||||
if (key == null)
|
if (key == null) {
|
||||||
return 0;
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
int low = 1;
|
int low = 0;
|
||||||
int high = numOrd()-1;
|
int high = numOrd()-1;
|
||||||
|
|
||||||
while (low <= high) {
|
while (low <= high) {
|
||||||
|
@ -543,24 +545,26 @@ public interface FieldCache {
|
||||||
|
|
||||||
/** The BytesRef argument must not be null; the method
|
/** The BytesRef argument must not be null; the method
|
||||||
* returns the same BytesRef, or an empty (length=0)
|
* returns the same BytesRef, or an empty (length=0)
|
||||||
* BytesRef if this ord is the null ord (0). */
|
* BytesRef if this ord is the null ord (-1). */
|
||||||
public abstract BytesRef lookup(int ord, BytesRef reuse);
|
public abstract BytesRef lookup(int ord, BytesRef reuse);
|
||||||
|
|
||||||
/** Convenience method, to lookup the Term for a doc.
|
/** Convenience method, to lookup the Term for a doc.
|
||||||
* If this doc is deleted or did not have this field,
|
* If this doc is deleted or did not have this field,
|
||||||
* this will return an empty (length=0) BytesRef. */
|
* this will return an empty (length=0) BytesRef. */
|
||||||
public BytesRef getTerm(int docID, BytesRef reuse) {
|
public BytesRef getTerm(int docID, BytesRef reuse) {
|
||||||
return lookup(getOrd(docID), reuse);
|
int ord = getOrd(docID);
|
||||||
|
if (ord == -1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return lookup(ord, reuse);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns sort ord for this document. Ord 0 is
|
/** Returns sort ord for this document. Ord -1 is
|
||||||
* reserved for docs that are deleted or did not have
|
* is returend for docs that are deleted or did not have
|
||||||
* this field. */
|
* this field. */
|
||||||
public abstract int getOrd(int docID);
|
public abstract int getOrd(int docID);
|
||||||
|
|
||||||
/** Returns total unique ord count; this includes +1 for
|
/** Returns total unique ord count. */
|
||||||
* the null ord (always 0) unless the field was
|
|
||||||
* indexed with doc values. */
|
|
||||||
public abstract int numOrd();
|
public abstract int numOrd();
|
||||||
|
|
||||||
/** Number of documents */
|
/** Number of documents */
|
||||||
|
@ -568,9 +572,6 @@ public interface FieldCache {
|
||||||
|
|
||||||
/** Returns a TermsEnum that can iterate over the values in this index entry */
|
/** Returns a TermsEnum that can iterate over the values in this index entry */
|
||||||
public abstract TermsEnum getTermsEnum();
|
public abstract TermsEnum getTermsEnum();
|
||||||
|
|
||||||
/** @lucene.internal */
|
|
||||||
public abstract PackedInts.Reader getDocToOrd();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Checks the internal cache for an appropriate entry, and if none
|
/** Checks the internal cache for an appropriate entry, and if none
|
||||||
|
|
|
@ -335,9 +335,6 @@ class FieldCacheImpl implements FieldCache {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// nocommit is this right ...
|
|
||||||
docsWithField = new Bits.MatchNoBits(maxDoc);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -970,11 +967,6 @@ class FieldCacheImpl implements FieldCache {
|
||||||
this.numOrd = numOrd;
|
this.numOrd = numOrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public PackedInts.Reader getDocToOrd() {
|
|
||||||
return docToTermOrd;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int numOrd() {
|
public int numOrd() {
|
||||||
return numOrd;
|
return numOrd;
|
||||||
|
@ -982,7 +974,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getOrd(int docID) {
|
public int getOrd(int docID) {
|
||||||
return (int) docToTermOrd.get(docID);
|
return (int) docToTermOrd.get(docID)-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1010,17 +1002,17 @@ class FieldCacheImpl implements FieldCache {
|
||||||
final BytesRef term = new BytesRef();
|
final BytesRef term = new BytesRef();
|
||||||
|
|
||||||
public DocTermsIndexEnum() {
|
public DocTermsIndexEnum() {
|
||||||
currentOrd = 0;
|
currentOrd = -1;
|
||||||
currentBlockNumber = 0;
|
currentBlockNumber = 0;
|
||||||
blocks = bytes.getBlocks();
|
blocks = bytes.getBlocks();
|
||||||
blockEnds = bytes.getBlockEnds();
|
blockEnds = bytes.getBlockEnds();
|
||||||
currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get(0));
|
term.bytes = blocks[0];
|
||||||
end = blockEnds[currentBlockNumber];
|
end = blockEnds[currentBlockNumber];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
|
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
|
||||||
int low = 1;
|
int low = 0;
|
||||||
int high = numOrd-1;
|
int high = numOrd-1;
|
||||||
|
|
||||||
while (low <= high) {
|
while (low <= high) {
|
||||||
|
@ -1032,9 +1024,10 @@ class FieldCacheImpl implements FieldCache {
|
||||||
low = mid + 1;
|
low = mid + 1;
|
||||||
else if (cmp > 0)
|
else if (cmp > 0)
|
||||||
high = mid - 1;
|
high = mid - 1;
|
||||||
else
|
else {
|
||||||
return SeekStatus.FOUND; // key found
|
return SeekStatus.FOUND; // key found
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (low == numOrd) {
|
if (low == numOrd) {
|
||||||
return SeekStatus.END;
|
return SeekStatus.END;
|
||||||
|
@ -1045,7 +1038,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void seekExact(long ord) throws IOException {
|
public void seekExact(long ord) throws IOException {
|
||||||
assert(ord >= 0 && ord <= numOrd);
|
assert ord >= 0 && ord <= numOrd;
|
||||||
// TODO: if gap is small, could iterate from current position? Or let user decide that?
|
// TODO: if gap is small, could iterate from current position? Or let user decide that?
|
||||||
currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord));
|
currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord));
|
||||||
end = blockEnds[currentBlockNumber];
|
end = blockEnds[currentBlockNumber];
|
||||||
|
@ -1057,14 +1050,18 @@ class FieldCacheImpl implements FieldCache {
|
||||||
int start = term.offset + term.length;
|
int start = term.offset + term.length;
|
||||||
if (start >= end) {
|
if (start >= end) {
|
||||||
// switch byte blocks
|
// switch byte blocks
|
||||||
if (currentBlockNumber +1 >= blocks.length) {
|
if (currentBlockNumber+1 >= blocks.length) {
|
||||||
|
assert currentOrd+1 == numOrd: "currentOrd=" + currentOrd + " numOrd=" + numOrd;
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
currentBlockNumber++;
|
currentBlockNumber++;
|
||||||
term.bytes = blocks[currentBlockNumber];
|
term.bytes = blocks[currentBlockNumber];
|
||||||
end = blockEnds[currentBlockNumber];
|
end = blockEnds[currentBlockNumber];
|
||||||
start = 0;
|
start = 0;
|
||||||
if (end<=0) return null; // special case of empty last array
|
if (end<=0) {
|
||||||
|
assert currentOrd+1 == numOrd;
|
||||||
|
return null; // special case of empty last array
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
currentOrd++;
|
currentOrd++;
|
||||||
|
@ -1131,6 +1128,12 @@ class FieldCacheImpl implements FieldCache {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nocommit for DV if you ask for sorted or binary we
|
||||||
|
// should check sorted first?
|
||||||
|
|
||||||
|
// nocommit woudl be nice if .getTErms would return a
|
||||||
|
// DocTermsIndex if one already existed
|
||||||
|
|
||||||
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
|
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
|
||||||
return getTermsIndex(reader, field, PackedInts.FAST);
|
return getTermsIndex(reader, field, PackedInts.FAST);
|
||||||
}
|
}
|
||||||
|
@ -1180,12 +1183,6 @@ class FieldCacheImpl implements FieldCache {
|
||||||
// nocommit: to the codec api? or can that termsenum just use this thing?
|
// nocommit: to the codec api? or can that termsenum just use this thing?
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public Reader getDocToOrd() {
|
|
||||||
// nocommit: add this to the codec api!
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
@ -1206,6 +1203,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
termCountHardLimit = maxDoc+1;
|
termCountHardLimit = maxDoc+1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nocommit use Uninvert?
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
// Try for coarse estimate for number of bits; this
|
// Try for coarse estimate for number of bits; this
|
||||||
// should be an underestimate most of the time, which
|
// should be an underestimate most of the time, which
|
||||||
|
@ -1238,8 +1236,9 @@ class FieldCacheImpl implements FieldCache {
|
||||||
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
|
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
|
||||||
|
|
||||||
// 0 is reserved for "unset"
|
// 0 is reserved for "unset"
|
||||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
int termOrd = 0;
|
||||||
int termOrd = 1;
|
|
||||||
|
// nocommit use Uninvert?
|
||||||
|
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
final TermsEnum termsEnum = terms.iterator(null);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
@ -1267,7 +1266,7 @@ class FieldCacheImpl implements FieldCache {
|
||||||
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
docToTermOrd.set(docID, termOrd);
|
docToTermOrd.set(docID, 1+termOrd);
|
||||||
}
|
}
|
||||||
termOrd++;
|
termOrd++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,35 +97,34 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
|
||||||
final int inclusiveLowerPoint, inclusiveUpperPoint;
|
final int inclusiveLowerPoint, inclusiveUpperPoint;
|
||||||
|
|
||||||
// Hints:
|
// Hints:
|
||||||
// * binarySearchLookup returns 0, if value was null.
|
// * binarySearchLookup returns -1, if value was null.
|
||||||
// * the value is <0 if no exact hit was found, the returned value
|
// * the value is <0 if no exact hit was found, the returned value
|
||||||
// is (-(insertion point) - 1)
|
// is (-(insertion point) - 1)
|
||||||
if (lowerPoint == 0) {
|
if (lowerPoint == -1 && lowerVal == null) {
|
||||||
assert lowerVal == null;
|
inclusiveLowerPoint = 0;
|
||||||
inclusiveLowerPoint = 1;
|
} else if (includeLower && lowerPoint >= 0) {
|
||||||
} else if (includeLower && lowerPoint > 0) {
|
|
||||||
inclusiveLowerPoint = lowerPoint;
|
inclusiveLowerPoint = lowerPoint;
|
||||||
} else if (lowerPoint > 0) {
|
} else if (lowerPoint >= 0) {
|
||||||
inclusiveLowerPoint = lowerPoint + 1;
|
inclusiveLowerPoint = lowerPoint + 1;
|
||||||
} else {
|
} else {
|
||||||
inclusiveLowerPoint = Math.max(1, -lowerPoint - 1);
|
inclusiveLowerPoint = Math.max(0, -lowerPoint - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (upperPoint == 0) {
|
if (upperPoint == -1 && upperVal == null) {
|
||||||
assert upperVal == null;
|
|
||||||
inclusiveUpperPoint = Integer.MAX_VALUE;
|
inclusiveUpperPoint = Integer.MAX_VALUE;
|
||||||
} else if (includeUpper && upperPoint > 0) {
|
} else if (includeUpper && upperPoint >= 0) {
|
||||||
inclusiveUpperPoint = upperPoint;
|
inclusiveUpperPoint = upperPoint;
|
||||||
} else if (upperPoint > 0) {
|
} else if (upperPoint >= 0) {
|
||||||
inclusiveUpperPoint = upperPoint - 1;
|
inclusiveUpperPoint = upperPoint - 1;
|
||||||
} else {
|
} else {
|
||||||
inclusiveUpperPoint = -upperPoint - 2;
|
inclusiveUpperPoint = -upperPoint - 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint)
|
if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) {
|
||||||
return DocIdSet.EMPTY_DOCIDSET;
|
return DocIdSet.EMPTY_DOCIDSET;
|
||||||
|
}
|
||||||
|
|
||||||
assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0;
|
assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0;
|
||||||
|
|
||||||
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
|
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -122,15 +122,21 @@ public class FieldCacheTermsFilter extends Filter {
|
||||||
final FixedBitSet bits = new FixedBitSet(fcsi.numOrd());
|
final FixedBitSet bits = new FixedBitSet(fcsi.numOrd());
|
||||||
final BytesRef spare = new BytesRef();
|
final BytesRef spare = new BytesRef();
|
||||||
for (int i=0;i<terms.length;i++) {
|
for (int i=0;i<terms.length;i++) {
|
||||||
int termNumber = fcsi.binarySearchLookup(terms[i], spare);
|
int ord = fcsi.binarySearchLookup(terms[i], spare);
|
||||||
if (termNumber > 0) {
|
if (ord >= 0) {
|
||||||
bits.set(termNumber);
|
bits.set(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
|
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
|
||||||
@Override
|
@Override
|
||||||
protected final boolean matchDoc(int doc) {
|
protected final boolean matchDoc(int doc) {
|
||||||
return bits.get(fcsi.getOrd(doc));
|
int ord = fcsi.getOrd(doc);
|
||||||
|
if (ord == -1) {
|
||||||
|
// missing
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return bits.get(ord);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1171,6 +1171,9 @@ public abstract class FieldComparator<T> {
|
||||||
return docValue.compareTo(value);
|
return docValue.compareTo(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nocommit remove null from FC DocTerms/Index as an
|
||||||
|
// allowed value
|
||||||
|
|
||||||
/** Base class for specialized (per bit width of the
|
/** Base class for specialized (per bit width of the
|
||||||
* ords) per-segment comparator. NOTE: this is messy;
|
* ords) per-segment comparator. NOTE: this is messy;
|
||||||
* we do this only because hotspot can't reliably inline
|
* we do this only because hotspot can't reliably inline
|
||||||
|
@ -1218,191 +1221,6 @@ public abstract class FieldComparator<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used per-segment when bit width of doc->ord is 8:
|
|
||||||
private final class ByteOrdComparator extends PerSegmentComparator {
|
|
||||||
private final byte[] readerOrds;
|
|
||||||
private final DocTermsIndex termsIndex;
|
|
||||||
private final int docBase;
|
|
||||||
|
|
||||||
public ByteOrdComparator(byte[] readerOrds, DocTermsIndex termsIndex, int docBase) {
|
|
||||||
this.readerOrds = readerOrds;
|
|
||||||
this.termsIndex = termsIndex;
|
|
||||||
this.docBase = docBase;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareBottom(int doc) {
|
|
||||||
assert bottomSlot != -1;
|
|
||||||
final int docOrd = (readerOrds[doc]&0xFF);
|
|
||||||
if (bottomSameReader) {
|
|
||||||
// ord is precisely comparable, even in the equal case
|
|
||||||
return bottomOrd - docOrd;
|
|
||||||
} else if (bottomOrd >= docOrd) {
|
|
||||||
// the equals case always means bottom is > doc
|
|
||||||
// (because we set bottomOrd to the lower bound in
|
|
||||||
// setBottom):
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) {
|
|
||||||
final int ord = readerOrds[doc]&0xFF;
|
|
||||||
ords[slot] = ord;
|
|
||||||
if (ord == 0) {
|
|
||||||
values[slot] = null;
|
|
||||||
} else {
|
|
||||||
assert ord > 0;
|
|
||||||
if (values[slot] == null) {
|
|
||||||
values[slot] = new BytesRef();
|
|
||||||
}
|
|
||||||
termsIndex.lookup(ord, values[slot]);
|
|
||||||
}
|
|
||||||
readerGen[slot] = currentReaderGen;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used per-segment when bit width of doc->ord is 16:
|
|
||||||
private final class ShortOrdComparator extends PerSegmentComparator {
|
|
||||||
private final short[] readerOrds;
|
|
||||||
private final DocTermsIndex termsIndex;
|
|
||||||
private final int docBase;
|
|
||||||
|
|
||||||
public ShortOrdComparator(short[] readerOrds, DocTermsIndex termsIndex, int docBase) {
|
|
||||||
this.readerOrds = readerOrds;
|
|
||||||
this.termsIndex = termsIndex;
|
|
||||||
this.docBase = docBase;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareBottom(int doc) {
|
|
||||||
assert bottomSlot != -1;
|
|
||||||
final int docOrd = (readerOrds[doc]&0xFFFF);
|
|
||||||
if (bottomSameReader) {
|
|
||||||
// ord is precisely comparable, even in the equal case
|
|
||||||
return bottomOrd - docOrd;
|
|
||||||
} else if (bottomOrd >= docOrd) {
|
|
||||||
// the equals case always means bottom is > doc
|
|
||||||
// (because we set bottomOrd to the lower bound in
|
|
||||||
// setBottom):
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) {
|
|
||||||
final int ord = readerOrds[doc]&0xFFFF;
|
|
||||||
ords[slot] = ord;
|
|
||||||
if (ord == 0) {
|
|
||||||
values[slot] = null;
|
|
||||||
} else {
|
|
||||||
assert ord > 0;
|
|
||||||
if (values[slot] == null) {
|
|
||||||
values[slot] = new BytesRef();
|
|
||||||
}
|
|
||||||
termsIndex.lookup(ord, values[slot]);
|
|
||||||
}
|
|
||||||
readerGen[slot] = currentReaderGen;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used per-segment when bit width of doc->ord is 32:
|
|
||||||
private final class IntOrdComparator extends PerSegmentComparator {
|
|
||||||
private final int[] readerOrds;
|
|
||||||
private final DocTermsIndex termsIndex;
|
|
||||||
private final int docBase;
|
|
||||||
|
|
||||||
public IntOrdComparator(int[] readerOrds, DocTermsIndex termsIndex, int docBase) {
|
|
||||||
this.readerOrds = readerOrds;
|
|
||||||
this.termsIndex = termsIndex;
|
|
||||||
this.docBase = docBase;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareBottom(int doc) {
|
|
||||||
assert bottomSlot != -1;
|
|
||||||
final int docOrd = readerOrds[doc];
|
|
||||||
if (bottomSameReader) {
|
|
||||||
// ord is precisely comparable, even in the equal case
|
|
||||||
return bottomOrd - docOrd;
|
|
||||||
} else if (bottomOrd >= docOrd) {
|
|
||||||
// the equals case always means bottom is > doc
|
|
||||||
// (because we set bottomOrd to the lower bound in
|
|
||||||
// setBottom):
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) {
|
|
||||||
final int ord = readerOrds[doc];
|
|
||||||
ords[slot] = ord;
|
|
||||||
if (ord == 0) {
|
|
||||||
values[slot] = null;
|
|
||||||
} else {
|
|
||||||
assert ord > 0;
|
|
||||||
if (values[slot] == null) {
|
|
||||||
values[slot] = new BytesRef();
|
|
||||||
}
|
|
||||||
termsIndex.lookup(ord, values[slot]);
|
|
||||||
}
|
|
||||||
readerGen[slot] = currentReaderGen;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used per-segment when bit width is not a native array
|
|
||||||
// size (8, 16, 32):
|
|
||||||
private final class AnyDocToOrdComparator extends PerSegmentComparator {
|
|
||||||
private final PackedInts.Reader readerOrds;
|
|
||||||
private final DocTermsIndex termsIndex;
|
|
||||||
private final int docBase;
|
|
||||||
|
|
||||||
public AnyDocToOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) {
|
|
||||||
this.readerOrds = readerOrds;
|
|
||||||
this.termsIndex = termsIndex;
|
|
||||||
this.docBase = docBase;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareBottom(int doc) {
|
|
||||||
assert bottomSlot != -1;
|
|
||||||
final int docOrd = (int) readerOrds.get(doc);
|
|
||||||
if (bottomSameReader) {
|
|
||||||
// ord is precisely comparable, even in the equal case
|
|
||||||
return bottomOrd - docOrd;
|
|
||||||
} else if (bottomOrd >= docOrd) {
|
|
||||||
// the equals case always means bottom is > doc
|
|
||||||
// (because we set bottomOrd to the lower bound in
|
|
||||||
// setBottom):
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) {
|
|
||||||
final int ord = (int) readerOrds.get(doc);
|
|
||||||
ords[slot] = ord;
|
|
||||||
if (ord == 0) {
|
|
||||||
values[slot] = null;
|
|
||||||
} else {
|
|
||||||
assert ord > 0;
|
|
||||||
if (values[slot] == null) {
|
|
||||||
values[slot] = new BytesRef();
|
|
||||||
}
|
|
||||||
termsIndex.lookup(ord, values[slot]);
|
|
||||||
}
|
|
||||||
readerGen[slot] = currentReaderGen;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used per-segment when docToOrd is null:
|
// Used per-segment when docToOrd is null:
|
||||||
private final class AnyOrdComparator extends PerSegmentComparator {
|
private final class AnyOrdComparator extends PerSegmentComparator {
|
||||||
private final DocTermsIndex termsIndex;
|
private final DocTermsIndex termsIndex;
|
||||||
|
@ -1416,7 +1234,7 @@ public abstract class FieldComparator<T> {
|
||||||
@Override
|
@Override
|
||||||
public int compareBottom(int doc) {
|
public int compareBottom(int doc) {
|
||||||
assert bottomSlot != -1;
|
assert bottomSlot != -1;
|
||||||
final int docOrd = (int) termsIndex.getOrd(doc);
|
final int docOrd = termsIndex.getOrd(doc);
|
||||||
if (bottomSameReader) {
|
if (bottomSameReader) {
|
||||||
// ord is precisely comparable, even in the equal case
|
// ord is precisely comparable, even in the equal case
|
||||||
return bottomOrd - docOrd;
|
return bottomOrd - docOrd;
|
||||||
|
@ -1432,12 +1250,12 @@ public abstract class FieldComparator<T> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void copy(int slot, int doc) {
|
public void copy(int slot, int doc) {
|
||||||
final int ord = (int) termsIndex.getOrd(doc);
|
final int ord = termsIndex.getOrd(doc);
|
||||||
ords[slot] = ord;
|
ords[slot] = ord;
|
||||||
if (ord == 0) {
|
if (ord == -1) {
|
||||||
values[slot] = null;
|
values[slot] = null;
|
||||||
} else {
|
} else {
|
||||||
assert ord > 0;
|
assert ord >= 0;
|
||||||
if (values[slot] == null) {
|
if (values[slot] == null) {
|
||||||
values[slot] = new BytesRef();
|
values[slot] = new BytesRef();
|
||||||
}
|
}
|
||||||
|
@ -1451,29 +1269,7 @@ public abstract class FieldComparator<T> {
|
||||||
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
|
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
final int docBase = context.docBase;
|
final int docBase = context.docBase;
|
||||||
termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
|
termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
|
||||||
final PackedInts.Reader docToOrd = termsIndex.getDocToOrd();
|
FieldComparator<BytesRef> perSegComp = new AnyOrdComparator(termsIndex, docBase);
|
||||||
FieldComparator<BytesRef> perSegComp = null;
|
|
||||||
if (docToOrd != null && docToOrd.hasArray()) {
|
|
||||||
final Object arr = docToOrd.getArray();
|
|
||||||
if (arr instanceof byte[]) {
|
|
||||||
perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase);
|
|
||||||
} else if (arr instanceof short[]) {
|
|
||||||
perSegComp = new ShortOrdComparator((short[]) arr, termsIndex, docBase);
|
|
||||||
} else if (arr instanceof int[]) {
|
|
||||||
perSegComp = new IntOrdComparator((int[]) arr, termsIndex, docBase);
|
|
||||||
}
|
|
||||||
// Don't specialize the long[] case since it's not
|
|
||||||
// possible, ie, worse case is MAX_INT-1 docs with
|
|
||||||
// every one having a unique value.
|
|
||||||
}
|
|
||||||
if (perSegComp == null) {
|
|
||||||
if (docToOrd != null) {
|
|
||||||
perSegComp = new AnyDocToOrdComparator(docToOrd, termsIndex, docBase);
|
|
||||||
} else {
|
|
||||||
perSegComp = new AnyOrdComparator(termsIndex, docBase);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
currentReaderGen++;
|
currentReaderGen++;
|
||||||
if (bottomSlot != -1) {
|
if (bottomSlot != -1) {
|
||||||
perSegComp.setBottom(bottomSlot);
|
perSegComp.setBottom(bottomSlot);
|
||||||
|
@ -1492,9 +1288,9 @@ public abstract class FieldComparator<T> {
|
||||||
bottomSameReader = true;
|
bottomSameReader = true;
|
||||||
} else {
|
} else {
|
||||||
if (bottomValue == null) {
|
if (bottomValue == null) {
|
||||||
// 0 ord is null for all segments
|
// -1 ord is null for all segments
|
||||||
assert ords[bottomSlot] == 0;
|
assert ords[bottomSlot] == -1;
|
||||||
bottomOrd = 0;
|
bottomOrd = -1;
|
||||||
bottomSameReader = true;
|
bottomSameReader = true;
|
||||||
readerGen[bottomSlot] = currentReaderGen;
|
readerGen[bottomSlot] = currentReaderGen;
|
||||||
} else {
|
} else {
|
||||||
|
@ -2118,7 +1914,7 @@ public abstract class FieldComparator<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) {
|
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) {
|
||||||
return binarySearch(br, a, key, 1, a.numOrd()-1);
|
return binarySearch(br, a, key, 0, a.numOrd()-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key, int low, int high) {
|
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key, int low, int high) {
|
||||||
|
|
|
@ -494,6 +494,11 @@ public class TestDemoDocValue extends LuceneTestCase {
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nocommit tests should fail if a codec makes the [easy]
|
||||||
|
// mistake of NOT opening all files when SimpleDVProducer
|
||||||
|
// is created ... frist cut of Lucene41 had this bug but
|
||||||
|
// no tests failed!?
|
||||||
|
|
||||||
public void testDemoSortedBytes() throws IOException {
|
public void testDemoSortedBytes() throws IOException {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
|
|
@ -1628,9 +1628,9 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
|
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
|
||||||
|
|
||||||
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST);
|
FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST);
|
||||||
assertEquals(5, dti.numOrd()); // +1 for null ord
|
assertEquals(4, dti.numOrd());
|
||||||
assertEquals(4, dti.size());
|
assertEquals(4, dti.size());
|
||||||
assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef()));
|
assertEquals(bigTermBytesRef, dti.lookup(2, new BytesRef()));
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -144,7 +144,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
||||||
// fill into a OpenBitSet
|
// fill into a OpenBitSet
|
||||||
do {
|
do {
|
||||||
long ord = termsEnum.ord();
|
long ord = termsEnum.ord();
|
||||||
if (ord > 0) {
|
if (ord >= 0) {
|
||||||
termSet.set(ord);
|
termSet.set(ord);
|
||||||
}
|
}
|
||||||
} while (termsEnum.next() != null);
|
} while (termsEnum.next() != null);
|
||||||
|
@ -155,7 +155,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
||||||
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
|
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
|
||||||
@Override
|
@Override
|
||||||
protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
|
protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
|
||||||
return termSet.get(fcsi.getOrd(doc));
|
int ord = fcsi.getOrd(doc);
|
||||||
|
if (ord == -1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return termSet.get(ord);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,7 +159,7 @@ class ElevationComparatorSource extends FieldComparatorSource {
|
||||||
|
|
||||||
private int docVal(int doc) {
|
private int docVal(int doc) {
|
||||||
int ord = idIndex.getOrd(doc);
|
int ord = idIndex.getOrd(doc);
|
||||||
if (ord == 0) {
|
if (ord == -1) {
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
BytesRef id = idIndex.lookup(ord, tempBR);
|
BytesRef id = idIndex.lookup(ord, tempBR);
|
||||||
|
|
|
@ -203,7 +203,7 @@ public class TestFieldCache extends LuceneTestCase {
|
||||||
|
|
||||||
TermsEnum tenum = termsIndex.getTermsEnum();
|
TermsEnum tenum = termsIndex.getTermsEnum();
|
||||||
BytesRef val = new BytesRef();
|
BytesRef val = new BytesRef();
|
||||||
for (int i=1; i<nTerms; i++) {
|
for (int i=0; i<nTerms; i++) {
|
||||||
BytesRef val1 = tenum.next();
|
BytesRef val1 = tenum.next();
|
||||||
BytesRef val2 = termsIndex.lookup(i,val);
|
BytesRef val2 = termsIndex.lookup(i,val);
|
||||||
// System.out.println("i="+i);
|
// System.out.println("i="+i);
|
||||||
|
|
|
@ -211,6 +211,7 @@ public class TestSort extends LuceneTestCase {
|
||||||
writer.addDocument (doc);
|
writer.addDocument (doc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexReader reader = writer.getReader();
|
IndexReader reader = writer.getReader();
|
||||||
writer.close ();
|
writer.close ();
|
||||||
IndexSearcher s = newSearcher(reader);
|
IndexSearcher s = newSearcher(reader);
|
||||||
|
@ -252,6 +253,8 @@ public class TestSort extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
String numFixed = getRandomCharString(fixedLen, 48, 52);
|
String numFixed = getRandomCharString(fixedLen, 48, 52);
|
||||||
|
// nocommit shouldn't this be tracer_fixed? how is
|
||||||
|
// this passing?
|
||||||
doc.add (new Field ("fixed_tracer", numFixed, onlyStored));
|
doc.add (new Field ("fixed_tracer", numFixed, onlyStored));
|
||||||
//doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
|
//doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
|
||||||
doc.add(new StringField("string_fixed", numFixed, Field.Store.NO));
|
doc.add(new StringField("string_fixed", numFixed, Field.Store.NO));
|
||||||
|
@ -269,11 +272,42 @@ public class TestSort extends LuceneTestCase {
|
||||||
|
|
||||||
writer.addDocument (doc);
|
writer.addDocument (doc);
|
||||||
}
|
}
|
||||||
|
// nocommit
|
||||||
//writer.forceMerge(1);
|
//writer.forceMerge(1);
|
||||||
//System.out.println(writer.getSegmentCount());
|
//System.out.println(writer.getSegmentCount());
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexReader reader = DirectoryReader.open(indexStore);
|
IndexReader reader = DirectoryReader.open(indexStore);
|
||||||
return newSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
/*
|
||||||
|
for(int docID=0;docID<reader.maxDoc();docID++) {
|
||||||
|
StoredDocument doc = reader.document(docID);
|
||||||
|
String s = doc.get("tracer");
|
||||||
|
TopDocs hits = searcher.search(new TermQuery(new Term("string", s)), NUM_STRINGS);
|
||||||
|
System.out.println("string=" + s + " has " + hits.totalHits + " docs");
|
||||||
|
boolean found = false;
|
||||||
|
for(int hit=0;!found && hit<hits.totalHits;hit++) {
|
||||||
|
if (hits.scoreDocs[hit].doc == docID) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertTrue(found);
|
||||||
|
s = doc.get("tracer2");
|
||||||
|
hits = searcher.search(new TermQuery(new Term("string2", s)), NUM_STRINGS);
|
||||||
|
System.out.println("string2=" + s + " has " + hits.totalHits + " docs");
|
||||||
|
found = false;
|
||||||
|
for(int hit=0;!found && hit<hits.totalHits;hit++) {
|
||||||
|
if (hits.scoreDocs[hit].doc == docID) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertTrue(found);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
return searcher;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getRandomNumberString(int num, int low, int high) {
|
public String getRandomNumberString(int num, int low, int high) {
|
||||||
|
@ -533,7 +567,7 @@ public class TestSort extends LuceneTestCase {
|
||||||
StorableField[] v = doc2.getFields("tracer" + fieldSuffix);
|
StorableField[] v = doc2.getFields("tracer" + fieldSuffix);
|
||||||
StorableField[] v2 = doc2.getFields("tracer2" + fieldSuffix);
|
StorableField[] v2 = doc2.getFields("tracer2" + fieldSuffix);
|
||||||
for (int j = 0; j < v.length; ++j) {
|
for (int j = 0; j < v.length; ++j) {
|
||||||
buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+")\n");
|
buff.append(v[j].stringValue() + "(" + v2[j].stringValue() + ")(" + result[x].doc+")\n");
|
||||||
if (last != null) {
|
if (last != null) {
|
||||||
int cmp = v[j].stringValue().compareTo(last);
|
int cmp = v[j].stringValue().compareTo(last);
|
||||||
if (!(cmp >= 0)) { // ensure first field is in order
|
if (!(cmp >= 0)) { // ensure first field is in order
|
||||||
|
|
Loading…
Reference in New Issue