mirror of https://github.com/apache/lucene.git
SOLR-1900: use bytes instead of strings for bigTerm to find end prefixes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@997108 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0e9e44f10d
commit
2d9eb62343
|
@ -232,6 +232,18 @@ public final class BytesRef implements Comparable<BytesRef>, Externalizable {
|
|||
offset = 0;
|
||||
}
|
||||
|
||||
public void append(BytesRef other) {
|
||||
int newLen = length + other.length;
|
||||
if (bytes.length < newLen) {
|
||||
byte[] newBytes = new byte[newLen];
|
||||
System.arraycopy(bytes, offset, newBytes, 0, length);
|
||||
offset = 0;
|
||||
bytes = newBytes;
|
||||
}
|
||||
System.arraycopy(other.bytes, other.offset, bytes, length+offset, other.length);
|
||||
length = newLen;
|
||||
}
|
||||
|
||||
public void grow(int newLength) {
|
||||
bytes = ArrayUtil.grow(bytes, newLength);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ import org.apache.solr.search.DocSet;
|
|||
import org.apache.solr.search.SolrIndexReader;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
import org.apache.solr.util.ByteUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
@ -231,12 +232,12 @@ class PerSegmentSingleValuedFaceting {
|
|||
// SolrCore.log.info("reader= " + reader + " FC=" + System.identityHashCode(si));
|
||||
|
||||
if (prefix!=null) {
|
||||
startTermIndex = si.binarySearchLookup(new BytesRef(prefix), tempBR);
|
||||
BytesRef prefixRef = new BytesRef(prefix);
|
||||
startTermIndex = si.binarySearchLookup(prefixRef, tempBR);
|
||||
if (startTermIndex<0) startTermIndex=-startTermIndex-1;
|
||||
// find the end term. \uffff isn't a legal unicode char, but only compareTo
|
||||
// is used, so it should be fine, and is guaranteed to be bigger than legal chars.
|
||||
// TODO: switch to binarySearch version that takes start/end in Java6
|
||||
endTermIndex = si.binarySearchLookup(new BytesRef(prefix+"\uffff\uffff\uffff\uffff"), tempBR);
|
||||
prefixRef.append(ByteUtils.bigTerm);
|
||||
// TODO: we could constrain the lower endpoint if we had a binarySearch method that allowed passing start/end
|
||||
endTermIndex = si.binarySearchLookup(prefixRef, tempBR);
|
||||
assert endTermIndex < 0;
|
||||
endTermIndex = -endTermIndex-1;
|
||||
} else {
|
||||
|
@ -408,4 +409,4 @@ class IndexSortedFacetCollector extends FacetCollector {
|
|||
public NamedList getFacetCounts() {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -432,9 +432,8 @@ public class SimpleFacets {
|
|||
if (prefix!=null) {
|
||||
startTermIndex = si.binarySearchLookup(prefixRef, br);
|
||||
if (startTermIndex<0) startTermIndex=-startTermIndex-1;
|
||||
// find the end term. \uffff isn't a legal unicode char, but only compareTo
|
||||
// is used, so it should be fine, and is guaranteed to be bigger than legal chars.
|
||||
endTermIndex = si.binarySearchLookup(new BytesRef(prefix+"\uffff\uffff\uffff\uffff"), br);
|
||||
prefixRef.append(ByteUtils.bigTerm);
|
||||
endTermIndex = si.binarySearchLookup(prefixRef, br);
|
||||
assert endTermIndex < 0;
|
||||
endTermIndex = -endTermIndex-1;
|
||||
} else {
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.solr.core.SolrCore;
|
|||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.*;
|
||||
import org.apache.solr.util.ByteUtils;
|
||||
import org.apache.solr.util.LongPriorityQueue;
|
||||
import org.apache.solr.util.PrimUtils;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
|
@ -483,9 +484,11 @@ public class UnInvertedField {
|
|||
|
||||
NumberedTermsEnum te = ti.getEnumerator(searcher.getReader());
|
||||
if (prefix != null && prefix.length() > 0) {
|
||||
te.skipTo(new BytesRef(prefix));
|
||||
BytesRef prefixBr = new BytesRef(prefix);
|
||||
te.skipTo(prefixBr);
|
||||
startTerm = te.getTermNumber();
|
||||
te.skipTo(new BytesRef(prefix + "\uffff\uffff\uffff\uffff"));
|
||||
prefixBr.append(ByteUtils.bigTerm);
|
||||
te.skipTo(prefixBr);
|
||||
endTerm = te.getTermNumber();
|
||||
}
|
||||
|
||||
|
|
|
@ -24,21 +24,18 @@ import org.apache.lucene.util.packed.Direct16;
|
|||
import org.apache.lucene.util.packed.Direct32;
|
||||
import org.apache.lucene.util.packed.Direct8;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.solr.util.ByteUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
public class MissingStringLastComparatorSource extends FieldComparatorSource {
|
||||
/** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
|
||||
* one would normally encounter, and definitely bigger than any UTF-8 terms */
|
||||
public static final BytesRef bigTerm = new BytesRef(
|
||||
new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
|
||||
);
|
||||
|
||||
|
||||
private final BytesRef missingValueProxy;
|
||||
|
||||
public MissingStringLastComparatorSource() {
|
||||
this(bigTerm);
|
||||
this(ByteUtils.bigTerm);
|
||||
}
|
||||
|
||||
/** Creates a {@link FieldComparatorSource} that sorts null last in a normal ascending sort.
|
||||
|
@ -428,4 +425,4 @@ public class MissingStringLastComparatorSource extends FieldComparatorSource {
|
|||
public Comparable<?> value(int slot) {
|
||||
return values==null ? NULL_VAL : values[slot];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,12 @@ import org.apache.noggit.CharArr;
|
|||
|
||||
|
||||
public class ByteUtils {
|
||||
|
||||
/** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
|
||||
* one would normally encounter, and definitely bigger than any UTF-8 terms */
|
||||
public static final BytesRef bigTerm = new BytesRef(
|
||||
new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
|
||||
);
|
||||
|
||||
/** Converts utf8 to utf16 and returns the number of 16 bit Java chars written.
|
||||
* Full characters are read, even if this reads past the length passed (and can result in
|
||||
* an ArrayOutOfBoundsException if invalid UTF8 is passed). Explicit checks for valid UTF8 are not performed.
|
||||
|
|
Loading…
Reference in New Issue