mirror of https://github.com/apache/lucene.git
LUCENE-1461: added FieldCacheRangeFilter, which speeds up creation of RangeFilter by using the FieldCache
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@721663 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
755e7aec14
commit
f562594180
|
@ -96,6 +96,15 @@ New features
|
||||||
Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
|
Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
|
7. LUCENE-1461: Added FieldCacheRangeFilter, a RangeFilter for
|
||||||
|
single-term fields that uses FieldCache to compute the filter. If
|
||||||
|
your field has a single term per document, and you need to create
|
||||||
|
many RangeFilters with varying lower/upper bounds, then this is
|
||||||
|
likely a much faster way to create the filters than RangeFilter.
|
||||||
|
However, it comes at the expense of added RAM consumption and
|
||||||
|
slower first-time usage due to populating the FieldCache. (Tim
|
||||||
|
Sturge via Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
||||||
|
|
|
@ -39,6 +39,28 @@ public interface FieldCache {
|
||||||
/** Expert: Stores term text values and document ordering data. */
|
/** Expert: Stores term text values and document ordering data. */
|
||||||
public static class StringIndex {
|
public static class StringIndex {
|
||||||
|
|
||||||
|
public int binarySearchLookup(String key) {
|
||||||
|
// this special case is the reason that Arrays.binarySearch() isn't useful.
|
||||||
|
if (key == null)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
int low = 1;
|
||||||
|
int high = lookup.length-1;
|
||||||
|
|
||||||
|
while (low <= high) {
|
||||||
|
int mid = (low + high) >> 1;
|
||||||
|
int cmp = lookup[mid].compareTo(key);
|
||||||
|
|
||||||
|
if (cmp < 0)
|
||||||
|
low = mid + 1;
|
||||||
|
else if (cmp > 0)
|
||||||
|
high = mid - 1;
|
||||||
|
else
|
||||||
|
return mid; // key found
|
||||||
|
}
|
||||||
|
return -(low + 1); // key not found.
|
||||||
|
}
|
||||||
|
|
||||||
/** All the term values, in natural order. */
|
/** All the term values, in natural order. */
|
||||||
public final String[] lookup;
|
public final String[] lookup;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,182 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A range filter built on top of a cached single term field (in FieldCache).
|
||||||
|
*
|
||||||
|
* FieldCacheRangeFilter builds a single cache for the field the first time it is used.
|
||||||
|
*
|
||||||
|
* Each subsequent FieldCacheRangeFilter on the same field then reuses this cache,
|
||||||
|
* even if the range itself changes.
|
||||||
|
*
|
||||||
|
* This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast)
|
||||||
|
* as building a RangeFilter (or ConstantScoreRangeQuery on a RangeFilter) for each query.
|
||||||
|
* However, if the range never changes it is slower (around 2x as slow) than building a
|
||||||
|
* CachingWrapperFilter on top of a single RangeFilter.
|
||||||
|
*
|
||||||
|
* As with all FieldCache based functionality, FieldCacheRangeFilter is only valid for
|
||||||
|
* fields which contain zero or one terms for each document. Thus it works on dates,
|
||||||
|
* prices and other single value fields but will not work on regular text fields. It is
|
||||||
|
* preferable to use an UN_TOKENIZED field to ensure that there is only a single term.
|
||||||
|
*
|
||||||
|
* Also, collation is done at the time the FieldCache is built; to change
|
||||||
|
* collation you need to override the getFieldCache() method to change the underlying cache.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class FieldCacheRangeFilter extends Filter {
|
||||||
|
private String field;
|
||||||
|
private String lowerVal;
|
||||||
|
private String upperVal;
|
||||||
|
private boolean includeLower;
|
||||||
|
private boolean includeUpper;
|
||||||
|
|
||||||
|
public FieldCacheRangeFilter(
|
||||||
|
String field,
|
||||||
|
String lowerVal,
|
||||||
|
String upperVal,
|
||||||
|
boolean includeLower,
|
||||||
|
boolean includeUpper) {
|
||||||
|
this.field = field;
|
||||||
|
this.lowerVal = lowerVal;
|
||||||
|
this.upperVal = upperVal;
|
||||||
|
this.includeLower = includeLower;
|
||||||
|
this.includeUpper = includeUpper;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FieldCache getFieldCache() {
|
||||||
|
return FieldCache.DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||||
|
return new RangeMultiFilterDocIdSet(getFieldCache().getStringIndex(reader, field));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
buffer.append(field);
|
||||||
|
buffer.append(":");
|
||||||
|
buffer.append(includeLower ? "[" : "{");
|
||||||
|
if (null != lowerVal) {
|
||||||
|
buffer.append(lowerVal);
|
||||||
|
}
|
||||||
|
buffer.append("-");
|
||||||
|
if (null != upperVal) {
|
||||||
|
buffer.append(upperVal);
|
||||||
|
}
|
||||||
|
buffer.append(includeUpper ? "]" : "}");
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (!(o instanceof FieldCacheRangeFilter)) return false;
|
||||||
|
FieldCacheRangeFilter other = (FieldCacheRangeFilter) o;
|
||||||
|
|
||||||
|
if (!this.field.equals(other.field)
|
||||||
|
|| this.includeLower != other.includeLower
|
||||||
|
|| this.includeUpper != other.includeUpper
|
||||||
|
) { return false; }
|
||||||
|
if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
|
||||||
|
if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
int h = field.hashCode();
|
||||||
|
h ^= lowerVal != null ? lowerVal.hashCode() : 550356204;
|
||||||
|
h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper
|
||||||
|
h ^= (upperVal != null ? (upperVal.hashCode()) : -1674416163);
|
||||||
|
h ^= (includeLower ? 1549299360 : -365038026)
|
||||||
|
^ (includeUpper ? 1721088258 : 1948649653);
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected class RangeMultiFilterDocIdSet extends DocIdSet {
|
||||||
|
private int inclusiveLowerPoint;
|
||||||
|
private int inclusiveUpperPoint;
|
||||||
|
private FieldCache.StringIndex fcsi;
|
||||||
|
|
||||||
|
public RangeMultiFilterDocIdSet(FieldCache.StringIndex fcsi) {
|
||||||
|
this.fcsi = fcsi;
|
||||||
|
initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize() {
|
||||||
|
int lowerPoint = fcsi.binarySearchLookup(lowerVal);
|
||||||
|
if (includeLower && lowerPoint >= 0) {
|
||||||
|
inclusiveLowerPoint = lowerPoint;
|
||||||
|
} else if (lowerPoint >= 0) {
|
||||||
|
inclusiveLowerPoint = lowerPoint+1;
|
||||||
|
} else {
|
||||||
|
inclusiveLowerPoint = -lowerPoint-1;
|
||||||
|
}
|
||||||
|
int upperPoint = fcsi.binarySearchLookup(upperVal);
|
||||||
|
if (includeUpper && upperPoint >= 0) {
|
||||||
|
inclusiveUpperPoint = upperPoint;
|
||||||
|
} else if (upperPoint >= 0) {
|
||||||
|
inclusiveUpperPoint = upperPoint - 1;
|
||||||
|
} else {
|
||||||
|
inclusiveUpperPoint = -upperPoint - 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
return new RangeMultiFilterIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected class RangeMultiFilterIterator extends DocIdSetIterator {
|
||||||
|
private int doc = -1;
|
||||||
|
|
||||||
|
public int doc() {
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean next() {
|
||||||
|
try {
|
||||||
|
do {
|
||||||
|
doc++;
|
||||||
|
} while (fcsi.order[doc] > inclusiveUpperPoint
|
||||||
|
|| fcsi.order[doc] < inclusiveLowerPoint);
|
||||||
|
return true;
|
||||||
|
} catch (ArrayIndexOutOfBoundsException e) {
|
||||||
|
doc = Integer.MAX_VALUE;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) {
|
||||||
|
try {
|
||||||
|
doc = target;
|
||||||
|
while (fcsi.order[doc] > inclusiveUpperPoint
|
||||||
|
|| fcsi.order[doc] < inclusiveLowerPoint) {
|
||||||
|
doc++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} catch (ArrayIndexOutOfBoundsException e) {
|
||||||
|
doc = Integer.MAX_VALUE;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -31,6 +31,9 @@ import java.text.Collator;
|
||||||
* This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter
|
* This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter
|
||||||
*
|
*
|
||||||
* </p>
|
* </p>
|
||||||
|
*
|
||||||
|
* If you construct a large number of range filters with different ranges but on the
|
||||||
|
* same field, {@link FieldCacheRangeFilter} may have significantly better performance.
|
||||||
*/
|
*/
|
||||||
public class RangeFilter extends Filter {
|
public class RangeFilter extends Filter {
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,379 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.text.Collator;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A basic 'positive' Unit test class for the RangeFilter class.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* NOTE: at the moment, this class only tests for 'positive' results,
|
||||||
|
* it does not verify the results to ensure there are no 'false positives',
|
||||||
|
* nor does it adequately test 'negative' results. It also does not test
|
||||||
|
* that garbage in results in an Exception.
|
||||||
|
*/
|
||||||
|
public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
|
||||||
|
|
||||||
|
public TestFieldCacheRangeFilter(String name) {
|
||||||
|
super(name);
|
||||||
|
}
|
||||||
|
public TestFieldCacheRangeFilter() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeFilterId() throws IOException {
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||||
|
IndexSearcher search = new IndexSearcher(reader);
|
||||||
|
|
||||||
|
int medId = ((maxId - minId) / 2);
|
||||||
|
|
||||||
|
String minIP = pad(minId);
|
||||||
|
String maxIP = pad(maxId);
|
||||||
|
String medIP = pad(medId);
|
||||||
|
|
||||||
|
int numDocs = reader.numDocs();
|
||||||
|
|
||||||
|
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||||
|
|
||||||
|
ScoreDoc[] result;
|
||||||
|
Query q = new TermQuery(new Term("body","body"));
|
||||||
|
|
||||||
|
// test id, bounded on both ends
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("find all", numDocs, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("all but last", numDocs-1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("all but first", numDocs-1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("all but ends", numDocs-2, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("med and up", 1+ maxId-medId, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,medIP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("up to med", 1+ medId-minId, result.length);
|
||||||
|
|
||||||
|
// unbounded id
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,null,T,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("min and up", numDocs, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",null,maxIP,F,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("max and down", numDocs, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,null,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("not min, but up", numDocs-1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",null,maxIP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("not max, but down", numDocs-1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("med and up, not max", maxId-medId, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,medIP,F,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("not min, up to med", medId-minId, result.length);
|
||||||
|
|
||||||
|
// very small sets
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,minIP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("min,min,F,F", 0, result.length);
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,medIP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("med,med,F,F", 0, result.length);
|
||||||
|
result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("max,max,F,F", 0, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,minIP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("min,min,T,T", 1, result.length);
|
||||||
|
result = search.search(q,new RangeFilter("id",null,minIP,F,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("nul,min,F,T", 1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("max,max,T,T", 1, result.length);
|
||||||
|
result = search.search(q,new RangeFilter("id",maxIP,null,T,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("max,nul,T,T", 1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,medIP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("med,med,T,T", 1, result.length);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeFilterIdCollating() throws IOException {
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||||
|
IndexSearcher search = new IndexSearcher(reader);
|
||||||
|
|
||||||
|
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||||
|
|
||||||
|
int medId = ((maxId - minId) / 2);
|
||||||
|
|
||||||
|
String minIP = pad(minId);
|
||||||
|
String maxIP = pad(maxId);
|
||||||
|
String medIP = pad(medId);
|
||||||
|
|
||||||
|
int numDocs = reader.numDocs();
|
||||||
|
|
||||||
|
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||||
|
|
||||||
|
Hits result;
|
||||||
|
Query q = new TermQuery(new Term("body","body"));
|
||||||
|
|
||||||
|
// test id, bounded on both ends
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T,c));
|
||||||
|
assertEquals("find all", numDocs, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F,c));
|
||||||
|
assertEquals("all but last", numDocs-1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T,c));
|
||||||
|
assertEquals("all but first", numDocs-1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F,c));
|
||||||
|
assertEquals("all but ends", numDocs-2, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T,c));
|
||||||
|
assertEquals("med and up", 1+ maxId-medId, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,medIP,T,T,c));
|
||||||
|
assertEquals("up to med", 1+ medId-minId, result.length());
|
||||||
|
|
||||||
|
// unbounded id
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,null,T,F,c));
|
||||||
|
assertEquals("min and up", numDocs, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",null,maxIP,F,T,c));
|
||||||
|
assertEquals("max and down", numDocs, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,null,F,F,c));
|
||||||
|
assertEquals("not min, but up", numDocs-1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",null,maxIP,F,F,c));
|
||||||
|
assertEquals("not max, but down", numDocs-1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F,c));
|
||||||
|
assertEquals("med and up, not max", maxId-medId, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,medIP,F,T,c));
|
||||||
|
assertEquals("not min, up to med", medId-minId, result.length());
|
||||||
|
|
||||||
|
// very small sets
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,minIP,F,F,c));
|
||||||
|
assertEquals("min,min,F,F", 0, result.length());
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,medIP,F,F,c));
|
||||||
|
assertEquals("med,med,F,F", 0, result.length());
|
||||||
|
result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F,c));
|
||||||
|
assertEquals("max,max,F,F", 0, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",minIP,minIP,T,T,c));
|
||||||
|
assertEquals("min,min,T,T", 1, result.length());
|
||||||
|
result = search.search(q,new RangeFilter("id",null,minIP,F,T,c));
|
||||||
|
assertEquals("nul,min,F,T", 1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T,c));
|
||||||
|
assertEquals("max,max,T,T", 1, result.length());
|
||||||
|
result = search.search(q,new RangeFilter("id",maxIP,null,T,F,c));
|
||||||
|
assertEquals("max,nul,T,T", 1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("id",medIP,medIP,T,T,c));
|
||||||
|
assertEquals("med,med,T,T", 1, result.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeFilterRand() throws IOException {
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||||
|
IndexSearcher search = new IndexSearcher(reader);
|
||||||
|
|
||||||
|
String minRP = pad(signedIndex.minR);
|
||||||
|
String maxRP = pad(signedIndex.maxR);
|
||||||
|
|
||||||
|
int numDocs = reader.numDocs();
|
||||||
|
|
||||||
|
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||||
|
|
||||||
|
ScoreDoc[] result;
|
||||||
|
Query q = new TermQuery(new Term("body","body"));
|
||||||
|
|
||||||
|
// test extremes, bounded on both ends
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("find all", numDocs, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("all but biggest", numDocs-1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("all but smallest", numDocs-1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("all but extremes", numDocs-2, result.length);
|
||||||
|
|
||||||
|
// unbounded
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,null,T,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("smallest and up", numDocs, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",null,maxRP,F,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("biggest and down", numDocs, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,null,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("not smallest, but up", numDocs-1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",null,maxRP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("not biggest, but down", numDocs-1, result.length);
|
||||||
|
|
||||||
|
// very small sets
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("min,min,F,F", 0, result.length);
|
||||||
|
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("max,max,F,F", 0, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("min,min,T,T", 1, result.length);
|
||||||
|
result = search.search(q,new RangeFilter("rand",null,minRP,F,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("nul,min,F,T", 1, result.length);
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T), numDocs).scoreDocs;
|
||||||
|
assertEquals("max,max,T,T", 1, result.length);
|
||||||
|
result = search.search(q,new RangeFilter("rand",maxRP,null,T,F), numDocs).scoreDocs;
|
||||||
|
assertEquals("max,nul,T,T", 1, result.length);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeFilterRandCollating() throws IOException {
|
||||||
|
|
||||||
|
// using the unsigned index because collation seems to ignore hyphens
|
||||||
|
IndexReader reader = IndexReader.open(unsignedIndex.index);
|
||||||
|
IndexSearcher search = new IndexSearcher(reader);
|
||||||
|
|
||||||
|
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||||
|
|
||||||
|
String minRP = pad(unsignedIndex.minR);
|
||||||
|
String maxRP = pad(unsignedIndex.maxR);
|
||||||
|
|
||||||
|
int numDocs = reader.numDocs();
|
||||||
|
|
||||||
|
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||||
|
|
||||||
|
Hits result;
|
||||||
|
Query q = new TermQuery(new Term("body","body"));
|
||||||
|
|
||||||
|
// test extremes, bounded on both ends
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T,c));
|
||||||
|
assertEquals("find all", numDocs, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F,c));
|
||||||
|
assertEquals("all but biggest", numDocs-1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T,c));
|
||||||
|
assertEquals("all but smallest", numDocs-1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F,c));
|
||||||
|
assertEquals("all but extremes", numDocs-2, result.length());
|
||||||
|
|
||||||
|
// unbounded
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,null,T,F,c));
|
||||||
|
assertEquals("smallest and up", numDocs, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",null,maxRP,F,T,c));
|
||||||
|
assertEquals("biggest and down", numDocs, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,null,F,F,c));
|
||||||
|
assertEquals("not smallest, but up", numDocs-1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",null,maxRP,F,F,c));
|
||||||
|
assertEquals("not biggest, but down", numDocs-1, result.length());
|
||||||
|
|
||||||
|
// very small sets
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F,c));
|
||||||
|
assertEquals("min,min,F,F", 0, result.length());
|
||||||
|
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F,c));
|
||||||
|
assertEquals("max,max,F,F", 0, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T,c));
|
||||||
|
assertEquals("min,min,T,T", 1, result.length());
|
||||||
|
result = search.search(q,new RangeFilter("rand",null,minRP,F,T,c));
|
||||||
|
assertEquals("nul,min,F,T", 1, result.length());
|
||||||
|
|
||||||
|
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T,c));
|
||||||
|
assertEquals("max,max,T,T", 1, result.length());
|
||||||
|
result = search.search(q,new RangeFilter("rand",maxRP,null,T,F,c));
|
||||||
|
assertEquals("max,nul,T,T", 1, result.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFarsi() throws Exception {
|
||||||
|
|
||||||
|
/* build an index */
|
||||||
|
RAMDirectory farsiIndex = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("content","\u0633\u0627\u0628",
|
||||||
|
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
doc.add(new Field("body", "body",
|
||||||
|
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
|
||||||
|
writer.optimize();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(farsiIndex);
|
||||||
|
IndexSearcher search = new IndexSearcher(reader);
|
||||||
|
Query q = new TermQuery(new Term("body","body"));
|
||||||
|
|
||||||
|
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||||
|
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||||
|
// characters properly.
|
||||||
|
Collator collator = Collator.getInstance(new Locale("ar"));
|
||||||
|
|
||||||
|
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||||
|
// orders the U+0698 character before the U+0633 character, so the single
|
||||||
|
// index Term below should NOT be returned by a RangeFilter with a Farsi
|
||||||
|
// Collator (or an Arabic one for the case when Farsi is not supported).
|
||||||
|
Hits result = search.search
|
||||||
|
(q, new RangeFilter("content", "\u062F", "\u0698", T, T, collator));
|
||||||
|
assertEquals("The index Term should not be included.", 0, result.length());
|
||||||
|
|
||||||
|
result = search.search
|
||||||
|
(q, new RangeFilter("content", "\u0633", "\u0638", T, T, collator));
|
||||||
|
assertEquals("The index Term should be included.", 1, result.length());
|
||||||
|
search.close();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue