mirror of https://github.com/apache/lucene.git
LUCENE-1602: Rewrite TrieRange to use MultiTermQuery
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765618 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
45986b246c
commit
92c6484161
|
@ -1,157 +0,0 @@
|
||||||
package org.apache.lucene.search.trie;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.Filter;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.TermDocs;
|
|
||||||
import org.apache.lucene.index.TermEnum;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.util.OpenBitSet;
|
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
|
||||||
|
|
||||||
|
|
||||||
abstract class AbstractTrieRangeFilter extends Filter {
|
|
||||||
|
|
||||||
AbstractTrieRangeFilter(final String field, final int precisionStep,
|
|
||||||
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
|
|
||||||
) {
|
|
||||||
this.field=field.intern();
|
|
||||||
this.precisionStep=precisionStep;
|
|
||||||
this.min=min;
|
|
||||||
this.max=max;
|
|
||||||
this.minInclusive=minInclusive;
|
|
||||||
this.maxInclusive=maxInclusive;
|
|
||||||
}
|
|
||||||
|
|
||||||
//@Override
|
|
||||||
public String toString() {
|
|
||||||
return toString(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString(final String field) {
|
|
||||||
final StringBuffer sb=new StringBuffer();
|
|
||||||
if (!this.field.equals(field)) sb.append(this.field).append(':');
|
|
||||||
return sb.append(minInclusive ? '[' : '{')
|
|
||||||
.append((min==null) ? "*" : min.toString())
|
|
||||||
.append(" TO ")
|
|
||||||
.append((max==null) ? "*" : max.toString())
|
|
||||||
.append(maxInclusive ? ']' : '}').toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
//@Override
|
|
||||||
public final boolean equals(final Object o) {
|
|
||||||
if (o==this) return true;
|
|
||||||
if (o==null) return false;
|
|
||||||
if (this.getClass().equals(o.getClass())) {
|
|
||||||
AbstractTrieRangeFilter q=(AbstractTrieRangeFilter)o;
|
|
||||||
return (
|
|
||||||
field==q.field &&
|
|
||||||
(q.min == null ? min == null : q.min.equals(min)) &&
|
|
||||||
(q.max == null ? max == null : q.max.equals(max)) &&
|
|
||||||
minInclusive==q.minInclusive &&
|
|
||||||
maxInclusive==q.maxInclusive &&
|
|
||||||
precisionStep==q.precisionStep
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
//@Override
|
|
||||||
public final int hashCode() {
|
|
||||||
int hash = field.hashCode() + (precisionStep^0x64365465);
|
|
||||||
if (min!=null) hash += min.hashCode()^0x14fa55fb;
|
|
||||||
if (max!=null) hash += max.hashCode()^0x733fa5fe;
|
|
||||||
return hash+
|
|
||||||
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
|
|
||||||
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
|
|
||||||
* This may be used for performance comparisons of different trie variants and their effectiveness.
|
|
||||||
* This method is not thread safe, be sure to only call it when no query is running!
|
|
||||||
* @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
|
|
||||||
*/
|
|
||||||
public int getLastNumberOfTerms() {
|
|
||||||
if (lastNumberOfTerms < 0) throw new IllegalStateException();
|
|
||||||
return lastNumberOfTerms;
|
|
||||||
}
|
|
||||||
|
|
||||||
void resetLastNumberOfTerms() {
|
|
||||||
lastNumberOfTerms=0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns this range filter as a query.
|
|
||||||
* Using this method, it is possible to create a Query using <code>new {Long|Int}TrieRangeFilter(....).asQuery()</code>.
|
|
||||||
* This is a synonym for wrapping with a {@link ConstantScoreQuery},
|
|
||||||
* but this query returns a better <code>toString()</code> variant.
|
|
||||||
*/
|
|
||||||
public Query asQuery() {
|
|
||||||
return new ConstantScoreQuery(this) {
|
|
||||||
|
|
||||||
/** this instance return a nicer String variant than the original {@link ConstantScoreQuery} */
|
|
||||||
//@Override
|
|
||||||
public String toString(final String field) {
|
|
||||||
// return a more convenient representation of this query than ConstantScoreQuery does:
|
|
||||||
return ((AbstractTrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost());
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void fillBits(
|
|
||||||
final IndexReader reader,
|
|
||||||
final OpenBitSet bits, final TermDocs termDocs,
|
|
||||||
final String lowerTerm, final String upperTerm
|
|
||||||
) throws IOException {
|
|
||||||
final int len=lowerTerm.length();
|
|
||||||
assert upperTerm.length()==len;
|
|
||||||
|
|
||||||
// find the docs
|
|
||||||
final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
|
|
||||||
try {
|
|
||||||
do {
|
|
||||||
final Term term = enumerator.term();
|
|
||||||
if (term!=null && term.field()==field) {
|
|
||||||
// break out when upperTerm reached or length of term is different
|
|
||||||
final String t=term.text();
|
|
||||||
if (len!=t.length() || t.compareTo(upperTerm)>0) break;
|
|
||||||
// we have a good term, find the docs
|
|
||||||
lastNumberOfTerms++;
|
|
||||||
termDocs.seek(enumerator);
|
|
||||||
while (termDocs.next()) bits.set(termDocs.doc());
|
|
||||||
} else break;
|
|
||||||
} while (enumerator.next());
|
|
||||||
} finally {
|
|
||||||
enumerator.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// members
|
|
||||||
final String field;
|
|
||||||
final int precisionStep;
|
|
||||||
final Number min,max;
|
|
||||||
final boolean minInclusive,maxInclusive;
|
|
||||||
|
|
||||||
private int lastNumberOfTerms=-1;
|
|
||||||
}
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
|
import org.apache.lucene.search.FilteredTermEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
|
abstract class AbstractTrieRangeQuery extends MultiTermQuery {
|
||||||
|
|
||||||
|
AbstractTrieRangeQuery(final String field, final int precisionStep,
|
||||||
|
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
this.field = field.intern();
|
||||||
|
this.precisionStep = precisionStep;
|
||||||
|
this.min = min;
|
||||||
|
this.max = max;
|
||||||
|
this.minInclusive = minInclusive;
|
||||||
|
this.maxInclusive = maxInclusive;
|
||||||
|
setConstantScoreRewrite(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract void passRanges(TrieRangeTermEnum enumerator);
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
|
||||||
|
TrieRangeTermEnum enumerator = new TrieRangeTermEnum(this, reader);
|
||||||
|
passRanges(enumerator);
|
||||||
|
enumerator.init();
|
||||||
|
return enumerator;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the field name for this query */
|
||||||
|
public String getField() { return field; }
|
||||||
|
|
||||||
|
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||||
|
public boolean includesMin() { return minInclusive; }
|
||||||
|
|
||||||
|
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||||
|
public boolean includesMax() { return maxInclusive; }
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
public String toString(final String field) {
|
||||||
|
final StringBuffer sb=new StringBuffer();
|
||||||
|
if (!this.field.equals(field)) sb.append(this.field).append(':');
|
||||||
|
return sb.append(minInclusive ? '[' : '{')
|
||||||
|
.append((min==null) ? "*" : min.toString())
|
||||||
|
.append(" TO ")
|
||||||
|
.append((max==null) ? "*" : max.toString())
|
||||||
|
.append(maxInclusive ? ']' : '}').toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
public final boolean equals(final Object o) {
|
||||||
|
if (o==this) return true;
|
||||||
|
if (o==null) return false;
|
||||||
|
if (this.getClass().equals(o.getClass())) {
|
||||||
|
AbstractTrieRangeQuery q=(AbstractTrieRangeQuery)o;
|
||||||
|
return (
|
||||||
|
field==q.field &&
|
||||||
|
(q.min == null ? min == null : q.min.equals(min)) &&
|
||||||
|
(q.max == null ? max == null : q.max.equals(max)) &&
|
||||||
|
minInclusive==q.minInclusive &&
|
||||||
|
maxInclusive==q.maxInclusive &&
|
||||||
|
precisionStep==q.precisionStep
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
public final int hashCode() {
|
||||||
|
int hash = field.hashCode() + (precisionStep^0x64365465);
|
||||||
|
if (min!=null) hash += min.hashCode()^0x14fa55fb;
|
||||||
|
if (max!=null) hash += max.hashCode()^0x733fa5fe;
|
||||||
|
return hash+
|
||||||
|
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
|
||||||
|
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Make this method accessible by *TrieRangeFilter,
|
||||||
|
// can be removed, when moved to core.
|
||||||
|
//@Override
|
||||||
|
protected Filter getFilter() {
|
||||||
|
return super.getFilter();
|
||||||
|
}
|
||||||
|
|
||||||
|
// members
|
||||||
|
final String field;
|
||||||
|
final int precisionStep;
|
||||||
|
final Number min,max;
|
||||||
|
final boolean minInclusive,maxInclusive;
|
||||||
|
}
|
|
@ -1,101 +1,61 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import org.apache.lucene.search.Filter; // for javadocs
|
||||||
|
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.search.Filter;
|
/**
|
||||||
import org.apache.lucene.search.DocIdSet;
|
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats.
|
||||||
import org.apache.lucene.index.IndexReader;
|
* This filter depends on a specific structure of terms in the index that can only be created
|
||||||
import org.apache.lucene.index.TermDocs;
|
* by indexing via {@link IntTrieTokenStream} methods.
|
||||||
import org.apache.lucene.util.OpenBitSet;
|
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
|
*/
|
||||||
|
public class IntTrieRangeFilter extends MultiTermQueryWrapperFilter {
|
||||||
/**
|
|
||||||
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats.
|
/**
|
||||||
* This filter depends on a specific structure of terms in the index that can only be created
|
* A trie filter for matching trie coded values using the given field name and
|
||||||
* by indexing via {@link IntTrieTokenStream} methods.
|
* the default helper field.
|
||||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||||
*/
|
* used for indexing the values.
|
||||||
public class IntTrieRangeFilter extends AbstractTrieRangeFilter {
|
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||||
|
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||||
/**
|
* To filter float values use the converter {@link TrieUtils#floatToSortableInt}.
|
||||||
* A trie filter for matching trie coded values using the given field name and
|
*/
|
||||||
* the default helper field.
|
public IntTrieRangeFilter(final String field, final int precisionStep,
|
||||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||||
* used for indexing the values.
|
) {
|
||||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
super(new IntTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
|
||||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
}
|
||||||
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
|
|
||||||
*/
|
/** Returns the field name for this filter */
|
||||||
public IntTrieRangeFilter(final String field, final int precisionStep,
|
public String getField() { return ((IntTrieRangeQuery)query).getField(); }
|
||||||
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
|
|
||||||
) {
|
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||||
super(field,precisionStep,min,max,minInclusive,maxInclusive);
|
public boolean includesMin() { return ((IntTrieRangeQuery)query).includesMin(); }
|
||||||
}
|
|
||||||
|
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||||
/**
|
public boolean includesMax() { return ((IntTrieRangeQuery)query).includesMax(); }
|
||||||
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
|
|
||||||
*/
|
/** Returns the lower value of this range filter */
|
||||||
//@Override
|
public Integer getMin() { return ((IntTrieRangeQuery)query).getMin(); }
|
||||||
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
|
|
||||||
// calculate the upper and lower bounds respecting the inclusive and null values.
|
/** Returns the upper value of this range filter */
|
||||||
int minBound=(this.min==null) ? Integer.MIN_VALUE : (
|
public Integer getMax() { return ((IntTrieRangeQuery)query).getMax(); }
|
||||||
minInclusive ? this.min.intValue() : (this.min.intValue()+1)
|
|
||||||
);
|
}
|
||||||
int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
|
|
||||||
maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
|
|
||||||
);
|
|
||||||
|
|
||||||
resetLastNumberOfTerms();
|
|
||||||
if (minBound > maxBound) {
|
|
||||||
// shortcut, no docs will match this
|
|
||||||
return DocIdSet.EMPTY_DOCIDSET;
|
|
||||||
} else {
|
|
||||||
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
|
||||||
final TermDocs termDocs = reader.termDocs();
|
|
||||||
try {
|
|
||||||
TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() {
|
|
||||||
|
|
||||||
//@Override
|
|
||||||
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
|
||||||
try {
|
|
||||||
fillBits(
|
|
||||||
reader, bits, termDocs,
|
|
||||||
minPrefixCoded, maxPrefixCoded
|
|
||||||
);
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
// IntRangeBuilder is not allowed to throw checked exceptions:
|
|
||||||
// wrap as RuntimeException
|
|
||||||
throw new RuntimeException(ioe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}, precisionStep, minBound, maxBound);
|
|
||||||
} catch (RuntimeException e) {
|
|
||||||
if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
termDocs.close();
|
|
||||||
}
|
|
||||||
return bits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of a Lucene {@link Query} that implements trie-based range querying for ints/floats.
|
||||||
|
* This query depends on a specific structure of terms in the index that can only be created
|
||||||
|
* by indexing via {@link IntTrieTokenStream} methods.
|
||||||
|
* <p>The query is in constant score mode per default. With precision steps of ≤4, this
|
||||||
|
* query can be run in conventional boolean rewrite mode without changing the max clause count.
|
||||||
|
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
|
*/
|
||||||
|
public class IntTrieRangeQuery extends AbstractTrieRangeQuery {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A trie query for matching trie coded values using the given field name and
|
||||||
|
* the default helper field.
|
||||||
|
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||||
|
* used for indexing the values.
|
||||||
|
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||||
|
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||||
|
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
|
||||||
|
*/
|
||||||
|
public IntTrieRangeQuery(final String field, final int precisionStep,
|
||||||
|
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
super(field,precisionStep,min,max,minInclusive,maxInclusive);
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
void passRanges(TrieRangeTermEnum enumerator) {
|
||||||
|
// calculate the upper and lower bounds respecting the inclusive and null values.
|
||||||
|
int minBound=(this.min==null) ? Integer.MIN_VALUE : (
|
||||||
|
minInclusive ? this.min.intValue() : (this.min.intValue()+1)
|
||||||
|
);
|
||||||
|
int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
|
||||||
|
maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
|
||||||
|
);
|
||||||
|
|
||||||
|
TrieUtils.splitIntRange(enumerator.getIntRangeBuilder(), precisionStep, minBound, maxBound);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the lower value of this range query */
|
||||||
|
public Integer getMin() { return (Integer)min; }
|
||||||
|
|
||||||
|
/** Returns the upper value of this range query */
|
||||||
|
public Integer getMax() { return (Integer)max; }
|
||||||
|
|
||||||
|
}
|
|
@ -1,172 +1,172 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides a {@link TokenStream} for indexing <code>int</code> values
|
* This class provides a {@link TokenStream} for indexing <code>int</code> values
|
||||||
* that can be queried by {@link IntTrieRangeFilter}. This stream is not intended
|
* that can be queried by {@link IntTrieRangeFilter}. This stream is not intended
|
||||||
* to be used in analyzers, its more for iterating the different precisions during
|
* to be used in analyzers, its more for iterating the different precisions during
|
||||||
* indexing a specific numeric value.
|
* indexing a specific numeric value.
|
||||||
* <p>A <code>int</code> value is indexed as multiple string encoded terms, each reduced
|
* <p>A <code>int</code> value is indexed as multiple string encoded terms, each reduced
|
||||||
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
|
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
|
||||||
* <code>shift</code> value (number of bits removed) used during encoding.
|
* <code>shift</code> value (number of bits removed) used during encoding.
|
||||||
* <p>The number of bits removed from the right for each trie entry is called
|
* <p>The number of bits removed from the right for each trie entry is called
|
||||||
* <code>precisionStep</code> in this API. For comparing the different step values, see the
|
* <code>precisionStep</code> in this API. For comparing the different step values, see the
|
||||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies
|
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies
|
||||||
* for numeric fields; it does not make sense to have them):
|
* for numeric fields; it does not make sense to have them):
|
||||||
* <pre>
|
* <pre>
|
||||||
* Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
|
* Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
|
||||||
* field.setOmitNorms(true);
|
* field.setOmitNorms(true);
|
||||||
* field.setOmitTermFreqAndPositions(true);
|
* field.setOmitTermFreqAndPositions(true);
|
||||||
* document.add(field);
|
* document.add(field);
|
||||||
* </pre>
|
* </pre>
|
||||||
* <p>For optimal performance, re-use the TokenStream and Field instance
|
* <p>For optimal performance, re-use the TokenStream and Field instance
|
||||||
* for more than one document:
|
* for more than one document:
|
||||||
* <pre>
|
* <pre>
|
||||||
* <em>// init</em>
|
* <em>// init</em>
|
||||||
* TokenStream stream = new IntTrieTokenStream(precisionStep);
|
* TokenStream stream = new IntTrieTokenStream(precisionStep);
|
||||||
* Field field = new Field(name, stream);
|
* Field field = new Field(name, stream);
|
||||||
* field.setOmitNorms(true);
|
* field.setOmitNorms(true);
|
||||||
* field.setOmitTermFreqAndPositions(true);
|
* field.setOmitTermFreqAndPositions(true);
|
||||||
* Document doc = new Document();
|
* Document doc = new Document();
|
||||||
* document.add(field);
|
* document.add(field);
|
||||||
* <em>// use this code to index many documents:</em>
|
* <em>// use this code to index many documents:</em>
|
||||||
* stream.setValue(value1)
|
* stream.setValue(value1)
|
||||||
* writer.addDocument(document);
|
* writer.addDocument(document);
|
||||||
* stream.setValue(value2)
|
* stream.setValue(value2)
|
||||||
* writer.addDocument(document);
|
* writer.addDocument(document);
|
||||||
* ...
|
* ...
|
||||||
* </pre>
|
* </pre>
|
||||||
* <p><em>Please note:</em> Token streams are read, when the document is added to index.
|
* <p><em>Please note:</em> Token streams are read, when the document is added to index.
|
||||||
* If you index more than one numeric field, use a separate instance for each.
|
* If you index more than one numeric field, use a separate instance for each.
|
||||||
* <p>For more information, how trie fields work, see the
|
* <p>For more information, how trie fields work, see the
|
||||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
*/
|
*/
|
||||||
public class IntTrieTokenStream extends TokenStream {
|
public class IntTrieTokenStream extends TokenStream {
|
||||||
|
|
||||||
/** The full precision token gets this token type assigned. */
|
/** The full precision token gets this token type assigned. */
|
||||||
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt";
|
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt";
|
||||||
|
|
||||||
/** The lower precision tokens gets this token type assigned. */
|
/** The lower precision tokens gets this token type assigned. */
|
||||||
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt";
|
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a token stream for indexing <code>value</code> with the given
|
* Creates a token stream for indexing <code>value</code> with the given
|
||||||
* <code>precisionStep</code>. As instance creating is a major cost,
|
* <code>precisionStep</code>. As instance creating is a major cost,
|
||||||
* consider using a {@link #IntTrieTokenStream(int)} instance once for
|
* consider using a {@link #IntTrieTokenStream(int)} instance once for
|
||||||
* indexing a large number of documents and assign a value with
|
* indexing a large number of documents and assign a value with
|
||||||
* {@link #setValue} for each document.
|
* {@link #setValue} for each document.
|
||||||
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
|
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||||
*/
|
*/
|
||||||
public IntTrieTokenStream(final int value, final int precisionStep) {
|
public IntTrieTokenStream(final int value, final int precisionStep) {
|
||||||
if (precisionStep<1 || precisionStep>32)
|
if (precisionStep<1 || precisionStep>32)
|
||||||
throw new IllegalArgumentException("precisionStep may only be 1..32");
|
throw new IllegalArgumentException("precisionStep may only be 1..32");
|
||||||
this.value = value;
|
this.value = value;
|
||||||
this.precisionStep = precisionStep;
|
this.precisionStep = precisionStep;
|
||||||
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||||
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
||||||
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||||
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
|
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a token stream for indexing values with the given
|
* Creates a token stream for indexing values with the given
|
||||||
* <code>precisionStep</code>. This stream is initially "empty"
|
* <code>precisionStep</code>. This stream is initially "empty"
|
||||||
* (using a numeric value of 0), assign a value before indexing
|
* (using a numeric value of 0), assign a value before indexing
|
||||||
* each document using {@link #setValue}.
|
* each document using {@link #setValue}.
|
||||||
*/
|
*/
|
||||||
public IntTrieTokenStream(final int precisionStep) {
|
public IntTrieTokenStream(final int precisionStep) {
|
||||||
this(0, precisionStep);
|
this(0, precisionStep);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resets the token stream to deliver prefix encoded values
|
* Resets the token stream to deliver prefix encoded values
|
||||||
* for <code>value</code>. Use this method to index the same
|
* for <code>value</code>. Use this method to index the same
|
||||||
* numeric field for a large number of documents and reuse the
|
* numeric field for a large number of documents and reuse the
|
||||||
* current stream instance.
|
* current stream instance.
|
||||||
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
|
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||||
*/
|
*/
|
||||||
public void setValue(final int value) {
|
public void setValue(final int value) {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
reset();
|
reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
shift = 0;
|
shift = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
public boolean incrementToken() {
|
public boolean incrementToken() {
|
||||||
if (shift>=32) return false;
|
if (shift>=32) return false;
|
||||||
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
|
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
|
||||||
termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
|
termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
|
||||||
shiftAtt.setShift(shift);
|
shiftAtt.setShift(shift);
|
||||||
if (shift==0) {
|
if (shift==0) {
|
||||||
typeAtt.setType(TOKEN_TYPE_FULL_PREC);
|
typeAtt.setType(TOKEN_TYPE_FULL_PREC);
|
||||||
posIncrAtt.setPositionIncrement(1);
|
posIncrAtt.setPositionIncrement(1);
|
||||||
} else {
|
} else {
|
||||||
typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
|
typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
|
||||||
posIncrAtt.setPositionIncrement(0);
|
posIncrAtt.setPositionIncrement(0);
|
||||||
}
|
}
|
||||||
shift += precisionStep;
|
shift += precisionStep;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
/** @deprecated */
|
/** @deprecated */
|
||||||
public Token next(final Token reusableToken) {
|
public Token next(final Token reusableToken) {
|
||||||
if (shift>=32) return null;
|
if (shift>=32) return null;
|
||||||
reusableToken.clear();
|
reusableToken.clear();
|
||||||
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
|
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
|
||||||
reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
|
reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
|
||||||
if (shift==0) {
|
if (shift==0) {
|
||||||
reusableToken.setType(TOKEN_TYPE_FULL_PREC);
|
reusableToken.setType(TOKEN_TYPE_FULL_PREC);
|
||||||
reusableToken.setPositionIncrement(1);
|
reusableToken.setPositionIncrement(1);
|
||||||
} else {
|
} else {
|
||||||
reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
|
reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
|
||||||
reusableToken.setPositionIncrement(0);
|
reusableToken.setPositionIncrement(0);
|
||||||
}
|
}
|
||||||
shift += precisionStep;
|
shift += precisionStep;
|
||||||
return reusableToken;
|
return reusableToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value);
|
final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value);
|
||||||
sb.append(",precisionStep=").append(precisionStep).append(')');
|
sb.append(",precisionStep=").append(precisionStep).append(')');
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
// members
|
// members
|
||||||
private final TermAttribute termAtt;
|
private final TermAttribute termAtt;
|
||||||
private final TypeAttribute typeAtt;
|
private final TypeAttribute typeAtt;
|
||||||
private final PositionIncrementAttribute posIncrAtt;
|
private final PositionIncrementAttribute posIncrAtt;
|
||||||
private final ShiftAttribute shiftAtt;
|
private final ShiftAttribute shiftAtt;
|
||||||
|
|
||||||
private int shift = 0;
|
private int shift = 0;
|
||||||
private int value;
|
private int value;
|
||||||
private final int precisionStep;
|
private final int precisionStep;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,101 +1,61 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import org.apache.lucene.search.Filter; // for javadocs
|
||||||
|
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.search.Filter;
|
/**
|
||||||
import org.apache.lucene.search.DocIdSet;
|
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles.
|
||||||
import org.apache.lucene.index.IndexReader;
|
* This filter depends on a specific structure of terms in the index that can only be created
|
||||||
import org.apache.lucene.index.TermDocs;
|
* by indexing via {@link LongTrieTokenStream} methods.
|
||||||
import org.apache.lucene.util.OpenBitSet;
|
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
|
*/
|
||||||
|
public class LongTrieRangeFilter extends MultiTermQueryWrapperFilter {
|
||||||
/**
|
|
||||||
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles.
|
/**
|
||||||
* This filter depends on a specific structure of terms in the index that can only be created
|
* A trie filter for matching trie coded values using the given field name and
|
||||||
* by indexing via {@link LongTrieTokenStream} methods.
|
* the default helper field.
|
||||||
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||||
*/
|
* used for indexing the values.
|
||||||
public class LongTrieRangeFilter extends AbstractTrieRangeFilter {
|
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||||
|
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||||
/**
|
* To filter double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||||
* A trie filter for matching trie coded values using the given field name and
|
*/
|
||||||
* the default helper field.
|
public LongTrieRangeFilter(final String field, final int precisionStep,
|
||||||
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
|
||||||
* used for indexing the values.
|
) {
|
||||||
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
super(new LongTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
|
||||||
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
}
|
||||||
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
|
||||||
*/
|
/** Returns the field name for this filter */
|
||||||
public LongTrieRangeFilter(final String field, final int precisionStep,
|
public String getField() { return ((LongTrieRangeQuery)query).getField(); }
|
||||||
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
|
|
||||||
) {
|
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||||
super(field,precisionStep,min,max,minInclusive,maxInclusive);
|
public boolean includesMin() { return ((LongTrieRangeQuery)query).includesMin(); }
|
||||||
}
|
|
||||||
|
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||||
/**
|
public boolean includesMax() { return ((LongTrieRangeQuery)query).includesMax(); }
|
||||||
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
|
|
||||||
*/
|
/** Returns the lower value of this range filter */
|
||||||
//@Override
|
public Long getMin() { return ((LongTrieRangeQuery)query).getMin(); }
|
||||||
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
|
|
||||||
// calculate the upper and lower bounds respecting the inclusive and null values.
|
/** Returns the upper value of this range filter */
|
||||||
long minBound=(this.min==null) ? Long.MIN_VALUE : (
|
public Long getMax() { return ((LongTrieRangeQuery)query).getMax(); }
|
||||||
minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
|
|
||||||
);
|
}
|
||||||
long maxBound=(this.max==null) ? Long.MAX_VALUE : (
|
|
||||||
maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
|
|
||||||
);
|
|
||||||
|
|
||||||
resetLastNumberOfTerms();
|
|
||||||
if (minBound > maxBound) {
|
|
||||||
// shortcut, no docs will match this
|
|
||||||
return DocIdSet.EMPTY_DOCIDSET;
|
|
||||||
} else {
|
|
||||||
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
|
||||||
final TermDocs termDocs = reader.termDocs();
|
|
||||||
try {
|
|
||||||
TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() {
|
|
||||||
|
|
||||||
//@Override
|
|
||||||
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
|
||||||
try {
|
|
||||||
fillBits(
|
|
||||||
reader, bits, termDocs,
|
|
||||||
minPrefixCoded, maxPrefixCoded
|
|
||||||
);
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
// LongRangeBuilder is not allowed to throw checked exceptions:
|
|
||||||
// wrap as RuntimeException
|
|
||||||
throw new RuntimeException(ioe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}, precisionStep, minBound, maxBound);
|
|
||||||
} catch (RuntimeException e) {
|
|
||||||
if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
termDocs.close();
|
|
||||||
}
|
|
||||||
return bits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of a Lucene {@link Query} that implements trie-based range querying for longs/doubles.
|
||||||
|
* This query depends on a specific structure of terms in the index that can only be created
|
||||||
|
* by indexing via {@link LongTrieTokenStream} methods.
|
||||||
|
* <p>The query is in constant score mode per default. With precision steps of ≤4, this
|
||||||
|
* query can be run in conventional boolean rewrite mode without changing the max clause count.
|
||||||
|
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
|
*/
|
||||||
|
public class LongTrieRangeQuery extends AbstractTrieRangeQuery {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A trie query for matching trie coded values using the given field name and
|
||||||
|
* the default helper field.
|
||||||
|
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
|
||||||
|
* used for indexing the values.
|
||||||
|
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
|
||||||
|
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
|
||||||
|
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||||
|
*/
|
||||||
|
public LongTrieRangeQuery(final String field, final int precisionStep,
|
||||||
|
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
|
||||||
|
) {
|
||||||
|
super(field,precisionStep,min,max,minInclusive,maxInclusive);
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
void passRanges(TrieRangeTermEnum enumerator) {
|
||||||
|
// calculate the upper and lower bounds respecting the inclusive and null values.
|
||||||
|
long minBound=(this.min==null) ? Long.MIN_VALUE : (
|
||||||
|
minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
|
||||||
|
);
|
||||||
|
long maxBound=(this.max==null) ? Long.MAX_VALUE : (
|
||||||
|
maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
|
||||||
|
);
|
||||||
|
|
||||||
|
TrieUtils.splitLongRange(enumerator.getLongRangeBuilder(), precisionStep, minBound, maxBound);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the lower value of this range query */
|
||||||
|
public Long getMin() { return (Long)min; }
|
||||||
|
|
||||||
|
/** Returns the upper value of this range query */
|
||||||
|
public Long getMax() { return (Long)max; }
|
||||||
|
|
||||||
|
}
|
|
@ -1,172 +1,172 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides a {@link TokenStream} for indexing <code>long</code> values
|
* This class provides a {@link TokenStream} for indexing <code>long</code> values
|
||||||
* that can be queried by {@link LongTrieRangeFilter}. This stream is not intended
|
* that can be queried by {@link LongTrieRangeFilter}. This stream is not intended
|
||||||
* to be used in analyzers, its more for iterating the different precisions during
|
* to be used in analyzers, its more for iterating the different precisions during
|
||||||
* indexing a specific numeric value.
|
* indexing a specific numeric value.
|
||||||
* <p>A <code>long</code> value is indexed as multiple string encoded terms, each reduced
|
* <p>A <code>long</code> value is indexed as multiple string encoded terms, each reduced
|
||||||
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
|
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
|
||||||
* <code>shift</code> value (number of bits removed) used during encoding.
|
* <code>shift</code> value (number of bits removed) used during encoding.
|
||||||
* <p>The number of bits removed from the right for each trie entry is called
|
* <p>The number of bits removed from the right for each trie entry is called
|
||||||
* <code>precisionStep</code> in this API. For comparing the different step values, see the
|
* <code>precisionStep</code> in this API. For comparing the different step values, see the
|
||||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies
|
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies
|
||||||
* for numeric fields; it does not make sense to have them):
|
* for numeric fields; it does not make sense to have them):
|
||||||
* <pre>
|
* <pre>
|
||||||
* Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
|
* Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
|
||||||
* field.setOmitNorms(true);
|
* field.setOmitNorms(true);
|
||||||
* field.setOmitTermFreqAndPositions(true);
|
* field.setOmitTermFreqAndPositions(true);
|
||||||
* document.add(field);
|
* document.add(field);
|
||||||
* </pre>
|
* </pre>
|
||||||
* <p>For optimal performance, re-use the TokenStream and Field instance
|
* <p>For optimal performance, re-use the TokenStream and Field instance
|
||||||
* for more than one document:
|
* for more than one document:
|
||||||
* <pre>
|
* <pre>
|
||||||
* <em>// init</em>
|
* <em>// init</em>
|
||||||
* TokenStream stream = new LongTrieTokenStream(precisionStep);
|
* TokenStream stream = new LongTrieTokenStream(precisionStep);
|
||||||
* Field field = new Field(name, stream);
|
* Field field = new Field(name, stream);
|
||||||
* field.setOmitNorms(true);
|
* field.setOmitNorms(true);
|
||||||
* field.setOmitTermFreqAndPositions(true);
|
* field.setOmitTermFreqAndPositions(true);
|
||||||
* Document doc = new Document();
|
* Document doc = new Document();
|
||||||
* document.add(field);
|
* document.add(field);
|
||||||
* <em>// use this code to index many documents:</em>
|
* <em>// use this code to index many documents:</em>
|
||||||
* stream.setValue(value1)
|
* stream.setValue(value1)
|
||||||
* writer.addDocument(document);
|
* writer.addDocument(document);
|
||||||
* stream.setValue(value2)
|
* stream.setValue(value2)
|
||||||
* writer.addDocument(document);
|
* writer.addDocument(document);
|
||||||
* ...
|
* ...
|
||||||
* </pre>
|
* </pre>
|
||||||
* <p><em>Please note:</em> Token streams are read, when the document is added to index.
|
* <p><em>Please note:</em> Token streams are read, when the document is added to index.
|
||||||
* If you index more than one numeric field, use a separate instance for each.
|
* If you index more than one numeric field, use a separate instance for each.
|
||||||
* <p>For more information, how trie fields work, see the
|
* <p>For more information, how trie fields work, see the
|
||||||
* {@linkplain org.apache.lucene.search.trie package description}.
|
* {@linkplain org.apache.lucene.search.trie package description}.
|
||||||
*/
|
*/
|
||||||
public class LongTrieTokenStream extends TokenStream {
|
public class LongTrieTokenStream extends TokenStream {
|
||||||
|
|
||||||
/** The full precision token gets this token type assigned. */
|
/** The full precision token gets this token type assigned. */
|
||||||
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong";
|
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong";
|
||||||
|
|
||||||
/** The lower precision tokens gets this token type assigned. */
|
/** The lower precision tokens gets this token type assigned. */
|
||||||
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong";
|
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a token stream for indexing <code>value</code> with the given
|
* Creates a token stream for indexing <code>value</code> with the given
|
||||||
* <code>precisionStep</code>. As instance creating is a major cost,
|
* <code>precisionStep</code>. As instance creating is a major cost,
|
||||||
* consider using a {@link #LongTrieTokenStream(int)} instance once for
|
* consider using a {@link #LongTrieTokenStream(int)} instance once for
|
||||||
* indexing a large number of documents and assign a value with
|
* indexing a large number of documents and assign a value with
|
||||||
* {@link #setValue} for each document.
|
* {@link #setValue} for each document.
|
||||||
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||||
*/
|
*/
|
||||||
public LongTrieTokenStream(final long value, final int precisionStep) {
|
public LongTrieTokenStream(final long value, final int precisionStep) {
|
||||||
if (precisionStep<1 || precisionStep>64)
|
if (precisionStep<1 || precisionStep>64)
|
||||||
throw new IllegalArgumentException("precisionStep may only be 1..64");
|
throw new IllegalArgumentException("precisionStep may only be 1..64");
|
||||||
this.value = value;
|
this.value = value;
|
||||||
this.precisionStep = precisionStep;
|
this.precisionStep = precisionStep;
|
||||||
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||||
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
|
||||||
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||||
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
|
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a token stream for indexing values with the given
|
* Creates a token stream for indexing values with the given
|
||||||
* <code>precisionStep</code>. This stream is initially "empty"
|
* <code>precisionStep</code>. This stream is initially "empty"
|
||||||
* (using a numeric value of 0), assign a value before indexing
|
* (using a numeric value of 0), assign a value before indexing
|
||||||
* each document using {@link #setValue}.
|
* each document using {@link #setValue}.
|
||||||
*/
|
*/
|
||||||
public LongTrieTokenStream(final int precisionStep) {
|
public LongTrieTokenStream(final int precisionStep) {
|
||||||
this(0L, precisionStep);
|
this(0L, precisionStep);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resets the token stream to deliver prefix encoded values
|
* Resets the token stream to deliver prefix encoded values
|
||||||
* for <code>value</code>. Use this method to index the same
|
* for <code>value</code>. Use this method to index the same
|
||||||
* numeric field for a large number of documents and reuse the
|
* numeric field for a large number of documents and reuse the
|
||||||
* current stream instance.
|
* current stream instance.
|
||||||
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
|
||||||
*/
|
*/
|
||||||
public void setValue(final long value) {
|
public void setValue(final long value) {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
reset();
|
reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
public void reset() {
|
public void reset() {
|
||||||
shift = 0;
|
shift = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
public boolean incrementToken() {
|
public boolean incrementToken() {
|
||||||
if (shift>=64) return false;
|
if (shift>=64) return false;
|
||||||
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
|
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
|
||||||
termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
|
termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
|
||||||
shiftAtt.setShift(shift);
|
shiftAtt.setShift(shift);
|
||||||
if (shift==0) {
|
if (shift==0) {
|
||||||
typeAtt.setType(TOKEN_TYPE_FULL_PREC);
|
typeAtt.setType(TOKEN_TYPE_FULL_PREC);
|
||||||
posIncrAtt.setPositionIncrement(1);
|
posIncrAtt.setPositionIncrement(1);
|
||||||
} else {
|
} else {
|
||||||
typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
|
typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
|
||||||
posIncrAtt.setPositionIncrement(0);
|
posIncrAtt.setPositionIncrement(0);
|
||||||
}
|
}
|
||||||
shift += precisionStep;
|
shift += precisionStep;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
/** @deprecated */
|
/** @deprecated */
|
||||||
public Token next(final Token reusableToken) {
|
public Token next(final Token reusableToken) {
|
||||||
if (shift>=64) return null;
|
if (shift>=64) return null;
|
||||||
reusableToken.clear();
|
reusableToken.clear();
|
||||||
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
|
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
|
||||||
reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
|
reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
|
||||||
if (shift==0) {
|
if (shift==0) {
|
||||||
reusableToken.setType(TOKEN_TYPE_FULL_PREC);
|
reusableToken.setType(TOKEN_TYPE_FULL_PREC);
|
||||||
reusableToken.setPositionIncrement(1);
|
reusableToken.setPositionIncrement(1);
|
||||||
} else {
|
} else {
|
||||||
reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
|
reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
|
||||||
reusableToken.setPositionIncrement(0);
|
reusableToken.setPositionIncrement(0);
|
||||||
}
|
}
|
||||||
shift += precisionStep;
|
shift += precisionStep;
|
||||||
return reusableToken;
|
return reusableToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value);
|
final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value);
|
||||||
sb.append(",precisionStep=").append(precisionStep).append(')');
|
sb.append(",precisionStep=").append(precisionStep).append(')');
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
// members
|
// members
|
||||||
private final TermAttribute termAtt;
|
private final TermAttribute termAtt;
|
||||||
private final TypeAttribute typeAtt;
|
private final TypeAttribute typeAtt;
|
||||||
private final PositionIncrementAttribute posIncrAtt;
|
private final PositionIncrementAttribute posIncrAtt;
|
||||||
private final ShiftAttribute shiftAtt;
|
private final ShiftAttribute shiftAtt;
|
||||||
|
|
||||||
private int shift = 0;
|
private int shift = 0;
|
||||||
private long value;
|
private long value;
|
||||||
private final int precisionStep;
|
private final int precisionStep;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,70 +1,70 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream}
|
* This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream}
|
||||||
* to the shift value of the current prefix-encoded token.
|
* to the shift value of the current prefix-encoded token.
|
||||||
* It may be used by filters or consumers to e.g. distribute the values to various fields.
|
* It may be used by filters or consumers to e.g. distribute the values to various fields.
|
||||||
*/
|
*/
|
||||||
public final class ShiftAttribute extends Attribute implements Cloneable, Serializable {
|
public final class ShiftAttribute extends Attribute implements Cloneable, Serializable {
|
||||||
private int shift = 0;
|
private int shift = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the shift value of the current prefix encoded token.
|
* Returns the shift value of the current prefix encoded token.
|
||||||
*/
|
*/
|
||||||
public int getShift() {
|
public int getShift() {
|
||||||
return shift;
|
return shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the shift value.
|
* Sets the shift value.
|
||||||
*/
|
*/
|
||||||
public void setShift(final int shift) {
|
public void setShift(final int shift) {
|
||||||
this.shift = shift;
|
this.shift = shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
shift = 0;
|
shift = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "shift=" + shift;
|
return "shift=" + shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean equals(Object other) {
|
public boolean equals(Object other) {
|
||||||
if (this == other) return true;
|
if (this == other) return true;
|
||||||
if (other instanceof ShiftAttribute) {
|
if (other instanceof ShiftAttribute) {
|
||||||
return ((ShiftAttribute) other).shift == shift;
|
return ((ShiftAttribute) other).shift == shift;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return shift;
|
return shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void copyTo(Attribute target) {
|
public void copyTo(Attribute target) {
|
||||||
final ShiftAttribute t = (ShiftAttribute) target;
|
final ShiftAttribute t = (ShiftAttribute) target;
|
||||||
t.setShift(shift);
|
t.setShift(shift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,140 @@
|
||||||
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.FilteredTermEnum;
|
||||||
|
import org.apache.lucene.search.MultiTermQuery; // for javadocs
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subclass of FilteredTermEnum for enumerating all terms that match the
|
||||||
|
* sub-ranges for trie range queries.
|
||||||
|
* <p>
|
||||||
|
* WARNING: Term enumerations is not guaranteed to be always ordered by
|
||||||
|
* {@link Term#compareTo}.
|
||||||
|
* The ordering depends on how {@link TrieUtils#splitLongRange} and
|
||||||
|
* {@link TrieUtils#splitIntRange} generates the sub-ranges. For
|
||||||
|
* the {@link MultiTermQuery} ordering is not relevant.
|
||||||
|
*/
|
||||||
|
final class TrieRangeTermEnum extends FilteredTermEnum {
|
||||||
|
|
||||||
|
private final AbstractTrieRangeQuery query;
|
||||||
|
private final IndexReader reader;
|
||||||
|
private final LinkedList/*<String>*/ rangeBounds = new LinkedList/*<String>*/();
|
||||||
|
private String currentUpperBound = null;
|
||||||
|
|
||||||
|
TrieRangeTermEnum(AbstractTrieRangeQuery query, IndexReader reader) {
|
||||||
|
this.query = query;
|
||||||
|
this.reader = reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a range builder that must be used to feed in the sub-ranges. */
|
||||||
|
TrieUtils.IntRangeBuilder getIntRangeBuilder() {
|
||||||
|
return new TrieUtils.IntRangeBuilder() {
|
||||||
|
//@Override
|
||||||
|
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
||||||
|
rangeBounds.add(minPrefixCoded);
|
||||||
|
rangeBounds.add(maxPrefixCoded);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a range builder that must be used to feed in the sub-ranges. */
|
||||||
|
TrieUtils.LongRangeBuilder getLongRangeBuilder() {
|
||||||
|
return new TrieUtils.LongRangeBuilder() {
|
||||||
|
//@Override
|
||||||
|
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
|
||||||
|
rangeBounds.add(minPrefixCoded);
|
||||||
|
rangeBounds.add(maxPrefixCoded);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** After feeding the range builder call this method to initialize the enum. */
|
||||||
|
void init() throws IOException {
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override
|
||||||
|
public float difference() {
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** this is a dummy, it is not used by this class. */
|
||||||
|
//@Override
|
||||||
|
protected boolean endEnum() {
|
||||||
|
assert false; // should never be called
|
||||||
|
return (currentTerm != null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares if current upper bound is reached,
|
||||||
|
* this also updates the term count for statistics.
|
||||||
|
* In contrast to {@link FilteredTermEnum}, a return value
|
||||||
|
* of <code>false</code> ends iterating the current enum
|
||||||
|
* and forwards to the next sub-range.
|
||||||
|
*/
|
||||||
|
//@Override
|
||||||
|
protected boolean termCompare(Term term) {
|
||||||
|
return (term.field() == query.field && term.text().compareTo(currentUpperBound) <= 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Increments the enumeration to the next element. True if one exists. */
|
||||||
|
//@Override
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
// if a current term exists, the actual enum is initialized:
|
||||||
|
// try change to next term, if no such term exists, fall-through
|
||||||
|
if (currentTerm != null) {
|
||||||
|
assert actualEnum!=null;
|
||||||
|
if (actualEnum.next()) {
|
||||||
|
currentTerm = actualEnum.term();
|
||||||
|
if (termCompare(currentTerm)) return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if all above fails, we go forward to the next enum,
|
||||||
|
// if one is available
|
||||||
|
currentTerm = null;
|
||||||
|
if (rangeBounds.size() < 2) return false;
|
||||||
|
// close the current enum and read next bounds
|
||||||
|
if (actualEnum != null) {
|
||||||
|
actualEnum.close();
|
||||||
|
actualEnum = null;
|
||||||
|
}
|
||||||
|
final String lowerBound = (String)rangeBounds.removeFirst();
|
||||||
|
this.currentUpperBound = (String)rangeBounds.removeFirst();
|
||||||
|
// this call recursively uses next(), if no valid term in
|
||||||
|
// next enum found.
|
||||||
|
// if this behavior is changed/modified in the superclass,
|
||||||
|
// this enum will not work anymore!
|
||||||
|
setEnum(reader.terms(new Term(query.field, lowerBound)));
|
||||||
|
return (currentTerm != null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Closes the enumeration to further activity, freeing resources. */
|
||||||
|
//@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
rangeBounds.clear();
|
||||||
|
currentUpperBound = null;
|
||||||
|
super.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -106,14 +106,14 @@ more info about this in the stream documentation)</em></p>
|
||||||
<h3>Searching</h3>
|
<h3>Searching</h3>
|
||||||
|
|
||||||
<p>The numeric index fields you prepared in this way can be searched by
|
<p>The numeric index fields you prepared in this way can be searched by
|
||||||
{@link org.apache.lucene.search.trie.LongTrieRangeFilter} or {@link org.apache.lucene.search.trie.IntTrieRangeFilter}:</p>
|
{@link org.apache.lucene.search.trie.LongTrieRangeQuery} or {@link org.apache.lucene.search.trie.IntTrieRangeQuery}:</p>
|
||||||
|
|
||||||
<pre>
|
<pre>
|
||||||
<em>// Java 1.4, because Long.valueOf(long) is not available:</em>
|
<em>// Java 1.4, because Long.valueOf(long) is not available:</em>
|
||||||
Query q = new LongTrieRangeFilter("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true).asQuery();
|
Query q = new LongTrieRangeQuery("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true);
|
||||||
|
|
||||||
<em>// OR, Java 1.5, using autoboxing:</em>
|
<em>// OR, Java 1.5, using autoboxing:</em>
|
||||||
Query q = new LongTrieRangeFilter("exampleLong", precisionStep, 123L, 999999L, true, true).asQuery();
|
Query q = new LongTrieRangeQuery("exampleLong", precisionStep, 123L, 999999L, true, true);
|
||||||
|
|
||||||
<em>// execute the search, as usual:</em>
|
<em>// execute the search, as usual:</em>
|
||||||
TopDocs docs = searcher.search(q, 10);
|
TopDocs docs = searcher.search(q, 10);
|
||||||
|
@ -132,7 +132,7 @@ more info about this in the stream documentation)</em></p>
|
||||||
that the old {@link org.apache.lucene.search.RangeQuery} (with raised
|
that the old {@link org.apache.lucene.search.RangeQuery} (with raised
|
||||||
{@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete,
|
{@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete,
|
||||||
{@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing
|
{@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing
|
||||||
{@link org.apache.lucene.search.trie.LongTrieRangeFilter}<code>.asQuery()</code> took <100ms to
|
{@link org.apache.lucene.search.trie.LongTrieRangeQuery} took <100ms to
|
||||||
complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
|
complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
|
||||||
This query type was developed for a geographic portal, where the performance for
|
This query type was developed for a geographic portal, where the performance for
|
||||||
e.g. bounding boxes or exact date/time stamps is important.</p>
|
e.g. bounding boxes or exact date/time stamps is important.</p>
|
||||||
|
|
|
@ -1,320 +1,371 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.RangeQuery;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.search.RangeQuery;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
public class TestIntTrieRangeFilter extends LuceneTestCase {
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
// distance of entries
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
private static final int distance = 6666;
|
|
||||||
// shift the starting of the values to the left, to also have negative values:
|
public class TestIntTrieRangeQuery extends LuceneTestCase {
|
||||||
private static final int startOffset = - 1 << 15;
|
// distance of entries
|
||||||
// number of docs to generate for testing
|
private static final int distance = 6666;
|
||||||
private static final int noDocs = 10000;
|
// shift the starting of the values to the left, to also have negative values:
|
||||||
|
private static final int startOffset = - 1 << 15;
|
||||||
private static Field newField(String name, int precisionStep) {
|
// number of docs to generate for testing
|
||||||
IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep);
|
private static final int noDocs = 10000;
|
||||||
stream.setUseNewAPI(true);
|
|
||||||
Field f=new Field(name, stream);
|
private static Field newField(String name, int precisionStep) {
|
||||||
f.setOmitTermFreqAndPositions(true);
|
IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep);
|
||||||
f.setOmitNorms(true);
|
stream.setUseNewAPI(true);
|
||||||
return f;
|
Field f=new Field(name, stream);
|
||||||
}
|
f.setOmitTermFreqAndPositions(true);
|
||||||
|
f.setOmitNorms(true);
|
||||||
private static final RAMDirectory directory;
|
return f;
|
||||||
private static final IndexSearcher searcher;
|
}
|
||||||
static {
|
|
||||||
try {
|
private static final RAMDirectory directory;
|
||||||
directory = new RAMDirectory();
|
private static final IndexSearcher searcher;
|
||||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
static {
|
||||||
true, MaxFieldLength.UNLIMITED);
|
try {
|
||||||
|
// set the theoretical maximum term count for 8bit (see docs for the number)
|
||||||
Field
|
BooleanQuery.setMaxClauseCount(3*255*2 + 255);
|
||||||
field8 = newField("field8", 8),
|
|
||||||
field4 = newField("field4", 4),
|
directory = new RAMDirectory();
|
||||||
field2 = newField("field2", 2),
|
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||||
ascfield8 = newField("ascfield8", 8),
|
true, MaxFieldLength.UNLIMITED);
|
||||||
ascfield4 = newField("ascfield4", 4),
|
|
||||||
ascfield2 = newField("ascfield2", 2);
|
Field
|
||||||
|
field8 = newField("field8", 8),
|
||||||
// Add a series of noDocs docs with increasing int values
|
field4 = newField("field4", 4),
|
||||||
for (int l=0; l<noDocs; l++) {
|
field2 = newField("field2", 2),
|
||||||
Document doc=new Document();
|
ascfield8 = newField("ascfield8", 8),
|
||||||
// add fields, that have a distance to test general functionality
|
ascfield4 = newField("ascfield4", 4),
|
||||||
int val=distance*l+startOffset;
|
ascfield2 = newField("ascfield2", 2);
|
||||||
doc.add(new Field("value", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
|
||||||
((IntTrieTokenStream)field8.tokenStreamValue()).setValue(val);
|
// Add a series of noDocs docs with increasing int values
|
||||||
doc.add(field8);
|
for (int l=0; l<noDocs; l++) {
|
||||||
((IntTrieTokenStream)field4.tokenStreamValue()).setValue(val);
|
Document doc=new Document();
|
||||||
doc.add(field4);
|
// add fields, that have a distance to test general functionality
|
||||||
((IntTrieTokenStream)field2.tokenStreamValue()).setValue(val);
|
int val=distance*l+startOffset;
|
||||||
doc.add(field2);
|
doc.add(new Field("value", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
|
||||||
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
((IntTrieTokenStream)field8.tokenStreamValue()).setValue(val);
|
||||||
val=l-(noDocs/2);
|
doc.add(field8);
|
||||||
((IntTrieTokenStream)ascfield8.tokenStreamValue()).setValue(val);
|
((IntTrieTokenStream)field4.tokenStreamValue()).setValue(val);
|
||||||
doc.add(ascfield8);
|
doc.add(field4);
|
||||||
((IntTrieTokenStream)ascfield4.tokenStreamValue()).setValue(val);
|
((IntTrieTokenStream)field2.tokenStreamValue()).setValue(val);
|
||||||
doc.add(ascfield4);
|
doc.add(field2);
|
||||||
((IntTrieTokenStream)ascfield2.tokenStreamValue()).setValue(val);
|
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
|
||||||
doc.add(ascfield2);
|
val=l-(noDocs/2);
|
||||||
writer.addDocument(doc);
|
((IntTrieTokenStream)ascfield8.tokenStreamValue()).setValue(val);
|
||||||
}
|
doc.add(ascfield8);
|
||||||
|
((IntTrieTokenStream)ascfield4.tokenStreamValue()).setValue(val);
|
||||||
writer.optimize();
|
doc.add(ascfield4);
|
||||||
writer.close();
|
((IntTrieTokenStream)ascfield2.tokenStreamValue()).setValue(val);
|
||||||
searcher=new IndexSearcher(directory);
|
doc.add(ascfield2);
|
||||||
} catch (Exception e) {
|
writer.addDocument(doc);
|
||||||
throw new Error(e);
|
}
|
||||||
}
|
|
||||||
}
|
writer.optimize();
|
||||||
|
writer.close();
|
||||||
private void testRange(int precisionStep) throws Exception {
|
searcher=new IndexSearcher(directory);
|
||||||
String field="field"+precisionStep;
|
} catch (Exception e) {
|
||||||
int count=3000;
|
throw new Error(e);
|
||||||
int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
}
|
||||||
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
}
|
||||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
|
||||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
|
/** test for both constant score and boolean query, the other tests only use the constant score mode */
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
private void testRange(int precisionStep) throws Exception {
|
||||||
assertNotNull(sd);
|
String field="field"+precisionStep;
|
||||||
assertEquals("Score doc count", count, sd.length );
|
int count=3000;
|
||||||
Document doc=searcher.doc(sd[0].doc);
|
int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
||||||
assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
IntTrieRangeQuery q = new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||||
doc=searcher.doc(sd[sd.length-1].doc);
|
IntTrieRangeFilter f = new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||||
assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
int lastTerms = 0;
|
||||||
}
|
for (byte i=0; i<3; i++) {
|
||||||
|
TopDocs topDocs;
|
||||||
public void testRange_8bit() throws Exception {
|
int terms;
|
||||||
testRange(8);
|
String type;
|
||||||
}
|
q.clearTotalNumberOfTerms();
|
||||||
|
f.clearTotalNumberOfTerms();
|
||||||
public void testRange_4bit() throws Exception {
|
switch (i) {
|
||||||
testRange(4);
|
case 0:
|
||||||
}
|
type = " (constant score)";
|
||||||
|
q.setConstantScoreRewrite(true);
|
||||||
public void testRange_2bit() throws Exception {
|
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
testRange(2);
|
terms = q.getTotalNumberOfTerms();
|
||||||
}
|
break;
|
||||||
|
case 1:
|
||||||
private void testLeftOpenRange(int precisionStep) throws Exception {
|
type = " (boolean query)";
|
||||||
String field="field"+precisionStep;
|
q.setConstantScoreRewrite(false);
|
||||||
int count=3000;
|
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
int upper=(count-1)*distance + (distance/3) + startOffset;
|
terms = q.getTotalNumberOfTerms();
|
||||||
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, null, new Integer(upper), true, true);
|
break;
|
||||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
case 2:
|
||||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
type = " (filter)";
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER);
|
||||||
assertNotNull(sd);
|
terms = f.getTotalNumberOfTerms();
|
||||||
assertEquals("Score doc count", count, sd.length );
|
break;
|
||||||
Document doc=searcher.doc(sd[0].doc);
|
default:
|
||||||
assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
return;
|
||||||
doc=searcher.doc(sd[sd.length-1].doc);
|
}
|
||||||
assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+".");
|
||||||
}
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
|
assertNotNull(sd);
|
||||||
public void testLeftOpenRange_8bit() throws Exception {
|
assertEquals("Score doc count"+type, count, sd.length );
|
||||||
testLeftOpenRange(8);
|
Document doc=searcher.doc(sd[0].doc);
|
||||||
}
|
assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||||
|
doc=searcher.doc(sd[sd.length-1].doc);
|
||||||
public void testLeftOpenRange_4bit() throws Exception {
|
assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||||
testLeftOpenRange(4);
|
if (i>0) {
|
||||||
}
|
assertEquals("Distinct term number is equal for all query types", lastTerms, terms);
|
||||||
|
}
|
||||||
public void testLeftOpenRange_2bit() throws Exception {
|
lastTerms = terms;
|
||||||
testLeftOpenRange(2);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testRightOpenRange(int precisionStep) throws Exception {
|
public void testRange_8bit() throws Exception {
|
||||||
String field="field"+precisionStep;
|
testRange(8);
|
||||||
int count=3000;
|
}
|
||||||
int lower=(count-1)*distance + (distance/3) +startOffset;
|
|
||||||
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), null, true, true);
|
public void testRange_4bit() throws Exception {
|
||||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
testRange(4);
|
||||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
}
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
|
||||||
assertNotNull(sd);
|
public void testRange_2bit() throws Exception {
|
||||||
assertEquals("Score doc count", noDocs-count, sd.length );
|
testRange(2);
|
||||||
Document doc=searcher.doc(sd[0].doc);
|
}
|
||||||
assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
|
||||||
doc=searcher.doc(sd[sd.length-1].doc);
|
public void testInverseRange() throws Exception {
|
||||||
assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
IntTrieRangeFilter f = new IntTrieRangeFilter("field8", 8, new Integer(1000), new Integer(-1000), true, true);
|
||||||
}
|
assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader()));
|
||||||
|
}
|
||||||
public void testRightOpenRange_8bit() throws Exception {
|
|
||||||
testRightOpenRange(8);
|
private void testLeftOpenRange(int precisionStep) throws Exception {
|
||||||
}
|
String field="field"+precisionStep;
|
||||||
|
int count=3000;
|
||||||
public void testRightOpenRange_4bit() throws Exception {
|
int upper=(count-1)*distance + (distance/3) + startOffset;
|
||||||
testRightOpenRange(4);
|
IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, null, new Integer(upper), true, true);
|
||||||
}
|
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
|
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||||
public void testRightOpenRange_2bit() throws Exception {
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
testRightOpenRange(2);
|
assertNotNull(sd);
|
||||||
}
|
assertEquals("Score doc count", count, sd.length );
|
||||||
|
Document doc=searcher.doc(sd[0].doc);
|
||||||
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||||
final Random rnd=newRandom();
|
doc=searcher.doc(sd[sd.length-1].doc);
|
||||||
String field="field"+precisionStep;
|
assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||||
int termCount=0;
|
}
|
||||||
for (int i=0; i<50; i++) {
|
|
||||||
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
public void testLeftOpenRange_8bit() throws Exception {
|
||||||
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
testLeftOpenRange(8);
|
||||||
if (lower>upper) {
|
}
|
||||||
int a=lower; lower=upper; upper=a;
|
|
||||||
}
|
public void testLeftOpenRange_4bit() throws Exception {
|
||||||
// test inclusive range
|
testLeftOpenRange(4);
|
||||||
IntTrieRangeFilter tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
}
|
||||||
RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true);
|
|
||||||
cq.setConstantScoreRewrite(true);
|
public void testLeftOpenRange_2bit() throws Exception {
|
||||||
TopDocs tTopDocs = searcher.search(tf.asQuery(), 1);
|
testLeftOpenRange(2);
|
||||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
}
|
||||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
|
||||||
termCount += tf.getLastNumberOfTerms();
|
private void testRightOpenRange(int precisionStep) throws Exception {
|
||||||
// test exclusive range
|
String field="field"+precisionStep;
|
||||||
tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
|
int count=3000;
|
||||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false);
|
int lower=(count-1)*distance + (distance/3) +startOffset;
|
||||||
cq.setConstantScoreRewrite(true);
|
IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), null, true, true);
|
||||||
tTopDocs = searcher.search(tf.asQuery(), 1);
|
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
cTopDocs = searcher.search(cq, 1);
|
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
||||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
termCount += tf.getLastNumberOfTerms();
|
assertNotNull(sd);
|
||||||
// test left exclusive range
|
assertEquals("Score doc count", noDocs-count, sd.length );
|
||||||
tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
|
Document doc=searcher.doc(sd[0].doc);
|
||||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true);
|
assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||||
cq.setConstantScoreRewrite(true);
|
doc=searcher.doc(sd[sd.length-1].doc);
|
||||||
tTopDocs = searcher.search(tf.asQuery(), 1);
|
assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
|
||||||
cTopDocs = searcher.search(cq, 1);
|
}
|
||||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
|
||||||
termCount += tf.getLastNumberOfTerms();
|
public void testRightOpenRange_8bit() throws Exception {
|
||||||
// test right exclusive range
|
testRightOpenRange(8);
|
||||||
tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
|
}
|
||||||
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false);
|
|
||||||
cq.setConstantScoreRewrite(true);
|
public void testRightOpenRange_4bit() throws Exception {
|
||||||
tTopDocs = searcher.search(tf.asQuery(), 1);
|
testRightOpenRange(4);
|
||||||
cTopDocs = searcher.search(cq, 1);
|
}
|
||||||
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
|
||||||
termCount += tf.getLastNumberOfTerms();
|
public void testRightOpenRange_2bit() throws Exception {
|
||||||
}
|
testRightOpenRange(2);
|
||||||
System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4)));
|
}
|
||||||
}
|
|
||||||
|
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
||||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
final Random rnd=newRandom();
|
||||||
testRandomTrieAndClassicRangeQuery(8);
|
String field="field"+precisionStep;
|
||||||
}
|
int termCountT=0,termCountC=0;
|
||||||
|
for (int i=0; i<50; i++) {
|
||||||
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||||
testRandomTrieAndClassicRangeQuery(4);
|
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||||
}
|
if (lower>upper) {
|
||||||
|
int a=lower; lower=upper; upper=a;
|
||||||
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
}
|
||||||
testRandomTrieAndClassicRangeQuery(2);
|
// test inclusive range
|
||||||
}
|
IntTrieRangeQuery tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||||
|
RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true);
|
||||||
private void testRangeSplit(int precisionStep) throws Exception {
|
cq.setConstantScoreRewrite(true);
|
||||||
final Random rnd=newRandom();
|
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||||
String field="ascfield"+precisionStep;
|
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||||
// 50 random tests
|
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
for (int i=0; i<50; i++) {
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
// test exclusive range
|
||||||
if (lower>upper) {
|
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
|
||||||
int a=lower; lower=upper; upper=a;
|
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false);
|
||||||
}
|
cq.setConstantScoreRewrite(true);
|
||||||
// test inclusive range
|
tTopDocs = searcher.search(tq, 1);
|
||||||
Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery();
|
cTopDocs = searcher.search(cq, 1);
|
||||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
// test exclusive range
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false).asQuery();
|
// test left exclusive range
|
||||||
tTopDocs = searcher.search(tq, 1);
|
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
|
||||||
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true);
|
||||||
// test left exclusive range
|
cq.setConstantScoreRewrite(true);
|
||||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true).asQuery();
|
tTopDocs = searcher.search(tq, 1);
|
||||||
tTopDocs = searcher.search(tq, 1);
|
cTopDocs = searcher.search(cq, 1);
|
||||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
// test right exclusive range
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false).asQuery();
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
tTopDocs = searcher.search(tq, 1);
|
// test right exclusive range
|
||||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
|
||||||
}
|
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false);
|
||||||
}
|
cq.setConstantScoreRewrite(true);
|
||||||
|
tTopDocs = searcher.search(tq, 1);
|
||||||
public void testRangeSplit_8bit() throws Exception {
|
cTopDocs = searcher.search(cq, 1);
|
||||||
testRangeSplit(8);
|
assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
}
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
public void testRangeSplit_4bit() throws Exception {
|
}
|
||||||
testRangeSplit(4);
|
System.out.println("Average number of terms during random search on '" + field + "':");
|
||||||
}
|
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
||||||
|
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
||||||
public void testRangeSplit_2bit() throws Exception {
|
}
|
||||||
testRangeSplit(2);
|
|
||||||
}
|
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||||
|
testRandomTrieAndClassicRangeQuery(8);
|
||||||
private void testSorting(int precisionStep) throws Exception {
|
}
|
||||||
final Random rnd=newRandom();
|
|
||||||
String field="field"+precisionStep;
|
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
|
||||||
// 10 random tests, the index order is ascending,
|
testRandomTrieAndClassicRangeQuery(4);
|
||||||
// so using a reverse sort field should retun descending documents
|
}
|
||||||
for (int i=0; i<10; i++) {
|
|
||||||
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
|
||||||
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
testRandomTrieAndClassicRangeQuery(2);
|
||||||
if (lower>upper) {
|
}
|
||||||
int a=lower; lower=upper; upper=a;
|
|
||||||
}
|
private void testRangeSplit(int precisionStep) throws Exception {
|
||||||
Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery();
|
final Random rnd=newRandom();
|
||||||
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true)));
|
String field="ascfield"+precisionStep;
|
||||||
if (topDocs.totalHits==0) continue;
|
// 50 random tests
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
for (int i=0; i<50; i++) {
|
||||||
assertNotNull(sd);
|
int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||||
int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value"));
|
int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2);
|
||||||
for (int j=1; j<sd.length; j++) {
|
if (lower>upper) {
|
||||||
int act=TrieUtils.prefixCodedToInt(searcher.doc(sd[j].doc).get("value"));
|
int a=lower; lower=upper; upper=a;
|
||||||
assertTrue("Docs should be sorted backwards", last>act );
|
}
|
||||||
last=act;
|
// test inclusive range
|
||||||
}
|
Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||||
}
|
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||||
}
|
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
||||||
|
// test exclusive range
|
||||||
public void testSorting_8bit() throws Exception {
|
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
|
||||||
testSorting(8);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
}
|
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
||||||
|
// test left exclusive range
|
||||||
public void testSorting_4bit() throws Exception {
|
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
|
||||||
testSorting(4);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
}
|
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||||
|
// test right exclusive range
|
||||||
public void testSorting_2bit() throws Exception {
|
tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
|
||||||
testSorting(2);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
}
|
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRangeSplit_8bit() throws Exception {
|
||||||
|
testRangeSplit(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeSplit_4bit() throws Exception {
|
||||||
|
testRangeSplit(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeSplit_2bit() throws Exception {
|
||||||
|
testRangeSplit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSorting(int precisionStep) throws Exception {
|
||||||
|
final Random rnd=newRandom();
|
||||||
|
String field="field"+precisionStep;
|
||||||
|
// 10 random tests, the index order is ascending,
|
||||||
|
// so using a reverse sort field should retun descending documents
|
||||||
|
for (int i=0; i<10; i++) {
|
||||||
|
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||||
|
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||||
|
if (lower>upper) {
|
||||||
|
int a=lower; lower=upper; upper=a;
|
||||||
|
}
|
||||||
|
Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
|
||||||
|
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true)));
|
||||||
|
if (topDocs.totalHits==0) continue;
|
||||||
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
|
assertNotNull(sd);
|
||||||
|
int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value"));
|
||||||
|
for (int j=1; j<sd.length; j++) {
|
||||||
|
int act=TrieUtils.prefixCodedToInt(searcher.doc(sd[j].doc).get("value"));
|
||||||
|
assertTrue("Docs should be sorted backwards", last>act );
|
||||||
|
last=act;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSorting_8bit() throws Exception {
|
||||||
|
testSorting(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSorting_4bit() throws Exception {
|
||||||
|
testSorting(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSorting_2bit() throws Exception {
|
||||||
|
testSorting(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,54 +1,54 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
|
||||||
public class TestIntTrieTokenStream extends LuceneTestCase {
|
public class TestIntTrieTokenStream extends LuceneTestCase {
|
||||||
|
|
||||||
static final int precisionStep = 8;
|
static final int precisionStep = 8;
|
||||||
static final int value = 123456;
|
static final int value = 123456;
|
||||||
|
|
||||||
public void testStreamNewAPI() throws Exception {
|
public void testStreamNewAPI() throws Exception {
|
||||||
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
|
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
|
||||||
stream.setUseNewAPI(true);
|
stream.setUseNewAPI(true);
|
||||||
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
||||||
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
|
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
|
||||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||||
for (int shift=0; shift<32; shift+=precisionStep) {
|
for (int shift=0; shift<32; shift+=precisionStep) {
|
||||||
assertTrue("New token is available", stream.incrementToken());
|
assertTrue("New token is available", stream.incrementToken());
|
||||||
assertEquals("Shift value", shift, shiftAtt.getShift());
|
assertEquals("Shift value", shift, shiftAtt.getShift());
|
||||||
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term());
|
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term());
|
||||||
}
|
}
|
||||||
assertFalse("No more tokens available", stream.incrementToken());
|
assertFalse("No more tokens available", stream.incrementToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testStreamOldAPI() throws Exception {
|
public void testStreamOldAPI() throws Exception {
|
||||||
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
|
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
|
||||||
stream.setUseNewAPI(false);
|
stream.setUseNewAPI(false);
|
||||||
Token tok=new Token();
|
Token tok=new Token();
|
||||||
for (int shift=0; shift<32; shift+=precisionStep) {
|
for (int shift=0; shift<32; shift+=precisionStep) {
|
||||||
assertNotNull("New token is available", tok=stream.next(tok));
|
assertNotNull("New token is available", tok=stream.next(tok));
|
||||||
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term());
|
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term());
|
||||||
}
|
}
|
||||||
assertNull("No more tokens available", stream.next(tok));
|
assertNull("No more tokens available", stream.next(tok));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,10 +30,13 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.RangeQuery;
|
import org.apache.lucene.search.RangeQuery;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
public class TestLongTrieRangeFilter extends LuceneTestCase {
|
public class TestLongTrieRangeQuery extends LuceneTestCase {
|
||||||
// distance of entries
|
// distance of entries
|
||||||
private static final long distance = 66666L;
|
private static final long distance = 66666L;
|
||||||
// shift the starting of the values to the left, to also have negative values:
|
// shift the starting of the values to the left, to also have negative values:
|
||||||
|
@ -53,7 +56,10 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
private static final RAMDirectory directory;
|
private static final RAMDirectory directory;
|
||||||
private static final IndexSearcher searcher;
|
private static final IndexSearcher searcher;
|
||||||
static {
|
static {
|
||||||
try {
|
try {
|
||||||
|
// set the theoretical maximum term count for 8bit (see docs for the number)
|
||||||
|
BooleanQuery.setMaxClauseCount(7*255*2 + 255);
|
||||||
|
|
||||||
directory = new RAMDirectory();
|
directory = new RAMDirectory();
|
||||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||||
true, MaxFieldLength.UNLIMITED);
|
true, MaxFieldLength.UNLIMITED);
|
||||||
|
@ -97,20 +103,54 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** test for constant score + boolean query + filter, the other tests only use the constant score mode */
|
||||||
private void testRange(int precisionStep) throws Exception {
|
private void testRange(int precisionStep) throws Exception {
|
||||||
String field="field"+precisionStep;
|
String field="field"+precisionStep;
|
||||||
int count=3000;
|
int count=3000;
|
||||||
long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
|
||||||
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
LongTrieRangeQuery q = new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
LongTrieRangeFilter f = new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'.");
|
int lastTerms = 0;
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
for (byte i=0; i<3; i++) {
|
||||||
assertNotNull(sd);
|
TopDocs topDocs;
|
||||||
assertEquals("Score doc count", count, sd.length );
|
int terms;
|
||||||
Document doc=searcher.doc(sd[0].doc);
|
String type;
|
||||||
assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
q.clearTotalNumberOfTerms();
|
||||||
doc=searcher.doc(sd[sd.length-1].doc);
|
f.clearTotalNumberOfTerms();
|
||||||
assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
switch (i) {
|
||||||
|
case 0:
|
||||||
|
type = " (constant score)";
|
||||||
|
q.setConstantScoreRewrite(true);
|
||||||
|
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
|
terms = q.getTotalNumberOfTerms();
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
type = " (boolean query)";
|
||||||
|
q.setConstantScoreRewrite(false);
|
||||||
|
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
|
terms = q.getTotalNumberOfTerms();
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
type = " (filter)";
|
||||||
|
topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER);
|
||||||
|
terms = f.getTotalNumberOfTerms();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+".");
|
||||||
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
|
assertNotNull(sd);
|
||||||
|
assertEquals("Score doc count"+type, count, sd.length );
|
||||||
|
Document doc=searcher.doc(sd[0].doc);
|
||||||
|
assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||||
|
doc=searcher.doc(sd[sd.length-1].doc);
|
||||||
|
assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
|
||||||
|
if (i>0) {
|
||||||
|
assertEquals("Distinct term number is equal for all query types", lastTerms, terms);
|
||||||
|
}
|
||||||
|
lastTerms = terms;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRange_8bit() throws Exception {
|
public void testRange_8bit() throws Exception {
|
||||||
|
@ -125,13 +165,18 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
testRange(2);
|
testRange(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testInverseRange() throws Exception {
|
||||||
|
LongTrieRangeFilter f = new LongTrieRangeFilter("field8", 8, new Long(1000L), new Long(-1000L), true, true);
|
||||||
|
assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader()));
|
||||||
|
}
|
||||||
|
|
||||||
private void testLeftOpenRange(int precisionStep) throws Exception {
|
private void testLeftOpenRange(int precisionStep) throws Exception {
|
||||||
String field="field"+precisionStep;
|
String field="field"+precisionStep;
|
||||||
int count=3000;
|
int count=3000;
|
||||||
long upper=(count-1)*distance + (distance/3) + startOffset;
|
long upper=(count-1)*distance + (distance/3) + startOffset;
|
||||||
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, null, new Long(upper), true, true);
|
LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, null, new Long(upper), true, true);
|
||||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
assertNotNull(sd);
|
assertNotNull(sd);
|
||||||
assertEquals("Score doc count", count, sd.length );
|
assertEquals("Score doc count", count, sd.length );
|
||||||
|
@ -157,9 +202,9 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
String field="field"+precisionStep;
|
String field="field"+precisionStep;
|
||||||
int count=3000;
|
int count=3000;
|
||||||
long lower=(count-1)*distance + (distance/3) +startOffset;
|
long lower=(count-1)*distance + (distance/3) +startOffset;
|
||||||
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), null, true, true);
|
LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, new Long(lower), null, true, true);
|
||||||
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
|
TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
|
||||||
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
assertNotNull(sd);
|
assertNotNull(sd);
|
||||||
assertEquals("Score doc count", noDocs-count, sd.length );
|
assertEquals("Score doc count", noDocs-count, sd.length );
|
||||||
|
@ -184,7 +229,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
|
||||||
final Random rnd=newRandom();
|
final Random rnd=newRandom();
|
||||||
String field="field"+precisionStep;
|
String field="field"+precisionStep;
|
||||||
int termCount=0;
|
int termCountT=0,termCountC=0;
|
||||||
for (int i=0; i<50; i++) {
|
for (int i=0; i<50; i++) {
|
||||||
long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||||
long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
|
||||||
|
@ -192,39 +237,45 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
long a=lower; lower=upper; upper=a;
|
long a=lower; lower=upper; upper=a;
|
||||||
}
|
}
|
||||||
// test inclusive range
|
// test inclusive range
|
||||||
LongTrieRangeFilter tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
LongTrieRangeQuery tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||||
RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true);
|
RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true);
|
||||||
cq.setConstantScoreRewrite(true);
|
cq.setConstantScoreRewrite(true);
|
||||||
TopDocs tTopDocs = searcher.search(tf.asQuery(), 1);
|
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
termCount += tf.getLastNumberOfTerms();
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
// test exclusive range
|
// test exclusive range
|
||||||
tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false);
|
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false);
|
||||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false);
|
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false);
|
||||||
cq.setConstantScoreRewrite(true);
|
cq.setConstantScoreRewrite(true);
|
||||||
tTopDocs = searcher.search(tf.asQuery(), 1);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
cTopDocs = searcher.search(cq, 1);
|
cTopDocs = searcher.search(cq, 1);
|
||||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
termCount += tf.getLastNumberOfTerms();
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
// test left exclusive range
|
// test left exclusive range
|
||||||
tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true);
|
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true);
|
||||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true);
|
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true);
|
||||||
cq.setConstantScoreRewrite(true);
|
cq.setConstantScoreRewrite(true);
|
||||||
tTopDocs = searcher.search(tf.asQuery(), 1);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
cTopDocs = searcher.search(cq, 1);
|
cTopDocs = searcher.search(cq, 1);
|
||||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
termCount += tf.getLastNumberOfTerms();
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
// test right exclusive range
|
// test right exclusive range
|
||||||
tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false);
|
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false);
|
||||||
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false);
|
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false);
|
||||||
cq.setConstantScoreRewrite(true);
|
cq.setConstantScoreRewrite(true);
|
||||||
tTopDocs = searcher.search(tf.asQuery(), 1);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
cTopDocs = searcher.search(cq, 1);
|
cTopDocs = searcher.search(cq, 1);
|
||||||
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||||
termCount += tf.getLastNumberOfTerms();
|
termCountT += tq.getTotalNumberOfTerms();
|
||||||
|
termCountC += cq.getTotalNumberOfTerms();
|
||||||
}
|
}
|
||||||
System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4)));
|
System.out.println("Average number of terms during random search on '" + field + "':");
|
||||||
|
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
|
||||||
|
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
|
||||||
|
@ -250,19 +301,19 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
long a=lower; lower=upper; upper=a;
|
long a=lower; lower=upper; upper=a;
|
||||||
}
|
}
|
||||||
// test inclusive range
|
// test inclusive range
|
||||||
Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery();
|
Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||||
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
|
||||||
// test exclusive range
|
// test exclusive range
|
||||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false).asQuery();
|
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false);
|
||||||
tTopDocs = searcher.search(tq, 1);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
|
||||||
// test left exclusive range
|
// test left exclusive range
|
||||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true).asQuery();
|
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true);
|
||||||
tTopDocs = searcher.search(tq, 1);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||||
// test right exclusive range
|
// test right exclusive range
|
||||||
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false).asQuery();
|
tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false);
|
||||||
tTopDocs = searcher.search(tq, 1);
|
tTopDocs = searcher.search(tq, 1);
|
||||||
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
|
||||||
}
|
}
|
||||||
|
@ -291,7 +342,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
|
||||||
if (lower>upper) {
|
if (lower>upper) {
|
||||||
long a=lower; lower=upper; upper=a;
|
long a=lower; lower=upper; upper=a;
|
||||||
}
|
}
|
||||||
Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery();
|
Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
|
||||||
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true)));
|
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true)));
|
||||||
if (topDocs.totalHits==0) continue;
|
if (topDocs.totalHits==0) continue;
|
||||||
ScoreDoc[] sd = topDocs.scoreDocs;
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
|
@ -1,54 +1,54 @@
|
||||||
package org.apache.lucene.search.trie;
|
package org.apache.lucene.search.trie;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
|
||||||
public class TestLongTrieTokenStream extends LuceneTestCase {
|
public class TestLongTrieTokenStream extends LuceneTestCase {
|
||||||
|
|
||||||
static final int precisionStep = 8;
|
static final int precisionStep = 8;
|
||||||
static final long value = 4573245871874382L;
|
static final long value = 4573245871874382L;
|
||||||
|
|
||||||
public void testStreamNewAPI() throws Exception {
|
public void testStreamNewAPI() throws Exception {
|
||||||
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
|
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
|
||||||
stream.setUseNewAPI(true);
|
stream.setUseNewAPI(true);
|
||||||
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
|
||||||
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
|
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
|
||||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||||
for (int shift=0; shift<64; shift+=precisionStep) {
|
for (int shift=0; shift<64; shift+=precisionStep) {
|
||||||
assertTrue("New token is available", stream.incrementToken());
|
assertTrue("New token is available", stream.incrementToken());
|
||||||
assertEquals("Shift value", shift, shiftAtt.getShift());
|
assertEquals("Shift value", shift, shiftAtt.getShift());
|
||||||
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term());
|
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term());
|
||||||
}
|
}
|
||||||
assertFalse("No more tokens available", stream.incrementToken());
|
assertFalse("No more tokens available", stream.incrementToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testStreamOldAPI() throws Exception {
|
public void testStreamOldAPI() throws Exception {
|
||||||
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
|
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
|
||||||
stream.setUseNewAPI(false);
|
stream.setUseNewAPI(false);
|
||||||
Token tok=new Token();
|
Token tok=new Token();
|
||||||
for (int shift=0; shift<64; shift+=precisionStep) {
|
for (int shift=0; shift<64; shift+=precisionStep) {
|
||||||
assertNotNull("New token is available", tok=stream.next(tok));
|
assertNotNull("New token is available", tok=stream.next(tok));
|
||||||
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term());
|
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term());
|
||||||
}
|
}
|
||||||
assertNull("No more tokens available", stream.next(tok));
|
assertNull("No more tokens available", stream.next(tok));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue