LUCENE-1602: Rewrite TrieRange to use MultiTermQuery

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765618 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2009-04-16 12:58:28 +00:00
parent 45986b246c
commit 92c6484161
15 changed files with 1495 additions and 1246 deletions

View File

@ -1,157 +0,0 @@
package org.apache.lucene.search.trie;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.ToStringUtils;
abstract class AbstractTrieRangeFilter extends Filter {
AbstractTrieRangeFilter(final String field, final int precisionStep,
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
) {
this.field=field.intern();
this.precisionStep=precisionStep;
this.min=min;
this.max=max;
this.minInclusive=minInclusive;
this.maxInclusive=maxInclusive;
}
//@Override
public String toString() {
return toString(null);
}
public String toString(final String field) {
final StringBuffer sb=new StringBuffer();
if (!this.field.equals(field)) sb.append(this.field).append(':');
return sb.append(minInclusive ? '[' : '{')
.append((min==null) ? "*" : min.toString())
.append(" TO ")
.append((max==null) ? "*" : max.toString())
.append(maxInclusive ? ']' : '}').toString();
}
//@Override
public final boolean equals(final Object o) {
if (o==this) return true;
if (o==null) return false;
if (this.getClass().equals(o.getClass())) {
AbstractTrieRangeFilter q=(AbstractTrieRangeFilter)o;
return (
field==q.field &&
(q.min == null ? min == null : q.min.equals(min)) &&
(q.max == null ? max == null : q.max.equals(max)) &&
minInclusive==q.minInclusive &&
maxInclusive==q.maxInclusive &&
precisionStep==q.precisionStep
);
}
return false;
}
//@Override
public final int hashCode() {
int hash = field.hashCode() + (precisionStep^0x64365465);
if (min!=null) hash += min.hashCode()^0x14fa55fb;
if (max!=null) hash += max.hashCode()^0x733fa5fe;
return hash+
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
}
/**
* Expert: Return the number of terms visited during the last execution of {@link #getDocIdSet}.
* This may be used for performance comparisons of different trie variants and their effectiveness.
* This method is not thread safe, be sure to only call it when no query is running!
* @throws IllegalStateException if {@link #getDocIdSet} was not yet executed.
*/
public int getLastNumberOfTerms() {
if (lastNumberOfTerms < 0) throw new IllegalStateException();
return lastNumberOfTerms;
}
void resetLastNumberOfTerms() {
lastNumberOfTerms=0;
}
/** Returns this range filter as a query.
* Using this method, it is possible to create a Query using <code>new {Long|Int}TrieRangeFilter(....).asQuery()</code>.
* This is a synonym for wrapping with a {@link ConstantScoreQuery},
* but this query returns a better <code>toString()</code> variant.
*/
public Query asQuery() {
return new ConstantScoreQuery(this) {
/** this instance return a nicer String variant than the original {@link ConstantScoreQuery} */
//@Override
public String toString(final String field) {
// return a more convenient representation of this query than ConstantScoreQuery does:
return ((AbstractTrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost());
}
};
}
void fillBits(
final IndexReader reader,
final OpenBitSet bits, final TermDocs termDocs,
final String lowerTerm, final String upperTerm
) throws IOException {
final int len=lowerTerm.length();
assert upperTerm.length()==len;
// find the docs
final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
try {
do {
final Term term = enumerator.term();
if (term!=null && term.field()==field) {
// break out when upperTerm reached or length of term is different
final String t=term.text();
if (len!=t.length() || t.compareTo(upperTerm)>0) break;
// we have a good term, find the docs
lastNumberOfTerms++;
termDocs.seek(enumerator);
while (termDocs.next()) bits.set(termDocs.doc());
} else break;
} while (enumerator.next());
} finally {
enumerator.close();
}
}
// members
final String field;
final int precisionStep;
final Number min,max;
final boolean minInclusive,maxInclusive;
private int lastNumberOfTerms=-1;
}

View File

@ -0,0 +1,112 @@
package org.apache.lucene.search.trie;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.FilteredTermEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
abstract class AbstractTrieRangeQuery extends MultiTermQuery {
AbstractTrieRangeQuery(final String field, final int precisionStep,
Number min, Number max, final boolean minInclusive, final boolean maxInclusive
) {
this.field = field.intern();
this.precisionStep = precisionStep;
this.min = min;
this.max = max;
this.minInclusive = minInclusive;
this.maxInclusive = maxInclusive;
setConstantScoreRewrite(true);
}
abstract void passRanges(TrieRangeTermEnum enumerator);
//@Override
protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
TrieRangeTermEnum enumerator = new TrieRangeTermEnum(this, reader);
passRanges(enumerator);
enumerator.init();
return enumerator;
}
/** Returns the field name for this query */
public String getField() { return field; }
/** Returns <code>true</code> if the lower endpoint is inclusive */
public boolean includesMin() { return minInclusive; }
/** Returns <code>true</code> if the upper endpoint is inclusive */
public boolean includesMax() { return maxInclusive; }
//@Override
public String toString(final String field) {
final StringBuffer sb=new StringBuffer();
if (!this.field.equals(field)) sb.append(this.field).append(':');
return sb.append(minInclusive ? '[' : '{')
.append((min==null) ? "*" : min.toString())
.append(" TO ")
.append((max==null) ? "*" : max.toString())
.append(maxInclusive ? ']' : '}').toString();
}
//@Override
public final boolean equals(final Object o) {
if (o==this) return true;
if (o==null) return false;
if (this.getClass().equals(o.getClass())) {
AbstractTrieRangeQuery q=(AbstractTrieRangeQuery)o;
return (
field==q.field &&
(q.min == null ? min == null : q.min.equals(min)) &&
(q.max == null ? max == null : q.max.equals(max)) &&
minInclusive==q.minInclusive &&
maxInclusive==q.maxInclusive &&
precisionStep==q.precisionStep
);
}
return false;
}
//@Override
public final int hashCode() {
int hash = field.hashCode() + (precisionStep^0x64365465);
if (min!=null) hash += min.hashCode()^0x14fa55fb;
if (max!=null) hash += max.hashCode()^0x733fa5fe;
return hash+
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
}
// TODO: Make this method accessible by *TrieRangeFilter,
// can be removed, when moved to core.
//@Override
protected Filter getFilter() {
return super.getFilter();
}
// members
final String field;
final int precisionStep;
final Number min,max;
final boolean minInclusive,maxInclusive;
}

View File

@ -1,101 +1,61 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException; import org.apache.lucene.search.Filter; // for javadocs
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Filter; /**
import org.apache.lucene.search.DocIdSet; * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats.
import org.apache.lucene.index.IndexReader; * This filter depends on a specific structure of terms in the index that can only be created
import org.apache.lucene.index.TermDocs; * by indexing via {@link IntTrieTokenStream} methods.
import org.apache.lucene.util.OpenBitSet; * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
*/
public class IntTrieRangeFilter extends MultiTermQueryWrapperFilter {
/**
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats. /**
* This filter depends on a specific structure of terms in the index that can only be created * A trie filter for matching trie coded values using the given field name and
* by indexing via {@link IntTrieTokenStream} methods. * the default helper field.
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. * <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
*/ * used for indexing the values.
public class IntTrieRangeFilter extends AbstractTrieRangeFilter { * You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
/** * To filter float values use the converter {@link TrieUtils#floatToSortableInt}.
* A trie filter for matching trie coded values using the given field name and */
* the default helper field. public IntTrieRangeFilter(final String field, final int precisionStep,
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code> final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
* used for indexing the values. ) {
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or super(new IntTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
* <code>max</code>. Inclusive/exclusive bounds can also be supplied. }
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
*/ /** Returns the field name for this filter */
public IntTrieRangeFilter(final String field, final int precisionStep, public String getField() { return ((IntTrieRangeQuery)query).getField(); }
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
) { /** Returns <code>true</code> if the lower endpoint is inclusive */
super(field,precisionStep,min,max,minInclusive,maxInclusive); public boolean includesMin() { return ((IntTrieRangeQuery)query).includesMin(); }
}
/** Returns <code>true</code> if the upper endpoint is inclusive */
/** public boolean includesMax() { return ((IntTrieRangeQuery)query).includesMax(); }
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
*/ /** Returns the lower value of this range filter */
//@Override public Integer getMin() { return ((IntTrieRangeQuery)query).getMin(); }
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
// calculate the upper and lower bounds respecting the inclusive and null values. /** Returns the upper value of this range filter */
int minBound=(this.min==null) ? Integer.MIN_VALUE : ( public Integer getMax() { return ((IntTrieRangeQuery)query).getMax(); }
minInclusive ? this.min.intValue() : (this.min.intValue()+1)
); }
int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
);
resetLastNumberOfTerms();
if (minBound > maxBound) {
// shortcut, no docs will match this
return DocIdSet.EMPTY_DOCIDSET;
} else {
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
final TermDocs termDocs = reader.termDocs();
try {
TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() {
//@Override
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
try {
fillBits(
reader, bits, termDocs,
minPrefixCoded, maxPrefixCoded
);
} catch (IOException ioe) {
// IntRangeBuilder is not allowed to throw checked exceptions:
// wrap as RuntimeException
throw new RuntimeException(ioe);
}
}
}, precisionStep, minBound, maxBound);
} catch (RuntimeException e) {
if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
throw e;
} finally {
termDocs.close();
}
return bits;
}
}
}

View File

@ -0,0 +1,66 @@
package org.apache.lucene.search.trie;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.Query;
/**
* Implementation of a Lucene {@link Query} that implements trie-based range querying for ints/floats.
* This query depends on a specific structure of terms in the index that can only be created
* by indexing via {@link IntTrieTokenStream} methods.
* <p>The query is in constant score mode per default. With precision steps of &le;4, this
* query can be run in conventional boolean rewrite mode without changing the max clause count.
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
*/
public class IntTrieRangeQuery extends AbstractTrieRangeQuery {
/**
* A trie query for matching trie coded values using the given field name and
* the default helper field.
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
* used for indexing the values.
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
* To query float values use the converter {@link TrieUtils#floatToSortableInt}.
*/
public IntTrieRangeQuery(final String field, final int precisionStep,
final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
) {
super(field,precisionStep,min,max,minInclusive,maxInclusive);
}
//@Override
void passRanges(TrieRangeTermEnum enumerator) {
// calculate the upper and lower bounds respecting the inclusive and null values.
int minBound=(this.min==null) ? Integer.MIN_VALUE : (
minInclusive ? this.min.intValue() : (this.min.intValue()+1)
);
int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
);
TrieUtils.splitIntRange(enumerator.getIntRangeBuilder(), precisionStep, minBound, maxBound);
}
/** Returns the lower value of this range query */
public Integer getMin() { return (Integer)min; }
/** Returns the upper value of this range query */
public Integer getMax() { return (Integer)max; }
}

View File

@ -1,172 +1,172 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/** /**
* This class provides a {@link TokenStream} for indexing <code>int</code> values * This class provides a {@link TokenStream} for indexing <code>int</code> values
* that can be queried by {@link IntTrieRangeFilter}. This stream is not intended * that can be queried by {@link IntTrieRangeFilter}. This stream is not intended
* to be used in analyzers, its more for iterating the different precisions during * to be used in analyzers, its more for iterating the different precisions during
* indexing a specific numeric value. * indexing a specific numeric value.
* <p>A <code>int</code> value is indexed as multiple string encoded terms, each reduced * <p>A <code>int</code> value is indexed as multiple string encoded terms, each reduced
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the * by zeroing bits from the right. Each value is also prefixed (in the first char) by the
* <code>shift</code> value (number of bits removed) used during encoding. * <code>shift</code> value (number of bits removed) used during encoding.
* <p>The number of bits removed from the right for each trie entry is called * <p>The number of bits removed from the right for each trie entry is called
* <code>precisionStep</code> in this API. For comparing the different step values, see the * <code>precisionStep</code> in this API. For comparing the different step values, see the
* {@linkplain org.apache.lucene.search.trie package description}. * {@linkplain org.apache.lucene.search.trie package description}.
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies * <p>The usage pattern is (it is recommened to switch off norms and term frequencies
* for numeric fields; it does not make sense to have them): * for numeric fields; it does not make sense to have them):
* <pre> * <pre>
* Field field = new Field(name, new IntTrieTokenStream(value, precisionStep)); * Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
* field.setOmitNorms(true); * field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true); * field.setOmitTermFreqAndPositions(true);
* document.add(field); * document.add(field);
* </pre> * </pre>
* <p>For optimal performance, re-use the TokenStream and Field instance * <p>For optimal performance, re-use the TokenStream and Field instance
* for more than one document: * for more than one document:
* <pre> * <pre>
* <em>// init</em> * <em>// init</em>
* TokenStream stream = new IntTrieTokenStream(precisionStep); * TokenStream stream = new IntTrieTokenStream(precisionStep);
* Field field = new Field(name, stream); * Field field = new Field(name, stream);
* field.setOmitNorms(true); * field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true); * field.setOmitTermFreqAndPositions(true);
* Document doc = new Document(); * Document doc = new Document();
* document.add(field); * document.add(field);
* <em>// use this code to index many documents:</em> * <em>// use this code to index many documents:</em>
* stream.setValue(value1) * stream.setValue(value1)
* writer.addDocument(document); * writer.addDocument(document);
* stream.setValue(value2) * stream.setValue(value2)
* writer.addDocument(document); * writer.addDocument(document);
* ... * ...
* </pre> * </pre>
* <p><em>Please note:</em> Token streams are read, when the document is added to index. * <p><em>Please note:</em> Token streams are read, when the document is added to index.
* If you index more than one numeric field, use a separate instance for each. * If you index more than one numeric field, use a separate instance for each.
* <p>For more information, how trie fields work, see the * <p>For more information, how trie fields work, see the
* {@linkplain org.apache.lucene.search.trie package description}. * {@linkplain org.apache.lucene.search.trie package description}.
*/ */
public class IntTrieTokenStream extends TokenStream { public class IntTrieTokenStream extends TokenStream {
/** The full precision token gets this token type assigned. */ /** The full precision token gets this token type assigned. */
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt"; public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt";
/** The lower precision tokens gets this token type assigned. */ /** The lower precision tokens gets this token type assigned. */
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt"; public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt";
/** /**
* Creates a token stream for indexing <code>value</code> with the given * Creates a token stream for indexing <code>value</code> with the given
* <code>precisionStep</code>. As instance creating is a major cost, * <code>precisionStep</code>. As instance creating is a major cost,
* consider using a {@link #IntTrieTokenStream(int)} instance once for * consider using a {@link #IntTrieTokenStream(int)} instance once for
* indexing a large number of documents and assign a value with * indexing a large number of documents and assign a value with
* {@link #setValue} for each document. * {@link #setValue} for each document.
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}. * To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
*/ */
public IntTrieTokenStream(final int value, final int precisionStep) { public IntTrieTokenStream(final int value, final int precisionStep) {
if (precisionStep<1 || precisionStep>32) if (precisionStep<1 || precisionStep>32)
throw new IllegalArgumentException("precisionStep may only be 1..32"); throw new IllegalArgumentException("precisionStep may only be 1..32");
this.value = value; this.value = value;
this.precisionStep = precisionStep; this.precisionStep = precisionStep;
termAtt = (TermAttribute) addAttribute(TermAttribute.class); termAtt = (TermAttribute) addAttribute(TermAttribute.class);
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class); shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
} }
/** /**
* Creates a token stream for indexing values with the given * Creates a token stream for indexing values with the given
* <code>precisionStep</code>. This stream is initially &quot;empty&quot; * <code>precisionStep</code>. This stream is initially &quot;empty&quot;
* (using a numeric value of 0), assign a value before indexing * (using a numeric value of 0), assign a value before indexing
* each document using {@link #setValue}. * each document using {@link #setValue}.
*/ */
public IntTrieTokenStream(final int precisionStep) { public IntTrieTokenStream(final int precisionStep) {
this(0, precisionStep); this(0, precisionStep);
} }
/** /**
* Resets the token stream to deliver prefix encoded values * Resets the token stream to deliver prefix encoded values
* for <code>value</code>. Use this method to index the same * for <code>value</code>. Use this method to index the same
* numeric field for a large number of documents and reuse the * numeric field for a large number of documents and reuse the
* current stream instance. * current stream instance.
* To index float values use the converter {@link TrieUtils#doubleToSortableLong}. * To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
*/ */
public void setValue(final int value) { public void setValue(final int value) {
this.value = value; this.value = value;
reset(); reset();
} }
// @Override // @Override
public void reset() { public void reset() {
shift = 0; shift = 0;
} }
// @Override // @Override
public boolean incrementToken() { public boolean incrementToken() {
if (shift>=32) return false; if (shift>=32) return false;
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
shiftAtt.setShift(shift); shiftAtt.setShift(shift);
if (shift==0) { if (shift==0) {
typeAtt.setType(TOKEN_TYPE_FULL_PREC); typeAtt.setType(TOKEN_TYPE_FULL_PREC);
posIncrAtt.setPositionIncrement(1); posIncrAtt.setPositionIncrement(1);
} else { } else {
typeAtt.setType(TOKEN_TYPE_LOWER_PREC); typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
posIncrAtt.setPositionIncrement(0); posIncrAtt.setPositionIncrement(0);
} }
shift += precisionStep; shift += precisionStep;
return true; return true;
} }
// @Override // @Override
/** @deprecated */ /** @deprecated */
public Token next(final Token reusableToken) { public Token next(final Token reusableToken) {
if (shift>=32) return null; if (shift>=32) return null;
reusableToken.clear(); reusableToken.clear();
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
if (shift==0) { if (shift==0) {
reusableToken.setType(TOKEN_TYPE_FULL_PREC); reusableToken.setType(TOKEN_TYPE_FULL_PREC);
reusableToken.setPositionIncrement(1); reusableToken.setPositionIncrement(1);
} else { } else {
reusableToken.setType(TOKEN_TYPE_LOWER_PREC); reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
reusableToken.setPositionIncrement(0); reusableToken.setPositionIncrement(0);
} }
shift += precisionStep; shift += precisionStep;
return reusableToken; return reusableToken;
} }
// @Override // @Override
public String toString() { public String toString() {
final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value); final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value);
sb.append(",precisionStep=").append(precisionStep).append(')'); sb.append(",precisionStep=").append(precisionStep).append(')');
return sb.toString(); return sb.toString();
} }
// members // members
private final TermAttribute termAtt; private final TermAttribute termAtt;
private final TypeAttribute typeAtt; private final TypeAttribute typeAtt;
private final PositionIncrementAttribute posIncrAtt; private final PositionIncrementAttribute posIncrAtt;
private final ShiftAttribute shiftAtt; private final ShiftAttribute shiftAtt;
private int shift = 0; private int shift = 0;
private int value; private int value;
private final int precisionStep; private final int precisionStep;
} }

View File

@ -1,101 +1,61 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException; import org.apache.lucene.search.Filter; // for javadocs
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Filter; /**
import org.apache.lucene.search.DocIdSet; * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles.
import org.apache.lucene.index.IndexReader; * This filter depends on a specific structure of terms in the index that can only be created
import org.apache.lucene.index.TermDocs; * by indexing via {@link LongTrieTokenStream} methods.
import org.apache.lucene.util.OpenBitSet; * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
*/
public class LongTrieRangeFilter extends MultiTermQueryWrapperFilter {
/**
* Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles. /**
* This filter depends on a specific structure of terms in the index that can only be created * A trie filter for matching trie coded values using the given field name and
* by indexing via {@link LongTrieTokenStream} methods. * the default helper field.
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. * <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
*/ * used for indexing the values.
public class LongTrieRangeFilter extends AbstractTrieRangeFilter { * You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
/** * To filter double values use the converter {@link TrieUtils#doubleToSortableLong}.
* A trie filter for matching trie coded values using the given field name and */
* the default helper field. public LongTrieRangeFilter(final String field, final int precisionStep,
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code> final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
* used for indexing the values. ) {
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or super(new LongTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
* <code>max</code>. Inclusive/exclusive bounds can also be supplied. }
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
*/ /** Returns the field name for this filter */
public LongTrieRangeFilter(final String field, final int precisionStep, public String getField() { return ((LongTrieRangeQuery)query).getField(); }
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
) { /** Returns <code>true</code> if the lower endpoint is inclusive */
super(field,precisionStep,min,max,minInclusive,maxInclusive); public boolean includesMin() { return ((LongTrieRangeQuery)query).includesMin(); }
}
/** Returns <code>true</code> if the upper endpoint is inclusive */
/** public boolean includesMax() { return ((LongTrieRangeQuery)query).includesMax(); }
* Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
*/ /** Returns the lower value of this range filter */
//@Override public Long getMin() { return ((LongTrieRangeQuery)query).getMin(); }
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
// calculate the upper and lower bounds respecting the inclusive and null values. /** Returns the upper value of this range filter */
long minBound=(this.min==null) ? Long.MIN_VALUE : ( public Long getMax() { return ((LongTrieRangeQuery)query).getMax(); }
minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
); }
long maxBound=(this.max==null) ? Long.MAX_VALUE : (
maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
);
resetLastNumberOfTerms();
if (minBound > maxBound) {
// shortcut, no docs will match this
return DocIdSet.EMPTY_DOCIDSET;
} else {
final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
final TermDocs termDocs = reader.termDocs();
try {
TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() {
//@Override
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
try {
fillBits(
reader, bits, termDocs,
minPrefixCoded, maxPrefixCoded
);
} catch (IOException ioe) {
// LongRangeBuilder is not allowed to throw checked exceptions:
// wrap as RuntimeException
throw new RuntimeException(ioe);
}
}
}, precisionStep, minBound, maxBound);
} catch (RuntimeException e) {
if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
throw e;
} finally {
termDocs.close();
}
return bits;
}
}
}

View File

@ -0,0 +1,66 @@
package org.apache.lucene.search.trie;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.Query;
/**
* Implementation of a Lucene {@link Query} that implements trie-based range querying for longs/doubles.
* This query depends on a specific structure of terms in the index that can only be created
* by indexing via {@link LongTrieTokenStream} methods.
* <p>The query is in constant score mode per default. With precision steps of &le;4, this
* query can be run in conventional boolean rewrite mode without changing the max clause count.
* For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
*/
public class LongTrieRangeQuery extends AbstractTrieRangeQuery {
/**
* A trie query for matching trie coded values using the given field name and
* the default helper field.
* <code>precisionStep</code> must me equal or a multiple of the <code>precisionStep</code>
* used for indexing the values.
* You can leave the bounds open, by supplying <code>null</code> for <code>min</code> and/or
* <code>max</code>. Inclusive/exclusive bounds can also be supplied.
* To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
*/
public LongTrieRangeQuery(final String field, final int precisionStep,
final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
) {
super(field,precisionStep,min,max,minInclusive,maxInclusive);
}
//@Override
void passRanges(TrieRangeTermEnum enumerator) {
// calculate the upper and lower bounds respecting the inclusive and null values.
long minBound=(this.min==null) ? Long.MIN_VALUE : (
minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
);
long maxBound=(this.max==null) ? Long.MAX_VALUE : (
maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
);
TrieUtils.splitLongRange(enumerator.getLongRangeBuilder(), precisionStep, minBound, maxBound);
}
/** Returns the lower value of this range query */
public Long getMin() { return (Long)min; }
/** Returns the upper value of this range query */
public Long getMax() { return (Long)max; }
}

View File

@ -1,172 +1,172 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/** /**
* This class provides a {@link TokenStream} for indexing <code>long</code> values * This class provides a {@link TokenStream} for indexing <code>long</code> values
* that can be queried by {@link LongTrieRangeFilter}. This stream is not intended * that can be queried by {@link LongTrieRangeFilter}. This stream is not intended
* to be used in analyzers, its more for iterating the different precisions during * to be used in analyzers, its more for iterating the different precisions during
* indexing a specific numeric value. * indexing a specific numeric value.
* <p>A <code>long</code> value is indexed as multiple string encoded terms, each reduced * <p>A <code>long</code> value is indexed as multiple string encoded terms, each reduced
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the * by zeroing bits from the right. Each value is also prefixed (in the first char) by the
* <code>shift</code> value (number of bits removed) used during encoding. * <code>shift</code> value (number of bits removed) used during encoding.
* <p>The number of bits removed from the right for each trie entry is called * <p>The number of bits removed from the right for each trie entry is called
* <code>precisionStep</code> in this API. For comparing the different step values, see the * <code>precisionStep</code> in this API. For comparing the different step values, see the
* {@linkplain org.apache.lucene.search.trie package description}. * {@linkplain org.apache.lucene.search.trie package description}.
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies * <p>The usage pattern is (it is recommened to switch off norms and term frequencies
* for numeric fields; it does not make sense to have them): * for numeric fields; it does not make sense to have them):
* <pre> * <pre>
* Field field = new Field(name, new LongTrieTokenStream(value, precisionStep)); * Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
* field.setOmitNorms(true); * field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true); * field.setOmitTermFreqAndPositions(true);
* document.add(field); * document.add(field);
* </pre> * </pre>
* <p>For optimal performance, re-use the TokenStream and Field instance * <p>For optimal performance, re-use the TokenStream and Field instance
* for more than one document: * for more than one document:
* <pre> * <pre>
* <em>// init</em> * <em>// init</em>
* TokenStream stream = new LongTrieTokenStream(precisionStep); * TokenStream stream = new LongTrieTokenStream(precisionStep);
* Field field = new Field(name, stream); * Field field = new Field(name, stream);
* field.setOmitNorms(true); * field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true); * field.setOmitTermFreqAndPositions(true);
* Document doc = new Document(); * Document doc = new Document();
* document.add(field); * document.add(field);
* <em>// use this code to index many documents:</em> * <em>// use this code to index many documents:</em>
* stream.setValue(value1) * stream.setValue(value1)
* writer.addDocument(document); * writer.addDocument(document);
* stream.setValue(value2) * stream.setValue(value2)
* writer.addDocument(document); * writer.addDocument(document);
* ... * ...
* </pre> * </pre>
* <p><em>Please note:</em> Token streams are read, when the document is added to index. * <p><em>Please note:</em> Token streams are read, when the document is added to index.
* If you index more than one numeric field, use a separate instance for each. * If you index more than one numeric field, use a separate instance for each.
* <p>For more information, how trie fields work, see the * <p>For more information, how trie fields work, see the
* {@linkplain org.apache.lucene.search.trie package description}. * {@linkplain org.apache.lucene.search.trie package description}.
*/ */
public class LongTrieTokenStream extends TokenStream { public class LongTrieTokenStream extends TokenStream {
/** The full precision token gets this token type assigned. */ /** The full precision token gets this token type assigned. */
public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong"; public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong";
/** The lower precision tokens gets this token type assigned. */ /** The lower precision tokens gets this token type assigned. */
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong"; public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong";
/** /**
* Creates a token stream for indexing <code>value</code> with the given * Creates a token stream for indexing <code>value</code> with the given
* <code>precisionStep</code>. As instance creating is a major cost, * <code>precisionStep</code>. As instance creating is a major cost,
* consider using a {@link #LongTrieTokenStream(int)} instance once for * consider using a {@link #LongTrieTokenStream(int)} instance once for
* indexing a large number of documents and assign a value with * indexing a large number of documents and assign a value with
* {@link #setValue} for each document. * {@link #setValue} for each document.
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}. * To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
*/ */
public LongTrieTokenStream(final long value, final int precisionStep) { public LongTrieTokenStream(final long value, final int precisionStep) {
if (precisionStep<1 || precisionStep>64) if (precisionStep<1 || precisionStep>64)
throw new IllegalArgumentException("precisionStep may only be 1..64"); throw new IllegalArgumentException("precisionStep may only be 1..64");
this.value = value; this.value = value;
this.precisionStep = precisionStep; this.precisionStep = precisionStep;
termAtt = (TermAttribute) addAttribute(TermAttribute.class); termAtt = (TermAttribute) addAttribute(TermAttribute.class);
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class); shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
} }
/** /**
* Creates a token stream for indexing values with the given * Creates a token stream for indexing values with the given
* <code>precisionStep</code>. This stream is initially &quot;empty&quot; * <code>precisionStep</code>. This stream is initially &quot;empty&quot;
* (using a numeric value of 0), assign a value before indexing * (using a numeric value of 0), assign a value before indexing
* each document using {@link #setValue}. * each document using {@link #setValue}.
*/ */
public LongTrieTokenStream(final int precisionStep) { public LongTrieTokenStream(final int precisionStep) {
this(0L, precisionStep); this(0L, precisionStep);
} }
/** /**
* Resets the token stream to deliver prefix encoded values * Resets the token stream to deliver prefix encoded values
* for <code>value</code>. Use this method to index the same * for <code>value</code>. Use this method to index the same
* numeric field for a large number of documents and reuse the * numeric field for a large number of documents and reuse the
* current stream instance. * current stream instance.
* To index double values use the converter {@link TrieUtils#doubleToSortableLong}. * To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
*/ */
public void setValue(final long value) { public void setValue(final long value) {
this.value = value; this.value = value;
reset(); reset();
} }
// @Override // @Override
public void reset() { public void reset() {
shift = 0; shift = 0;
} }
// @Override // @Override
public boolean incrementToken() { public boolean incrementToken() {
if (shift>=64) return false; if (shift>=64) return false;
final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
shiftAtt.setShift(shift); shiftAtt.setShift(shift);
if (shift==0) { if (shift==0) {
typeAtt.setType(TOKEN_TYPE_FULL_PREC); typeAtt.setType(TOKEN_TYPE_FULL_PREC);
posIncrAtt.setPositionIncrement(1); posIncrAtt.setPositionIncrement(1);
} else { } else {
typeAtt.setType(TOKEN_TYPE_LOWER_PREC); typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
posIncrAtt.setPositionIncrement(0); posIncrAtt.setPositionIncrement(0);
} }
shift += precisionStep; shift += precisionStep;
return true; return true;
} }
// @Override // @Override
/** @deprecated */ /** @deprecated */
public Token next(final Token reusableToken) { public Token next(final Token reusableToken) {
if (shift>=64) return null; if (shift>=64) return null;
reusableToken.clear(); reusableToken.clear();
final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
if (shift==0) { if (shift==0) {
reusableToken.setType(TOKEN_TYPE_FULL_PREC); reusableToken.setType(TOKEN_TYPE_FULL_PREC);
reusableToken.setPositionIncrement(1); reusableToken.setPositionIncrement(1);
} else { } else {
reusableToken.setType(TOKEN_TYPE_LOWER_PREC); reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
reusableToken.setPositionIncrement(0); reusableToken.setPositionIncrement(0);
} }
shift += precisionStep; shift += precisionStep;
return reusableToken; return reusableToken;
} }
// @Override // @Override
public String toString() { public String toString() {
final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value); final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value);
sb.append(",precisionStep=").append(precisionStep).append(')'); sb.append(",precisionStep=").append(precisionStep).append(')');
return sb.toString(); return sb.toString();
} }
// members // members
private final TermAttribute termAtt; private final TermAttribute termAtt;
private final TypeAttribute typeAtt; private final TypeAttribute typeAtt;
private final PositionIncrementAttribute posIncrAtt; private final PositionIncrementAttribute posIncrAtt;
private final ShiftAttribute shiftAtt; private final ShiftAttribute shiftAtt;
private int shift = 0; private int shift = 0;
private long value; private long value;
private final int precisionStep; private final int precisionStep;
} }

View File

@ -1,70 +1,70 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import java.io.Serializable; import java.io.Serializable;
/** /**
* This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream} * This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream}
* to the shift value of the current prefix-encoded token. * to the shift value of the current prefix-encoded token.
* It may be used by filters or consumers to e.g. distribute the values to various fields. * It may be used by filters or consumers to e.g. distribute the values to various fields.
*/ */
public final class ShiftAttribute extends Attribute implements Cloneable, Serializable { public final class ShiftAttribute extends Attribute implements Cloneable, Serializable {
private int shift = 0; private int shift = 0;
/** /**
* Returns the shift value of the current prefix encoded token. * Returns the shift value of the current prefix encoded token.
*/ */
public int getShift() { public int getShift() {
return shift; return shift;
} }
/** /**
* Sets the shift value. * Sets the shift value.
*/ */
public void setShift(final int shift) { public void setShift(final int shift) {
this.shift = shift; this.shift = shift;
} }
public void clear() { public void clear() {
shift = 0; shift = 0;
} }
public String toString() { public String toString() {
return "shift=" + shift; return "shift=" + shift;
} }
public boolean equals(Object other) { public boolean equals(Object other) {
if (this == other) return true; if (this == other) return true;
if (other instanceof ShiftAttribute) { if (other instanceof ShiftAttribute) {
return ((ShiftAttribute) other).shift == shift; return ((ShiftAttribute) other).shift == shift;
} }
return false; return false;
} }
public int hashCode() { public int hashCode() {
return shift; return shift;
} }
public void copyTo(Attribute target) { public void copyTo(Attribute target) {
final ShiftAttribute t = (ShiftAttribute) target; final ShiftAttribute t = (ShiftAttribute) target;
t.setShift(shift); t.setShift(shift);
} }
} }

View File

@ -0,0 +1,140 @@
package org.apache.lucene.search.trie;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.LinkedList;
import org.apache.lucene.search.FilteredTermEnum;
import org.apache.lucene.search.MultiTermQuery; // for javadocs
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/**
* Subclass of FilteredTermEnum for enumerating all terms that match the
* sub-ranges for trie range queries.
* <p>
* WARNING: Term enumerations is not guaranteed to be always ordered by
* {@link Term#compareTo}.
* The ordering depends on how {@link TrieUtils#splitLongRange} and
* {@link TrieUtils#splitIntRange} generates the sub-ranges. For
* the {@link MultiTermQuery} ordering is not relevant.
*/
final class TrieRangeTermEnum extends FilteredTermEnum {
private final AbstractTrieRangeQuery query;
private final IndexReader reader;
private final LinkedList/*<String>*/ rangeBounds = new LinkedList/*<String>*/();
private String currentUpperBound = null;
TrieRangeTermEnum(AbstractTrieRangeQuery query, IndexReader reader) {
this.query = query;
this.reader = reader;
}
/** Returns a range builder that must be used to feed in the sub-ranges. */
TrieUtils.IntRangeBuilder getIntRangeBuilder() {
return new TrieUtils.IntRangeBuilder() {
//@Override
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
rangeBounds.add(minPrefixCoded);
rangeBounds.add(maxPrefixCoded);
}
};
}
/** Returns a range builder that must be used to feed in the sub-ranges. */
TrieUtils.LongRangeBuilder getLongRangeBuilder() {
return new TrieUtils.LongRangeBuilder() {
//@Override
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
rangeBounds.add(minPrefixCoded);
rangeBounds.add(maxPrefixCoded);
}
};
}
/** After feeding the range builder call this method to initialize the enum. */
void init() throws IOException {
next();
}
//@Override
public float difference() {
return 1.0f;
}
/** this is a dummy, it is not used by this class. */
//@Override
protected boolean endEnum() {
assert false; // should never be called
return (currentTerm != null);
}
/**
* Compares if current upper bound is reached,
* this also updates the term count for statistics.
* In contrast to {@link FilteredTermEnum}, a return value
* of <code>false</code> ends iterating the current enum
* and forwards to the next sub-range.
*/
//@Override
protected boolean termCompare(Term term) {
return (term.field() == query.field && term.text().compareTo(currentUpperBound) <= 0);
}
/** Increments the enumeration to the next element. True if one exists. */
//@Override
public boolean next() throws IOException {
// if a current term exists, the actual enum is initialized:
// try change to next term, if no such term exists, fall-through
if (currentTerm != null) {
assert actualEnum!=null;
if (actualEnum.next()) {
currentTerm = actualEnum.term();
if (termCompare(currentTerm)) return true;
}
}
// if all above fails, we go forward to the next enum,
// if one is available
currentTerm = null;
if (rangeBounds.size() < 2) return false;
// close the current enum and read next bounds
if (actualEnum != null) {
actualEnum.close();
actualEnum = null;
}
final String lowerBound = (String)rangeBounds.removeFirst();
this.currentUpperBound = (String)rangeBounds.removeFirst();
// this call recursively uses next(), if no valid term in
// next enum found.
// if this behavior is changed/modified in the superclass,
// this enum will not work anymore!
setEnum(reader.terms(new Term(query.field, lowerBound)));
return (currentTerm != null);
}
/** Closes the enumeration to further activity, freeing resources. */
//@Override
public void close() throws IOException {
rangeBounds.clear();
currentUpperBound = null;
super.close();
}
}

View File

@ -106,14 +106,14 @@ more info about this in the stream documentation)</em></p>
<h3>Searching</h3> <h3>Searching</h3>
<p>The numeric index fields you prepared in this way can be searched by <p>The numeric index fields you prepared in this way can be searched by
{@link org.apache.lucene.search.trie.LongTrieRangeFilter} or {@link org.apache.lucene.search.trie.IntTrieRangeFilter}:</p> {@link org.apache.lucene.search.trie.LongTrieRangeQuery} or {@link org.apache.lucene.search.trie.IntTrieRangeQuery}:</p>
<pre> <pre>
<em>// Java 1.4, because Long.valueOf(long) is not available:</em> <em>// Java 1.4, because Long.valueOf(long) is not available:</em>
Query q = new LongTrieRangeFilter("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true).asQuery(); Query q = new LongTrieRangeQuery("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true);
<em>// OR, Java 1.5, using autoboxing:</em> <em>// OR, Java 1.5, using autoboxing:</em>
Query q = new LongTrieRangeFilter("exampleLong", precisionStep, 123L, 999999L, true, true).asQuery(); Query q = new LongTrieRangeQuery("exampleLong", precisionStep, 123L, 999999L, true, true);
<em>// execute the search, as usual:</em> <em>// execute the search, as usual:</em>
TopDocs docs = searcher.search(q, 10); TopDocs docs = searcher.search(q, 10);
@ -132,7 +132,7 @@ more info about this in the stream documentation)</em></p>
that the old {@link org.apache.lucene.search.RangeQuery} (with raised that the old {@link org.apache.lucene.search.RangeQuery} (with raised
{@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete, {@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete,
{@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing {@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing
{@link org.apache.lucene.search.trie.LongTrieRangeFilter}<code>.asQuery()</code> took &lt;100ms to {@link org.apache.lucene.search.trie.LongTrieRangeQuery} took &lt;100ms to
complete (on an Opteron64 machine, Java 1.5, 8 bit precision step). complete (on an Opteron64 machine, Java 1.5, 8 bit precision step).
This query type was developed for a geographic portal, where the performance for This query type was developed for a geographic portal, where the performance for
e.g. bounding boxes or exact date/time stamps is important.</p> e.g. bounding boxes or exact date/time stamps is important.</p>

View File

@ -1,320 +1,371 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import java.util.Random; import java.util.Random;
import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.RangeQuery; import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.BooleanQuery;
public class TestIntTrieRangeFilter extends LuceneTestCase { import org.apache.lucene.search.MatchAllDocsQuery;
// distance of entries import org.apache.lucene.util.LuceneTestCase;
private static final int distance = 6666;
// shift the starting of the values to the left, to also have negative values: public class TestIntTrieRangeQuery extends LuceneTestCase {
private static final int startOffset = - 1 << 15; // distance of entries
// number of docs to generate for testing private static final int distance = 6666;
private static final int noDocs = 10000; // shift the starting of the values to the left, to also have negative values:
private static final int startOffset = - 1 << 15;
private static Field newField(String name, int precisionStep) { // number of docs to generate for testing
IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); private static final int noDocs = 10000;
stream.setUseNewAPI(true);
Field f=new Field(name, stream); private static Field newField(String name, int precisionStep) {
f.setOmitTermFreqAndPositions(true); IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep);
f.setOmitNorms(true); stream.setUseNewAPI(true);
return f; Field f=new Field(name, stream);
} f.setOmitTermFreqAndPositions(true);
f.setOmitNorms(true);
private static final RAMDirectory directory; return f;
private static final IndexSearcher searcher; }
static {
try { private static final RAMDirectory directory;
directory = new RAMDirectory(); private static final IndexSearcher searcher;
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), static {
true, MaxFieldLength.UNLIMITED); try {
// set the theoretical maximum term count for 8bit (see docs for the number)
Field BooleanQuery.setMaxClauseCount(3*255*2 + 255);
field8 = newField("field8", 8),
field4 = newField("field4", 4), directory = new RAMDirectory();
field2 = newField("field2", 2), IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
ascfield8 = newField("ascfield8", 8), true, MaxFieldLength.UNLIMITED);
ascfield4 = newField("ascfield4", 4),
ascfield2 = newField("ascfield2", 2); Field
field8 = newField("field8", 8),
// Add a series of noDocs docs with increasing int values field4 = newField("field4", 4),
for (int l=0; l<noDocs; l++) { field2 = newField("field2", 2),
Document doc=new Document(); ascfield8 = newField("ascfield8", 8),
// add fields, that have a distance to test general functionality ascfield4 = newField("ascfield4", 4),
int val=distance*l+startOffset; ascfield2 = newField("ascfield2", 2);
doc.add(new Field("value", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
((IntTrieTokenStream)field8.tokenStreamValue()).setValue(val); // Add a series of noDocs docs with increasing int values
doc.add(field8); for (int l=0; l<noDocs; l++) {
((IntTrieTokenStream)field4.tokenStreamValue()).setValue(val); Document doc=new Document();
doc.add(field4); // add fields, that have a distance to test general functionality
((IntTrieTokenStream)field2.tokenStreamValue()).setValue(val); int val=distance*l+startOffset;
doc.add(field2); doc.add(new Field("value", TrieUtils.intToPrefixCoded(val), Field.Store.YES, Field.Index.NO));
// add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive ((IntTrieTokenStream)field8.tokenStreamValue()).setValue(val);
val=l-(noDocs/2); doc.add(field8);
((IntTrieTokenStream)ascfield8.tokenStreamValue()).setValue(val); ((IntTrieTokenStream)field4.tokenStreamValue()).setValue(val);
doc.add(ascfield8); doc.add(field4);
((IntTrieTokenStream)ascfield4.tokenStreamValue()).setValue(val); ((IntTrieTokenStream)field2.tokenStreamValue()).setValue(val);
doc.add(ascfield4); doc.add(field2);
((IntTrieTokenStream)ascfield2.tokenStreamValue()).setValue(val); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
doc.add(ascfield2); val=l-(noDocs/2);
writer.addDocument(doc); ((IntTrieTokenStream)ascfield8.tokenStreamValue()).setValue(val);
} doc.add(ascfield8);
((IntTrieTokenStream)ascfield4.tokenStreamValue()).setValue(val);
writer.optimize(); doc.add(ascfield4);
writer.close(); ((IntTrieTokenStream)ascfield2.tokenStreamValue()).setValue(val);
searcher=new IndexSearcher(directory); doc.add(ascfield2);
} catch (Exception e) { writer.addDocument(doc);
throw new Error(e); }
}
} writer.optimize();
writer.close();
private void testRange(int precisionStep) throws Exception { searcher=new IndexSearcher(directory);
String field="field"+precisionStep; } catch (Exception e) {
int count=3000; throw new Error(e);
int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); }
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true); }
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER);
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'."); /** test for both constant score and boolean query, the other tests only use the constant score mode */
ScoreDoc[] sd = topDocs.scoreDocs; private void testRange(int precisionStep) throws Exception {
assertNotNull(sd); String field="field"+precisionStep;
assertEquals("Score doc count", count, sd.length ); int count=3000;
Document doc=searcher.doc(sd[0].doc); int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); IntTrieRangeQuery q = new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
doc=searcher.doc(sd[sd.length-1].doc); IntTrieRangeFilter f = new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); int lastTerms = 0;
} for (byte i=0; i<3; i++) {
TopDocs topDocs;
public void testRange_8bit() throws Exception { int terms;
testRange(8); String type;
} q.clearTotalNumberOfTerms();
f.clearTotalNumberOfTerms();
public void testRange_4bit() throws Exception { switch (i) {
testRange(4); case 0:
} type = " (constant score)";
q.setConstantScoreRewrite(true);
public void testRange_2bit() throws Exception { topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
testRange(2); terms = q.getTotalNumberOfTerms();
} break;
case 1:
private void testLeftOpenRange(int precisionStep) throws Exception { type = " (boolean query)";
String field="field"+precisionStep; q.setConstantScoreRewrite(false);
int count=3000; topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
int upper=(count-1)*distance + (distance/3) + startOffset; terms = q.getTotalNumberOfTerms();
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, null, new Integer(upper), true, true); break;
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); case 2:
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); type = " (filter)";
ScoreDoc[] sd = topDocs.scoreDocs; topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER);
assertNotNull(sd); terms = f.getTotalNumberOfTerms();
assertEquals("Score doc count", count, sd.length ); break;
Document doc=searcher.doc(sd[0].doc); default:
assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); return;
doc=searcher.doc(sd[sd.length-1].doc); }
assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+".");
} ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
public void testLeftOpenRange_8bit() throws Exception { assertEquals("Score doc count"+type, count, sd.length );
testLeftOpenRange(8); Document doc=searcher.doc(sd[0].doc);
} assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
doc=searcher.doc(sd[sd.length-1].doc);
public void testLeftOpenRange_4bit() throws Exception { assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
testLeftOpenRange(4); if (i>0) {
} assertEquals("Distinct term number is equal for all query types", lastTerms, terms);
}
public void testLeftOpenRange_2bit() throws Exception { lastTerms = terms;
testLeftOpenRange(2); }
} }
private void testRightOpenRange(int precisionStep) throws Exception { public void testRange_8bit() throws Exception {
String field="field"+precisionStep; testRange(8);
int count=3000; }
int lower=(count-1)*distance + (distance/3) +startOffset;
IntTrieRangeFilter f=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), null, true, true); public void testRange_4bit() throws Exception {
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); testRange(4);
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); }
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd); public void testRange_2bit() throws Exception {
assertEquals("Score doc count", noDocs-count, sd.length ); testRange(2);
Document doc=searcher.doc(sd[0].doc); }
assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
doc=searcher.doc(sd[sd.length-1].doc); public void testInverseRange() throws Exception {
assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); IntTrieRangeFilter f = new IntTrieRangeFilter("field8", 8, new Integer(1000), new Integer(-1000), true, true);
} assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader()));
}
public void testRightOpenRange_8bit() throws Exception {
testRightOpenRange(8); private void testLeftOpenRange(int precisionStep) throws Exception {
} String field="field"+precisionStep;
int count=3000;
public void testRightOpenRange_4bit() throws Exception { int upper=(count-1)*distance + (distance/3) + startOffset;
testRightOpenRange(4); IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, null, new Integer(upper), true, true);
} TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
public void testRightOpenRange_2bit() throws Exception { ScoreDoc[] sd = topDocs.scoreDocs;
testRightOpenRange(2); assertNotNull(sd);
} assertEquals("Score doc count", count, sd.length );
Document doc=searcher.doc(sd[0].doc);
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
final Random rnd=newRandom(); doc=searcher.doc(sd[sd.length-1].doc);
String field="field"+precisionStep; assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
int termCount=0; }
for (int i=0; i<50; i++) {
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; public void testLeftOpenRange_8bit() throws Exception {
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; testLeftOpenRange(8);
if (lower>upper) { }
int a=lower; lower=upper; upper=a;
} public void testLeftOpenRange_4bit() throws Exception {
// test inclusive range testLeftOpenRange(4);
IntTrieRangeFilter tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true); }
RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true);
cq.setConstantScoreRewrite(true); public void testLeftOpenRange_2bit() throws Exception {
TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); testLeftOpenRange(2);
TopDocs cTopDocs = searcher.search(cq, 1); }
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
termCount += tf.getLastNumberOfTerms(); private void testRightOpenRange(int precisionStep) throws Exception {
// test exclusive range String field="field"+precisionStep;
tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false); int count=3000;
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false); int lower=(count-1)*distance + (distance/3) +startOffset;
cq.setConstantScoreRewrite(true); IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), null, true, true);
tTopDocs = searcher.search(tf.asQuery(), 1); TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
cTopDocs = searcher.search(cq, 1); System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); ScoreDoc[] sd = topDocs.scoreDocs;
termCount += tf.getLastNumberOfTerms(); assertNotNull(sd);
// test left exclusive range assertEquals("Score doc count", noDocs-count, sd.length );
tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true); Document doc=searcher.doc(sd[0].doc);
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true); assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
cq.setConstantScoreRewrite(true); doc=searcher.doc(sd[sd.length-1].doc);
tTopDocs = searcher.search(tf.asQuery(), 1); assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) );
cTopDocs = searcher.search(cq, 1); }
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
termCount += tf.getLastNumberOfTerms(); public void testRightOpenRange_8bit() throws Exception {
// test right exclusive range testRightOpenRange(8);
tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false); }
cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false);
cq.setConstantScoreRewrite(true); public void testRightOpenRange_4bit() throws Exception {
tTopDocs = searcher.search(tf.asQuery(), 1); testRightOpenRange(4);
cTopDocs = searcher.search(cq, 1); }
assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
termCount += tf.getLastNumberOfTerms(); public void testRightOpenRange_2bit() throws Exception {
} testRightOpenRange(2);
System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); }
}
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { final Random rnd=newRandom();
testRandomTrieAndClassicRangeQuery(8); String field="field"+precisionStep;
} int termCountT=0,termCountC=0;
for (int i=0; i<50; i++) {
public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
testRandomTrieAndClassicRangeQuery(4); int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
} if (lower>upper) {
int a=lower; lower=upper; upper=a;
public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { }
testRandomTrieAndClassicRangeQuery(2); // test inclusive range
} IntTrieRangeQuery tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true);
private void testRangeSplit(int precisionStep) throws Exception { cq.setConstantScoreRewrite(true);
final Random rnd=newRandom(); TopDocs tTopDocs = searcher.search(tq, 1);
String field="ascfield"+precisionStep; TopDocs cTopDocs = searcher.search(cq, 1);
// 50 random tests assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
for (int i=0; i<50; i++) { termCountT += tq.getTotalNumberOfTerms();
int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2); termCountC += cq.getTotalNumberOfTerms();
int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2); // test exclusive range
if (lower>upper) { tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
int a=lower; lower=upper; upper=a; cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false);
} cq.setConstantScoreRewrite(true);
// test inclusive range tTopDocs = searcher.search(tq, 1);
Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); cTopDocs = searcher.search(cq, 1);
TopDocs tTopDocs = searcher.search(tq, 1); assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); termCountT += tq.getTotalNumberOfTerms();
// test exclusive range termCountC += cq.getTotalNumberOfTerms();
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false).asQuery(); // test left exclusive range
tTopDocs = searcher.search(tq, 1); tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true);
// test left exclusive range cq.setConstantScoreRewrite(true);
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true).asQuery(); tTopDocs = searcher.search(tq, 1);
tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
// test right exclusive range termCountT += tq.getTotalNumberOfTerms();
tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false).asQuery(); termCountC += cq.getTotalNumberOfTerms();
tTopDocs = searcher.search(tq, 1); // test right exclusive range
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
} cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false);
} cq.setConstantScoreRewrite(true);
tTopDocs = searcher.search(tq, 1);
public void testRangeSplit_8bit() throws Exception { cTopDocs = searcher.search(cq, 1);
testRangeSplit(8); assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
} termCountT += tq.getTotalNumberOfTerms();
termCountC += cq.getTotalNumberOfTerms();
public void testRangeSplit_4bit() throws Exception { }
testRangeSplit(4); System.out.println("Average number of terms during random search on '" + field + "':");
} System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
public void testRangeSplit_2bit() throws Exception { }
testRangeSplit(2);
} public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
testRandomTrieAndClassicRangeQuery(8);
private void testSorting(int precisionStep) throws Exception { }
final Random rnd=newRandom();
String field="field"+precisionStep; public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception {
// 10 random tests, the index order is ascending, testRandomTrieAndClassicRangeQuery(4);
// so using a reverse sort field should retun descending documents }
for (int i=0; i<10; i++) {
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception {
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; testRandomTrieAndClassicRangeQuery(2);
if (lower>upper) { }
int a=lower; lower=upper; upper=a;
} private void testRangeSplit(int precisionStep) throws Exception {
Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); final Random rnd=newRandom();
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true))); String field="ascfield"+precisionStep;
if (topDocs.totalHits==0) continue; // 50 random tests
ScoreDoc[] sd = topDocs.scoreDocs; for (int i=0; i<50; i++) {
assertNotNull(sd); int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2);
int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value")); int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2);
for (int j=1; j<sd.length; j++) { if (lower>upper) {
int act=TrieUtils.prefixCodedToInt(searcher.doc(sd[j].doc).get("value")); int a=lower; lower=upper; upper=a;
assertTrue("Docs should be sorted backwards", last>act ); }
last=act; // test inclusive range
} Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
} TopDocs tTopDocs = searcher.search(tq, 1);
} assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
// test exclusive range
public void testSorting_8bit() throws Exception { tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
testSorting(8); tTopDocs = searcher.search(tq, 1);
} assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
// test left exclusive range
public void testSorting_4bit() throws Exception { tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
testSorting(4); tTopDocs = searcher.search(tq, 1);
} assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
// test right exclusive range
public void testSorting_2bit() throws Exception { tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
testSorting(2); tTopDocs = searcher.search(tq, 1);
} assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
}
} }
public void testRangeSplit_8bit() throws Exception {
testRangeSplit(8);
}
public void testRangeSplit_4bit() throws Exception {
testRangeSplit(4);
}
public void testRangeSplit_2bit() throws Exception {
testRangeSplit(2);
}
private void testSorting(int precisionStep) throws Exception {
final Random rnd=newRandom();
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
for (int i=0; i<10; i++) {
int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
int a=lower; lower=upper; upper=a;
}
Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true)));
if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value"));
for (int j=1; j<sd.length; j++) {
int act=TrieUtils.prefixCodedToInt(searcher.doc(sd[j].doc).get("value"));
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
public void testSorting_8bit() throws Exception {
testSorting(8);
}
public void testSorting_4bit() throws Exception {
testSorting(4);
}
public void testSorting_2bit() throws Exception {
testSorting(2);
}
}

View File

@ -1,54 +1,54 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public class TestIntTrieTokenStream extends LuceneTestCase { public class TestIntTrieTokenStream extends LuceneTestCase {
static final int precisionStep = 8; static final int precisionStep = 8;
static final int value = 123456; static final int value = 123456;
public void testStreamNewAPI() throws Exception { public void testStreamNewAPI() throws Exception {
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
stream.setUseNewAPI(true); stream.setUseNewAPI(true);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed // use getAttribute to test if attributes really exist, if not an IAE will be throwed
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
for (int shift=0; shift<32; shift+=precisionStep) { for (int shift=0; shift<32; shift+=precisionStep) {
assertTrue("New token is available", stream.incrementToken()); assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value", shift, shiftAtt.getShift()); assertEquals("Shift value", shift, shiftAtt.getShift());
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term()); assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term());
} }
assertFalse("No more tokens available", stream.incrementToken()); assertFalse("No more tokens available", stream.incrementToken());
} }
public void testStreamOldAPI() throws Exception { public void testStreamOldAPI() throws Exception {
final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep);
stream.setUseNewAPI(false); stream.setUseNewAPI(false);
Token tok=new Token(); Token tok=new Token();
for (int shift=0; shift<32; shift+=precisionStep) { for (int shift=0; shift<32; shift+=precisionStep) {
assertNotNull("New token is available", tok=stream.next(tok)); assertNotNull("New token is available", tok=stream.next(tok));
assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term()); assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term());
} }
assertNull("No more tokens available", stream.next(tok)); assertNull("No more tokens available", stream.next(tok));
} }
} }

View File

@ -30,10 +30,13 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.RangeQuery; import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
public class TestLongTrieRangeFilter extends LuceneTestCase { public class TestLongTrieRangeQuery extends LuceneTestCase {
// distance of entries // distance of entries
private static final long distance = 66666L; private static final long distance = 66666L;
// shift the starting of the values to the left, to also have negative values: // shift the starting of the values to the left, to also have negative values:
@ -53,7 +56,10 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
private static final RAMDirectory directory; private static final RAMDirectory directory;
private static final IndexSearcher searcher; private static final IndexSearcher searcher;
static { static {
try { try {
// set the theoretical maximum term count for 8bit (see docs for the number)
BooleanQuery.setMaxClauseCount(7*255*2 + 255);
directory = new RAMDirectory(); directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
true, MaxFieldLength.UNLIMITED); true, MaxFieldLength.UNLIMITED);
@ -97,20 +103,54 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
} }
} }
/** test for constant score + boolean query + filter, the other tests only use the constant score mode */
private void testRange(int precisionStep) throws Exception { private void testRange(int precisionStep) throws Exception {
String field="field"+precisionStep; String field="field"+precisionStep;
int count=3000; int count=3000;
long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); LongTrieRangeQuery q = new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); LongTrieRangeFilter f = new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true);
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'."); int lastTerms = 0;
ScoreDoc[] sd = topDocs.scoreDocs; for (byte i=0; i<3; i++) {
assertNotNull(sd); TopDocs topDocs;
assertEquals("Score doc count", count, sd.length ); int terms;
Document doc=searcher.doc(sd[0].doc); String type;
assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); q.clearTotalNumberOfTerms();
doc=searcher.doc(sd[sd.length-1].doc); f.clearTotalNumberOfTerms();
assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); switch (i) {
case 0:
type = " (constant score)";
q.setConstantScoreRewrite(true);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 1:
type = " (boolean query)";
q.setConstantScoreRewrite(false);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 2:
type = " (filter)";
topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER);
terms = f.getTotalNumberOfTerms();
break;
default:
return;
}
System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+".");
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
assertEquals("Score doc count"+type, count, sd.length );
Document doc=searcher.doc(sd[0].doc);
assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
doc=searcher.doc(sd[sd.length-1].doc);
assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) );
if (i>0) {
assertEquals("Distinct term number is equal for all query types", lastTerms, terms);
}
lastTerms = terms;
}
} }
public void testRange_8bit() throws Exception { public void testRange_8bit() throws Exception {
@ -125,13 +165,18 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
testRange(2); testRange(2);
} }
public void testInverseRange() throws Exception {
LongTrieRangeFilter f = new LongTrieRangeFilter("field8", 8, new Long(1000L), new Long(-1000L), true, true);
assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader()));
}
private void testLeftOpenRange(int precisionStep) throws Exception { private void testLeftOpenRange(int precisionStep) throws Exception {
String field="field"+precisionStep; String field="field"+precisionStep;
int count=3000; int count=3000;
long upper=(count-1)*distance + (distance/3) + startOffset; long upper=(count-1)*distance + (distance/3) + startOffset;
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, null, new Long(upper), true, true); LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, null, new Long(upper), true, true);
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'.");
ScoreDoc[] sd = topDocs.scoreDocs; ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd); assertNotNull(sd);
assertEquals("Score doc count", count, sd.length ); assertEquals("Score doc count", count, sd.length );
@ -157,9 +202,9 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
String field="field"+precisionStep; String field="field"+precisionStep;
int count=3000; int count=3000;
long lower=(count-1)*distance + (distance/3) +startOffset; long lower=(count-1)*distance + (distance/3) +startOffset;
LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), null, true, true); LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, new Long(lower), null, true, true);
TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'.");
ScoreDoc[] sd = topDocs.scoreDocs; ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd); assertNotNull(sd);
assertEquals("Score doc count", noDocs-count, sd.length ); assertEquals("Score doc count", noDocs-count, sd.length );
@ -184,7 +229,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
final Random rnd=newRandom(); final Random rnd=newRandom();
String field="field"+precisionStep; String field="field"+precisionStep;
int termCount=0; int termCountT=0,termCountC=0;
for (int i=0; i<50; i++) { for (int i=0; i<50; i++) {
long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset;
@ -192,39 +237,45 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
long a=lower; lower=upper; upper=a; long a=lower; lower=upper; upper=a;
} }
// test inclusive range // test inclusive range
LongTrieRangeFilter tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); LongTrieRangeQuery tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true); RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true);
cq.setConstantScoreRewrite(true); cq.setConstantScoreRewrite(true);
TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1); TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
termCount += tf.getLastNumberOfTerms(); termCountT += tq.getTotalNumberOfTerms();
termCountC += cq.getTotalNumberOfTerms();
// test exclusive range // test exclusive range
tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false); tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false);
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false);
cq.setConstantScoreRewrite(true); cq.setConstantScoreRewrite(true);
tTopDocs = searcher.search(tf.asQuery(), 1); tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1); cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
termCount += tf.getLastNumberOfTerms(); termCountT += tq.getTotalNumberOfTerms();
termCountC += cq.getTotalNumberOfTerms();
// test left exclusive range // test left exclusive range
tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true); tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true);
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true);
cq.setConstantScoreRewrite(true); cq.setConstantScoreRewrite(true);
tTopDocs = searcher.search(tf.asQuery(), 1); tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1); cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
termCount += tf.getLastNumberOfTerms(); termCountT += tq.getTotalNumberOfTerms();
termCountC += cq.getTotalNumberOfTerms();
// test right exclusive range // test right exclusive range
tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false); tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false);
cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false);
cq.setConstantScoreRewrite(true); cq.setConstantScoreRewrite(true);
tTopDocs = searcher.search(tf.asQuery(), 1); tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1); cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
termCount += tf.getLastNumberOfTerms(); termCountT += tq.getTotalNumberOfTerms();
termCountC += cq.getTotalNumberOfTerms();
} }
System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); System.out.println("Average number of terms during random search on '" + field + "':");
System.out.println(" Trie query: " + (((double)termCountT)/(50*4)));
System.out.println(" Classical query: " + (((double)termCountC)/(50*4)));
} }
public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception {
@ -250,19 +301,19 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
long a=lower; lower=upper; upper=a; long a=lower; lower=upper; upper=a;
} }
// test inclusive range // test inclusive range
Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
TopDocs tTopDocs = searcher.search(tq, 1); TopDocs tTopDocs = searcher.search(tq, 1);
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits );
// test exclusive range // test exclusive range
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false).asQuery(); tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false);
tTopDocs = searcher.search(tq, 1); tTopDocs = searcher.search(tq, 1);
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits );
// test left exclusive range // test left exclusive range
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true).asQuery(); tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true);
tTopDocs = searcher.search(tq, 1); tTopDocs = searcher.search(tq, 1);
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
// test right exclusive range // test right exclusive range
tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false).asQuery(); tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false);
tTopDocs = searcher.search(tq, 1); tTopDocs = searcher.search(tq, 1);
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits );
} }
@ -291,7 +342,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase {
if (lower>upper) { if (lower>upper) {
long a=lower; lower=upper; upper=a; long a=lower; lower=upper; upper=a;
} }
Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true);
TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true))); TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true)));
if (topDocs.totalHits==0) continue; if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs; ScoreDoc[] sd = topDocs.scoreDocs;

View File

@ -1,54 +1,54 @@
package org.apache.lucene.search.trie; package org.apache.lucene.search.trie;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public class TestLongTrieTokenStream extends LuceneTestCase { public class TestLongTrieTokenStream extends LuceneTestCase {
static final int precisionStep = 8; static final int precisionStep = 8;
static final long value = 4573245871874382L; static final long value = 4573245871874382L;
public void testStreamNewAPI() throws Exception { public void testStreamNewAPI() throws Exception {
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
stream.setUseNewAPI(true); stream.setUseNewAPI(true);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed // use getAttribute to test if attributes really exist, if not an IAE will be throwed
final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
for (int shift=0; shift<64; shift+=precisionStep) { for (int shift=0; shift<64; shift+=precisionStep) {
assertTrue("New token is available", stream.incrementToken()); assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value", shift, shiftAtt.getShift()); assertEquals("Shift value", shift, shiftAtt.getShift());
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term()); assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term());
} }
assertFalse("No more tokens available", stream.incrementToken()); assertFalse("No more tokens available", stream.incrementToken());
} }
public void testStreamOldAPI() throws Exception { public void testStreamOldAPI() throws Exception {
final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep);
stream.setUseNewAPI(false); stream.setUseNewAPI(false);
Token tok=new Token(); Token tok=new Token();
for (int shift=0; shift<64; shift+=precisionStep) { for (int shift=0; shift<64; shift+=precisionStep) {
assertNotNull("New token is available", tok=stream.next(tok)); assertNotNull("New token is available", tok=stream.next(tok));
assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term()); assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term());
} }
assertNull("No more tokens available", stream.next(tok)); assertNull("No more tokens available", stream.next(tok));
} }
} }