From 92c64841618b55b09bb67d216f85177c1d47fed9 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Thu, 16 Apr 2009 12:58:28 +0000 Subject: [PATCH] LUCENE-1602: Rewrite TrieRange to use MultiTermQuery git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765618 13f79535-47bb-0310-9956-ffa450edef68 --- .../search/trie/AbstractTrieRangeFilter.java | 157 ---- .../search/trie/AbstractTrieRangeQuery.java | 112 +++ .../search/trie/IntTrieRangeFilter.java | 162 ++-- .../lucene/search/trie/IntTrieRangeQuery.java | 66 ++ .../search/trie/IntTrieTokenStream.java | 344 ++++----- .../search/trie/LongTrieRangeFilter.java | 162 ++-- .../search/trie/LongTrieRangeQuery.java | 66 ++ .../search/trie/LongTrieTokenStream.java | 344 ++++----- .../lucene/search/trie/ShiftAttribute.java | 140 ++-- .../lucene/search/trie/TrieRangeTermEnum.java | 140 ++++ .../apache/lucene/search/trie/package.html | 8 +- ...Filter.java => TestIntTrieRangeQuery.java} | 691 ++++++++++-------- .../search/trie/TestIntTrieTokenStream.java | 108 +-- ...ilter.java => TestLongTrieRangeQuery.java} | 133 ++-- .../search/trie/TestLongTrieTokenStream.java | 108 +-- 15 files changed, 1495 insertions(+), 1246 deletions(-) delete mode 100644 contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java create mode 100644 contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java create mode 100644 contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java create mode 100644 contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java create mode 100644 contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java rename contrib/queries/src/test/org/apache/lucene/search/trie/{TestIntTrieRangeFilter.java => TestIntTrieRangeQuery.java} (66%) rename contrib/queries/src/test/org/apache/lucene/search/trie/{TestLongTrieRangeFilter.java => TestLongTrieRangeQuery.java} (66%) diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java b/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java deleted file mode 100644 index e885a4b0cff..00000000000 --- a/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeFilter.java +++ /dev/null @@ -1,157 +0,0 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.search.Filter; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.util.ToStringUtils; - - -abstract class AbstractTrieRangeFilter extends Filter { - - AbstractTrieRangeFilter(final String field, final int precisionStep, - Number min, Number max, final boolean minInclusive, final boolean maxInclusive - ) { - this.field=field.intern(); - this.precisionStep=precisionStep; - this.min=min; - this.max=max; - this.minInclusive=minInclusive; - this.maxInclusive=maxInclusive; - } - - //@Override - public String toString() { - return toString(null); - } - - public String toString(final String field) { - final StringBuffer sb=new StringBuffer(); - if (!this.field.equals(field)) sb.append(this.field).append(':'); - return sb.append(minInclusive ? '[' : '{') - .append((min==null) ? "*" : min.toString()) - .append(" TO ") - .append((max==null) ? "*" : max.toString()) - .append(maxInclusive ? ']' : '}').toString(); - } - - //@Override - public final boolean equals(final Object o) { - if (o==this) return true; - if (o==null) return false; - if (this.getClass().equals(o.getClass())) { - AbstractTrieRangeFilter q=(AbstractTrieRangeFilter)o; - return ( - field==q.field && - (q.min == null ? min == null : q.min.equals(min)) && - (q.max == null ? max == null : q.max.equals(max)) && - minInclusive==q.minInclusive && - maxInclusive==q.maxInclusive && - precisionStep==q.precisionStep - ); - } - return false; - } - - //@Override - public final int hashCode() { - int hash = field.hashCode() + (precisionStep^0x64365465); - if (min!=null) hash += min.hashCode()^0x14fa55fb; - if (max!=null) hash += max.hashCode()^0x733fa5fe; - return hash+ - (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ - (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); - } - - /** - * Expert: Return the number of terms visited during the last execution of {@link #getDocIdSet}. - * This may be used for performance comparisons of different trie variants and their effectiveness. - * This method is not thread safe, be sure to only call it when no query is running! - * @throws IllegalStateException if {@link #getDocIdSet} was not yet executed. - */ - public int getLastNumberOfTerms() { - if (lastNumberOfTerms < 0) throw new IllegalStateException(); - return lastNumberOfTerms; - } - - void resetLastNumberOfTerms() { - lastNumberOfTerms=0; - } - - /** Returns this range filter as a query. - * Using this method, it is possible to create a Query using new {Long|Int}TrieRangeFilter(....).asQuery(). - * This is a synonym for wrapping with a {@link ConstantScoreQuery}, - * but this query returns a better toString() variant. - */ - public Query asQuery() { - return new ConstantScoreQuery(this) { - - /** this instance return a nicer String variant than the original {@link ConstantScoreQuery} */ - //@Override - public String toString(final String field) { - // return a more convenient representation of this query than ConstantScoreQuery does: - return ((AbstractTrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost()); - } - - }; - } - - void fillBits( - final IndexReader reader, - final OpenBitSet bits, final TermDocs termDocs, - final String lowerTerm, final String upperTerm - ) throws IOException { - final int len=lowerTerm.length(); - assert upperTerm.length()==len; - - // find the docs - final TermEnum enumerator = reader.terms(new Term(field, lowerTerm)); - try { - do { - final Term term = enumerator.term(); - if (term!=null && term.field()==field) { - // break out when upperTerm reached or length of term is different - final String t=term.text(); - if (len!=t.length() || t.compareTo(upperTerm)>0) break; - // we have a good term, find the docs - lastNumberOfTerms++; - termDocs.seek(enumerator); - while (termDocs.next()) bits.set(termDocs.doc()); - } else break; - } while (enumerator.next()); - } finally { - enumerator.close(); - } - } - - // members - final String field; - final int precisionStep; - final Number min,max; - final boolean minInclusive,maxInclusive; - - private int lastNumberOfTerms=-1; -} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java b/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java new file mode 100644 index 00000000000..d9a9d2a6224 --- /dev/null +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java @@ -0,0 +1,112 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.FilteredTermEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.ToStringUtils; + +abstract class AbstractTrieRangeQuery extends MultiTermQuery { + + AbstractTrieRangeQuery(final String field, final int precisionStep, + Number min, Number max, final boolean minInclusive, final boolean maxInclusive + ) { + this.field = field.intern(); + this.precisionStep = precisionStep; + this.min = min; + this.max = max; + this.minInclusive = minInclusive; + this.maxInclusive = maxInclusive; + setConstantScoreRewrite(true); + } + + abstract void passRanges(TrieRangeTermEnum enumerator); + + //@Override + protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException { + TrieRangeTermEnum enumerator = new TrieRangeTermEnum(this, reader); + passRanges(enumerator); + enumerator.init(); + return enumerator; + } + + /** Returns the field name for this query */ + public String getField() { return field; } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesMin() { return minInclusive; } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesMax() { return maxInclusive; } + + //@Override + public String toString(final String field) { + final StringBuffer sb=new StringBuffer(); + if (!this.field.equals(field)) sb.append(this.field).append(':'); + return sb.append(minInclusive ? '[' : '{') + .append((min==null) ? "*" : min.toString()) + .append(" TO ") + .append((max==null) ? "*" : max.toString()) + .append(maxInclusive ? ']' : '}').toString(); + } + + //@Override + public final boolean equals(final Object o) { + if (o==this) return true; + if (o==null) return false; + if (this.getClass().equals(o.getClass())) { + AbstractTrieRangeQuery q=(AbstractTrieRangeQuery)o; + return ( + field==q.field && + (q.min == null ? min == null : q.min.equals(min)) && + (q.max == null ? max == null : q.max.equals(max)) && + minInclusive==q.minInclusive && + maxInclusive==q.maxInclusive && + precisionStep==q.precisionStep + ); + } + return false; + } + + //@Override + public final int hashCode() { + int hash = field.hashCode() + (precisionStep^0x64365465); + if (min!=null) hash += min.hashCode()^0x14fa55fb; + if (max!=null) hash += max.hashCode()^0x733fa5fe; + return hash+ + (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ + (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); + } + + // TODO: Make this method accessible by *TrieRangeFilter, + // can be removed, when moved to core. + //@Override + protected Filter getFilter() { + return super.getFilter(); + } + + // members + final String field; + final int precisionStep; + final Number min,max; + final boolean minInclusive,maxInclusive; +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java index 889a67e7ca0..8e0cd3f61e1 100644 --- a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java @@ -1,101 +1,61 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.document.Document; -import org.apache.lucene.search.Filter; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.util.OpenBitSet; - - -/** - * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats. - * This filter depends on a specific structure of terms in the index that can only be created - * by indexing via {@link IntTrieTokenStream} methods. - * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. - */ -public class IntTrieRangeFilter extends AbstractTrieRangeFilter { - - /** - * A trie filter for matching trie coded values using the given field name and - * the default helper field. - * precisionStep must me equal or a multiple of the precisionStep - * used for indexing the values. - * You can leave the bounds open, by supplying null for min and/or - * max. Inclusive/exclusive bounds can also be supplied. - * To query float values use the converter {@link TrieUtils#floatToSortableInt}. - */ - public IntTrieRangeFilter(final String field, final int precisionStep, - final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive - ) { - super(field,precisionStep,min,max,minInclusive,maxInclusive); - } - - /** - * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results. - */ - //@Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - // calculate the upper and lower bounds respecting the inclusive and null values. - int minBound=(this.min==null) ? Integer.MIN_VALUE : ( - minInclusive ? this.min.intValue() : (this.min.intValue()+1) - ); - int maxBound=(this.max==null) ? Integer.MAX_VALUE : ( - maxInclusive ? this.max.intValue() : (this.max.intValue()-1) - ); - - resetLastNumberOfTerms(); - if (minBound > maxBound) { - // shortcut, no docs will match this - return DocIdSet.EMPTY_DOCIDSET; - } else { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - final TermDocs termDocs = reader.termDocs(); - try { - TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() { - - //@Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { - try { - fillBits( - reader, bits, termDocs, - minPrefixCoded, maxPrefixCoded - ); - } catch (IOException ioe) { - // IntRangeBuilder is not allowed to throw checked exceptions: - // wrap as RuntimeException - throw new RuntimeException(ioe); - } - } - - }, precisionStep, minBound, maxBound); - } catch (RuntimeException e) { - if (e.getCause() instanceof IOException) throw (IOException)e.getCause(); - throw e; - } finally { - termDocs.close(); - } - return bits; - } - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Filter; // for javadocs +import org.apache.lucene.search.MultiTermQueryWrapperFilter; + +/** + * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats. + * This filter depends on a specific structure of terms in the index that can only be created + * by indexing via {@link IntTrieTokenStream} methods. + * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. + */ +public class IntTrieRangeFilter extends MultiTermQueryWrapperFilter { + + /** + * A trie filter for matching trie coded values using the given field name and + * the default helper field. + * precisionStep must me equal or a multiple of the precisionStep + * used for indexing the values. + * You can leave the bounds open, by supplying null for min and/or + * max. Inclusive/exclusive bounds can also be supplied. + * To filter float values use the converter {@link TrieUtils#floatToSortableInt}. + */ + public IntTrieRangeFilter(final String field, final int precisionStep, + final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive + ) { + super(new IntTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive)); + } + + /** Returns the field name for this filter */ + public String getField() { return ((IntTrieRangeQuery)query).getField(); } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesMin() { return ((IntTrieRangeQuery)query).includesMin(); } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesMax() { return ((IntTrieRangeQuery)query).includesMax(); } + + /** Returns the lower value of this range filter */ + public Integer getMin() { return ((IntTrieRangeQuery)query).getMin(); } + + /** Returns the upper value of this range filter */ + public Integer getMax() { return ((IntTrieRangeQuery)query).getMax(); } + +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java new file mode 100644 index 00000000000..d0fdc9be485 --- /dev/null +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java @@ -0,0 +1,66 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; + +/** + * Implementation of a Lucene {@link Query} that implements trie-based range querying for ints/floats. + * This query depends on a specific structure of terms in the index that can only be created + * by indexing via {@link IntTrieTokenStream} methods. + *

The query is in constant score mode per default. With precision steps of ≤4, this + * query can be run in conventional boolean rewrite mode without changing the max clause count. + * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. + */ +public class IntTrieRangeQuery extends AbstractTrieRangeQuery { + + /** + * A trie query for matching trie coded values using the given field name and + * the default helper field. + * precisionStep must me equal or a multiple of the precisionStep + * used for indexing the values. + * You can leave the bounds open, by supplying null for min and/or + * max. Inclusive/exclusive bounds can also be supplied. + * To query float values use the converter {@link TrieUtils#floatToSortableInt}. + */ + public IntTrieRangeQuery(final String field, final int precisionStep, + final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive + ) { + super(field,precisionStep,min,max,minInclusive,maxInclusive); + } + + //@Override + void passRanges(TrieRangeTermEnum enumerator) { + // calculate the upper and lower bounds respecting the inclusive and null values. + int minBound=(this.min==null) ? Integer.MIN_VALUE : ( + minInclusive ? this.min.intValue() : (this.min.intValue()+1) + ); + int maxBound=(this.max==null) ? Integer.MAX_VALUE : ( + maxInclusive ? this.max.intValue() : (this.max.intValue()-1) + ); + + TrieUtils.splitIntRange(enumerator.getIntRangeBuilder(), precisionStep, minBound, maxBound); + } + + /** Returns the lower value of this range query */ + public Integer getMin() { return (Integer)min; } + + /** Returns the upper value of this range query */ + public Integer getMax() { return (Integer)max; } + +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java index 597664a056c..46c02f4c353 100644 --- a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java @@ -1,172 +1,172 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; - -/** - * This class provides a {@link TokenStream} for indexing int values - * that can be queried by {@link IntTrieRangeFilter}. This stream is not intended - * to be used in analyzers, its more for iterating the different precisions during - * indexing a specific numeric value. - *

A int value is indexed as multiple string encoded terms, each reduced - * by zeroing bits from the right. Each value is also prefixed (in the first char) by the - * shift value (number of bits removed) used during encoding. - *

The number of bits removed from the right for each trie entry is called - * precisionStep in this API. For comparing the different step values, see the - * {@linkplain org.apache.lucene.search.trie package description}. - *

The usage pattern is (it is recommened to switch off norms and term frequencies - * for numeric fields; it does not make sense to have them): - *

- *  Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
- *  field.setOmitNorms(true);
- *  field.setOmitTermFreqAndPositions(true);
- *  document.add(field);
- * 
- *

For optimal performance, re-use the TokenStream and Field instance - * for more than one document: - *

- *  // init
- *  TokenStream stream = new IntTrieTokenStream(precisionStep);
- *  Field field = new Field(name, stream);
- *  field.setOmitNorms(true);
- *  field.setOmitTermFreqAndPositions(true);
- *  Document doc = new Document();
- *  document.add(field);
- *  // use this code to index many documents:
- *  stream.setValue(value1)
- *  writer.addDocument(document);
- *  stream.setValue(value2)
- *  writer.addDocument(document);
- *  ...
- * 
- *

Please note: Token streams are read, when the document is added to index. - * If you index more than one numeric field, use a separate instance for each. - *

For more information, how trie fields work, see the - * {@linkplain org.apache.lucene.search.trie package description}. - */ -public class IntTrieTokenStream extends TokenStream { - - /** The full precision token gets this token type assigned. */ - public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt"; - - /** The lower precision tokens gets this token type assigned. */ - public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt"; - - /** - * Creates a token stream for indexing value with the given - * precisionStep. As instance creating is a major cost, - * consider using a {@link #IntTrieTokenStream(int)} instance once for - * indexing a large number of documents and assign a value with - * {@link #setValue} for each document. - * To index float values use the converter {@link TrieUtils#doubleToSortableLong}. - */ - public IntTrieTokenStream(final int value, final int precisionStep) { - if (precisionStep<1 || precisionStep>32) - throw new IllegalArgumentException("precisionStep may only be 1..32"); - this.value = value; - this.precisionStep = precisionStep; - termAtt = (TermAttribute) addAttribute(TermAttribute.class); - typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); - posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); - shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class); - } - - /** - * Creates a token stream for indexing values with the given - * precisionStep. This stream is initially "empty" - * (using a numeric value of 0), assign a value before indexing - * each document using {@link #setValue}. - */ - public IntTrieTokenStream(final int precisionStep) { - this(0, precisionStep); - } - - /** - * Resets the token stream to deliver prefix encoded values - * for value. Use this method to index the same - * numeric field for a large number of documents and reuse the - * current stream instance. - * To index float values use the converter {@link TrieUtils#doubleToSortableLong}. - */ - public void setValue(final int value) { - this.value = value; - reset(); - } - - // @Override - public void reset() { - shift = 0; - } - - // @Override - public boolean incrementToken() { - if (shift>=32) return false; - final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); - termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); - shiftAtt.setShift(shift); - if (shift==0) { - typeAtt.setType(TOKEN_TYPE_FULL_PREC); - posIncrAtt.setPositionIncrement(1); - } else { - typeAtt.setType(TOKEN_TYPE_LOWER_PREC); - posIncrAtt.setPositionIncrement(0); - } - shift += precisionStep; - return true; - } - - // @Override - /** @deprecated */ - public Token next(final Token reusableToken) { - if (shift>=32) return null; - reusableToken.clear(); - final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); - reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); - if (shift==0) { - reusableToken.setType(TOKEN_TYPE_FULL_PREC); - reusableToken.setPositionIncrement(1); - } else { - reusableToken.setType(TOKEN_TYPE_LOWER_PREC); - reusableToken.setPositionIncrement(0); - } - shift += precisionStep; - return reusableToken; - } - - // @Override - public String toString() { - final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value); - sb.append(",precisionStep=").append(precisionStep).append(')'); - return sb.toString(); - } - - // members - private final TermAttribute termAtt; - private final TypeAttribute typeAtt; - private final PositionIncrementAttribute posIncrAtt; - private final ShiftAttribute shiftAtt; - - private int shift = 0; - private int value; - private final int precisionStep; -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +/** + * This class provides a {@link TokenStream} for indexing int values + * that can be queried by {@link IntTrieRangeFilter}. This stream is not intended + * to be used in analyzers, its more for iterating the different precisions during + * indexing a specific numeric value. + *

A int value is indexed as multiple string encoded terms, each reduced + * by zeroing bits from the right. Each value is also prefixed (in the first char) by the + * shift value (number of bits removed) used during encoding. + *

The number of bits removed from the right for each trie entry is called + * precisionStep in this API. For comparing the different step values, see the + * {@linkplain org.apache.lucene.search.trie package description}. + *

The usage pattern is (it is recommened to switch off norms and term frequencies + * for numeric fields; it does not make sense to have them): + *

+ *  Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  document.add(field);
+ * 
+ *

For optimal performance, re-use the TokenStream and Field instance + * for more than one document: + *

+ *  // init
+ *  TokenStream stream = new IntTrieTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  Document doc = new Document();
+ *  document.add(field);
+ *  // use this code to index many documents:
+ *  stream.setValue(value1)
+ *  writer.addDocument(document);
+ *  stream.setValue(value2)
+ *  writer.addDocument(document);
+ *  ...
+ * 
+ *

Please note: Token streams are read, when the document is added to index. + * If you index more than one numeric field, use a separate instance for each. + *

For more information, how trie fields work, see the + * {@linkplain org.apache.lucene.search.trie package description}. + */ +public class IntTrieTokenStream extends TokenStream { + + /** The full precision token gets this token type assigned. */ + public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt"; + + /** The lower precision tokens gets this token type assigned. */ + public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt"; + + /** + * Creates a token stream for indexing value with the given + * precisionStep. As instance creating is a major cost, + * consider using a {@link #IntTrieTokenStream(int)} instance once for + * indexing a large number of documents and assign a value with + * {@link #setValue} for each document. + * To index float values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public IntTrieTokenStream(final int value, final int precisionStep) { + if (precisionStep<1 || precisionStep>32) + throw new IllegalArgumentException("precisionStep may only be 1..32"); + this.value = value; + this.precisionStep = precisionStep; + termAtt = (TermAttribute) addAttribute(TermAttribute.class); + typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); + posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); + shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class); + } + + /** + * Creates a token stream for indexing values with the given + * precisionStep. This stream is initially "empty" + * (using a numeric value of 0), assign a value before indexing + * each document using {@link #setValue}. + */ + public IntTrieTokenStream(final int precisionStep) { + this(0, precisionStep); + } + + /** + * Resets the token stream to deliver prefix encoded values + * for value. Use this method to index the same + * numeric field for a large number of documents and reuse the + * current stream instance. + * To index float values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public void setValue(final int value) { + this.value = value; + reset(); + } + + // @Override + public void reset() { + shift = 0; + } + + // @Override + public boolean incrementToken() { + if (shift>=32) return false; + final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); + termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); + shiftAtt.setShift(shift); + if (shift==0) { + typeAtt.setType(TOKEN_TYPE_FULL_PREC); + posIncrAtt.setPositionIncrement(1); + } else { + typeAtt.setType(TOKEN_TYPE_LOWER_PREC); + posIncrAtt.setPositionIncrement(0); + } + shift += precisionStep; + return true; + } + + // @Override + /** @deprecated */ + public Token next(final Token reusableToken) { + if (shift>=32) return null; + reusableToken.clear(); + final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE); + reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer)); + if (shift==0) { + reusableToken.setType(TOKEN_TYPE_FULL_PREC); + reusableToken.setPositionIncrement(1); + } else { + reusableToken.setType(TOKEN_TYPE_LOWER_PREC); + reusableToken.setPositionIncrement(0); + } + shift += precisionStep; + return reusableToken; + } + + // @Override + public String toString() { + final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value); + sb.append(",precisionStep=").append(precisionStep).append(')'); + return sb.toString(); + } + + // members + private final TermAttribute termAtt; + private final TypeAttribute typeAtt; + private final PositionIncrementAttribute posIncrAtt; + private final ShiftAttribute shiftAtt; + + private int shift = 0; + private int value; + private final int precisionStep; +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java index dc8afc462f1..496f3718a11 100644 --- a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java @@ -1,101 +1,61 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.document.Document; -import org.apache.lucene.search.Filter; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.util.OpenBitSet; - - -/** - * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles. - * This filter depends on a specific structure of terms in the index that can only be created - * by indexing via {@link LongTrieTokenStream} methods. - * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. - */ -public class LongTrieRangeFilter extends AbstractTrieRangeFilter { - - /** - * A trie filter for matching trie coded values using the given field name and - * the default helper field. - * precisionStep must me equal or a multiple of the precisionStep - * used for indexing the values. - * You can leave the bounds open, by supplying null for min and/or - * max. Inclusive/exclusive bounds can also be supplied. - * To query double values use the converter {@link TrieUtils#doubleToSortableLong}. - */ - public LongTrieRangeFilter(final String field, final int precisionStep, - final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive - ) { - super(field,precisionStep,min,max,minInclusive,maxInclusive); - } - - /** - * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results. - */ - //@Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - // calculate the upper and lower bounds respecting the inclusive and null values. - long minBound=(this.min==null) ? Long.MIN_VALUE : ( - minInclusive ? this.min.longValue() : (this.min.longValue()+1L) - ); - long maxBound=(this.max==null) ? Long.MAX_VALUE : ( - maxInclusive ? this.max.longValue() : (this.max.longValue()-1L) - ); - - resetLastNumberOfTerms(); - if (minBound > maxBound) { - // shortcut, no docs will match this - return DocIdSet.EMPTY_DOCIDSET; - } else { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - final TermDocs termDocs = reader.termDocs(); - try { - TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() { - - //@Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { - try { - fillBits( - reader, bits, termDocs, - minPrefixCoded, maxPrefixCoded - ); - } catch (IOException ioe) { - // LongRangeBuilder is not allowed to throw checked exceptions: - // wrap as RuntimeException - throw new RuntimeException(ioe); - } - } - - }, precisionStep, minBound, maxBound); - } catch (RuntimeException e) { - if (e.getCause() instanceof IOException) throw (IOException)e.getCause(); - throw e; - } finally { - termDocs.close(); - } - return bits; - } - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Filter; // for javadocs +import org.apache.lucene.search.MultiTermQueryWrapperFilter; + +/** + * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles. + * This filter depends on a specific structure of terms in the index that can only be created + * by indexing via {@link LongTrieTokenStream} methods. + * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. + */ +public class LongTrieRangeFilter extends MultiTermQueryWrapperFilter { + + /** + * A trie filter for matching trie coded values using the given field name and + * the default helper field. + * precisionStep must me equal or a multiple of the precisionStep + * used for indexing the values. + * You can leave the bounds open, by supplying null for min and/or + * max. Inclusive/exclusive bounds can also be supplied. + * To filter double values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public LongTrieRangeFilter(final String field, final int precisionStep, + final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive + ) { + super(new LongTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive)); + } + + /** Returns the field name for this filter */ + public String getField() { return ((LongTrieRangeQuery)query).getField(); } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesMin() { return ((LongTrieRangeQuery)query).includesMin(); } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesMax() { return ((LongTrieRangeQuery)query).includesMax(); } + + /** Returns the lower value of this range filter */ + public Long getMin() { return ((LongTrieRangeQuery)query).getMin(); } + + /** Returns the upper value of this range filter */ + public Long getMax() { return ((LongTrieRangeQuery)query).getMax(); } + +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java new file mode 100644 index 00000000000..f32d55edc86 --- /dev/null +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java @@ -0,0 +1,66 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; + +/** + * Implementation of a Lucene {@link Query} that implements trie-based range querying for longs/doubles. + * This query depends on a specific structure of terms in the index that can only be created + * by indexing via {@link LongTrieTokenStream} methods. + *

The query is in constant score mode per default. With precision steps of ≤4, this + * query can be run in conventional boolean rewrite mode without changing the max clause count. + * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}. + */ +public class LongTrieRangeQuery extends AbstractTrieRangeQuery { + + /** + * A trie query for matching trie coded values using the given field name and + * the default helper field. + * precisionStep must me equal or a multiple of the precisionStep + * used for indexing the values. + * You can leave the bounds open, by supplying null for min and/or + * max. Inclusive/exclusive bounds can also be supplied. + * To query double values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public LongTrieRangeQuery(final String field, final int precisionStep, + final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive + ) { + super(field,precisionStep,min,max,minInclusive,maxInclusive); + } + + //@Override + void passRanges(TrieRangeTermEnum enumerator) { + // calculate the upper and lower bounds respecting the inclusive and null values. + long minBound=(this.min==null) ? Long.MIN_VALUE : ( + minInclusive ? this.min.longValue() : (this.min.longValue()+1L) + ); + long maxBound=(this.max==null) ? Long.MAX_VALUE : ( + maxInclusive ? this.max.longValue() : (this.max.longValue()-1L) + ); + + TrieUtils.splitLongRange(enumerator.getLongRangeBuilder(), precisionStep, minBound, maxBound); + } + + /** Returns the lower value of this range query */ + public Long getMin() { return (Long)min; } + + /** Returns the upper value of this range query */ + public Long getMax() { return (Long)max; } + +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java index 0ca6ef704c3..c4f215a1288 100644 --- a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java @@ -1,172 +1,172 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; - -/** - * This class provides a {@link TokenStream} for indexing long values - * that can be queried by {@link LongTrieRangeFilter}. This stream is not intended - * to be used in analyzers, its more for iterating the different precisions during - * indexing a specific numeric value. - *

A long value is indexed as multiple string encoded terms, each reduced - * by zeroing bits from the right. Each value is also prefixed (in the first char) by the - * shift value (number of bits removed) used during encoding. - *

The number of bits removed from the right for each trie entry is called - * precisionStep in this API. For comparing the different step values, see the - * {@linkplain org.apache.lucene.search.trie package description}. - *

The usage pattern is (it is recommened to switch off norms and term frequencies - * for numeric fields; it does not make sense to have them): - *

- *  Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
- *  field.setOmitNorms(true);
- *  field.setOmitTermFreqAndPositions(true);
- *  document.add(field);
- * 
- *

For optimal performance, re-use the TokenStream and Field instance - * for more than one document: - *

- *  // init
- *  TokenStream stream = new LongTrieTokenStream(precisionStep);
- *  Field field = new Field(name, stream);
- *  field.setOmitNorms(true);
- *  field.setOmitTermFreqAndPositions(true);
- *  Document doc = new Document();
- *  document.add(field);
- *  // use this code to index many documents:
- *  stream.setValue(value1)
- *  writer.addDocument(document);
- *  stream.setValue(value2)
- *  writer.addDocument(document);
- *  ...
- * 
- *

Please note: Token streams are read, when the document is added to index. - * If you index more than one numeric field, use a separate instance for each. - *

For more information, how trie fields work, see the - * {@linkplain org.apache.lucene.search.trie package description}. - */ -public class LongTrieTokenStream extends TokenStream { - - /** The full precision token gets this token type assigned. */ - public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong"; - - /** The lower precision tokens gets this token type assigned. */ - public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong"; - - /** - * Creates a token stream for indexing value with the given - * precisionStep. As instance creating is a major cost, - * consider using a {@link #LongTrieTokenStream(int)} instance once for - * indexing a large number of documents and assign a value with - * {@link #setValue} for each document. - * To index double values use the converter {@link TrieUtils#doubleToSortableLong}. - */ - public LongTrieTokenStream(final long value, final int precisionStep) { - if (precisionStep<1 || precisionStep>64) - throw new IllegalArgumentException("precisionStep may only be 1..64"); - this.value = value; - this.precisionStep = precisionStep; - termAtt = (TermAttribute) addAttribute(TermAttribute.class); - typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); - posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); - shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class); - } - - /** - * Creates a token stream for indexing values with the given - * precisionStep. This stream is initially "empty" - * (using a numeric value of 0), assign a value before indexing - * each document using {@link #setValue}. - */ - public LongTrieTokenStream(final int precisionStep) { - this(0L, precisionStep); - } - - /** - * Resets the token stream to deliver prefix encoded values - * for value. Use this method to index the same - * numeric field for a large number of documents and reuse the - * current stream instance. - * To index double values use the converter {@link TrieUtils#doubleToSortableLong}. - */ - public void setValue(final long value) { - this.value = value; - reset(); - } - - // @Override - public void reset() { - shift = 0; - } - - // @Override - public boolean incrementToken() { - if (shift>=64) return false; - final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); - termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); - shiftAtt.setShift(shift); - if (shift==0) { - typeAtt.setType(TOKEN_TYPE_FULL_PREC); - posIncrAtt.setPositionIncrement(1); - } else { - typeAtt.setType(TOKEN_TYPE_LOWER_PREC); - posIncrAtt.setPositionIncrement(0); - } - shift += precisionStep; - return true; - } - - // @Override - /** @deprecated */ - public Token next(final Token reusableToken) { - if (shift>=64) return null; - reusableToken.clear(); - final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); - reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); - if (shift==0) { - reusableToken.setType(TOKEN_TYPE_FULL_PREC); - reusableToken.setPositionIncrement(1); - } else { - reusableToken.setType(TOKEN_TYPE_LOWER_PREC); - reusableToken.setPositionIncrement(0); - } - shift += precisionStep; - return reusableToken; - } - - // @Override - public String toString() { - final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value); - sb.append(",precisionStep=").append(precisionStep).append(')'); - return sb.toString(); - } - - // members - private final TermAttribute termAtt; - private final TypeAttribute typeAtt; - private final PositionIncrementAttribute posIncrAtt; - private final ShiftAttribute shiftAtt; - - private int shift = 0; - private long value; - private final int precisionStep; -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +/** + * This class provides a {@link TokenStream} for indexing long values + * that can be queried by {@link LongTrieRangeFilter}. This stream is not intended + * to be used in analyzers, its more for iterating the different precisions during + * indexing a specific numeric value. + *

A long value is indexed as multiple string encoded terms, each reduced + * by zeroing bits from the right. Each value is also prefixed (in the first char) by the + * shift value (number of bits removed) used during encoding. + *

The number of bits removed from the right for each trie entry is called + * precisionStep in this API. For comparing the different step values, see the + * {@linkplain org.apache.lucene.search.trie package description}. + *

The usage pattern is (it is recommened to switch off norms and term frequencies + * for numeric fields; it does not make sense to have them): + *

+ *  Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  document.add(field);
+ * 
+ *

For optimal performance, re-use the TokenStream and Field instance + * for more than one document: + *

+ *  // init
+ *  TokenStream stream = new LongTrieTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  Document doc = new Document();
+ *  document.add(field);
+ *  // use this code to index many documents:
+ *  stream.setValue(value1)
+ *  writer.addDocument(document);
+ *  stream.setValue(value2)
+ *  writer.addDocument(document);
+ *  ...
+ * 
+ *

Please note: Token streams are read, when the document is added to index. + * If you index more than one numeric field, use a separate instance for each. + *

For more information, how trie fields work, see the + * {@linkplain org.apache.lucene.search.trie package description}. + */ +public class LongTrieTokenStream extends TokenStream { + + /** The full precision token gets this token type assigned. */ + public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong"; + + /** The lower precision tokens gets this token type assigned. */ + public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong"; + + /** + * Creates a token stream for indexing value with the given + * precisionStep. As instance creating is a major cost, + * consider using a {@link #LongTrieTokenStream(int)} instance once for + * indexing a large number of documents and assign a value with + * {@link #setValue} for each document. + * To index double values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public LongTrieTokenStream(final long value, final int precisionStep) { + if (precisionStep<1 || precisionStep>64) + throw new IllegalArgumentException("precisionStep may only be 1..64"); + this.value = value; + this.precisionStep = precisionStep; + termAtt = (TermAttribute) addAttribute(TermAttribute.class); + typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); + posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); + shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class); + } + + /** + * Creates a token stream for indexing values with the given + * precisionStep. This stream is initially "empty" + * (using a numeric value of 0), assign a value before indexing + * each document using {@link #setValue}. + */ + public LongTrieTokenStream(final int precisionStep) { + this(0L, precisionStep); + } + + /** + * Resets the token stream to deliver prefix encoded values + * for value. Use this method to index the same + * numeric field for a large number of documents and reuse the + * current stream instance. + * To index double values use the converter {@link TrieUtils#doubleToSortableLong}. + */ + public void setValue(final long value) { + this.value = value; + reset(); + } + + // @Override + public void reset() { + shift = 0; + } + + // @Override + public boolean incrementToken() { + if (shift>=64) return false; + final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); + termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); + shiftAtt.setShift(shift); + if (shift==0) { + typeAtt.setType(TOKEN_TYPE_FULL_PREC); + posIncrAtt.setPositionIncrement(1); + } else { + typeAtt.setType(TOKEN_TYPE_LOWER_PREC); + posIncrAtt.setPositionIncrement(0); + } + shift += precisionStep; + return true; + } + + // @Override + /** @deprecated */ + public Token next(final Token reusableToken) { + if (shift>=64) return null; + reusableToken.clear(); + final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE); + reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer)); + if (shift==0) { + reusableToken.setType(TOKEN_TYPE_FULL_PREC); + reusableToken.setPositionIncrement(1); + } else { + reusableToken.setType(TOKEN_TYPE_LOWER_PREC); + reusableToken.setPositionIncrement(0); + } + shift += precisionStep; + return reusableToken; + } + + // @Override + public String toString() { + final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value); + sb.append(",precisionStep=").append(precisionStep).append(')'); + return sb.toString(); + } + + // members + private final TermAttribute termAtt; + private final TypeAttribute typeAtt; + private final PositionIncrementAttribute posIncrAtt; + private final ShiftAttribute shiftAtt; + + private int shift = 0; + private long value; + private final int precisionStep; +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java b/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java index 308c23c0b59..a5a6b211422 100644 --- a/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java @@ -1,70 +1,70 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.Attribute; - -import java.io.Serializable; - -/** - * This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream} - * to the shift value of the current prefix-encoded token. - * It may be used by filters or consumers to e.g. distribute the values to various fields. - */ -public final class ShiftAttribute extends Attribute implements Cloneable, Serializable { - private int shift = 0; - - /** - * Returns the shift value of the current prefix encoded token. - */ - public int getShift() { - return shift; - } - - /** - * Sets the shift value. - */ - public void setShift(final int shift) { - this.shift = shift; - } - - public void clear() { - shift = 0; - } - - public String toString() { - return "shift=" + shift; - } - - public boolean equals(Object other) { - if (this == other) return true; - if (other instanceof ShiftAttribute) { - return ((ShiftAttribute) other).shift == shift; - } - return false; - } - - public int hashCode() { - return shift; - } - - public void copyTo(Attribute target) { - final ShiftAttribute t = (ShiftAttribute) target; - t.setShift(shift); - } -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Attribute; + +import java.io.Serializable; + +/** + * This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream} + * to the shift value of the current prefix-encoded token. + * It may be used by filters or consumers to e.g. distribute the values to various fields. + */ +public final class ShiftAttribute extends Attribute implements Cloneable, Serializable { + private int shift = 0; + + /** + * Returns the shift value of the current prefix encoded token. + */ + public int getShift() { + return shift; + } + + /** + * Sets the shift value. + */ + public void setShift(final int shift) { + this.shift = shift; + } + + public void clear() { + shift = 0; + } + + public String toString() { + return "shift=" + shift; + } + + public boolean equals(Object other) { + if (this == other) return true; + if (other instanceof ShiftAttribute) { + return ((ShiftAttribute) other).shift == shift; + } + return false; + } + + public int hashCode() { + return shift; + } + + public void copyTo(Attribute target) { + final ShiftAttribute t = (ShiftAttribute) target; + t.setShift(shift); + } +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java b/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java new file mode 100644 index 00000000000..33973577b2e --- /dev/null +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java @@ -0,0 +1,140 @@ +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.LinkedList; + +import org.apache.lucene.search.FilteredTermEnum; +import org.apache.lucene.search.MultiTermQuery; // for javadocs +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * sub-ranges for trie range queries. + *

+ * WARNING: Term enumerations is not guaranteed to be always ordered by + * {@link Term#compareTo}. + * The ordering depends on how {@link TrieUtils#splitLongRange} and + * {@link TrieUtils#splitIntRange} generates the sub-ranges. For + * the {@link MultiTermQuery} ordering is not relevant. + */ +final class TrieRangeTermEnum extends FilteredTermEnum { + + private final AbstractTrieRangeQuery query; + private final IndexReader reader; + private final LinkedList/**/ rangeBounds = new LinkedList/**/(); + private String currentUpperBound = null; + + TrieRangeTermEnum(AbstractTrieRangeQuery query, IndexReader reader) { + this.query = query; + this.reader = reader; + } + + /** Returns a range builder that must be used to feed in the sub-ranges. */ + TrieUtils.IntRangeBuilder getIntRangeBuilder() { + return new TrieUtils.IntRangeBuilder() { + //@Override + public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + rangeBounds.add(minPrefixCoded); + rangeBounds.add(maxPrefixCoded); + } + }; + } + + /** Returns a range builder that must be used to feed in the sub-ranges. */ + TrieUtils.LongRangeBuilder getLongRangeBuilder() { + return new TrieUtils.LongRangeBuilder() { + //@Override + public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + rangeBounds.add(minPrefixCoded); + rangeBounds.add(maxPrefixCoded); + } + }; + } + + /** After feeding the range builder call this method to initialize the enum. */ + void init() throws IOException { + next(); + } + + //@Override + public float difference() { + return 1.0f; + } + + /** this is a dummy, it is not used by this class. */ + //@Override + protected boolean endEnum() { + assert false; // should never be called + return (currentTerm != null); + } + + /** + * Compares if current upper bound is reached, + * this also updates the term count for statistics. + * In contrast to {@link FilteredTermEnum}, a return value + * of false ends iterating the current enum + * and forwards to the next sub-range. + */ + //@Override + protected boolean termCompare(Term term) { + return (term.field() == query.field && term.text().compareTo(currentUpperBound) <= 0); + } + + /** Increments the enumeration to the next element. True if one exists. */ + //@Override + public boolean next() throws IOException { + // if a current term exists, the actual enum is initialized: + // try change to next term, if no such term exists, fall-through + if (currentTerm != null) { + assert actualEnum!=null; + if (actualEnum.next()) { + currentTerm = actualEnum.term(); + if (termCompare(currentTerm)) return true; + } + } + // if all above fails, we go forward to the next enum, + // if one is available + currentTerm = null; + if (rangeBounds.size() < 2) return false; + // close the current enum and read next bounds + if (actualEnum != null) { + actualEnum.close(); + actualEnum = null; + } + final String lowerBound = (String)rangeBounds.removeFirst(); + this.currentUpperBound = (String)rangeBounds.removeFirst(); + // this call recursively uses next(), if no valid term in + // next enum found. + // if this behavior is changed/modified in the superclass, + // this enum will not work anymore! + setEnum(reader.terms(new Term(query.field, lowerBound))); + return (currentTerm != null); + } + + /** Closes the enumeration to further activity, freeing resources. */ + //@Override + public void close() throws IOException { + rangeBounds.clear(); + currentUpperBound = null; + super.close(); + } + +} diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/package.html b/contrib/queries/src/java/org/apache/lucene/search/trie/package.html index 75fbe81cef6..00bfe78c3a6 100644 --- a/contrib/queries/src/java/org/apache/lucene/search/trie/package.html +++ b/contrib/queries/src/java/org/apache/lucene/search/trie/package.html @@ -106,14 +106,14 @@ more info about this in the stream documentation)

Searching

The numeric index fields you prepared in this way can be searched by -{@link org.apache.lucene.search.trie.LongTrieRangeFilter} or {@link org.apache.lucene.search.trie.IntTrieRangeFilter}:

+{@link org.apache.lucene.search.trie.LongTrieRangeQuery} or {@link org.apache.lucene.search.trie.IntTrieRangeQuery}:

   // Java 1.4, because Long.valueOf(long) is not available:
-  Query q = new LongTrieRangeFilter("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true).asQuery();
+  Query q = new LongTrieRangeQuery("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true);
   
   // OR, Java 1.5, using autoboxing:
-  Query q = new LongTrieRangeFilter("exampleLong", precisionStep, 123L, 999999L, true, true).asQuery();
+  Query q = new LongTrieRangeQuery("exampleLong", precisionStep, 123L, 999999L, true, true);
   
   // execute the search, as usual:
   TopDocs docs = searcher.search(q, 10);
@@ -132,7 +132,7 @@ more info about this in the stream documentation)

that the old {@link org.apache.lucene.search.RangeQuery} (with raised {@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete, {@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing -{@link org.apache.lucene.search.trie.LongTrieRangeFilter}.asQuery() took <100ms to +{@link org.apache.lucene.search.trie.LongTrieRangeQuery} took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit precision step). This query type was developed for a geographic portal, where the performance for e.g. bounding boxes or exact date/time stamps is important.

diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java similarity index 66% rename from contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java rename to contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java index b2599954fc6..cc845dd9a66 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java @@ -1,320 +1,371 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Random; - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriter.MaxFieldLength; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.RangeQuery; -import org.apache.lucene.util.LuceneTestCase; - -public class TestIntTrieRangeFilter extends LuceneTestCase { - // distance of entries - private static final int distance = 6666; - // shift the starting of the values to the left, to also have negative values: - private static final int startOffset = - 1 << 15; - // number of docs to generate for testing - private static final int noDocs = 10000; - - private static Field newField(String name, int precisionStep) { - IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); - stream.setUseNewAPI(true); - Field f=new Field(name, stream); - f.setOmitTermFreqAndPositions(true); - f.setOmitNorms(true); - return f; - } - - private static final RAMDirectory directory; - private static final IndexSearcher searcher; - static { - try { - directory = new RAMDirectory(); - IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), - true, MaxFieldLength.UNLIMITED); - - Field - field8 = newField("field8", 8), - field4 = newField("field4", 4), - field2 = newField("field2", 2), - ascfield8 = newField("ascfield8", 8), - ascfield4 = newField("ascfield4", 4), - ascfield2 = newField("ascfield2", 2); - - // Add a series of noDocs docs with increasing int values - for (int l=0; lupper) { - int a=lower; lower=upper; upper=a; - } - // test inclusive range - IntTrieRangeFilter tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true); - RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true); - cq.setConstantScoreRewrite(true); - TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); - TopDocs cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test left exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test right exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - } - System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); - } - - public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { - testRandomTrieAndClassicRangeQuery(8); - } - - public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { - testRandomTrieAndClassicRangeQuery(4); - } - - public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { - testRandomTrieAndClassicRangeQuery(2); - } - - private void testRangeSplit(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="ascfield"+precisionStep; - // 50 random tests - for (int i=0; i<50; i++) { - int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2); - int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2); - if (lower>upper) { - int a=lower; lower=upper; upper=a; - } - // test inclusive range - Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); - TopDocs tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); - // test exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); - // test left exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - // test right exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - } - } - - public void testRangeSplit_8bit() throws Exception { - testRangeSplit(8); - } - - public void testRangeSplit_4bit() throws Exception { - testRangeSplit(4); - } - - public void testRangeSplit_2bit() throws Exception { - testRangeSplit(2); - } - - private void testSorting(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="field"+precisionStep; - // 10 random tests, the index order is ascending, - // so using a reverse sort field should retun descending documents - for (int i=0; i<10; i++) { - int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; - int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; - if (lower>upper) { - int a=lower; lower=upper; upper=a; - } - Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); - TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true))); - if (topDocs.totalHits==0) continue; - ScoreDoc[] sd = topDocs.scoreDocs; - assertNotNull(sd); - int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value")); - for (int j=1; jact ); - last=act; - } - } - } - - public void testSorting_8bit() throws Exception { - testSorting(8); - } - - public void testSorting_4bit() throws Exception { - testSorting(4); - } - - public void testSorting_2bit() throws Exception { - testSorting(2); - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Random; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.util.LuceneTestCase; + +public class TestIntTrieRangeQuery extends LuceneTestCase { + // distance of entries + private static final int distance = 6666; + // shift the starting of the values to the left, to also have negative values: + private static final int startOffset = - 1 << 15; + // number of docs to generate for testing + private static final int noDocs = 10000; + + private static Field newField(String name, int precisionStep) { + IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); + stream.setUseNewAPI(true); + Field f=new Field(name, stream); + f.setOmitTermFreqAndPositions(true); + f.setOmitNorms(true); + return f; + } + + private static final RAMDirectory directory; + private static final IndexSearcher searcher; + static { + try { + // set the theoretical maximum term count for 8bit (see docs for the number) + BooleanQuery.setMaxClauseCount(3*255*2 + 255); + + directory = new RAMDirectory(); + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), + true, MaxFieldLength.UNLIMITED); + + Field + field8 = newField("field8", 8), + field4 = newField("field4", 4), + field2 = newField("field2", 2), + ascfield8 = newField("ascfield8", 8), + ascfield4 = newField("ascfield4", 4), + ascfield2 = newField("ascfield2", 2); + + // Add a series of noDocs docs with increasing int values + for (int l=0; l0) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } + } + + public void testRange_8bit() throws Exception { + testRange(8); + } + + public void testRange_4bit() throws Exception { + testRange(4); + } + + public void testRange_2bit() throws Exception { + testRange(2); + } + + public void testInverseRange() throws Exception { + IntTrieRangeFilter f = new IntTrieRangeFilter("field8", 8, new Integer(1000), new Integer(-1000), true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + + private void testLeftOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int upper=(count-1)*distance + (distance/3) + startOffset; + IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, null, new Integer(upper), true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + } + + public void testLeftOpenRange_8bit() throws Exception { + testLeftOpenRange(8); + } + + public void testLeftOpenRange_4bit() throws Exception { + testLeftOpenRange(4); + } + + public void testLeftOpenRange_2bit() throws Exception { + testLeftOpenRange(2); + } + + private void testRightOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int lower=(count-1)*distance + (distance/3) +startOffset; + IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + } + + public void testRightOpenRange_8bit() throws Exception { + testRightOpenRange(8); + } + + public void testRightOpenRange_4bit() throws Exception { + testRightOpenRange(4); + } + + public void testRightOpenRange_2bit() throws Exception { + testRightOpenRange(2); + } + + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + int termCountT=0,termCountC=0; + for (int i=0; i<50; i++) { + int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + IntTrieRangeQuery tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true); + cq.setConstantScoreRewrite(true); + TopDocs tTopDocs = searcher.search(tq, 1); + TopDocs cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test left exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test right exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + } + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(50*4))); + System.out.println(" Classical query: " + (((double)termCountC)/(50*4))); + } + + public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { + testRandomTrieAndClassicRangeQuery(8); + } + + public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { + testRandomTrieAndClassicRangeQuery(4); + } + + public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { + testRandomTrieAndClassicRangeQuery(2); + } + + private void testRangeSplit(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="ascfield"+precisionStep; + // 50 random tests + for (int i=0; i<50; i++) { + int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2); + int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2); + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + // test exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); + // test left exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + // test right exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + } + } + + public void testRangeSplit_8bit() throws Exception { + testRangeSplit(8); + } + + public void testRangeSplit_4bit() throws Exception { + testRangeSplit(4); + } + + public void testRangeSplit_2bit() throws Exception { + testRangeSplit(2); + } + + private void testSorting(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + for (int i=0; i<10; i++) { + int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value")); + for (int j=1; jact ); + last=act; + } + } + } + + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + public void testSorting_2bit() throws Exception { + testSorting(2); + } + +} diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java index 415798a6876..c853f671b8f 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java @@ -1,54 +1,54 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; - -public class TestIntTrieTokenStream extends LuceneTestCase { - - static final int precisionStep = 8; - static final int value = 123456; - - public void testStreamNewAPI() throws Exception { - final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(true); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); - final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); - for (int shift=0; shift<32; shift+=precisionStep) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Shift value", shift, shiftAtt.getShift()); - assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testStreamOldAPI() throws Exception { - final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(false); - Token tok=new Token(); - for (int shift=0; shift<32; shift+=precisionStep) { - assertNotNull("New token is available", tok=stream.next(tok)); - assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term()); - } - assertNull("No more tokens available", stream.next(tok)); - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + +public class TestIntTrieTokenStream extends LuceneTestCase { + + static final int precisionStep = 8; + static final int value = 123456; + + public void testStreamNewAPI() throws Exception { + final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(true); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); + final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); + for (int shift=0; shift<32; shift+=precisionStep) { + assertTrue("New token is available", stream.incrementToken()); + assertEquals("Shift value", shift, shiftAtt.getShift()); + assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term()); + } + assertFalse("No more tokens available", stream.incrementToken()); + } + + public void testStreamOldAPI() throws Exception { + final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(false); + Token tok=new Token(); + for (int shift=0; shift<32; shift+=precisionStep) { + assertNotNull("New token is available", tok=stream.next(tok)); + assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term()); + } + assertNull("No more tokens available", stream.next(tok)); + } + +} diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java similarity index 66% rename from contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java rename to contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java index 8ea19edd0be..0d33cd2fe4e 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java @@ -30,10 +30,13 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.Sort; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.LuceneTestCase; -public class TestLongTrieRangeFilter extends LuceneTestCase { +public class TestLongTrieRangeQuery extends LuceneTestCase { // distance of entries private static final long distance = 66666L; // shift the starting of the values to the left, to also have negative values: @@ -53,7 +56,10 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { private static final RAMDirectory directory; private static final IndexSearcher searcher; static { - try { + try { + // set the theoretical maximum term count for 8bit (see docs for the number) + BooleanQuery.setMaxClauseCount(7*255*2 + 255); + directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); @@ -97,20 +103,54 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { } } + /** test for constant score + boolean query + filter, the other tests only use the constant score mode */ private void testRange(int precisionStep) throws Exception { String field="field"+precisionStep; int count=3000; long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); - LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); - TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); - System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'."); - ScoreDoc[] sd = topDocs.scoreDocs; - assertNotNull(sd); - assertEquals("Score doc count", count, sd.length ); - Document doc=searcher.doc(sd[0].doc); - assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); - doc=searcher.doc(sd[sd.length-1].doc); - assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + LongTrieRangeQuery q = new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); + LongTrieRangeFilter f = new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); + int lastTerms = 0; + for (byte i=0; i<3; i++) { + TopDocs topDocs; + int terms; + String type; + q.clearTotalNumberOfTerms(); + f.clearTotalNumberOfTerms(); + switch (i) { + case 0: + type = " (constant score)"; + q.setConstantScoreRewrite(true); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 1: + type = " (boolean query)"; + q.setConstantScoreRewrite(false); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 2: + type = " (filter)"; + topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER); + terms = f.getTotalNumberOfTerms(); + break; + default: + return; + } + System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+"."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count"+type, count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + if (i>0) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } } public void testRange_8bit() throws Exception { @@ -125,13 +165,18 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { testRange(2); } + public void testInverseRange() throws Exception { + LongTrieRangeFilter f = new LongTrieRangeFilter("field8", 8, new Long(1000L), new Long(-1000L), true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + private void testLeftOpenRange(int precisionStep) throws Exception { String field="field"+precisionStep; int count=3000; long upper=(count-1)*distance + (distance/3) + startOffset; - LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, null, new Long(upper), true, true); - TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); - System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, null, new Long(upper), true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", count, sd.length ); @@ -157,9 +202,9 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { String field="field"+precisionStep; int count=3000; long lower=(count-1)*distance + (distance/3) +startOffset; - LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), null, true, true); - TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); - System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, new Long(lower), null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", noDocs-count, sd.length ); @@ -184,7 +229,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { final Random rnd=newRandom(); String field="field"+precisionStep; - int termCount=0; + int termCountT=0,termCountC=0; for (int i=0; i<50; i++) { long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; @@ -192,39 +237,45 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { long a=lower; lower=upper; upper=a; } // test inclusive range - LongTrieRangeFilter tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); + LongTrieRangeQuery tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true); cq.setConstantScoreRewrite(true); - TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); + TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); // test exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false); cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); + tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true); cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); + tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false); cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); + tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); } - System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(50*4))); + System.out.println(" Classical query: " + (((double)termCountC)/(50*4))); } public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { @@ -250,19 +301,19 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { long a=lower; lower=upper; upper=a; } // test inclusive range - Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); + Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); TopDocs tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); // test exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false).asQuery(); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false); tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); // test left exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true).asQuery(); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true); tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); // test right exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false).asQuery(); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false); tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); } @@ -291,7 +342,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { if (lower>upper) { long a=lower; lower=upper; upper=a; } - Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); + Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true))); if (topDocs.totalHits==0) continue; ScoreDoc[] sd = topDocs.scoreDocs; diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java index be8c47b4719..a36d2c24698 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java @@ -1,54 +1,54 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; - -public class TestLongTrieTokenStream extends LuceneTestCase { - - static final int precisionStep = 8; - static final long value = 4573245871874382L; - - public void testStreamNewAPI() throws Exception { - final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(true); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); - final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); - for (int shift=0; shift<64; shift+=precisionStep) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Shift value", shift, shiftAtt.getShift()); - assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testStreamOldAPI() throws Exception { - final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(false); - Token tok=new Token(); - for (int shift=0; shift<64; shift+=precisionStep) { - assertNotNull("New token is available", tok=stream.next(tok)); - assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term()); - } - assertNull("No more tokens available", stream.next(tok)); - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + +public class TestLongTrieTokenStream extends LuceneTestCase { + + static final int precisionStep = 8; + static final long value = 4573245871874382L; + + public void testStreamNewAPI() throws Exception { + final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(true); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); + final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); + for (int shift=0; shift<64; shift+=precisionStep) { + assertTrue("New token is available", stream.incrementToken()); + assertEquals("Shift value", shift, shiftAtt.getShift()); + assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term()); + } + assertFalse("No more tokens available", stream.incrementToken()); + } + + public void testStreamOldAPI() throws Exception { + final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(false); + Token tok=new Token(); + for (int shift=0; shift<64; shift+=precisionStep) { + assertNotNull("New token is available", tok=stream.next(tok)); + assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term()); + } + assertNull("No more tokens available", stream.next(tok)); + } + +}