From 92c64841618b55b09bb67d216f85177c1d47fed9 Mon Sep 17 00:00:00 2001
From: Uwe Schindler The query is in constant score mode per default. With precision steps of ≤4, this
+ * query can be run in conventional boolean rewrite mode without changing the max clause count.
+ * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
+ */
+public class IntTrieRangeQuery extends AbstractTrieRangeQuery {
+
+ /**
+ * A trie query for matching trie coded values using the given field name and
+ * the default helper field.
+ * A The number of bits removed from the right for each trie entry is called
- * The usage pattern is (it is recommened to switch off norms and term frequencies
- * for numeric fields; it does not make sense to have them):
- * For optimal performance, re-use the TokenStream and Field instance
- * for more than one document:
- * Please note: Token streams are read, when the document is added to index.
- * If you index more than one numeric field, use a separate instance for each.
- * For more information, how trie fields work, see the
- * {@linkplain org.apache.lucene.search.trie package description}.
- */
-public class IntTrieTokenStream extends TokenStream {
-
- /** The full precision token gets this token type assigned. */
- public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt";
-
- /** The lower precision tokens gets this token type assigned. */
- public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt";
-
- /**
- * Creates a token stream for indexing A The number of bits removed from the right for each trie entry is called
+ * The usage pattern is (it is recommened to switch off norms and term frequencies
+ * for numeric fields; it does not make sense to have them):
+ * For optimal performance, re-use the TokenStream and Field instance
+ * for more than one document:
+ * Please note: Token streams are read, when the document is added to index.
+ * If you index more than one numeric field, use a separate instance for each.
+ * For more information, how trie fields work, see the
+ * {@linkplain org.apache.lucene.search.trie package description}.
+ */
+public class IntTrieTokenStream extends TokenStream {
+
+ /** The full precision token gets this token type assigned. */
+ public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieInt";
+
+ /** The lower precision tokens gets this token type assigned. */
+ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieInt";
+
+ /**
+ * Creates a token stream for indexing The query is in constant score mode per default. With precision steps of ≤4, this
+ * query can be run in conventional boolean rewrite mode without changing the max clause count.
+ * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
+ */
+public class LongTrieRangeQuery extends AbstractTrieRangeQuery {
+
+ /**
+ * A trie query for matching trie coded values using the given field name and
+ * the default helper field.
+ * A The number of bits removed from the right for each trie entry is called
- * The usage pattern is (it is recommened to switch off norms and term frequencies
- * for numeric fields; it does not make sense to have them):
- * For optimal performance, re-use the TokenStream and Field instance
- * for more than one document:
- * Please note: Token streams are read, when the document is added to index.
- * If you index more than one numeric field, use a separate instance for each.
- * For more information, how trie fields work, see the
- * {@linkplain org.apache.lucene.search.trie package description}.
- */
-public class LongTrieTokenStream extends TokenStream {
-
- /** The full precision token gets this token type assigned. */
- public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong";
-
- /** The lower precision tokens gets this token type assigned. */
- public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong";
-
- /**
- * Creates a token stream for indexing A The number of bits removed from the right for each trie entry is called
+ * The usage pattern is (it is recommened to switch off norms and term frequencies
+ * for numeric fields; it does not make sense to have them):
+ * For optimal performance, re-use the TokenStream and Field instance
+ * for more than one document:
+ * Please note: Token streams are read, when the document is added to index.
+ * If you index more than one numeric field, use a separate instance for each.
+ * For more information, how trie fields work, see the
+ * {@linkplain org.apache.lucene.search.trie package description}.
+ */
+public class LongTrieTokenStream extends TokenStream {
+
+ /** The full precision token gets this token type assigned. */
+ public static final String TOKEN_TYPE_FULL_PREC = "fullPrecTrieLong";
+
+ /** The lower precision tokens gets this token type assigned. */
+ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecTrieLong";
+
+ /**
+ * Creates a token stream for indexing
+ * WARNING: Term enumerations is not guaranteed to be always ordered by
+ * {@link Term#compareTo}.
+ * The ordering depends on how {@link TrieUtils#splitLongRange} and
+ * {@link TrieUtils#splitIntRange} generates the sub-ranges. For
+ * the {@link MultiTermQuery} ordering is not relevant.
+ */
+final class TrieRangeTermEnum extends FilteredTermEnum {
+
+ private final AbstractTrieRangeQuery query;
+ private final IndexReader reader;
+ private final LinkedList/* The numeric index fields you prepared in this way can be searched by
-{@link org.apache.lucene.search.trie.LongTrieRangeFilter} or {@link org.apache.lucene.search.trie.IntTrieRangeFilter}:new {Long|Int}TrieRangeFilter(....).asQuery()
.
- * This is a synonym for wrapping with a {@link ConstantScoreQuery},
- * but this query returns a better toString()
variant.
- */
- public Query asQuery() {
- return new ConstantScoreQuery(this) {
-
- /** this instance return a nicer String variant than the original {@link ConstantScoreQuery} */
- //@Override
- public String toString(final String field) {
- // return a more convenient representation of this query than ConstantScoreQuery does:
- return ((AbstractTrieRangeFilter) filter).toString(field)+ToStringUtils.boost(getBoost());
- }
-
- };
- }
-
- void fillBits(
- final IndexReader reader,
- final OpenBitSet bits, final TermDocs termDocs,
- final String lowerTerm, final String upperTerm
- ) throws IOException {
- final int len=lowerTerm.length();
- assert upperTerm.length()==len;
-
- // find the docs
- final TermEnum enumerator = reader.terms(new Term(field, lowerTerm));
- try {
- do {
- final Term term = enumerator.term();
- if (term!=null && term.field()==field) {
- // break out when upperTerm reached or length of term is different
- final String t=term.text();
- if (len!=t.length() || t.compareTo(upperTerm)>0) break;
- // we have a good term, find the docs
- lastNumberOfTerms++;
- termDocs.seek(enumerator);
- while (termDocs.next()) bits.set(termDocs.doc());
- } else break;
- } while (enumerator.next());
- } finally {
- enumerator.close();
- }
- }
-
- // members
- final String field;
- final int precisionStep;
- final Number min,max;
- final boolean minInclusive,maxInclusive;
-
- private int lastNumberOfTerms=-1;
-}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java b/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java
new file mode 100644
index 00000000000..d9a9d2a6224
--- /dev/null
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/AbstractTrieRangeQuery.java
@@ -0,0 +1,112 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.FilteredTermEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.ToStringUtils;
+
+abstract class AbstractTrieRangeQuery extends MultiTermQuery {
+
+ AbstractTrieRangeQuery(final String field, final int precisionStep,
+ Number min, Number max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ this.field = field.intern();
+ this.precisionStep = precisionStep;
+ this.min = min;
+ this.max = max;
+ this.minInclusive = minInclusive;
+ this.maxInclusive = maxInclusive;
+ setConstantScoreRewrite(true);
+ }
+
+ abstract void passRanges(TrieRangeTermEnum enumerator);
+
+ //@Override
+ protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
+ TrieRangeTermEnum enumerator = new TrieRangeTermEnum(this, reader);
+ passRanges(enumerator);
+ enumerator.init();
+ return enumerator;
+ }
+
+ /** Returns the field name for this query */
+ public String getField() { return field; }
+
+ /** Returns true
if the lower endpoint is inclusive */
+ public boolean includesMin() { return minInclusive; }
+
+ /** Returns true
if the upper endpoint is inclusive */
+ public boolean includesMax() { return maxInclusive; }
+
+ //@Override
+ public String toString(final String field) {
+ final StringBuffer sb=new StringBuffer();
+ if (!this.field.equals(field)) sb.append(this.field).append(':');
+ return sb.append(minInclusive ? '[' : '{')
+ .append((min==null) ? "*" : min.toString())
+ .append(" TO ")
+ .append((max==null) ? "*" : max.toString())
+ .append(maxInclusive ? ']' : '}').toString();
+ }
+
+ //@Override
+ public final boolean equals(final Object o) {
+ if (o==this) return true;
+ if (o==null) return false;
+ if (this.getClass().equals(o.getClass())) {
+ AbstractTrieRangeQuery q=(AbstractTrieRangeQuery)o;
+ return (
+ field==q.field &&
+ (q.min == null ? min == null : q.min.equals(min)) &&
+ (q.max == null ? max == null : q.max.equals(max)) &&
+ minInclusive==q.minInclusive &&
+ maxInclusive==q.maxInclusive &&
+ precisionStep==q.precisionStep
+ );
+ }
+ return false;
+ }
+
+ //@Override
+ public final int hashCode() {
+ int hash = field.hashCode() + (precisionStep^0x64365465);
+ if (min!=null) hash += min.hashCode()^0x14fa55fb;
+ if (max!=null) hash += max.hashCode()^0x733fa5fe;
+ return hash+
+ (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
+ (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
+ }
+
+ // TODO: Make this method accessible by *TrieRangeFilter,
+ // can be removed, when moved to core.
+ //@Override
+ protected Filter getFilter() {
+ return super.getFilter();
+ }
+
+ // members
+ final String field;
+ final int precisionStep;
+ final Number min,max;
+ final boolean minInclusive,maxInclusive;
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java
index 889a67e7ca0..8e0cd3f61e1 100644
--- a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeFilter.java
@@ -1,101 +1,61 @@
-package org.apache.lucene.search.trie;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.util.OpenBitSet;
-
-
-/**
- * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats.
- * This filter depends on a specific structure of terms in the index that can only be created
- * by indexing via {@link IntTrieTokenStream} methods.
- * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
- */
-public class IntTrieRangeFilter extends AbstractTrieRangeFilter {
-
- /**
- * A trie filter for matching trie coded values using the given field name and
- * the default helper field.
- * precisionStep
must me equal or a multiple of the precisionStep
- * used for indexing the values.
- * You can leave the bounds open, by supplying null
for min
and/or
- * max
. Inclusive/exclusive bounds can also be supplied.
- * To query float values use the converter {@link TrieUtils#floatToSortableInt}.
- */
- public IntTrieRangeFilter(final String field, final int precisionStep,
- final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
- ) {
- super(field,precisionStep,min,max,minInclusive,maxInclusive);
- }
-
- /**
- * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
- */
- //@Override
- public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
- // calculate the upper and lower bounds respecting the inclusive and null values.
- int minBound=(this.min==null) ? Integer.MIN_VALUE : (
- minInclusive ? this.min.intValue() : (this.min.intValue()+1)
- );
- int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
- maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
- );
-
- resetLastNumberOfTerms();
- if (minBound > maxBound) {
- // shortcut, no docs will match this
- return DocIdSet.EMPTY_DOCIDSET;
- } else {
- final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
- final TermDocs termDocs = reader.termDocs();
- try {
- TrieUtils.splitIntRange(new TrieUtils.IntRangeBuilder() {
-
- //@Override
- public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
- try {
- fillBits(
- reader, bits, termDocs,
- minPrefixCoded, maxPrefixCoded
- );
- } catch (IOException ioe) {
- // IntRangeBuilder is not allowed to throw checked exceptions:
- // wrap as RuntimeException
- throw new RuntimeException(ioe);
- }
- }
-
- }, precisionStep, minBound, maxBound);
- } catch (RuntimeException e) {
- if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
- throw e;
- } finally {
- termDocs.close();
- }
- return bits;
- }
- }
-
-}
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Filter; // for javadocs
+import org.apache.lucene.search.MultiTermQueryWrapperFilter;
+
+/**
+ * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for ints/floats.
+ * This filter depends on a specific structure of terms in the index that can only be created
+ * by indexing via {@link IntTrieTokenStream} methods.
+ * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
+ */
+public class IntTrieRangeFilter extends MultiTermQueryWrapperFilter {
+
+ /**
+ * A trie filter for matching trie coded values using the given field name and
+ * the default helper field.
+ * precisionStep
must me equal or a multiple of the precisionStep
+ * used for indexing the values.
+ * You can leave the bounds open, by supplying null
for min
and/or
+ * max
. Inclusive/exclusive bounds can also be supplied.
+ * To filter float values use the converter {@link TrieUtils#floatToSortableInt}.
+ */
+ public IntTrieRangeFilter(final String field, final int precisionStep,
+ final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ super(new IntTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
+ }
+
+ /** Returns the field name for this filter */
+ public String getField() { return ((IntTrieRangeQuery)query).getField(); }
+
+ /** Returns true
if the lower endpoint is inclusive */
+ public boolean includesMin() { return ((IntTrieRangeQuery)query).includesMin(); }
+
+ /** Returns true
if the upper endpoint is inclusive */
+ public boolean includesMax() { return ((IntTrieRangeQuery)query).includesMax(); }
+
+ /** Returns the lower value of this range filter */
+ public Integer getMin() { return ((IntTrieRangeQuery)query).getMin(); }
+
+ /** Returns the upper value of this range filter */
+ public Integer getMax() { return ((IntTrieRangeQuery)query).getMax(); }
+
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java
new file mode 100644
index 00000000000..d0fdc9be485
--- /dev/null
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieRangeQuery.java
@@ -0,0 +1,66 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Query;
+
+/**
+ * Implementation of a Lucene {@link Query} that implements trie-based range querying for ints/floats.
+ * This query depends on a specific structure of terms in the index that can only be created
+ * by indexing via {@link IntTrieTokenStream} methods.
+ * precisionStep
must me equal or a multiple of the precisionStep
+ * used for indexing the values.
+ * You can leave the bounds open, by supplying null
for min
and/or
+ * max
. Inclusive/exclusive bounds can also be supplied.
+ * To query float values use the converter {@link TrieUtils#floatToSortableInt}.
+ */
+ public IntTrieRangeQuery(final String field, final int precisionStep,
+ final Integer min, final Integer max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ super(field,precisionStep,min,max,minInclusive,maxInclusive);
+ }
+
+ //@Override
+ void passRanges(TrieRangeTermEnum enumerator) {
+ // calculate the upper and lower bounds respecting the inclusive and null values.
+ int minBound=(this.min==null) ? Integer.MIN_VALUE : (
+ minInclusive ? this.min.intValue() : (this.min.intValue()+1)
+ );
+ int maxBound=(this.max==null) ? Integer.MAX_VALUE : (
+ maxInclusive ? this.max.intValue() : (this.max.intValue()-1)
+ );
+
+ TrieUtils.splitIntRange(enumerator.getIntRangeBuilder(), precisionStep, minBound, maxBound);
+ }
+
+ /** Returns the lower value of this range query */
+ public Integer getMin() { return (Integer)min; }
+
+ /** Returns the upper value of this range query */
+ public Integer getMax() { return (Integer)max; }
+
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java
index 597664a056c..46c02f4c353 100644
--- a/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/IntTrieTokenStream.java
@@ -1,172 +1,172 @@
-package org.apache.lucene.search.trie;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-/**
- * This class provides a {@link TokenStream} for indexing int
values
- * that can be queried by {@link IntTrieRangeFilter}. This stream is not intended
- * to be used in analyzers, its more for iterating the different precisions during
- * indexing a specific numeric value.
- * int
value is indexed as multiple string encoded terms, each reduced
- * by zeroing bits from the right. Each value is also prefixed (in the first char) by the
- * shift
value (number of bits removed) used during encoding.
- * precisionStep
in this API. For comparing the different step values, see the
- * {@linkplain org.apache.lucene.search.trie package description}.
- *
- * Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
- * field.setOmitNorms(true);
- * field.setOmitTermFreqAndPositions(true);
- * document.add(field);
- *
- *
- * // init
- * TokenStream stream = new IntTrieTokenStream(precisionStep);
- * Field field = new Field(name, stream);
- * field.setOmitNorms(true);
- * field.setOmitTermFreqAndPositions(true);
- * Document doc = new Document();
- * document.add(field);
- * // use this code to index many documents:
- * stream.setValue(value1)
- * writer.addDocument(document);
- * stream.setValue(value2)
- * writer.addDocument(document);
- * ...
- *
- * value
with the given
- * precisionStep
. As instance creating is a major cost,
- * consider using a {@link #IntTrieTokenStream(int)} instance once for
- * indexing a large number of documents and assign a value with
- * {@link #setValue} for each document.
- * To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
- */
- public IntTrieTokenStream(final int value, final int precisionStep) {
- if (precisionStep<1 || precisionStep>32)
- throw new IllegalArgumentException("precisionStep may only be 1..32");
- this.value = value;
- this.precisionStep = precisionStep;
- termAtt = (TermAttribute) addAttribute(TermAttribute.class);
- typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
- posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
- shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
- }
-
- /**
- * Creates a token stream for indexing values with the given
- * precisionStep
. This stream is initially "empty"
- * (using a numeric value of 0), assign a value before indexing
- * each document using {@link #setValue}.
- */
- public IntTrieTokenStream(final int precisionStep) {
- this(0, precisionStep);
- }
-
- /**
- * Resets the token stream to deliver prefix encoded values
- * for value
. Use this method to index the same
- * numeric field for a large number of documents and reuse the
- * current stream instance.
- * To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
- */
- public void setValue(final int value) {
- this.value = value;
- reset();
- }
-
- // @Override
- public void reset() {
- shift = 0;
- }
-
- // @Override
- public boolean incrementToken() {
- if (shift>=32) return false;
- final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
- termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
- shiftAtt.setShift(shift);
- if (shift==0) {
- typeAtt.setType(TOKEN_TYPE_FULL_PREC);
- posIncrAtt.setPositionIncrement(1);
- } else {
- typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
- posIncrAtt.setPositionIncrement(0);
- }
- shift += precisionStep;
- return true;
- }
-
- // @Override
- /** @deprecated */
- public Token next(final Token reusableToken) {
- if (shift>=32) return null;
- reusableToken.clear();
- final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
- reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
- if (shift==0) {
- reusableToken.setType(TOKEN_TYPE_FULL_PREC);
- reusableToken.setPositionIncrement(1);
- } else {
- reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
- reusableToken.setPositionIncrement(0);
- }
- shift += precisionStep;
- return reusableToken;
- }
-
- // @Override
- public String toString() {
- final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value);
- sb.append(",precisionStep=").append(precisionStep).append(')');
- return sb.toString();
- }
-
- // members
- private final TermAttribute termAtt;
- private final TypeAttribute typeAtt;
- private final PositionIncrementAttribute posIncrAtt;
- private final ShiftAttribute shiftAtt;
-
- private int shift = 0;
- private int value;
- private final int precisionStep;
-}
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ * This class provides a {@link TokenStream} for indexing int
values
+ * that can be queried by {@link IntTrieRangeFilter}. This stream is not intended
+ * to be used in analyzers, its more for iterating the different precisions during
+ * indexing a specific numeric value.
+ * int
value is indexed as multiple string encoded terms, each reduced
+ * by zeroing bits from the right. Each value is also prefixed (in the first char) by the
+ * shift
value (number of bits removed) used during encoding.
+ * precisionStep
in this API. For comparing the different step values, see the
+ * {@linkplain org.apache.lucene.search.trie package description}.
+ *
+ * Field field = new Field(name, new IntTrieTokenStream(value, precisionStep));
+ * field.setOmitNorms(true);
+ * field.setOmitTermFreqAndPositions(true);
+ * document.add(field);
+ *
+ *
+ * // init
+ * TokenStream stream = new IntTrieTokenStream(precisionStep);
+ * Field field = new Field(name, stream);
+ * field.setOmitNorms(true);
+ * field.setOmitTermFreqAndPositions(true);
+ * Document doc = new Document();
+ * document.add(field);
+ * // use this code to index many documents:
+ * stream.setValue(value1)
+ * writer.addDocument(document);
+ * stream.setValue(value2)
+ * writer.addDocument(document);
+ * ...
+ *
+ * value
with the given
+ * precisionStep
. As instance creating is a major cost,
+ * consider using a {@link #IntTrieTokenStream(int)} instance once for
+ * indexing a large number of documents and assign a value with
+ * {@link #setValue} for each document.
+ * To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
+ */
+ public IntTrieTokenStream(final int value, final int precisionStep) {
+ if (precisionStep<1 || precisionStep>32)
+ throw new IllegalArgumentException("precisionStep may only be 1..32");
+ this.value = value;
+ this.precisionStep = precisionStep;
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+ posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+ shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
+ }
+
+ /**
+ * Creates a token stream for indexing values with the given
+ * precisionStep
. This stream is initially "empty"
+ * (using a numeric value of 0), assign a value before indexing
+ * each document using {@link #setValue}.
+ */
+ public IntTrieTokenStream(final int precisionStep) {
+ this(0, precisionStep);
+ }
+
+ /**
+ * Resets the token stream to deliver prefix encoded values
+ * for value
. Use this method to index the same
+ * numeric field for a large number of documents and reuse the
+ * current stream instance.
+ * To index float values use the converter {@link TrieUtils#doubleToSortableLong}.
+ */
+ public void setValue(final int value) {
+ this.value = value;
+ reset();
+ }
+
+ // @Override
+ public void reset() {
+ shift = 0;
+ }
+
+ // @Override
+ public boolean incrementToken() {
+ if (shift>=32) return false;
+ final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
+ termAtt.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
+ shiftAtt.setShift(shift);
+ if (shift==0) {
+ typeAtt.setType(TOKEN_TYPE_FULL_PREC);
+ posIncrAtt.setPositionIncrement(1);
+ } else {
+ typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
+ posIncrAtt.setPositionIncrement(0);
+ }
+ shift += precisionStep;
+ return true;
+ }
+
+ // @Override
+ /** @deprecated */
+ public Token next(final Token reusableToken) {
+ if (shift>=32) return null;
+ reusableToken.clear();
+ final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.INT_BUF_SIZE);
+ reusableToken.setTermLength(TrieUtils.intToPrefixCoded(value, shift, buffer));
+ if (shift==0) {
+ reusableToken.setType(TOKEN_TYPE_FULL_PREC);
+ reusableToken.setPositionIncrement(1);
+ } else {
+ reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
+ reusableToken.setPositionIncrement(0);
+ }
+ shift += precisionStep;
+ return reusableToken;
+ }
+
+ // @Override
+ public String toString() {
+ final StringBuffer sb = new StringBuffer("(trie-int,value=").append(value);
+ sb.append(",precisionStep=").append(precisionStep).append(')');
+ return sb.toString();
+ }
+
+ // members
+ private final TermAttribute termAtt;
+ private final TypeAttribute typeAtt;
+ private final PositionIncrementAttribute posIncrAtt;
+ private final ShiftAttribute shiftAtt;
+
+ private int shift = 0;
+ private int value;
+ private final int precisionStep;
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java
index dc8afc462f1..496f3718a11 100644
--- a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeFilter.java
@@ -1,101 +1,61 @@
-package org.apache.lucene.search.trie;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.util.OpenBitSet;
-
-
-/**
- * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles.
- * This filter depends on a specific structure of terms in the index that can only be created
- * by indexing via {@link LongTrieTokenStream} methods.
- * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
- */
-public class LongTrieRangeFilter extends AbstractTrieRangeFilter {
-
- /**
- * A trie filter for matching trie coded values using the given field name and
- * the default helper field.
- * precisionStep
must me equal or a multiple of the precisionStep
- * used for indexing the values.
- * You can leave the bounds open, by supplying null
for min
and/or
- * max
. Inclusive/exclusive bounds can also be supplied.
- * To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
- */
- public LongTrieRangeFilter(final String field, final int precisionStep,
- final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
- ) {
- super(field,precisionStep,min,max,minInclusive,maxInclusive);
- }
-
- /**
- * Returns a DocIdSet that provides the documents which should be permitted or prohibited in search results.
- */
- //@Override
- public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
- // calculate the upper and lower bounds respecting the inclusive and null values.
- long minBound=(this.min==null) ? Long.MIN_VALUE : (
- minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
- );
- long maxBound=(this.max==null) ? Long.MAX_VALUE : (
- maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
- );
-
- resetLastNumberOfTerms();
- if (minBound > maxBound) {
- // shortcut, no docs will match this
- return DocIdSet.EMPTY_DOCIDSET;
- } else {
- final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
- final TermDocs termDocs = reader.termDocs();
- try {
- TrieUtils.splitLongRange(new TrieUtils.LongRangeBuilder() {
-
- //@Override
- public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
- try {
- fillBits(
- reader, bits, termDocs,
- minPrefixCoded, maxPrefixCoded
- );
- } catch (IOException ioe) {
- // LongRangeBuilder is not allowed to throw checked exceptions:
- // wrap as RuntimeException
- throw new RuntimeException(ioe);
- }
- }
-
- }, precisionStep, minBound, maxBound);
- } catch (RuntimeException e) {
- if (e.getCause() instanceof IOException) throw (IOException)e.getCause();
- throw e;
- } finally {
- termDocs.close();
- }
- return bits;
- }
- }
-
-}
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Filter; // for javadocs
+import org.apache.lucene.search.MultiTermQueryWrapperFilter;
+
+/**
+ * Implementation of a Lucene {@link Filter} that implements trie-based range filtering for longs/doubles.
+ * This filter depends on a specific structure of terms in the index that can only be created
+ * by indexing via {@link LongTrieTokenStream} methods.
+ * For more information, how the algorithm works, see the {@linkplain org.apache.lucene.search.trie package description}.
+ */
+public class LongTrieRangeFilter extends MultiTermQueryWrapperFilter {
+
+ /**
+ * A trie filter for matching trie coded values using the given field name and
+ * the default helper field.
+ * precisionStep
must me equal or a multiple of the precisionStep
+ * used for indexing the values.
+ * You can leave the bounds open, by supplying null
for min
and/or
+ * max
. Inclusive/exclusive bounds can also be supplied.
+ * To filter double values use the converter {@link TrieUtils#doubleToSortableLong}.
+ */
+ public LongTrieRangeFilter(final String field, final int precisionStep,
+ final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ super(new LongTrieRangeQuery(field,precisionStep,min,max,minInclusive,maxInclusive));
+ }
+
+ /** Returns the field name for this filter */
+ public String getField() { return ((LongTrieRangeQuery)query).getField(); }
+
+ /** Returns true
if the lower endpoint is inclusive */
+ public boolean includesMin() { return ((LongTrieRangeQuery)query).includesMin(); }
+
+ /** Returns true
if the upper endpoint is inclusive */
+ public boolean includesMax() { return ((LongTrieRangeQuery)query).includesMax(); }
+
+ /** Returns the lower value of this range filter */
+ public Long getMin() { return ((LongTrieRangeQuery)query).getMin(); }
+
+ /** Returns the upper value of this range filter */
+ public Long getMax() { return ((LongTrieRangeQuery)query).getMax(); }
+
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java
new file mode 100644
index 00000000000..f32d55edc86
--- /dev/null
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieRangeQuery.java
@@ -0,0 +1,66 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Query;
+
+/**
+ * Implementation of a Lucene {@link Query} that implements trie-based range querying for longs/doubles.
+ * This query depends on a specific structure of terms in the index that can only be created
+ * by indexing via {@link LongTrieTokenStream} methods.
+ * precisionStep
must me equal or a multiple of the precisionStep
+ * used for indexing the values.
+ * You can leave the bounds open, by supplying null
for min
and/or
+ * max
. Inclusive/exclusive bounds can also be supplied.
+ * To query double values use the converter {@link TrieUtils#doubleToSortableLong}.
+ */
+ public LongTrieRangeQuery(final String field, final int precisionStep,
+ final Long min, final Long max, final boolean minInclusive, final boolean maxInclusive
+ ) {
+ super(field,precisionStep,min,max,minInclusive,maxInclusive);
+ }
+
+ //@Override
+ void passRanges(TrieRangeTermEnum enumerator) {
+ // calculate the upper and lower bounds respecting the inclusive and null values.
+ long minBound=(this.min==null) ? Long.MIN_VALUE : (
+ minInclusive ? this.min.longValue() : (this.min.longValue()+1L)
+ );
+ long maxBound=(this.max==null) ? Long.MAX_VALUE : (
+ maxInclusive ? this.max.longValue() : (this.max.longValue()-1L)
+ );
+
+ TrieUtils.splitLongRange(enumerator.getLongRangeBuilder(), precisionStep, minBound, maxBound);
+ }
+
+ /** Returns the lower value of this range query */
+ public Long getMin() { return (Long)min; }
+
+ /** Returns the upper value of this range query */
+ public Long getMax() { return (Long)max; }
+
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java
index 0ca6ef704c3..c4f215a1288 100644
--- a/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/LongTrieTokenStream.java
@@ -1,172 +1,172 @@
-package org.apache.lucene.search.trie;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-/**
- * This class provides a {@link TokenStream} for indexing long
values
- * that can be queried by {@link LongTrieRangeFilter}. This stream is not intended
- * to be used in analyzers, its more for iterating the different precisions during
- * indexing a specific numeric value.
- * long
value is indexed as multiple string encoded terms, each reduced
- * by zeroing bits from the right. Each value is also prefixed (in the first char) by the
- * shift
value (number of bits removed) used during encoding.
- * precisionStep
in this API. For comparing the different step values, see the
- * {@linkplain org.apache.lucene.search.trie package description}.
- *
- * Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
- * field.setOmitNorms(true);
- * field.setOmitTermFreqAndPositions(true);
- * document.add(field);
- *
- *
- * // init
- * TokenStream stream = new LongTrieTokenStream(precisionStep);
- * Field field = new Field(name, stream);
- * field.setOmitNorms(true);
- * field.setOmitTermFreqAndPositions(true);
- * Document doc = new Document();
- * document.add(field);
- * // use this code to index many documents:
- * stream.setValue(value1)
- * writer.addDocument(document);
- * stream.setValue(value2)
- * writer.addDocument(document);
- * ...
- *
- * value
with the given
- * precisionStep
. As instance creating is a major cost,
- * consider using a {@link #LongTrieTokenStream(int)} instance once for
- * indexing a large number of documents and assign a value with
- * {@link #setValue} for each document.
- * To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
- */
- public LongTrieTokenStream(final long value, final int precisionStep) {
- if (precisionStep<1 || precisionStep>64)
- throw new IllegalArgumentException("precisionStep may only be 1..64");
- this.value = value;
- this.precisionStep = precisionStep;
- termAtt = (TermAttribute) addAttribute(TermAttribute.class);
- typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
- posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
- shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
- }
-
- /**
- * Creates a token stream for indexing values with the given
- * precisionStep
. This stream is initially "empty"
- * (using a numeric value of 0), assign a value before indexing
- * each document using {@link #setValue}.
- */
- public LongTrieTokenStream(final int precisionStep) {
- this(0L, precisionStep);
- }
-
- /**
- * Resets the token stream to deliver prefix encoded values
- * for value
. Use this method to index the same
- * numeric field for a large number of documents and reuse the
- * current stream instance.
- * To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
- */
- public void setValue(final long value) {
- this.value = value;
- reset();
- }
-
- // @Override
- public void reset() {
- shift = 0;
- }
-
- // @Override
- public boolean incrementToken() {
- if (shift>=64) return false;
- final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
- termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
- shiftAtt.setShift(shift);
- if (shift==0) {
- typeAtt.setType(TOKEN_TYPE_FULL_PREC);
- posIncrAtt.setPositionIncrement(1);
- } else {
- typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
- posIncrAtt.setPositionIncrement(0);
- }
- shift += precisionStep;
- return true;
- }
-
- // @Override
- /** @deprecated */
- public Token next(final Token reusableToken) {
- if (shift>=64) return null;
- reusableToken.clear();
- final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
- reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
- if (shift==0) {
- reusableToken.setType(TOKEN_TYPE_FULL_PREC);
- reusableToken.setPositionIncrement(1);
- } else {
- reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
- reusableToken.setPositionIncrement(0);
- }
- shift += precisionStep;
- return reusableToken;
- }
-
- // @Override
- public String toString() {
- final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value);
- sb.append(",precisionStep=").append(precisionStep).append(')');
- return sb.toString();
- }
-
- // members
- private final TermAttribute termAtt;
- private final TypeAttribute typeAtt;
- private final PositionIncrementAttribute posIncrAtt;
- private final ShiftAttribute shiftAtt;
-
- private int shift = 0;
- private long value;
- private final int precisionStep;
-}
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ * This class provides a {@link TokenStream} for indexing long
values
+ * that can be queried by {@link LongTrieRangeFilter}. This stream is not intended
+ * to be used in analyzers, its more for iterating the different precisions during
+ * indexing a specific numeric value.
+ * long
value is indexed as multiple string encoded terms, each reduced
+ * by zeroing bits from the right. Each value is also prefixed (in the first char) by the
+ * shift
value (number of bits removed) used during encoding.
+ * precisionStep
in this API. For comparing the different step values, see the
+ * {@linkplain org.apache.lucene.search.trie package description}.
+ *
+ * Field field = new Field(name, new LongTrieTokenStream(value, precisionStep));
+ * field.setOmitNorms(true);
+ * field.setOmitTermFreqAndPositions(true);
+ * document.add(field);
+ *
+ *
+ * // init
+ * TokenStream stream = new LongTrieTokenStream(precisionStep);
+ * Field field = new Field(name, stream);
+ * field.setOmitNorms(true);
+ * field.setOmitTermFreqAndPositions(true);
+ * Document doc = new Document();
+ * document.add(field);
+ * // use this code to index many documents:
+ * stream.setValue(value1)
+ * writer.addDocument(document);
+ * stream.setValue(value2)
+ * writer.addDocument(document);
+ * ...
+ *
+ * value
with the given
+ * precisionStep
. As instance creating is a major cost,
+ * consider using a {@link #LongTrieTokenStream(int)} instance once for
+ * indexing a large number of documents and assign a value with
+ * {@link #setValue} for each document.
+ * To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
+ */
+ public LongTrieTokenStream(final long value, final int precisionStep) {
+ if (precisionStep<1 || precisionStep>64)
+ throw new IllegalArgumentException("precisionStep may only be 1..64");
+ this.value = value;
+ this.precisionStep = precisionStep;
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+ posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+ shiftAtt = (ShiftAttribute) addAttribute(ShiftAttribute.class);
+ }
+
+ /**
+ * Creates a token stream for indexing values with the given
+ * precisionStep
. This stream is initially "empty"
+ * (using a numeric value of 0), assign a value before indexing
+ * each document using {@link #setValue}.
+ */
+ public LongTrieTokenStream(final int precisionStep) {
+ this(0L, precisionStep);
+ }
+
+ /**
+ * Resets the token stream to deliver prefix encoded values
+ * for value
. Use this method to index the same
+ * numeric field for a large number of documents and reuse the
+ * current stream instance.
+ * To index double values use the converter {@link TrieUtils#doubleToSortableLong}.
+ */
+ public void setValue(final long value) {
+ this.value = value;
+ reset();
+ }
+
+ // @Override
+ public void reset() {
+ shift = 0;
+ }
+
+ // @Override
+ public boolean incrementToken() {
+ if (shift>=64) return false;
+ final char[] buffer = termAtt.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
+ termAtt.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
+ shiftAtt.setShift(shift);
+ if (shift==0) {
+ typeAtt.setType(TOKEN_TYPE_FULL_PREC);
+ posIncrAtt.setPositionIncrement(1);
+ } else {
+ typeAtt.setType(TOKEN_TYPE_LOWER_PREC);
+ posIncrAtt.setPositionIncrement(0);
+ }
+ shift += precisionStep;
+ return true;
+ }
+
+ // @Override
+ /** @deprecated */
+ public Token next(final Token reusableToken) {
+ if (shift>=64) return null;
+ reusableToken.clear();
+ final char[] buffer = reusableToken.resizeTermBuffer(TrieUtils.LONG_BUF_SIZE);
+ reusableToken.setTermLength(TrieUtils.longToPrefixCoded(value, shift, buffer));
+ if (shift==0) {
+ reusableToken.setType(TOKEN_TYPE_FULL_PREC);
+ reusableToken.setPositionIncrement(1);
+ } else {
+ reusableToken.setType(TOKEN_TYPE_LOWER_PREC);
+ reusableToken.setPositionIncrement(0);
+ }
+ shift += precisionStep;
+ return reusableToken;
+ }
+
+ // @Override
+ public String toString() {
+ final StringBuffer sb = new StringBuffer("(trie-long,value=").append(value);
+ sb.append(",precisionStep=").append(precisionStep).append(')');
+ return sb.toString();
+ }
+
+ // members
+ private final TermAttribute termAtt;
+ private final TypeAttribute typeAtt;
+ private final PositionIncrementAttribute posIncrAtt;
+ private final ShiftAttribute shiftAtt;
+
+ private int shift = 0;
+ private long value;
+ private final int precisionStep;
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java b/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java
index 308c23c0b59..a5a6b211422 100644
--- a/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/ShiftAttribute.java
@@ -1,70 +1,70 @@
-package org.apache.lucene.search.trie;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.Attribute;
-
-import java.io.Serializable;
-
-/**
- * This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream}
- * to the shift value of the current prefix-encoded token.
- * It may be used by filters or consumers to e.g. distribute the values to various fields.
- */
-public final class ShiftAttribute extends Attribute implements Cloneable, Serializable {
- private int shift = 0;
-
- /**
- * Returns the shift value of the current prefix encoded token.
- */
- public int getShift() {
- return shift;
- }
-
- /**
- * Sets the shift value.
- */
- public void setShift(final int shift) {
- this.shift = shift;
- }
-
- public void clear() {
- shift = 0;
- }
-
- public String toString() {
- return "shift=" + shift;
- }
-
- public boolean equals(Object other) {
- if (this == other) return true;
- if (other instanceof ShiftAttribute) {
- return ((ShiftAttribute) other).shift == shift;
- }
- return false;
- }
-
- public int hashCode() {
- return shift;
- }
-
- public void copyTo(Attribute target) {
- final ShiftAttribute t = (ShiftAttribute) target;
- t.setShift(shift);
- }
-}
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+import java.io.Serializable;
+
+/**
+ * This attribute is updated by {@link IntTrieTokenStream} and {@link LongTrieTokenStream}
+ * to the shift value of the current prefix-encoded token.
+ * It may be used by filters or consumers to e.g. distribute the values to various fields.
+ */
+public final class ShiftAttribute extends Attribute implements Cloneable, Serializable {
+ private int shift = 0;
+
+ /**
+ * Returns the shift value of the current prefix encoded token.
+ */
+ public int getShift() {
+ return shift;
+ }
+
+ /**
+ * Sets the shift value.
+ */
+ public void setShift(final int shift) {
+ this.shift = shift;
+ }
+
+ public void clear() {
+ shift = 0;
+ }
+
+ public String toString() {
+ return "shift=" + shift;
+ }
+
+ public boolean equals(Object other) {
+ if (this == other) return true;
+ if (other instanceof ShiftAttribute) {
+ return ((ShiftAttribute) other).shift == shift;
+ }
+ return false;
+ }
+
+ public int hashCode() {
+ return shift;
+ }
+
+ public void copyTo(Attribute target) {
+ final ShiftAttribute t = (ShiftAttribute) target;
+ t.setShift(shift);
+ }
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java b/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java
new file mode 100644
index 00000000000..33973577b2e
--- /dev/null
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/TrieRangeTermEnum.java
@@ -0,0 +1,140 @@
+package org.apache.lucene.search.trie;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+import org.apache.lucene.search.FilteredTermEnum;
+import org.apache.lucene.search.MultiTermQuery; // for javadocs
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating all terms that match the
+ * sub-ranges for trie range queries.
+ * false
ends iterating the current enum
+ * and forwards to the next sub-range.
+ */
+ //@Override
+ protected boolean termCompare(Term term) {
+ return (term.field() == query.field && term.text().compareTo(currentUpperBound) <= 0);
+ }
+
+ /** Increments the enumeration to the next element. True if one exists. */
+ //@Override
+ public boolean next() throws IOException {
+ // if a current term exists, the actual enum is initialized:
+ // try change to next term, if no such term exists, fall-through
+ if (currentTerm != null) {
+ assert actualEnum!=null;
+ if (actualEnum.next()) {
+ currentTerm = actualEnum.term();
+ if (termCompare(currentTerm)) return true;
+ }
+ }
+ // if all above fails, we go forward to the next enum,
+ // if one is available
+ currentTerm = null;
+ if (rangeBounds.size() < 2) return false;
+ // close the current enum and read next bounds
+ if (actualEnum != null) {
+ actualEnum.close();
+ actualEnum = null;
+ }
+ final String lowerBound = (String)rangeBounds.removeFirst();
+ this.currentUpperBound = (String)rangeBounds.removeFirst();
+ // this call recursively uses next(), if no valid term in
+ // next enum found.
+ // if this behavior is changed/modified in the superclass,
+ // this enum will not work anymore!
+ setEnum(reader.terms(new Term(query.field, lowerBound)));
+ return (currentTerm != null);
+ }
+
+ /** Closes the enumeration to further activity, freeing resources. */
+ //@Override
+ public void close() throws IOException {
+ rangeBounds.clear();
+ currentUpperBound = null;
+ super.close();
+ }
+
+}
diff --git a/contrib/queries/src/java/org/apache/lucene/search/trie/package.html b/contrib/queries/src/java/org/apache/lucene/search/trie/package.html
index 75fbe81cef6..00bfe78c3a6 100644
--- a/contrib/queries/src/java/org/apache/lucene/search/trie/package.html
+++ b/contrib/queries/src/java/org/apache/lucene/search/trie/package.html
@@ -106,14 +106,14 @@ more info about this in the stream documentation)Searching
// Java 1.4, because Long.valueOf(long) is not available: - Query q = new LongTrieRangeFilter("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true).asQuery(); + Query q = new LongTrieRangeQuery("exampleLong", precisionStep, new Long(123L), new Long(999999L), true, true); // OR, Java 1.5, using autoboxing: - Query q = new LongTrieRangeFilter("exampleLong", precisionStep, 123L, 999999L, true, true).asQuery(); + Query q = new LongTrieRangeQuery("exampleLong", precisionStep, 123L, 999999L, true, true); // execute the search, as usual: TopDocs docs = searcher.search(q, 10); @@ -132,7 +132,7 @@ more info about this in the stream documentation) that the old {@link org.apache.lucene.search.RangeQuery} (with raised {@link org.apache.lucene.search.BooleanQuery} clause count) took about 30-40 secs to complete, {@link org.apache.lucene.search.ConstantScoreRangeQuery} took 5 secs and executing -{@link org.apache.lucene.search.trie.LongTrieRangeFilter}.asQuery()
took <100ms to +{@link org.apache.lucene.search.trie.LongTrieRangeQuery} took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit precision step). This query type was developed for a geographic portal, where the performance for e.g. bounding boxes or exact date/time stamps is important. diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java similarity index 66% rename from contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java rename to contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java index b2599954fc6..cc845dd9a66 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeFilter.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieRangeQuery.java @@ -1,320 +1,371 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Random; - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriter.MaxFieldLength; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.RangeQuery; -import org.apache.lucene.util.LuceneTestCase; - -public class TestIntTrieRangeFilter extends LuceneTestCase { - // distance of entries - private static final int distance = 6666; - // shift the starting of the values to the left, to also have negative values: - private static final int startOffset = - 1 << 15; - // number of docs to generate for testing - private static final int noDocs = 10000; - - private static Field newField(String name, int precisionStep) { - IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); - stream.setUseNewAPI(true); - Field f=new Field(name, stream); - f.setOmitTermFreqAndPositions(true); - f.setOmitNorms(true); - return f; - } - - private static final RAMDirectory directory; - private static final IndexSearcher searcher; - static { - try { - directory = new RAMDirectory(); - IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), - true, MaxFieldLength.UNLIMITED); - - Field - field8 = newField("field8", 8), - field4 = newField("field4", 4), - field2 = newField("field2", 2), - ascfield8 = newField("ascfield8", 8), - ascfield4 = newField("ascfield4", 4), - ascfield2 = newField("ascfield2", 2); - - // Add a series of noDocs docs with increasing int values - for (int l=0; lupper) { - int a=lower; lower=upper; upper=a; - } - // test inclusive range - IntTrieRangeFilter tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true); - RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true); - cq.setConstantScoreRewrite(true); - TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); - TopDocs cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test left exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - // test right exclusive range - tf=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false); - cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false); - cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); - cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for IntTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); - } - System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); - } - - public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { - testRandomTrieAndClassicRangeQuery(8); - } - - public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { - testRandomTrieAndClassicRangeQuery(4); - } - - public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { - testRandomTrieAndClassicRangeQuery(2); - } - - private void testRangeSplit(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="ascfield"+precisionStep; - // 50 random tests - for (int i=0; i<50; i++) { - int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2); - int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2); - if (lower>upper) { - int a=lower; lower=upper; upper=a; - } - // test inclusive range - Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); - TopDocs tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); - // test exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); - // test left exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), false, true).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - // test right exclusive range - tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, false).asQuery(); - tTopDocs = searcher.search(tq, 1); - assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); - } - } - - public void testRangeSplit_8bit() throws Exception { - testRangeSplit(8); - } - - public void testRangeSplit_4bit() throws Exception { - testRangeSplit(4); - } - - public void testRangeSplit_2bit() throws Exception { - testRangeSplit(2); - } - - private void testSorting(int precisionStep) throws Exception { - final Random rnd=newRandom(); - String field="field"+precisionStep; - // 10 random tests, the index order is ascending, - // so using a reverse sort field should retun descending documents - for (int i=0; i<10; i++) { - int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; - int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; - if (lower>upper) { - int a=lower; lower=upper; upper=a; - } - Query tq=new IntTrieRangeFilter(field, precisionStep, new Integer(lower), new Integer(upper), true, true).asQuery(); - TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true))); - if (topDocs.totalHits==0) continue; - ScoreDoc[] sd = topDocs.scoreDocs; - assertNotNull(sd); - int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value")); - for (int j=1; j act ); - last=act; - } - } - } - - public void testSorting_8bit() throws Exception { - testSorting(8); - } - - public void testSorting_4bit() throws Exception { - testSorting(4); - } - - public void testSorting_2bit() throws Exception { - testSorting(2); - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Random; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.util.LuceneTestCase; + +public class TestIntTrieRangeQuery extends LuceneTestCase { + // distance of entries + private static final int distance = 6666; + // shift the starting of the values to the left, to also have negative values: + private static final int startOffset = - 1 << 15; + // number of docs to generate for testing + private static final int noDocs = 10000; + + private static Field newField(String name, int precisionStep) { + IntTrieTokenStream stream = new IntTrieTokenStream(precisionStep); + stream.setUseNewAPI(true); + Field f=new Field(name, stream); + f.setOmitTermFreqAndPositions(true); + f.setOmitNorms(true); + return f; + } + + private static final RAMDirectory directory; + private static final IndexSearcher searcher; + static { + try { + // set the theoretical maximum term count for 8bit (see docs for the number) + BooleanQuery.setMaxClauseCount(3*255*2 + 255); + + directory = new RAMDirectory(); + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), + true, MaxFieldLength.UNLIMITED); + + Field + field8 = newField("field8", 8), + field4 = newField("field4", 4), + field2 = newField("field2", 2), + ascfield8 = newField("ascfield8", 8), + ascfield4 = newField("ascfield4", 4), + ascfield2 = newField("ascfield2", 2); + + // Add a series of noDocs docs with increasing int values + for (int l=0; l 0) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } + } + + public void testRange_8bit() throws Exception { + testRange(8); + } + + public void testRange_4bit() throws Exception { + testRange(4); + } + + public void testRange_2bit() throws Exception { + testRange(2); + } + + public void testInverseRange() throws Exception { + IntTrieRangeFilter f = new IntTrieRangeFilter("field8", 8, new Integer(1000), new Integer(-1000), true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + + private void testLeftOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int upper=(count-1)*distance + (distance/3) + startOffset; + IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, null, new Integer(upper), true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + } + + public void testLeftOpenRange_8bit() throws Exception { + testLeftOpenRange(8); + } + + public void testLeftOpenRange_4bit() throws Exception { + testLeftOpenRange(4); + } + + public void testLeftOpenRange_2bit() throws Exception { + testLeftOpenRange(2); + } + + private void testRightOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int lower=(count-1)*distance + (distance/3) +startOffset; + IntTrieRangeQuery q=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, TrieUtils.prefixCodedToInt(doc.get("value")) ); + } + + public void testRightOpenRange_8bit() throws Exception { + testRightOpenRange(8); + } + + public void testRightOpenRange_4bit() throws Exception { + testRightOpenRange(4); + } + + public void testRightOpenRange_2bit() throws Exception { + testRightOpenRange(2); + } + + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + int termCountT=0,termCountC=0; + for (int i=0; i<50; i++) { + int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + IntTrieRangeQuery tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + RangeQuery cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, true); + cq.setConstantScoreRewrite(true); + TopDocs tTopDocs = searcher.search(tq, 1); + TopDocs cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test left exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), false, true); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test right exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false); + cq=new RangeQuery(field, TrieUtils.intToPrefixCoded(lower), TrieUtils.intToPrefixCoded(upper), true, false); + cq.setConstantScoreRewrite(true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for IntTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + } + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(50*4))); + System.out.println(" Classical query: " + (((double)termCountC)/(50*4))); + } + + public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { + testRandomTrieAndClassicRangeQuery(8); + } + + public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { + testRandomTrieAndClassicRangeQuery(4); + } + + public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { + testRandomTrieAndClassicRangeQuery(2); + } + + private void testRangeSplit(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="ascfield"+precisionStep; + // 50 random tests + for (int i=0; i<50; i++) { + int lower=(int)(rnd.nextDouble()*noDocs - noDocs/2); + int upper=(int)(rnd.nextDouble()*noDocs - noDocs/2); + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + // test exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); + // test left exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), false, true); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + // test right exclusive range + tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + } + } + + public void testRangeSplit_8bit() throws Exception { + testRangeSplit(8); + } + + public void testRangeSplit_4bit() throws Exception { + testRangeSplit(4); + } + + public void testRangeSplit_2bit() throws Exception { + testRangeSplit(2); + } + + private void testSorting(int precisionStep) throws Exception { + final Random rnd=newRandom(); + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + for (int i=0; i<10; i++) { + int lower=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(rnd.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + Query tq=new IntTrieRangeQuery(field, precisionStep, new Integer(lower), new Integer(upper), true, true); + TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getIntSortField(field, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + int last=TrieUtils.prefixCodedToInt(searcher.doc(sd[0].doc).get("value")); + for (int j=1; j act ); + last=act; + } + } + } + + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + public void testSorting_2bit() throws Exception { + testSorting(2); + } + +} diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java index 415798a6876..c853f671b8f 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestIntTrieTokenStream.java @@ -1,54 +1,54 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; - -public class TestIntTrieTokenStream extends LuceneTestCase { - - static final int precisionStep = 8; - static final int value = 123456; - - public void testStreamNewAPI() throws Exception { - final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(true); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); - final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); - for (int shift=0; shift<32; shift+=precisionStep) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Shift value", shift, shiftAtt.getShift()); - assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testStreamOldAPI() throws Exception { - final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(false); - Token tok=new Token(); - for (int shift=0; shift<32; shift+=precisionStep) { - assertNotNull("New token is available", tok=stream.next(tok)); - assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term()); - } - assertNull("No more tokens available", stream.next(tok)); - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + +public class TestIntTrieTokenStream extends LuceneTestCase { + + static final int precisionStep = 8; + static final int value = 123456; + + public void testStreamNewAPI() throws Exception { + final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(true); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); + final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); + for (int shift=0; shift<32; shift+=precisionStep) { + assertTrue("New token is available", stream.incrementToken()); + assertEquals("Shift value", shift, shiftAtt.getShift()); + assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), termAtt.term()); + } + assertFalse("No more tokens available", stream.incrementToken()); + } + + public void testStreamOldAPI() throws Exception { + final IntTrieTokenStream stream=new IntTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(false); + Token tok=new Token(); + for (int shift=0; shift<32; shift+=precisionStep) { + assertNotNull("New token is available", tok=stream.next(tok)); + assertEquals("Term is correctly encoded", TrieUtils.intToPrefixCoded(value, shift), tok.term()); + } + assertNull("No more tokens available", stream.next(tok)); + } + +} diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java similarity index 66% rename from contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java rename to contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java index 8ea19edd0be..0d33cd2fe4e 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeFilter.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieRangeQuery.java @@ -30,10 +30,13 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.Sort; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.RangeQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.LuceneTestCase; -public class TestLongTrieRangeFilter extends LuceneTestCase { +public class TestLongTrieRangeQuery extends LuceneTestCase { // distance of entries private static final long distance = 66666L; // shift the starting of the values to the left, to also have negative values: @@ -53,7 +56,10 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { private static final RAMDirectory directory; private static final IndexSearcher searcher; static { - try { + try { + // set the theoretical maximum term count for 8bit (see docs for the number) + BooleanQuery.setMaxClauseCount(7*255*2 + 255); + directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); @@ -97,20 +103,54 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { } } + /** test for constant score + boolean query + filter, the other tests only use the constant score mode */ private void testRange(int precisionStep) throws Exception { String field="field"+precisionStep; int count=3000; long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); - LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); - TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); - System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in range for field '"+field+"'."); - ScoreDoc[] sd = topDocs.scoreDocs; - assertNotNull(sd); - assertEquals("Score doc count", count, sd.length ); - Document doc=searcher.doc(sd[0].doc); - assertEquals("First doc", 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); - doc=searcher.doc(sd[sd.length-1].doc); - assertEquals("Last doc", (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + LongTrieRangeQuery q = new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); + LongTrieRangeFilter f = new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); + int lastTerms = 0; + for (byte i=0; i<3; i++) { + TopDocs topDocs; + int terms; + String type; + q.clearTotalNumberOfTerms(); + f.clearTotalNumberOfTerms(); + switch (i) { + case 0: + type = " (constant score)"; + q.setConstantScoreRewrite(true); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 1: + type = " (boolean query)"; + q.setConstantScoreRewrite(false); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 2: + type = " (filter)"; + topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER); + terms = f.getTotalNumberOfTerms(); + break; + default: + return; + } + System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+"."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count"+type, count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc"+type, 2*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc"+type, (1+count)*distance+startOffset, TrieUtils.prefixCodedToLong(doc.get("value")) ); + if (i>0) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } } public void testRange_8bit() throws Exception { @@ -125,13 +165,18 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { testRange(2); } + public void testInverseRange() throws Exception { + LongTrieRangeFilter f = new LongTrieRangeFilter("field8", 8, new Long(1000L), new Long(-1000L), true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + private void testLeftOpenRange(int precisionStep) throws Exception { String field="field"+precisionStep; int count=3000; long upper=(count-1)*distance + (distance/3) + startOffset; - LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, null, new Long(upper), true, true); - TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); - System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, null, new Long(upper), true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", count, sd.length ); @@ -157,9 +202,9 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { String field="field"+precisionStep; int count=3000; long lower=(count-1)*distance + (distance/3) +startOffset; - LongTrieRangeFilter f=new LongTrieRangeFilter(field, precisionStep, new Long(lower), null, true, true); - TopDocs topDocs = searcher.search(f.asQuery(), null, noDocs, Sort.INDEXORDER); - System.out.println("Found "+f.getLastNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + LongTrieRangeQuery q=new LongTrieRangeQuery(field, precisionStep, new Long(lower), null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", noDocs-count, sd.length ); @@ -184,7 +229,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { final Random rnd=newRandom(); String field="field"+precisionStep; - int termCount=0; + int termCountT=0,termCountC=0; for (int i=0; i<50; i++) { long lower=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; long upper=(long)(rnd.nextDouble()*noDocs*distance)+startOffset; @@ -192,39 +237,45 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { long a=lower; lower=upper; upper=a; } // test inclusive range - LongTrieRangeFilter tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true); + LongTrieRangeQuery tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); RangeQuery cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, true); cq.setConstantScoreRewrite(true); - TopDocs tTopDocs = searcher.search(tf.asQuery(), 1); + TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); // test exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, false); cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); + tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), false, true); cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); + tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range - tf=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false); cq=new RangeQuery(field, TrieUtils.longToPrefixCoded(lower), TrieUtils.longToPrefixCoded(upper), true, false); cq.setConstantScoreRewrite(true); - tTopDocs = searcher.search(tf.asQuery(), 1); + tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); - assertEquals("Returned count for LongTrieRangeFilter and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCount += tf.getLastNumberOfTerms(); + assertEquals("Returned count for LongTrieRangeQuery and RangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); } - System.out.println("Average number of terms during random search on '" + field + "': " + (((double)termCount)/(50*4))); + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(50*4))); + System.out.println(" Classical query: " + (((double)termCountC)/(50*4))); } public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { @@ -250,19 +301,19 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { long a=lower; lower=upper; upper=a; } // test inclusive range - Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); + Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); TopDocs tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); // test exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, false).asQuery(); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, false); tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); // test left exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), false, true).asQuery(); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), false, true); tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); // test right exclusive range - tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, false).asQuery(); + tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, false); tTopDocs = searcher.search(tq, 1); assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); } @@ -291,7 +342,7 @@ public class TestLongTrieRangeFilter extends LuceneTestCase { if (lower>upper) { long a=lower; lower=upper; upper=a; } - Query tq=new LongTrieRangeFilter(field, precisionStep, new Long(lower), new Long(upper), true, true).asQuery(); + Query tq=new LongTrieRangeQuery(field, precisionStep, new Long(lower), new Long(upper), true, true); TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(TrieUtils.getLongSortField(field, true))); if (topDocs.totalHits==0) continue; ScoreDoc[] sd = topDocs.scoreDocs; diff --git a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java index be8c47b4719..a36d2c24698 100644 --- a/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java +++ b/contrib/queries/src/test/org/apache/lucene/search/trie/TestLongTrieTokenStream.java @@ -1,54 +1,54 @@ -package org.apache.lucene.search.trie; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; - -public class TestLongTrieTokenStream extends LuceneTestCase { - - static final int precisionStep = 8; - static final long value = 4573245871874382L; - - public void testStreamNewAPI() throws Exception { - final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(true); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); - final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); - for (int shift=0; shift<64; shift+=precisionStep) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Shift value", shift, shiftAtt.getShift()); - assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testStreamOldAPI() throws Exception { - final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); - stream.setUseNewAPI(false); - Token tok=new Token(); - for (int shift=0; shift<64; shift+=precisionStep) { - assertNotNull("New token is available", tok=stream.next(tok)); - assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term()); - } - assertNull("No more tokens available", stream.next(tok)); - } - -} +package org.apache.lucene.search.trie; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; + +public class TestLongTrieTokenStream extends LuceneTestCase { + + static final int precisionStep = 8; + static final long value = 4573245871874382L; + + public void testStreamNewAPI() throws Exception { + final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(true); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + final ShiftAttribute shiftAtt = (ShiftAttribute) stream.getAttribute(ShiftAttribute.class); + final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); + for (int shift=0; shift<64; shift+=precisionStep) { + assertTrue("New token is available", stream.incrementToken()); + assertEquals("Shift value", shift, shiftAtt.getShift()); + assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), termAtt.term()); + } + assertFalse("No more tokens available", stream.incrementToken()); + } + + public void testStreamOldAPI() throws Exception { + final LongTrieTokenStream stream=new LongTrieTokenStream(value, precisionStep); + stream.setUseNewAPI(false); + Token tok=new Token(); + for (int shift=0; shift<64; shift+=precisionStep) { + assertNotNull("New token is available", tok=stream.next(tok)); + assertEquals("Term is correctly encoded", TrieUtils.longToPrefixCoded(value, shift), tok.term()); + } + assertNull("No more tokens available", stream.next(tok)); + } + +}