mirror of https://github.com/apache/lucene.git
LUCENE-6539: Add DocValuesNumbersQuery
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1685540 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0ef770fcd0
commit
3aa18151b3
|
@ -75,6 +75,13 @@ New Features
|
|||
* LUCENE-6504: Add Lucene53Codec, with norms implemented directly
|
||||
via the Directory's RandomAccessInput api. (Robert Muir)
|
||||
|
||||
* LUCENE-6539: Add new DocValuesNumbersQuery, to match any document
|
||||
containing one of the specified long values. This change also
|
||||
moves the existing DocValuesTermsQuery and DocValuesRangeQuery
|
||||
to Lucene's sandbox module, since in general these queries are
|
||||
quite slow and are only fast in specific cases. (Adrien Grand,
|
||||
Robert Muir, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6508: Simplify Lock api, there is now just
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.text.Collator;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -29,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* <p>
|
||||
* This is more efficient that {@link CollationKeyAnalyzer} if the field
|
||||
* only has one value: no uninversion is necessary to sort on the field,
|
||||
* locale-sensitive range queries can still work via {@link DocValuesRangeQuery},
|
||||
* locale-sensitive range queries can still work via {@code DocValuesRangeQuery},
|
||||
* and the underlying data structures built at index-time are likely more efficient
|
||||
* and use less memory than FieldCache.
|
||||
*/
|
||||
|
|
|
@ -17,22 +17,16 @@ package org.apache.lucene.collation;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -41,6 +35,9 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* trivial test of CollationDocValuesField
|
||||
*/
|
||||
|
@ -110,8 +107,7 @@ public class TestCollationDocValuesField extends LuceneTestCase {
|
|||
String end = TestUtil.randomSimpleString(random());
|
||||
BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
|
||||
BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
|
||||
Query query = DocValuesRangeQuery.newBytesRefRange("collated", lowerVal, upperVal, true, true);
|
||||
doTestRanges(is, start, end, query, collator);
|
||||
doTestRanges(is, start, end, lowerVal, upperVal, collator);
|
||||
}
|
||||
} finally {
|
||||
ir.close();
|
||||
|
@ -119,25 +115,15 @@ public class TestCollationDocValuesField extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, Query query, Collator collator) throws Exception {
|
||||
QueryUtils.check(query);
|
||||
|
||||
// positive test
|
||||
TopDocs docs = is.search(query, is.getIndexReader().maxDoc());
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = is.doc(doc.doc).get("field");
|
||||
assertTrue(collate(collator, value, startPoint) >= 0);
|
||||
assertTrue(collate(collator, value, endPoint) <= 0);
|
||||
}
|
||||
|
||||
// negative test
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
||||
bq.add(query, Occur.MUST_NOT);
|
||||
docs = is.search(bq, is.getIndexReader().maxDoc());
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = is.doc(doc.doc).get("field");
|
||||
assertTrue(collate(collator, value, startPoint) < 0 || collate(collator, value, endPoint) > 0);
|
||||
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, BytesRef startBR, BytesRef endBR, Collator collator) throws Exception {
|
||||
SortedDocValues dvs = MultiDocValues.getSortedValues(is.getIndexReader(), "collated");
|
||||
for(int docID=0;docID<is.getIndexReader().maxDoc();docID++) {
|
||||
StoredDocument doc = is.doc(docID);
|
||||
String s = doc.getField("field").stringValue();
|
||||
boolean collatorAccepts = collator.compare(s, startPoint) >= 0 && collator.compare(s, endPoint) <= 0;
|
||||
BytesRef br = dvs.get(docID);
|
||||
boolean luceneAccepts = br.compareTo(startBR) >= 0 && br.compareTo(endBR) <= 0;
|
||||
assertEquals(collatorAccepts, luceneAccepts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.collation;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
|
@ -30,7 +29,7 @@ import com.ibm.icu.text.RawCollationKey;
|
|||
* <p>
|
||||
* This is more efficient that {@link ICUCollationKeyAnalyzer} if the field
|
||||
* only has one value: no uninversion is necessary to sort on the field,
|
||||
* locale-sensitive range queries can still work via {@link DocValuesRangeQuery},
|
||||
* locale-sensitive range queries can still work via {@code DocValuesRangeQuery},
|
||||
* and the underlying data structures built at index-time are likely more efficient
|
||||
* and use less memory than FieldCache.
|
||||
*/
|
||||
|
|
|
@ -21,15 +21,12 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -108,33 +105,22 @@ public class TestICUCollationDocValuesField extends LuceneTestCase {
|
|||
String end = TestUtil.randomSimpleString(random());
|
||||
BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
|
||||
BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
|
||||
Query query = DocValuesRangeQuery.newBytesRefRange("collated", lowerVal, upperVal, true, true);
|
||||
doTestRanges(is, start, end, query, collator);
|
||||
doTestRanges(is, start, end, lowerVal, upperVal, collator);
|
||||
}
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, Query query, Collator collator) throws Exception {
|
||||
QueryUtils.check(query);
|
||||
|
||||
// positive test
|
||||
TopDocs docs = is.search(query, is.getIndexReader().maxDoc());
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = is.doc(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, startPoint) >= 0);
|
||||
assertTrue(collator.compare(value, endPoint) <= 0);
|
||||
}
|
||||
|
||||
// negative test
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
||||
bq.add(query, Occur.MUST_NOT);
|
||||
docs = is.search(bq, is.getIndexReader().maxDoc());
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = is.doc(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0);
|
||||
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, BytesRef startBR, BytesRef endBR, Collator collator) throws Exception {
|
||||
SortedDocValues dvs = MultiDocValues.getSortedValues(is.getIndexReader(), "collated");
|
||||
for(int docID=0;docID<is.getIndexReader().maxDoc();docID++) {
|
||||
StoredDocument doc = is.doc(docID);
|
||||
String s = doc.getField("field").stringValue();
|
||||
boolean collatorAccepts = collator.compare(s, startPoint) >= 0 && collator.compare(s, endPoint) <= 0;
|
||||
BytesRef br = dvs.get(docID);
|
||||
boolean luceneAccepts = br.compareTo(startBR) >= 0 && br.compareTo(endBR) <= 0;
|
||||
assertEquals(collatorAccepts, luceneAccepts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,11 +73,11 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
|
|||
Occur.SHOULD);
|
||||
|
||||
Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
|
||||
new QueryWrapperFilter(matchTheseItems(new int[] {1,3})));
|
||||
t.setBoost(1000);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
t = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
|
||||
t = new ConstantScoreQuery(matchTheseItems(new int[] {0,2}));
|
||||
t.setBoost(30);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
|
@ -136,11 +136,11 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
|
|||
Occur.SHOULD);
|
||||
|
||||
Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
|
||||
new QueryWrapperFilter(matchTheseItems(new int[] {1,3})));
|
||||
t.setBoost(1000);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
t = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
|
||||
t = new ConstantScoreQuery(matchTheseItems(new int[] {0,2}));
|
||||
t.setBoost(-20.0f);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
|
@ -207,11 +207,11 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
|
|||
public void testFQ5() throws Exception {
|
||||
TermQuery query = new TermQuery(new Term(FIELD, "xx"));
|
||||
query.setBoost(0);
|
||||
bqtest(new FilteredQuery(query, new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))), new int[] {3});
|
||||
bqtest(new FilteredQuery(query, new QueryWrapperFilter(matchTheseItems(new int[] {1,3}))), new int[] {3});
|
||||
}
|
||||
|
||||
public void testCSQ4() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {3}));
|
||||
Query q = new ConstantScoreQuery(matchTheseItems(new int[] {3}));
|
||||
q.setBoost(0);
|
||||
bqtest(q, new int[] {3});
|
||||
}
|
||||
|
|
|
@ -105,28 +105,28 @@ public class TestSimpleExplanations extends BaseExplanationTestCase {
|
|||
|
||||
public void testFQ1() throws Exception {
|
||||
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "w1")),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {0,1,2,3}))),
|
||||
new QueryWrapperFilter(matchTheseItems(new int[] {0,1,2,3}))),
|
||||
new int[] {0,1,2,3});
|
||||
}
|
||||
public void testFQ2() throws Exception {
|
||||
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "w1")),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {0,2,3}))),
|
||||
new QueryWrapperFilter(matchTheseItems(new int[] {0,2,3}))),
|
||||
new int[] {0,2,3});
|
||||
}
|
||||
public void testFQ3() throws Exception {
|
||||
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))),
|
||||
new QueryWrapperFilter(matchTheseItems(new int[] {1,3}))),
|
||||
new int[] {3});
|
||||
}
|
||||
public void testFQ4() throws Exception {
|
||||
TermQuery termQuery = new TermQuery(new Term(FIELD, "xx"));
|
||||
termQuery.setBoost(1000);
|
||||
qtest(new FilteredQuery(termQuery, new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))),
|
||||
qtest(new FilteredQuery(termQuery, new QueryWrapperFilter(matchTheseItems(new int[] {1,3}))),
|
||||
new int[] {3});
|
||||
}
|
||||
public void testFQ6() throws Exception {
|
||||
Query q = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
|
||||
new QueryWrapperFilter(matchTheseItems(new int[] {1,3})));
|
||||
q.setBoost(1000);
|
||||
qtest(q, new int[] {3});
|
||||
}
|
||||
|
@ -134,15 +134,15 @@ public class TestSimpleExplanations extends BaseExplanationTestCase {
|
|||
/* ConstantScoreQueries */
|
||||
|
||||
public void testCSQ1() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,1,2,3}));
|
||||
Query q = new ConstantScoreQuery(matchTheseItems(new int[] {0,1,2,3}));
|
||||
qtest(q, new int[] {0,1,2,3});
|
||||
}
|
||||
public void testCSQ2() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {1,3}));
|
||||
Query q = new ConstantScoreQuery(matchTheseItems(new int[] {1,3}));
|
||||
qtest(q, new int[] {1,3});
|
||||
}
|
||||
public void testCSQ3() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
|
||||
Query q = new ConstantScoreQuery(matchTheseItems(new int[] {0,2}));
|
||||
q.setBoost(1000);
|
||||
qtest(q, new int[] {0,2});
|
||||
}
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Like {@link DocValuesTermsQuery}, but this query only
|
||||
* runs on a long {@link NumericDocValuesField} or a
|
||||
* {@link SortedNumericDocValuesField}, matching
|
||||
* all documents whose value in the specified field is
|
||||
* contained in the provided set of long values.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE</b>: be very careful using this query: it is
|
||||
* typically much slower than using {@code TermsQuery},
|
||||
* but in certain specialized cases may be faster.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DocValuesNumbersQuery extends Query {
|
||||
|
||||
private final String field;
|
||||
private final Set<Long> numbers;
|
||||
|
||||
public DocValuesNumbersQuery(String field, Set<Long> numbers) {
|
||||
this.field = Objects.requireNonNull(field);
|
||||
this.numbers = Objects.requireNonNull(numbers, "Set of numbers must not be null");
|
||||
}
|
||||
|
||||
public DocValuesNumbersQuery(String field, Long... numbers) {
|
||||
this(field, new HashSet<Long>(Arrays.asList(numbers)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!super.equals(obj)) {
|
||||
return false;
|
||||
}
|
||||
// super.equals ensures we are the same class:
|
||||
DocValuesNumbersQuery that = (DocValuesNumbersQuery) obj;
|
||||
if (!field.equals(that.field)) {
|
||||
return false;
|
||||
}
|
||||
return numbers.equals(that.numbers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(field, numbers, getBoost());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String defaultField) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(field).append(": [");
|
||||
for (Long number : numbers) {
|
||||
sb.append(number).append(", ");
|
||||
}
|
||||
if (numbers.size() > 0) {
|
||||
sb.setLength(sb.length() - 2);
|
||||
}
|
||||
return sb.append(']').toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new RandomAccessWeight(this) {
|
||||
|
||||
@Override
|
||||
protected Bits getMatchingDocs(LeafReaderContext context) throws IOException {
|
||||
final SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
|
||||
return new Bits() {
|
||||
|
||||
@Override
|
||||
public boolean get(int doc) {
|
||||
values.setDocument(doc);
|
||||
int count = values.count();
|
||||
for(int i=0;i<count;i++) {
|
||||
if (numbers.contains(values.valueAt(i))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return context.reader().maxDoc();
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -35,6 +35,12 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* usually slow since they do not use an inverted index. However, in the
|
||||
* dense case where most documents match this query, it <b>might</b> be as
|
||||
* fast or faster than a regular {@link NumericRangeQuery}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE</b>: be very careful using this query: it is
|
||||
* typically much slower than using {@code TermsQuery},
|
||||
* but in certain specialized cases may be faster.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class DocValuesRangeQuery extends Query {
|
|
@ -45,6 +45,11 @@ import org.apache.lucene.util.LongBitSet;
|
|||
* characteristics, as described below.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE</b>: be very careful using this query: it is
|
||||
* typically much slower than using {@code TermsQuery},
|
||||
* but in certain specialized cases may be faster.
|
||||
*
|
||||
* <p>
|
||||
* With each search, this query translates the specified
|
||||
* set of Terms into a private {@link LongBitSet} keyed by
|
||||
* term number per unique {@link IndexReader} (normally one
|
||||
|
@ -82,6 +87,8 @@ import org.apache.lucene.util.LongBitSet;
|
|||
*
|
||||
* <p>
|
||||
* Which query is best is very application dependent.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DocValuesTermsQuery extends Query {
|
||||
|
|
@ -0,0 +1,194 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public class TestDocValuesNumbersQuery extends LuceneTestCase {
|
||||
|
||||
public void testEquals() {
|
||||
assertEquals(new DocValuesNumbersQuery("field", 17L, 42L), new DocValuesNumbersQuery("field", 17L, 42L));
|
||||
assertEquals(new DocValuesNumbersQuery("field", 17L, 42L, 32416190071L), new DocValuesNumbersQuery("field", 17L, 32416190071L, 42L));
|
||||
assertFalse(new DocValuesNumbersQuery("field", 42L).equals(new DocValuesNumbersQuery("field2", 42L)));
|
||||
assertFalse(new DocValuesNumbersQuery("field", 17L, 42L).equals(new DocValuesNumbersQuery("field", 17L, 32416190071L)));
|
||||
}
|
||||
|
||||
public void testDuelTermsQuery() throws IOException {
|
||||
final int iters = atLeast(2);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
final List<Long> allNumbers = new ArrayList<>();
|
||||
final int numNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
|
||||
for (int i = 0; i < numNumbers; ++i) {
|
||||
allNumbers.add(random().nextLong());
|
||||
}
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
final int numDocs = atLeast(100);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
final Long number = allNumbers.get(random().nextInt(allNumbers.size()));
|
||||
doc.add(new StringField("text", number.toString(), Store.NO));
|
||||
doc.add(new NumericDocValuesField("long", number));
|
||||
doc.add(new SortedNumericDocValuesField("twolongs", number));
|
||||
doc.add(new SortedNumericDocValuesField("twolongs", number*2));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
if (numNumbers > 1 && random().nextBoolean()) {
|
||||
iw.deleteDocuments(new TermQuery(new Term("text", allNumbers.get(0).toString())));
|
||||
}
|
||||
iw.commit();
|
||||
final IndexReader reader = iw.getReader();
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
if (reader.numDocs() == 0) {
|
||||
// may occasionally happen if all documents got the same term
|
||||
IOUtils.close(reader, dir);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
final float boost = random().nextFloat() * 10;
|
||||
final int numQueryNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
|
||||
Set<Long> queryNumbers = new HashSet<>();
|
||||
Set<Long> queryNumbersX2 = new HashSet<>();
|
||||
for (int j = 0; j < numQueryNumbers; ++j) {
|
||||
Long number = allNumbers.get(random().nextInt(allNumbers.size()));
|
||||
queryNumbers.add(number);
|
||||
queryNumbersX2.add(2*number);
|
||||
}
|
||||
final BooleanQuery bq = new BooleanQuery();
|
||||
for (Long number : queryNumbers) {
|
||||
bq.add(new TermQuery(new Term("text", number.toString())), Occur.SHOULD);
|
||||
}
|
||||
Query q1 = new ConstantScoreQuery(bq);
|
||||
q1.setBoost(boost);
|
||||
|
||||
Query q2 = new DocValuesNumbersQuery("long", queryNumbers);
|
||||
q2.setBoost(boost);
|
||||
assertSameMatches(searcher, q1, q2, true);
|
||||
|
||||
Query q3 = new DocValuesNumbersQuery("twolongs", queryNumbers);
|
||||
q3.setBoost(boost);
|
||||
assertSameMatches(searcher, q1, q3, true);
|
||||
|
||||
Query q4 = new DocValuesNumbersQuery("twolongs", queryNumbersX2);
|
||||
q4.setBoost(boost);
|
||||
assertSameMatches(searcher, q1, q4, true);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testApproximation() throws IOException {
|
||||
final int iters = atLeast(2);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
final List<Long> allNumbers = new ArrayList<>();
|
||||
final int numNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
|
||||
for (int i = 0; i < numNumbers; ++i) {
|
||||
allNumbers.add(random().nextLong());
|
||||
}
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
final int numDocs = atLeast(100);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
final Long number = allNumbers.get(random().nextInt(allNumbers.size()));
|
||||
doc.add(new StringField("text", number.toString(), Store.NO));
|
||||
doc.add(new NumericDocValuesField("long", number));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
if (numNumbers > 1 && random().nextBoolean()) {
|
||||
iw.deleteDocuments(new TermQuery(new Term("text", allNumbers.get(0).toString())));
|
||||
}
|
||||
iw.commit();
|
||||
final IndexReader reader = iw.getReader();
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
if (reader.numDocs() == 0) {
|
||||
// may occasionally happen if all documents got the same term
|
||||
IOUtils.close(reader, dir);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
final float boost = random().nextFloat() * 10;
|
||||
final int numQueryNumbers = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
|
||||
Set<Long> queryNumbers = new HashSet<>();
|
||||
for (int j = 0; j < numQueryNumbers; ++j) {
|
||||
queryNumbers.add(allNumbers.get(random().nextInt(allNumbers.size())));
|
||||
}
|
||||
final BooleanQuery bq = new BooleanQuery();
|
||||
for (Long number : queryNumbers) {
|
||||
bq.add(new TermQuery(new Term("text", number.toString())), Occur.SHOULD);
|
||||
}
|
||||
Query q1 = new ConstantScoreQuery(bq);
|
||||
q1.setBoost(boost);
|
||||
final Query q2 = new DocValuesNumbersQuery("long", queryNumbers);
|
||||
q2.setBoost(boost);
|
||||
|
||||
BooleanQuery bq1 = new BooleanQuery();
|
||||
bq1.add(q1, Occur.MUST);
|
||||
bq1.add(new TermQuery(new Term("text", allNumbers.get(0).toString())), Occur.FILTER);
|
||||
|
||||
BooleanQuery bq2 = new BooleanQuery();
|
||||
bq2.add(q2, Occur.MUST);
|
||||
bq2.add(new TermQuery(new Term("text", allNumbers.get(0).toString())), Occur.FILTER);
|
||||
|
||||
assertSameMatches(searcher, bq1, bq2, true);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores) throws IOException {
|
||||
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||
final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||
assertEquals(td1.totalHits, td2.totalHits);
|
||||
for (int i = 0; i < td1.scoreDocs.length; ++i) {
|
||||
assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
|
||||
if (scores) {
|
||||
assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -112,19 +112,14 @@ public abstract class BaseExplanationTestCase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Convenience subclass of FieldCacheTermsFilter
|
||||
* Convenience subclass of TermsQuery
|
||||
*/
|
||||
public static class ItemizedQuery extends DocValuesTermsQuery {
|
||||
private static String[] int2str(int [] terms) {
|
||||
String [] out = new String[terms.length];
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
out[i] = ""+terms[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
public ItemizedQuery(int [] keys) {
|
||||
super(KEY, int2str(keys));
|
||||
protected Query matchTheseItems(int[] terms) {
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
for(int term : terms) {
|
||||
query.add(new BooleanClause(new TermQuery(new Term(KEY, ""+term)), BooleanClause.Occur.SHOULD));
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
/** helper for generating MultiPhraseQueries */
|
||||
|
|
|
@ -103,6 +103,7 @@
|
|||
<pathelement location="${queries.jar}"/>
|
||||
<pathelement location="${queryparser.jar}"/>
|
||||
<pathelement location="${join.jar}"/>
|
||||
<pathelement location="${sandbox.jar}"/>
|
||||
</path>
|
||||
|
||||
<path id="solr.base.classpath">
|
||||
|
@ -168,7 +169,7 @@
|
|||
|
||||
<target name="prep-lucene-jars"
|
||||
depends="jar-lucene-core, jar-backward-codecs, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-codecs,jar-expressions, jar-suggest, jar-highlighter, jar-memory,
|
||||
jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser, jar-join">
|
||||
jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser, jar-join, jar-sandbox">
|
||||
<property name="solr.deps.compiled" value="true"/>
|
||||
</target>
|
||||
|
||||
|
|
Loading…
Reference in New Issue