diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java b/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java index c08747ea501..693a5aa9ff6 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java @@ -59,6 +59,7 @@ public final class ICUCollationDocValuesField extends Field { } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } + fieldsData = bytes; // so wrong setters cannot be called } @Override @@ -73,11 +74,4 @@ public final class ICUCollationDocValuesField extends Field { bytes.offset = 0; bytes.length = key.size; } - - @Override - public BytesRef binaryValue() { - return bytes; - } - - // nocommit: UOE the other field methods? or set to empty bytesref initially so this just works... } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java b/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java index 035ecff6048..9cc9fa9e853 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationDocValuesField.java @@ -17,19 +17,28 @@ package org.apache.lucene.collation; * limitations under the License. */ -import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FieldCacheRangeFilter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import com.ibm.icu.text.Collator; import com.ibm.icu.util.ULocale; @@ -37,9 +46,10 @@ import com.ibm.icu.util.ULocale; /** * trivial test of ICUCollationDocValuesField */ +@SuppressCodecs("Lucene3x") public class TestICUCollationDocValuesField extends LuceneTestCase { - public void test() throws Exception { - assumeFalse("3.x codec does not support docvalues", Codec.getDefault().getName().equals("Lucene3x")); + + public void testBasic() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); @@ -69,4 +79,65 @@ public class TestICUCollationDocValuesField extends LuceneTestCase { ir.close(); dir.close(); } + + public void testRanges() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + Field field = newField("field", "", StringField.TYPE_STORED); + Collator collator = Collator.getInstance(); // uses -Dtests.locale + if (random().nextBoolean()) { + collator.setStrength(Collator.PRIMARY); + } + ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", collator); + doc.add(field); + doc.add(collationField); + + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + String value = _TestUtil.randomSimpleString(random()); + field.setStringValue(value); + collationField.setStringValue(value); + iw.addDocument(doc); + } + + IndexReader ir = iw.getReader(); + iw.close(); + IndexSearcher is = newSearcher(ir); + + int numChecks = atLeast(100); + for (int i = 0; i < numChecks; i++) { + String start = _TestUtil.randomSimpleString(random()); + String end = _TestUtil.randomSimpleString(random()); + BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray()); + BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray()); + Query query = new ConstantScoreQuery(FieldCacheRangeFilter.newBytesRefRange("collated", lowerVal, upperVal, true, true)); + doTestRanges(is, start, end, query, collator); + } + + ir.close(); + dir.close(); + } + + private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, Query query, Collator collator) throws Exception { + QueryUtils.check(query); + + // positive test + TopDocs docs = is.search(query, is.getIndexReader().maxDoc()); + for (ScoreDoc doc : docs.scoreDocs) { + String value = is.doc(doc.doc).get("field"); + assertTrue(collator.compare(value, startPoint) >= 0); + assertTrue(collator.compare(value, endPoint) <= 0); + } + + // negative test + BooleanQuery bq = new BooleanQuery(); + bq.add(new MatchAllDocsQuery(), Occur.SHOULD); + bq.add(query, Occur.MUST_NOT); + docs = is.search(bq, is.getIndexReader().maxDoc()); + for (ScoreDoc doc : docs.scoreDocs) { + String value = is.doc(doc.doc).get("field"); + assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java index 853977c4bca..4cb2ad137eb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java @@ -138,6 +138,64 @@ public abstract class FieldCacheRangeFilter extends Filter { }; } + /** + * Creates a BytesRef range filter using {@link FieldCache#getTermsIndex}. This works with all + * fields containing zero or one term in the field. The range can be half-open by setting one + * of the values to null. + */ + // TODO: bogus that newStringRange doesnt share this code... generics hell + public static FieldCacheRangeFilter newBytesRefRange(String field, BytesRef lowerVal, BytesRef upperVal, boolean includeLower, boolean includeUpper) { + return new FieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper) { + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + final SortedDocValues fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); + final BytesRef spare = new BytesRef(); + final int lowerPoint = lowerVal == null ? -1 : fcsi.lookupTerm(lowerVal, spare); + final int upperPoint = upperVal == null ? -1 : fcsi.lookupTerm(upperVal, spare); + + final int inclusiveLowerPoint, inclusiveUpperPoint; + + // Hints: + // * binarySearchLookup returns -1, if value was null. + // * the value is <0 if no exact hit was found, the returned value + // is (-(insertion point) - 1) + if (lowerPoint == -1 && lowerVal == null) { + inclusiveLowerPoint = 0; + } else if (includeLower && lowerPoint >= 0) { + inclusiveLowerPoint = lowerPoint; + } else if (lowerPoint >= 0) { + inclusiveLowerPoint = lowerPoint + 1; + } else { + inclusiveLowerPoint = Math.max(0, -lowerPoint - 1); + } + + if (upperPoint == -1 && upperVal == null) { + inclusiveUpperPoint = Integer.MAX_VALUE; + } else if (includeUpper && upperPoint >= 0) { + inclusiveUpperPoint = upperPoint; + } else if (upperPoint >= 0) { + inclusiveUpperPoint = upperPoint - 1; + } else { + inclusiveUpperPoint = -upperPoint - 2; + } + + if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) { + return DocIdSet.EMPTY_DOCIDSET; + } + + assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0; + + return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) { + @Override + protected final boolean matchDoc(int doc) { + final int docOrd = fcsi.getOrd(doc); + return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; + } + }; + } + }; + } + /** * Creates a numeric range filter using {@link FieldCache#getBytes(AtomicReader,String,boolean)}. This works with all * byte fields containing exactly one numeric term in the field. The range can be half-open by setting one diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java index da7bff07920..a3e16f363cf 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java @@ -859,18 +859,17 @@ public class _TestUtil { final DocValuesType dvType = field1.fieldType().docValueType(); if (dvType != null) { switch(dvType) { - // nocommit: not quite right! - case NUMERIC: - field2 = new NumericDocValuesField(field1.name(), field1.numericValue().longValue()); + case NUMERIC: + field2 = new NumericDocValuesField(field1.name(), field1.numericValue().longValue()); + break; + case BINARY: + field2 = new BinaryDocValuesField(field1.name(), field1.binaryValue()); break; - case BINARY: - field2 = new BinaryDocValuesField(field1.name(), field1.binaryValue()); - break; - case SORTED: - field2 = new SortedDocValuesField(field1.name(), field1.binaryValue()); - break; - default: - throw new IllegalStateException("unknown Type: " + dvType); + case SORTED: + field2 = new SortedDocValuesField(field1.name(), field1.binaryValue()); + break; + default: + throw new IllegalStateException("unknown Type: " + dvType); } } else { field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());