mirror of https://github.com/apache/lucene.git
LUCENE-2514, LUCENE-2551: collation uses byte[] keys, deprecate old unscalable locale sort/range, termrangequery/filter work on bytes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1075210 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
81ee0e72d3
commit
308e0bd4a9
|
@ -313,6 +313,21 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
|||
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
|
||||
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
|
||||
|
||||
* LUCENE-2514: The option to use a Collator's order (instead of binary order) for
|
||||
sorting and range queries has been moved to contrib/queries.
|
||||
|
||||
The Collated TermRangeQuery/Filter has been moved to SlowCollatedTermRangeQuery/Filter,
|
||||
and the collated sorting has been moved to SlowCollatedStringComparator.
|
||||
|
||||
Note: this functionality isn't very scalable and if you are using it, consider
|
||||
indexing collation keys with the collation support in the analysis module instead.
|
||||
|
||||
To perform collated range queries, use a suitable collating analyzer: CollationKeyAnalyzer
|
||||
or ICUCollationKeyAnalyzer, and set qp.setAnalyzeRangeTerms(true).
|
||||
|
||||
TermRangeQuery and TermRangeFilter now work purely on bytes. Both have helper factory methods
|
||||
(newStringRange) similar to the NumericRange API, to easily perform range queries on Strings.
|
||||
|
||||
* LUCENE-2691: The near-real-time API has moved from IndexWriter to
|
||||
IndexReader. Instead of IndexWriter.getReader(), call
|
||||
IndexReader.open(IndexWriter) or IndexReader.reopen(IndexWriter).
|
||||
|
|
|
@ -828,7 +828,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true);
|
||||
TermRangeFilter rf = TermRangeFilter.newStringRange("contents", "john", "john", true, true);
|
||||
SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")),
|
||||
new SpanTermQuery(new Term("contents", "kennedy")), };
|
||||
SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
|
||||
|
@ -851,7 +851,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true);
|
||||
TermRangeFilter rf = TermRangeFilter.newStringRange("contents", "john", "john", true, true);
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("contents", "john"));
|
||||
pq.add(new Term("contents", "kennedy"));
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.FieldCache.DocTerms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Sorts by a field's value using the given Collator
|
||||
*
|
||||
* <p><b>WARNING</b>: this is very slow; you'll
|
||||
* get much better performance using the
|
||||
* CollationKeyAnalyzer or ICUCollationKeyAnalyzer.
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class SlowCollatedStringComparator extends FieldComparator {
|
||||
|
||||
private final String[] values;
|
||||
private DocTerms currentDocTerms;
|
||||
private final String field;
|
||||
final Collator collator;
|
||||
private String bottom;
|
||||
private final BytesRef tempBR = new BytesRef();
|
||||
|
||||
public SlowCollatedStringComparator(int numHits, String field, Collator collator) {
|
||||
values = new String[numHits];
|
||||
this.field = field;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
final String val1 = values[slot1];
|
||||
final String val2 = values[slot2];
|
||||
if (val1 == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (val2 == null) {
|
||||
return 1;
|
||||
}
|
||||
return collator.compare(val1, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString();
|
||||
if (bottom == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (val2 == null) {
|
||||
return 1;
|
||||
}
|
||||
return collator.compare(bottom, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) {
|
||||
final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
|
||||
if (br == null) {
|
||||
values[slot] = null;
|
||||
} else {
|
||||
values[slot] = br.utf8ToString();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||
currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
final String s = values[slot];
|
||||
return s == null ? null : new BytesRef(values[slot]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
|
||||
/**
|
||||
* A Filter that restricts search results to a range of term
|
||||
* values in a given field.
|
||||
*
|
||||
* <p>This filter matches the documents looking for terms that fall into the
|
||||
* supplied range according to {@link
|
||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
||||
* for numerical ranges; use {@link NumericRangeFilter} instead.
|
||||
*
|
||||
* <p>If you construct a large number of range filters with different ranges but on the
|
||||
* same field, {@link FieldCacheRangeFilter} may have significantly better performance.
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowCollatedTermRangeFilter extends MultiTermQueryWrapperFilter<SlowCollatedTermRangeQuery> {
|
||||
/**
|
||||
*
|
||||
* @param lowerTerm The lower bound on this range
|
||||
* @param upperTerm The upper bound on this range
|
||||
* @param includeLower Does this range include the lower bound?
|
||||
* @param includeUpper Does this range include the upper bound?
|
||||
* @param collator The collator to use when determining range inclusion; set
|
||||
* to null to use Unicode code point ordering instead of collation.
|
||||
* @throws IllegalArgumentException if both terms are null or if
|
||||
* lowerTerm is null and includeLower is true (similar for upperTerm
|
||||
* and includeUpper)
|
||||
*/
|
||||
public SlowCollatedTermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper,
|
||||
Collator collator) {
|
||||
super(new SlowCollatedTermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range filter */
|
||||
public String getLowerTerm() { return query.getLowerTerm(); }
|
||||
|
||||
/** Returns the upper value of this range filter */
|
||||
public String getUpperTerm() { return query.getUpperTerm(); }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesLower() { return query.includesLower(); }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesUpper() { return query.includesUpper(); }
|
||||
|
||||
/** Returns the collator used to determine range inclusion, if any. */
|
||||
public Collator getCollator() { return query.getCollator(); }
|
||||
}
|
|
@ -0,0 +1,176 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
* A Query that matches documents within an range of terms.
|
||||
*
|
||||
* <p>This query matches the documents looking for terms that fall into the
|
||||
* supplied range according to {@link
|
||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
||||
* for numerical ranges; use {@link NumericRangeQuery} instead.
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* rewrite method.
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowCollatedTermRangeQuery extends MultiTermQuery {
|
||||
private String lowerTerm;
|
||||
private String upperTerm;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
private Collator collator;
|
||||
|
||||
/** Constructs a query selecting all terms greater/equal than
|
||||
* <code>lowerTerm</code> but less/equal than <code>upperTerm</code>.
|
||||
* <p>
|
||||
* If an endpoint is null, it is said
|
||||
* to be "open". Either or both endpoints may be open. Open endpoints may not
|
||||
* be exclusive (you can't select all but the first or last term without
|
||||
* explicitly specifying the term to exclude.)
|
||||
* <p>
|
||||
*
|
||||
* @param lowerTerm The Term text at the lower end of the range
|
||||
* @param upperTerm The Term text at the upper end of the range
|
||||
* @param includeLower
|
||||
* If true, the <code>lowerTerm</code> is
|
||||
* included in the range.
|
||||
* @param includeUpper
|
||||
* If true, the <code>upperTerm</code> is
|
||||
* included in the range.
|
||||
* @param collator The collator to use to collate index Terms, to determine
|
||||
* their membership in the range bounded by <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code>.
|
||||
*/
|
||||
public SlowCollatedTermRangeQuery(String field, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper, Collator collator) {
|
||||
super(field);
|
||||
this.lowerTerm = lowerTerm;
|
||||
this.upperTerm = upperTerm;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range query */
|
||||
public String getLowerTerm() { return lowerTerm; }
|
||||
|
||||
/** Returns the upper value of this range query */
|
||||
public String getUpperTerm() { return upperTerm; }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesLower() { return includeLower; }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesUpper() { return includeUpper; }
|
||||
|
||||
/** Returns the collator used to determine range inclusion */
|
||||
public Collator getCollator() { return collator; }
|
||||
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) {
|
||||
return TermsEnum.EMPTY;
|
||||
}
|
||||
|
||||
TermsEnum tenum = terms.iterator();
|
||||
|
||||
if (lowerTerm == null && upperTerm == null) {
|
||||
return tenum;
|
||||
}
|
||||
return new SlowCollatedTermRangeTermsEnum(tenum,
|
||||
lowerTerm, upperTerm, includeLower, includeUpper, collator);
|
||||
}
|
||||
|
||||
/** @deprecated */
|
||||
@Deprecated
|
||||
public String field() {
|
||||
return getField();
|
||||
}
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
if (!getField().equals(field)) {
|
||||
buffer.append(getField());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append(includeLower ? '[' : '{');
|
||||
buffer.append(lowerTerm != null ? lowerTerm : "*");
|
||||
buffer.append(" TO ");
|
||||
buffer.append(upperTerm != null ? upperTerm : "*");
|
||||
buffer.append(includeUpper ? ']' : '}');
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + ((collator == null) ? 0 : collator.hashCode());
|
||||
result = prime * result + (includeLower ? 1231 : 1237);
|
||||
result = prime * result + (includeUpper ? 1231 : 1237);
|
||||
result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
|
||||
result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (!super.equals(obj))
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
SlowCollatedTermRangeQuery other = (SlowCollatedTermRangeQuery) obj;
|
||||
if (collator == null) {
|
||||
if (other.collator != null)
|
||||
return false;
|
||||
} else if (!collator.equals(other.collator))
|
||||
return false;
|
||||
if (includeLower != other.includeLower)
|
||||
return false;
|
||||
if (includeUpper != other.includeUpper)
|
||||
return false;
|
||||
if (lowerTerm == null) {
|
||||
if (other.lowerTerm != null)
|
||||
return false;
|
||||
} else if (!lowerTerm.equals(other.lowerTerm))
|
||||
return false;
|
||||
if (upperTerm == null) {
|
||||
if (other.upperTerm != null)
|
||||
return false;
|
||||
} else if (!upperTerm.equals(other.upperTerm))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Subclass of FilteredTermEnum for enumerating all terms that match the
|
||||
* specified range parameters.
|
||||
* <p>Term enumerations are always ordered by
|
||||
* {@link #getComparator}. Each term in the enumeration is
|
||||
* greater than all that precede it.</p>
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowCollatedTermRangeTermsEnum extends FilteredTermsEnum {
|
||||
private Collator collator;
|
||||
private String upperTermText;
|
||||
private String lowerTermText;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
|
||||
/**
|
||||
* Enumerates all terms greater/equal than <code>lowerTerm</code>
|
||||
* but less/equal than <code>upperTerm</code>.
|
||||
*
|
||||
* If an endpoint is null, it is said to be "open". Either or both
|
||||
* endpoints may be open. Open endpoints may not be exclusive
|
||||
* (you can't select all but the first or last term without
|
||||
* explicitly specifying the term to exclude.)
|
||||
*
|
||||
* @param tenum
|
||||
* @param lowerTermText
|
||||
* The term text at the lower end of the range
|
||||
* @param upperTermText
|
||||
* The term text at the upper end of the range
|
||||
* @param includeLower
|
||||
* If true, the <code>lowerTerm</code> is included in the range.
|
||||
* @param includeUpper
|
||||
* If true, the <code>upperTerm</code> is included in the range.
|
||||
* @param collator
|
||||
* The collator to use to collate index Terms, to determine their
|
||||
* membership in the range bounded by <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code>.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public SlowCollatedTermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText,
|
||||
boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
|
||||
super(tenum);
|
||||
this.collator = collator;
|
||||
this.upperTermText = upperTermText;
|
||||
this.lowerTermText = lowerTermText;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
|
||||
// do a little bit of normalization...
|
||||
// open ended range queries should always be inclusive.
|
||||
if (this.lowerTermText == null) {
|
||||
this.lowerTermText = "";
|
||||
this.includeLower = true;
|
||||
}
|
||||
|
||||
// TODO: optimize
|
||||
BytesRef startBytesRef = new BytesRef("");
|
||||
setInitialSeekTerm(startBytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
if ((includeLower
|
||||
? collator.compare(term.utf8ToString(), lowerTermText) >= 0
|
||||
: collator.compare(term.utf8ToString(), lowerTermText) > 0)
|
||||
&& (upperTermText == null
|
||||
|| (includeUpper
|
||||
? collator.compare(term.utf8ToString(), upperTermText) <= 0
|
||||
: collator.compare(term.utf8ToString(), upperTermText) < 0))) {
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
}
|
|
@ -70,7 +70,7 @@ public class BooleanFilterTest extends LuceneTestCase {
|
|||
|
||||
private Filter getRangeFilter(String field,String lowerPrice, String upperPrice)
|
||||
{
|
||||
Filter f = new TermRangeFilter(field,lowerPrice,upperPrice,true,true);
|
||||
Filter f = TermRangeFilter.newStringRange(field,lowerPrice,upperPrice,true,true);
|
||||
return f;
|
||||
}
|
||||
private Filter getTermsFilter(String field,String text)
|
||||
|
|
|
@ -84,7 +84,7 @@ public class ChainedFilterTest extends LuceneTestCase {
|
|||
//Date pastTheEnd = parseDate("2099 Jan 1");
|
||||
// dateFilter = DateFilter.Before("date", pastTheEnd);
|
||||
// just treat dates as strings and select the whole range for now...
|
||||
dateFilter = new TermRangeFilter("date","","ZZZZ",true,true);
|
||||
dateFilter = TermRangeFilter.newStringRange("date","","ZZZZ",true,true);
|
||||
|
||||
bobFilter = new QueryWrapperFilter(
|
||||
new TermQuery(new Term("owner", "bob")));
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Tests SlowCollatedStringComparator, SlowCollatedTermRangeQuery, and SlowCollatedTermRangeFilter
|
||||
*/
|
||||
public class TestSlowCollationMethods extends LuceneTestCase {
|
||||
private static Collator collator;
|
||||
private static IndexSearcher searcher;
|
||||
private static IndexReader reader;
|
||||
private static Directory dir;
|
||||
private static int numDocs;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
final Locale locale = LuceneTestCase.randomLocale(random);
|
||||
collator = Collator.getInstance(locale);
|
||||
collator.setStrength(Collator.IDENTICAL);
|
||||
collator.setDecomposition(Collator.NO_DECOMPOSITION);
|
||||
|
||||
numDocs = 1000 * RANDOM_MULTIPLIER;
|
||||
dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, dir);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
String value = _TestUtil.randomUnicodeString(random);
|
||||
Field field = newField("field", value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
reader = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
// TODO: we should be able to use newSearcher, but custom sorts are broken if IS has an executorservice
|
||||
// see LUCENE-2941
|
||||
//searcher = newSearcher(reader);
|
||||
searcher = new IndexSearcher(reader);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
searcher.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
collator = null;
|
||||
searcher = null;
|
||||
reader = null;
|
||||
dir = null;
|
||||
}
|
||||
|
||||
public void testSort() throws Exception {
|
||||
SortField sf = new SortField("field", new FieldComparatorSource() {
|
||||
@Override
|
||||
public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
return new SlowCollatedStringComparator(numHits, fieldname, collator);
|
||||
}
|
||||
});
|
||||
TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, numDocs, new Sort(sf));
|
||||
String prev = "";
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, prev) >= 0);
|
||||
prev = value;
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestRanges(String startPoint, String endPoint, Query query) throws Exception {
|
||||
// positive test
|
||||
TopDocs docs = searcher.search(query, numDocs);
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, startPoint) >= 0);
|
||||
assertTrue(collator.compare(value, endPoint) <= 0);
|
||||
}
|
||||
|
||||
// negative test
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
||||
bq.add(query, Occur.MUST_NOT);
|
||||
docs = searcher.search(bq, numDocs);
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeQuery() throws Exception {
|
||||
int numQueries = 50*RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < numQueries; i++) {
|
||||
String startPoint = _TestUtil.randomUnicodeString(random);
|
||||
String endPoint = _TestUtil.randomUnicodeString(random);
|
||||
Query query = new SlowCollatedTermRangeQuery("field", startPoint, endPoint, true, true, collator);
|
||||
doTestRanges(startPoint, endPoint, query);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeFilter() throws Exception {
|
||||
int numQueries = 50*RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < numQueries; i++) {
|
||||
String startPoint = _TestUtil.randomUnicodeString(random);
|
||||
String endPoint = _TestUtil.randomUnicodeString(random);
|
||||
Query query = new ConstantScoreQuery(new SlowCollatedTermRangeFilter("field", startPoint, endPoint, true, true, collator));
|
||||
doTestRanges(startPoint, endPoint, query);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -174,8 +174,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
if (isPass2ResolvingPhrases) {
|
||||
// Must use old-style RangeQuery in order to produce a BooleanQuery
|
||||
// that can be turned into SpanOr clause
|
||||
TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, startInclusive, endInclusive,
|
||||
getRangeCollator());
|
||||
TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive);
|
||||
rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
return rangeQuery;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.queryParser.standard;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.TooManyListenersException;
|
||||
|
@ -41,10 +40,8 @@ import org.apache.lucene.queryParser.standard.config.LowercaseExpandedTermsAttri
|
|||
import org.apache.lucene.queryParser.standard.config.MultiFieldAttribute;
|
||||
import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
|
||||
import org.apache.lucene.queryParser.standard.config.PositionIncrementsAttribute;
|
||||
import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute;
|
||||
import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler;
|
||||
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
|
||||
import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
|
||||
import org.apache.lucene.queryParser.standard.parser.StandardSyntaxParser;
|
||||
import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
|
@ -187,32 +184,6 @@ public class StandardQueryParser extends QueryParserHelper {
|
|||
return attr.getOperator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the collator used to determine index term inclusion in ranges for
|
||||
* RangeQuerys.
|
||||
* <p/>
|
||||
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null collator
|
||||
* using this method will cause every single index Term in the Field
|
||||
* referenced by lowerTerm and/or upperTerm to be examined. Depending on the
|
||||
* number of index Terms in this Field, the operation could be very slow.
|
||||
*
|
||||
* @param collator
|
||||
* the collator to use when constructing {@link RangeQueryNode}s
|
||||
*/
|
||||
public void setRangeCollator(Collator collator) {
|
||||
RangeCollatorAttribute attr = getQueryConfigHandler().getAttribute(RangeCollatorAttribute.class);
|
||||
attr.setDateResolution(collator);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the collator used to determine index term inclusion in ranges for
|
||||
* RangeQuerys.
|
||||
*/
|
||||
public Collator getRangeCollator() {
|
||||
RangeCollatorAttribute attr = getQueryConfigHandler().getAttribute(RangeCollatorAttribute.class);
|
||||
return attr.getRangeCollator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boolean operator of the QueryParser. In default mode (
|
||||
* {@link Operator#OR}) terms without any modifiers are considered optional:
|
||||
|
|
|
@ -53,9 +53,7 @@ public class RangeQueryNodeBuilder implements StandardQueryBuilder {
|
|||
|
||||
String field = rangeNode.getField().toString();
|
||||
|
||||
TermRangeQuery rangeQuery = new TermRangeQuery(field, lower
|
||||
.getTextAsString(), upper.getTextAsString(), lowerInclusive,
|
||||
upperInclusive, rangeNode.getCollator());
|
||||
TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, lower.getTextAsString(), upper.getTextAsString(), lowerInclusive, upperInclusive);
|
||||
|
||||
MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode.getTag(MultiTermRewriteMethodAttribute.TAG_ID);
|
||||
if (method != null) {
|
||||
|
|
|
@ -1,92 +0,0 @@
|
|||
package org.apache.lucene.queryParser.standard.config;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
|
||||
import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
||||
/**
|
||||
* This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor
|
||||
* and must be defined in the {@link QueryConfigHandler}. This attribute tells
|
||||
* the processor which {@link Collator} should be used for a
|
||||
* {@link TermRangeQuery} <br/>
|
||||
*
|
||||
* @see org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute
|
||||
*/
|
||||
public class RangeCollatorAttributeImpl extends AttributeImpl
|
||||
implements RangeCollatorAttribute {
|
||||
|
||||
private Collator rangeCollator;
|
||||
|
||||
public RangeCollatorAttributeImpl() {
|
||||
rangeCollator = null; // default value for 2.4
|
||||
}
|
||||
|
||||
public void setDateResolution(Collator rangeCollator) {
|
||||
this.rangeCollator = rangeCollator;
|
||||
}
|
||||
|
||||
public Collator getRangeCollator() {
|
||||
return this.rangeCollator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (other instanceof RangeCollatorAttributeImpl) {
|
||||
RangeCollatorAttributeImpl rangeCollatorAttr = (RangeCollatorAttributeImpl) other;
|
||||
|
||||
if (rangeCollatorAttr.rangeCollator == this.rangeCollator
|
||||
|| rangeCollatorAttr.rangeCollator.equals(this.rangeCollator)) {
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (this.rangeCollator == null) ? 0 : this.rangeCollator.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<rangeCollatorAttribute rangeCollator='" + this.rangeCollator
|
||||
+ "'/>";
|
||||
}
|
||||
|
||||
}
|
|
@ -38,7 +38,6 @@ public class StandardQueryConfigHandler extends QueryConfigHandler {
|
|||
addFieldConfigListener(new FieldDateResolutionFCListener(this));
|
||||
|
||||
// Default Values
|
||||
addAttribute(RangeCollatorAttribute.class);
|
||||
addAttribute(DefaultOperatorAttribute.class);
|
||||
addAttribute(AnalyzerAttribute.class);
|
||||
addAttribute(FuzzyAttribute.class);
|
||||
|
|
|
@ -17,34 +17,24 @@ package org.apache.lucene.queryParser.standard.nodes;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
|
||||
import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
|
||||
import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute;
|
||||
import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor;
|
||||
|
||||
/**
|
||||
* This query node represents a range query. It also holds which collator will
|
||||
* be used by the range query and if the constant score rewrite is enabled. <br/>
|
||||
* This query node represents a range query.
|
||||
*
|
||||
* @see ParametricRangeQueryNodeProcessor
|
||||
* @see RangeCollatorAttribute
|
||||
* @see org.apache.lucene.search.TermRangeQuery
|
||||
*/
|
||||
public class RangeQueryNode extends ParametricRangeQueryNode {
|
||||
|
||||
private Collator collator;
|
||||
|
||||
/**
|
||||
* @param lower
|
||||
* @param upper
|
||||
*/
|
||||
public RangeQueryNode(ParametricQueryNode lower, ParametricQueryNode upper, Collator collator) {
|
||||
public RangeQueryNode(ParametricQueryNode lower, ParametricQueryNode upper) {
|
||||
super(lower, upper);
|
||||
|
||||
this.collator = collator;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -57,12 +47,4 @@ public class RangeQueryNode extends ParametricRangeQueryNode {
|
|||
return sb.toString();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the collator
|
||||
*/
|
||||
public Collator getCollator() {
|
||||
return this.collator;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.queryParser.standard.processors;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
import java.text.DateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
@ -36,7 +35,6 @@ import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode.CompareOpera
|
|||
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
|
||||
import org.apache.lucene.queryParser.standard.config.DateResolutionAttribute;
|
||||
import org.apache.lucene.queryParser.standard.config.LocaleAttribute;
|
||||
import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute;
|
||||
import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
|
||||
|
||||
/**
|
||||
|
@ -54,12 +52,7 @@ import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
|
|||
* If a {@link DateResolutionAttribute} is defined and the {@link Resolution} is
|
||||
* not <code>null</code> it will also be used to parse the date value. <br/>
|
||||
* <br/>
|
||||
* This processor will also try to retrieve a {@link RangeCollatorAttribute}
|
||||
* from the {@link QueryConfigHandler}. If a {@link RangeCollatorAttribute} is
|
||||
* found and the {@link Collator} is not <code>null</code>, it's set on the
|
||||
* {@link RangeQueryNode}. <br/>
|
||||
*
|
||||
* @see RangeCollatorAttribute
|
||||
* @see DateResolutionAttribute
|
||||
* @see LocaleAttribute
|
||||
* @see RangeQueryNode
|
||||
|
@ -79,17 +72,9 @@ public class ParametricRangeQueryNodeProcessor extends QueryNodeProcessorImpl {
|
|||
ParametricQueryNode upper = parametricRangeNode.getUpperBound();
|
||||
ParametricQueryNode lower = parametricRangeNode.getLowerBound();
|
||||
Locale locale = Locale.getDefault();
|
||||
Collator collator = null;
|
||||
DateTools.Resolution dateRes = null;
|
||||
boolean inclusive = false;
|
||||
|
||||
if (getQueryConfigHandler().hasAttribute(RangeCollatorAttribute.class)) {
|
||||
|
||||
collator = getQueryConfigHandler().getAttribute(
|
||||
RangeCollatorAttribute.class).getRangeCollator();
|
||||
|
||||
}
|
||||
|
||||
if (getQueryConfigHandler().hasAttribute(LocaleAttribute.class)) {
|
||||
|
||||
locale = getQueryConfigHandler().getAttribute(LocaleAttribute.class)
|
||||
|
@ -155,7 +140,7 @@ public class ParametricRangeQueryNodeProcessor extends QueryNodeProcessorImpl {
|
|||
lower.setText(part1);
|
||||
upper.setText(part2);
|
||||
|
||||
return new RangeQueryNode(lower, upper, collator);
|
||||
return new RangeQueryNode(lower, upper);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -642,55 +642,6 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
"gack (bar blar {a TO z})");
|
||||
}
|
||||
|
||||
public void testFarsiRangeCollating() throws Exception {
|
||||
Directory ramDir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
|
||||
Document doc = new Document();
|
||||
doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
iw.addDocument(doc);
|
||||
iw.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the
|
||||
// Farsi
|
||||
// characters properly.
|
||||
Collator c = Collator.getInstance(new Locale("ar"));
|
||||
qp.setRangeCollator(c);
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the
|
||||
// single
|
||||
// index Term below should NOT be returned by a ConstantScoreRangeQuery
|
||||
// with a Farsi Collator (or an Arabic one for the case when Farsi is
|
||||
// not
|
||||
// supported).
|
||||
|
||||
// Test ConstantScoreRangeQuery
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"),
|
||||
null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
// Test RangeQuery
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
is.close();
|
||||
ramDir.close();
|
||||
}
|
||||
|
||||
/** for testing DateTools support */
|
||||
private String getDate(String s, DateTools.Resolution resolution)
|
||||
throws Exception {
|
||||
|
|
|
@ -60,8 +60,6 @@ public class TestAttributes extends LuceneTestCase {
|
|||
Collections.singletonMap(MultiTermRewriteMethodAttribute.class.getName()+"#multiTermRewriteMethod", MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT));
|
||||
_TestUtil.assertAttributeReflection(new PositionIncrementsAttributeImpl(),
|
||||
Collections.singletonMap(PositionIncrementsAttribute.class.getName()+"#positionIncrementsEnabled", false));
|
||||
_TestUtil.assertAttributeReflection(new RangeCollatorAttributeImpl(),
|
||||
Collections.singletonMap(RangeCollatorAttribute.class.getName()+"#rangeCollator", null));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ public class RangeFilterBuilder implements FilterBuilder {
|
|||
String upperTerm=e.getAttribute("upperTerm");
|
||||
boolean includeLower=DOMUtils.getAttribute(e,"includeLower",true);
|
||||
boolean includeUpper=DOMUtils.getAttribute(e,"includeUpper",true);
|
||||
return new TermRangeFilter(fieldName,lowerTerm,upperTerm,includeLower,includeUpper);
|
||||
return TermRangeFilter.newStringRange(fieldName,lowerTerm,upperTerm,includeLower,includeUpper);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -77,7 +77,7 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
}
|
||||
|
||||
// *** TermToBytesRefAttribute interface ***
|
||||
public final int toBytesRef(BytesRef target) {
|
||||
public int toBytesRef(BytesRef target) {
|
||||
return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, target);
|
||||
}
|
||||
|
||||
|
|
|
@ -78,9 +78,9 @@ public abstract class QueryParserBase {
|
|||
// maps field names to date resolutions
|
||||
Map<String,DateTools.Resolution> fieldToDateResolution = null;
|
||||
|
||||
// The collator to use when determining range inclusion,
|
||||
// for use when constructing RangeQuerys.
|
||||
Collator rangeCollator = null;
|
||||
//Whether or not to analyze range terms when constructing RangeQuerys
|
||||
// (For example, analyzing terms into collation keys for locale-sensitive RangeQuery)
|
||||
boolean analyzeRangeTerms = false;
|
||||
|
||||
boolean autoGeneratePhraseQueries;
|
||||
|
||||
|
@ -391,27 +391,21 @@ public abstract class QueryParserBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets the collator used to determine index term inclusion in ranges
|
||||
* for RangeQuerys.
|
||||
* <p/>
|
||||
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null
|
||||
* collator using this method will cause every single index Term in the
|
||||
* Field referenced by lowerTerm and/or upperTerm to be examined.
|
||||
* Depending on the number of index Terms in this Field, the operation could
|
||||
* be very slow.
|
||||
* Set whether or not to analyze range terms when constructing RangeQuerys.
|
||||
* For example, setting this to true can enable analyzing terms into
|
||||
* collation keys for locale-sensitive RangeQuery.
|
||||
*
|
||||
* @param rc the collator to use when constructing RangeQuerys
|
||||
* @param analyzeRangeTerms whether or not terms should be analyzed for RangeQuerys
|
||||
*/
|
||||
public void setRangeCollator(Collator rc) {
|
||||
rangeCollator = rc;
|
||||
public void setAnalyzeRangeTerms(boolean analyzeRangeTerms) {
|
||||
this.analyzeRangeTerms = analyzeRangeTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the collator used to determine index term inclusion in ranges
|
||||
* for RangeQuerys.
|
||||
* @return whether or not to analyze range terms when constructing RangeQuerys.
|
||||
*/
|
||||
public Collator getRangeCollator() {
|
||||
return rangeCollator;
|
||||
public boolean getAnalyzeRangeTerms() {
|
||||
return analyzeRangeTerms;
|
||||
}
|
||||
|
||||
protected void addClause(List<BooleanClause> clauses, int conj, int mods, Query q) {
|
||||
|
@ -792,6 +786,36 @@ public abstract class QueryParserBase {
|
|||
return new FuzzyQuery(term,minimumSimilarity,prefixLength);
|
||||
}
|
||||
|
||||
private BytesRef analyzeRangePart(String field, String part) {
|
||||
TokenStream source;
|
||||
|
||||
try {
|
||||
source = analyzer.reusableTokenStream(field, new StringReader(part));
|
||||
source.reset();
|
||||
} catch (IOException e) {
|
||||
source = analyzer.tokenStream(field, new StringReader(part));
|
||||
}
|
||||
|
||||
BytesRef result = new BytesRef();
|
||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||
|
||||
try {
|
||||
if (!source.incrementToken())
|
||||
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
||||
termAtt.toBytesRef(result);
|
||||
if (source.incrementToken())
|
||||
throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("error analyzing range part: " + part, e);
|
||||
}
|
||||
|
||||
try {
|
||||
source.close();
|
||||
} catch (IOException ignored) {}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a new TermRangeQuery instance
|
||||
* @param field Field
|
||||
|
@ -802,7 +826,23 @@ public abstract class QueryParserBase {
|
|||
* @return new TermRangeQuery instance
|
||||
*/
|
||||
protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) {
|
||||
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, startInclusive, endInclusive, rangeCollator);
|
||||
final BytesRef start;
|
||||
final BytesRef end;
|
||||
|
||||
if (part1 == null) {
|
||||
start = null;
|
||||
} else {
|
||||
start = analyzeRangeTerms ? analyzeRangePart(field, part1) : new BytesRef(part1);
|
||||
}
|
||||
|
||||
if (part2 == null) {
|
||||
end = null;
|
||||
} else {
|
||||
end = analyzeRangeTerms ? analyzeRangePart(field, part2) : new BytesRef(part2);
|
||||
}
|
||||
|
||||
final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive);
|
||||
|
||||
query.setRewriteMethod(multiTermRewriteMethod);
|
||||
return query;
|
||||
}
|
||||
|
|
|
@ -18,8 +18,6 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
||||
|
@ -718,85 +716,6 @@ public abstract class FieldComparator {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/** Sorts by a field's value using the Collator for a
|
||||
* given Locale.
|
||||
*
|
||||
* <p><b>WARNING</b>: this is likely very slow; you'll
|
||||
* get much better performance using the
|
||||
* CollationKeyAnalyzer or ICUCollationKeyAnalyzer. */
|
||||
public static final class StringComparatorLocale extends FieldComparator {
|
||||
|
||||
private final String[] values;
|
||||
private DocTerms currentDocTerms;
|
||||
private final String field;
|
||||
final Collator collator;
|
||||
private String bottom;
|
||||
private final BytesRef tempBR = new BytesRef();
|
||||
|
||||
StringComparatorLocale(int numHits, String field, Locale locale) {
|
||||
values = new String[numHits];
|
||||
this.field = field;
|
||||
collator = Collator.getInstance(locale);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
final String val1 = values[slot1];
|
||||
final String val2 = values[slot2];
|
||||
if (val1 == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (val2 == null) {
|
||||
return 1;
|
||||
}
|
||||
return collator.compare(val1, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString();
|
||||
if (bottom == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (val2 == null) {
|
||||
return 1;
|
||||
}
|
||||
return collator.compare(bottom, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) {
|
||||
final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
|
||||
if (br == null) {
|
||||
values[slot] = null;
|
||||
} else {
|
||||
values[slot] = br.utf8ToString();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||
currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparable<?> value(int slot) {
|
||||
final String s = values[slot];
|
||||
return s == null ? null : new BytesRef(values[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts by field's natural Term sort order, using
|
||||
* ordinals. This is functionally equivalent to {@link
|
||||
* TermValComparator}, but it first resolves the string
|
||||
|
|
|
@ -20,9 +20,6 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Expert: Collects sorted results from Searchable's and collates them.
|
||||
* The elements put into this queue must be of type FieldDoc.
|
||||
|
@ -35,11 +32,6 @@ class FieldDocSortedHitQueue extends PriorityQueue<FieldDoc> {
|
|||
|
||||
volatile SortField[] fields = null;
|
||||
|
||||
// used in the case where the fields are sorted by locale
|
||||
// based strings
|
||||
volatile Collator[] collators = null;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted by the given list of fields.
|
||||
* @param fields Fieldable names, in priority order (highest priority first).
|
||||
|
@ -60,7 +52,6 @@ class FieldDocSortedHitQueue extends PriorityQueue<FieldDoc> {
|
|||
*/
|
||||
void setFields (SortField[] fields) {
|
||||
this.fields = fields;
|
||||
this.collators = hasCollators (fields);
|
||||
}
|
||||
|
||||
|
||||
|
@ -69,24 +60,6 @@ class FieldDocSortedHitQueue extends PriorityQueue<FieldDoc> {
|
|||
return fields;
|
||||
}
|
||||
|
||||
|
||||
/** Returns an array of collators, possibly <code>null</code>. The collators
|
||||
* correspond to any SortFields which were given a specific locale.
|
||||
* @param fields Array of sort fields.
|
||||
* @return Array, possibly <code>null</code>.
|
||||
*/
|
||||
private Collator[] hasCollators (final SortField[] fields) {
|
||||
if (fields == null) return null;
|
||||
Collator[] ret = new Collator[fields.length];
|
||||
for (int i=0; i<fields.length; ++i) {
|
||||
Locale locale = fields[i].getLocale();
|
||||
if (locale != null)
|
||||
ret[i] = Collator.getInstance (locale);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||
* @param a ScoreDoc
|
||||
|
@ -109,10 +82,8 @@ class FieldDocSortedHitQueue extends PriorityQueue<FieldDoc> {
|
|||
c = (s2 == null) ? 0 : -1;
|
||||
} else if (s2 == null) {
|
||||
c = 1;
|
||||
} else if (fields[i].getLocale() == null) {
|
||||
c = s1.compareTo(s2);
|
||||
} else {
|
||||
c = collators[i].compare(s1.utf8ToString(), s2.utf8ToString());
|
||||
c = s1.compareTo(s2);
|
||||
}
|
||||
} else {
|
||||
c = docA.fields[i].compareTo(docB.fields[i]);
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.search.cache.*;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -90,7 +89,6 @@ public class SortField {
|
|||
|
||||
private String field;
|
||||
private int type; // defaults to determining type dynamically
|
||||
private Locale locale; // defaults to "natural order" (no Locale)
|
||||
boolean reverse = false; // defaults to natural order
|
||||
private CachedArrayCreator<?> creator;
|
||||
public Object missingValue = null; // used for 'sortMissingFirst/Last'
|
||||
|
@ -214,28 +212,6 @@ public class SortField {
|
|||
return this;
|
||||
}
|
||||
|
||||
|
||||
/** Creates a sort by terms in the given field sorted
|
||||
* according to the given locale.
|
||||
* @param field Name of field to sort by, cannot be <code>null</code>.
|
||||
* @param locale Locale of values in the field.
|
||||
*/
|
||||
public SortField (String field, Locale locale) {
|
||||
initFieldType(field, STRING);
|
||||
this.locale = locale;
|
||||
}
|
||||
|
||||
/** Creates a sort, possibly in reverse, by terms in the given field sorted
|
||||
* according to the given locale.
|
||||
* @param field Name of field to sort by, cannot be <code>null</code>.
|
||||
* @param locale Locale of values in the field.
|
||||
*/
|
||||
public SortField (String field, Locale locale, boolean reverse) {
|
||||
initFieldType(field, STRING);
|
||||
this.locale = locale;
|
||||
this.reverse = reverse;
|
||||
}
|
||||
|
||||
/** Creates a sort with a custom comparison function.
|
||||
* @param field Name of field to sort by; cannot be <code>null</code>.
|
||||
* @param comparator Returns a comparator for sorting hits.
|
||||
|
@ -295,14 +271,6 @@ public class SortField {
|
|||
return type;
|
||||
}
|
||||
|
||||
/** Returns the Locale by which term values are interpreted.
|
||||
* May return <code>null</code> if no Locale was specified.
|
||||
* @return Locale, or <code>null</code>.
|
||||
*/
|
||||
public Locale getLocale() {
|
||||
return locale;
|
||||
}
|
||||
|
||||
/** Returns the instance of a {@link FieldCache} parser that fits to the given sort type.
|
||||
* May return <code>null</code> if no parser was specified. Sorting is using the default parser then.
|
||||
* @return An instance of a {@link FieldCache} parser, or <code>null</code>.
|
||||
|
@ -384,7 +352,6 @@ public class SortField {
|
|||
break;
|
||||
}
|
||||
|
||||
if (locale != null) buffer.append('(').append(locale).append(')');
|
||||
if (creator != null) buffer.append('(').append(creator).append(')');
|
||||
if (reverse) buffer.append('!');
|
||||
|
||||
|
@ -404,7 +371,6 @@ public class SortField {
|
|||
other.field == this.field // field is always interned
|
||||
&& other.type == this.type
|
||||
&& other.reverse == this.reverse
|
||||
&& (other.locale == null ? this.locale == null : other.locale.equals(this.locale))
|
||||
&& (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource))
|
||||
&& (other.creator == null ? this.creator == null : other.creator.equals(this.creator))
|
||||
);
|
||||
|
@ -419,7 +385,6 @@ public class SortField {
|
|||
public int hashCode() {
|
||||
int hash=type^0x346565dd + Boolean.valueOf(reverse).hashCode()^0xaf5998bb;
|
||||
if (field != null) hash += field.hashCode()^0xff5685dd;
|
||||
if (locale != null) hash += locale.hashCode()^0x08150815;
|
||||
if (comparatorSource != null) hash += comparatorSource.hashCode();
|
||||
if (creator != null) hash += creator.hashCode()^0x3aaf56ff;
|
||||
return hash;
|
||||
|
@ -439,13 +404,6 @@ public class SortField {
|
|||
*/
|
||||
public FieldComparator getComparator(final int numHits, final int sortPos) throws IOException {
|
||||
|
||||
if (locale != null) {
|
||||
// TODO: it'd be nice to allow FieldCache.getStringIndex
|
||||
// to optionally accept a Locale so sorting could then use
|
||||
// the faster StringComparator impls
|
||||
return new FieldComparator.StringComparatorLocale(numHits, field, locale);
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case SortField.SCORE:
|
||||
return new FieldComparator.RelevanceComparator(numHits);
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -17,15 +19,13 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
|
||||
/**
|
||||
* A Filter that restricts search results to a range of term
|
||||
* values in a given field.
|
||||
*
|
||||
* <p>This filter matches the documents looking for terms that fall into the
|
||||
* supplied range according to {@link
|
||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
||||
* Byte#compareTo(Byte)}, It is not intended
|
||||
* for numerical ranges; use {@link NumericRangeFilter} instead.
|
||||
*
|
||||
* <p>If you construct a large number of range filters with different ranges but on the
|
||||
|
@ -44,39 +44,25 @@ public class TermRangeFilter extends MultiTermQueryWrapperFilter<TermRangeQuery>
|
|||
* lowerTerm is null and includeLower is true (similar for upperTerm
|
||||
* and includeUpper)
|
||||
*/
|
||||
public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
||||
public TermRangeFilter(String fieldName, BytesRef lowerTerm, BytesRef upperTerm,
|
||||
boolean includeLower, boolean includeUpper) {
|
||||
super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper));
|
||||
}
|
||||
|
||||
/**
|
||||
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
|
||||
* value in the <code>collator</code> parameter will cause every single
|
||||
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
|
||||
* examined. Depending on the number of index Terms in this Field, the
|
||||
* operation could be very slow.
|
||||
*
|
||||
* @param lowerTerm The lower bound on this range
|
||||
* @param upperTerm The upper bound on this range
|
||||
* @param includeLower Does this range include the lower bound?
|
||||
* @param includeUpper Does this range include the upper bound?
|
||||
* @param collator The collator to use when determining range inclusion; set
|
||||
* to null to use Unicode code point ordering instead of collation.
|
||||
* @throws IllegalArgumentException if both terms are null or if
|
||||
* lowerTerm is null and includeLower is true (similar for upperTerm
|
||||
* and includeUpper)
|
||||
* Factory that creates a new TermRangeFilter using Strings for term text.
|
||||
*/
|
||||
public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper,
|
||||
Collator collator) {
|
||||
super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
|
||||
public static TermRangeFilter newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
|
||||
BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
|
||||
BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
|
||||
return new TermRangeFilter(field, lower, upper, includeLower, includeUpper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a filter for field <code>fieldName</code> matching
|
||||
* less than or equal to <code>upperTerm</code>.
|
||||
*/
|
||||
public static TermRangeFilter Less(String fieldName, String upperTerm) {
|
||||
public static TermRangeFilter Less(String fieldName, BytesRef upperTerm) {
|
||||
return new TermRangeFilter(fieldName, null, upperTerm, false, true);
|
||||
}
|
||||
|
||||
|
@ -84,22 +70,19 @@ public class TermRangeFilter extends MultiTermQueryWrapperFilter<TermRangeQuery>
|
|||
* Constructs a filter for field <code>fieldName</code> matching
|
||||
* greater than or equal to <code>lowerTerm</code>.
|
||||
*/
|
||||
public static TermRangeFilter More(String fieldName, String lowerTerm) {
|
||||
public static TermRangeFilter More(String fieldName, BytesRef lowerTerm) {
|
||||
return new TermRangeFilter(fieldName, lowerTerm, null, true, false);
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range filter */
|
||||
public String getLowerTerm() { return query.getLowerTerm(); }
|
||||
public BytesRef getLowerTerm() { return query.getLowerTerm(); }
|
||||
|
||||
/** Returns the upper value of this range filter */
|
||||
public String getUpperTerm() { return query.getUpperTerm(); }
|
||||
public BytesRef getUpperTerm() { return query.getUpperTerm(); }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesLower() { return query.includesLower(); }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesUpper() { return query.includesUpper(); }
|
||||
|
||||
/** Returns the collator used to determine range inclusion, if any. */
|
||||
public Collator getCollator() { return query.getCollator(); }
|
||||
}
|
||||
|
|
|
@ -18,11 +18,11 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
*
|
||||
* <p>This query matches the documents looking for terms that fall into the
|
||||
* supplied range according to {@link
|
||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
||||
* Byte#compareTo(Byte)}. It is not intended
|
||||
* for numerical ranges; use {@link NumericRangeQuery} instead.
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
|
@ -40,9 +40,8 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
*/
|
||||
|
||||
public class TermRangeQuery extends MultiTermQuery {
|
||||
private String lowerTerm;
|
||||
private String upperTerm;
|
||||
private Collator collator;
|
||||
private BytesRef lowerTerm;
|
||||
private BytesRef upperTerm;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
|
||||
|
@ -69,55 +68,28 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
* If true, the <code>upperTerm</code> is
|
||||
* included in the range.
|
||||
*/
|
||||
public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
|
||||
this(field, lowerTerm, upperTerm, includeLower, includeUpper, null);
|
||||
}
|
||||
|
||||
/** Constructs a query selecting all terms greater/equal than
|
||||
* <code>lowerTerm</code> but less/equal than <code>upperTerm</code>.
|
||||
* <p>
|
||||
* If an endpoint is null, it is said
|
||||
* to be "open". Either or both endpoints may be open. Open endpoints may not
|
||||
* be exclusive (you can't select all but the first or last term without
|
||||
* explicitly specifying the term to exclude.)
|
||||
* <p>
|
||||
* If <code>collator</code> is not null, it will be used to decide whether
|
||||
* index terms are within the given range, rather than using the Unicode code
|
||||
* point order in which index terms are stored.
|
||||
* <p>
|
||||
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
|
||||
* value in the <code>collator</code> parameter will cause every single
|
||||
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
|
||||
* examined. Depending on the number of index Terms in this Field, the
|
||||
* operation could be very slow.
|
||||
*
|
||||
* @param lowerTerm The Term text at the lower end of the range
|
||||
* @param upperTerm The Term text at the upper end of the range
|
||||
* @param includeLower
|
||||
* If true, the <code>lowerTerm</code> is
|
||||
* included in the range.
|
||||
* @param includeUpper
|
||||
* If true, the <code>upperTerm</code> is
|
||||
* included in the range.
|
||||
* @param collator The collator to use to collate index Terms, to determine
|
||||
* their membership in the range bounded by <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code>.
|
||||
*/
|
||||
public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
|
||||
Collator collator) {
|
||||
public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
|
||||
super(field);
|
||||
this.lowerTerm = lowerTerm;
|
||||
this.upperTerm = upperTerm;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory that creates a new TermRangeQuery using Strings for term text.
|
||||
*/
|
||||
public static TermRangeQuery newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
|
||||
BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
|
||||
BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
|
||||
return new TermRangeQuery(field, lower, upper, includeLower, includeUpper);
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range query */
|
||||
public String getLowerTerm() { return lowerTerm; }
|
||||
public BytesRef getLowerTerm() { return lowerTerm; }
|
||||
|
||||
/** Returns the upper value of this range query */
|
||||
public String getUpperTerm() { return upperTerm; }
|
||||
public BytesRef getUpperTerm() { return upperTerm; }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesLower() { return includeLower; }
|
||||
|
@ -125,22 +97,19 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesUpper() { return includeUpper; }
|
||||
|
||||
/** Returns the collator used to determine range inclusion, if any. */
|
||||
public Collator getCollator() { return collator; }
|
||||
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
|
||||
if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
|
||||
return TermsEnum.EMPTY;
|
||||
}
|
||||
|
||||
TermsEnum tenum = terms.iterator();
|
||||
|
||||
if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) {
|
||||
if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
|
||||
return tenum;
|
||||
}
|
||||
return new TermRangeTermsEnum(tenum,
|
||||
lowerTerm, upperTerm, includeLower, includeUpper, collator);
|
||||
lowerTerm, upperTerm, includeLower, includeUpper);
|
||||
}
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
|
@ -152,9 +121,10 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
buffer.append(":");
|
||||
}
|
||||
buffer.append(includeLower ? '[' : '{');
|
||||
buffer.append(lowerTerm != null ? ("*".equals(lowerTerm) ? "\\*" : lowerTerm) : "*");
|
||||
// TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8!
|
||||
buffer.append(lowerTerm != null ? ("*".equals(lowerTerm.utf8ToString()) ? "\\*" : lowerTerm.utf8ToString()) : "*");
|
||||
buffer.append(" TO ");
|
||||
buffer.append(upperTerm != null ? ("*".equals(upperTerm) ? "\\*" : upperTerm) : "*");
|
||||
buffer.append(upperTerm != null ? ("*".equals(upperTerm.utf8ToString()) ? "\\*" : upperTerm.utf8ToString()) : "*");
|
||||
buffer.append(includeUpper ? ']' : '}');
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
|
@ -164,7 +134,6 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + ((collator == null) ? 0 : collator.hashCode());
|
||||
result = prime * result + (includeLower ? 1231 : 1237);
|
||||
result = prime * result + (includeUpper ? 1231 : 1237);
|
||||
result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
|
||||
|
@ -181,11 +150,6 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
TermRangeQuery other = (TermRangeQuery) obj;
|
||||
if (collator == null) {
|
||||
if (other.collator != null)
|
||||
return false;
|
||||
} else if (!collator.equals(other.collator))
|
||||
return false;
|
||||
if (includeLower != other.includeLower)
|
||||
return false;
|
||||
if (includeUpper != other.includeUpper)
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -33,11 +32,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class TermRangeTermsEnum extends FilteredTermsEnum {
|
||||
|
||||
private Collator collator;
|
||||
private String upperTermText;
|
||||
private String lowerTermText;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
final private boolean includeLower;
|
||||
final private boolean includeUpper;
|
||||
final private BytesRef lowerBytesRef;
|
||||
final private BytesRef upperBytesRef;
|
||||
private final Comparator<BytesRef> termComp;
|
||||
|
@ -53,79 +49,61 @@ public class TermRangeTermsEnum extends FilteredTermsEnum {
|
|||
*
|
||||
* @param tenum
|
||||
* TermsEnum to filter
|
||||
* @param lowerTermText
|
||||
* @param lowerTerm
|
||||
* The term text at the lower end of the range
|
||||
* @param upperTermText
|
||||
* @param upperTerm
|
||||
* The term text at the upper end of the range
|
||||
* @param includeLower
|
||||
* If true, the <code>lowerTerm</code> is included in the range.
|
||||
* @param includeUpper
|
||||
* If true, the <code>upperTerm</code> is included in the range.
|
||||
* @param collator
|
||||
* The collator to use to collate index Terms, to determine their
|
||||
* membership in the range bounded by <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code>.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public TermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText,
|
||||
boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
|
||||
public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm,
|
||||
boolean includeLower, boolean includeUpper) throws IOException {
|
||||
super(tenum);
|
||||
this.collator = collator;
|
||||
this.upperTermText = upperTermText;
|
||||
this.lowerTermText = lowerTermText;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
|
||||
// do a little bit of normalization...
|
||||
// open ended range queries should always be inclusive.
|
||||
if (this.lowerTermText == null) {
|
||||
this.lowerTermText = "";
|
||||
if (lowerTerm == null) {
|
||||
this.lowerBytesRef = new BytesRef();
|
||||
this.includeLower = true;
|
||||
} else {
|
||||
this.lowerBytesRef = lowerTerm;
|
||||
this.includeLower = includeLower;
|
||||
}
|
||||
lowerBytesRef = new BytesRef(this.lowerTermText);
|
||||
|
||||
if (this.upperTermText == null) {
|
||||
if (upperTerm == null) {
|
||||
this.includeUpper = true;
|
||||
upperBytesRef = null;
|
||||
} else {
|
||||
upperBytesRef = new BytesRef(upperTermText);
|
||||
this.includeUpper = includeUpper;
|
||||
upperBytesRef = upperTerm;
|
||||
}
|
||||
|
||||
BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef("");
|
||||
setInitialSeekTerm(startBytesRef);
|
||||
setInitialSeekTerm(lowerBytesRef);
|
||||
termComp = getComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
if (collator == null) {
|
||||
if (!this.includeLower && term.equals(lowerBytesRef))
|
||||
return AcceptStatus.NO;
|
||||
// Use this field's default sort ordering
|
||||
if (upperBytesRef != null) {
|
||||
final int cmp = termComp.compare(upperBytesRef, term);
|
||||
/*
|
||||
* if beyond the upper term, or is exclusive and this is equal to
|
||||
* the upper term, break out
|
||||
*/
|
||||
if ((cmp < 0) ||
|
||||
(!includeUpper && cmp==0)) {
|
||||
return AcceptStatus.END;
|
||||
}
|
||||
}
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
if ((includeLower
|
||||
? collator.compare(term.utf8ToString(), lowerTermText) >= 0
|
||||
: collator.compare(term.utf8ToString(), lowerTermText) > 0)
|
||||
&& (upperTermText == null
|
||||
|| (includeUpper
|
||||
? collator.compare(term.utf8ToString(), upperTermText) <= 0
|
||||
: collator.compare(term.utf8ToString(), upperTermText) < 0))) {
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
if (!this.includeLower && term.equals(lowerBytesRef))
|
||||
return AcceptStatus.NO;
|
||||
|
||||
// Use this field's default sort ordering
|
||||
if (upperBytesRef != null) {
|
||||
final int cmp = termComp.compare(upperBytesRef, term);
|
||||
/*
|
||||
* if beyond the upper term, or is exclusive and this is equal to
|
||||
* the upper term, break out
|
||||
*/
|
||||
if ((cmp < 0) ||
|
||||
(!includeUpper && cmp==0)) {
|
||||
return AcceptStatus.END;
|
||||
}
|
||||
}
|
||||
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,10 @@ import java.nio.ByteBuffer;
|
|||
* <p/>
|
||||
*
|
||||
* @lucene.experimental
|
||||
* @deprecated Implement {@link TermToBytesRefAttribute} and store bytes directly
|
||||
* instead. This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class IndexableBinaryStringTools {
|
||||
|
||||
private static final CodingCase[] CODING_CASES = {
|
||||
|
|
|
@ -577,50 +577,6 @@ public class TestQueryParser extends LuceneTestCase {
|
|||
assertQueryEquals("[\\* TO \"*\"]",null,"[\\* TO \\*]");
|
||||
}
|
||||
|
||||
public void testFarsiRangeCollating() throws Exception {
|
||||
Directory ramDir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
|
||||
Document doc = new Document();
|
||||
doc.add(newField("content","\u0633\u0627\u0628",
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
iw.addDocument(doc);
|
||||
iw.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator c = Collator.getInstance(new Locale("ar"));
|
||||
qp.setRangeCollator(c);
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a ConstantScoreRangeQuery
|
||||
// with a Farsi Collator (or an Arabic one for the case when Farsi is not
|
||||
// supported).
|
||||
|
||||
// Test ConstantScoreRangeQuery
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
// Test TermRangeQuery
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
is.close();
|
||||
ramDir.close();
|
||||
}
|
||||
|
||||
private String escapeDateString(String s) {
|
||||
if (s.indexOf(" ") > -1) {
|
||||
return "\"" + s + "\"";
|
||||
|
@ -1260,4 +1216,41 @@ public class TestQueryParser extends LuceneTestCase {
|
|||
Query unexpanded = new TermQuery(new Term("field", "dogs"));
|
||||
assertEquals(unexpanded, smart.parse("\"dogs\""));
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock collation analyzer: indexes terms as "collated" + term
|
||||
*/
|
||||
private class MockCollationFilter extends TokenFilter {
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
protected MockCollationFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
String term = termAtt.toString();
|
||||
termAtt.setEmpty().append("collated").append(term);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
private class MockCollationAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
}
|
||||
}
|
||||
|
||||
public void testCollatedRange() throws Exception {
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCollationAnalyzer());
|
||||
qp.setAnalyzeRangeTerms(true);
|
||||
Query expected = TermRangeQuery.newStringRange("field", "collatedabc", "collateddef", true, true);
|
||||
Query actual = qp.parse("[abc TO def]");
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ public class TestConstantScoreQuery extends LuceneTestCase {
|
|||
public void testCSQ() throws Exception {
|
||||
final Query q1 = new ConstantScoreQuery(new TermQuery(new Term("a", "b")));
|
||||
final Query q2 = new ConstantScoreQuery(new TermQuery(new Term("a", "c")));
|
||||
final Query q3 = new ConstantScoreQuery(new TermRangeFilter("a", "b", "c", true, true));
|
||||
final Query q3 = new ConstantScoreQuery(TermRangeFilter.newStringRange("a", "b", "c", true, true));
|
||||
QueryUtils.check(q1);
|
||||
QueryUtils.check(q2);
|
||||
QueryUtils.checkEqual(q1,q1);
|
||||
|
|
|
@ -61,12 +61,12 @@ public class TestDateFilter extends LuceneTestCase {
|
|||
|
||||
// filter that should preserve matches
|
||||
// DateFilter df1 = DateFilter.Before("datefield", now);
|
||||
TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools
|
||||
TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools
|
||||
.timeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools
|
||||
.timeToString(now, DateTools.Resolution.MILLISECOND), false, true);
|
||||
// filter that should discard matches
|
||||
// DateFilter df2 = DateFilter.Before("datefield", now - 999999);
|
||||
TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools
|
||||
TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools
|
||||
.timeToString(0, DateTools.Resolution.MILLISECOND), DateTools
|
||||
.timeToString(now - 2000, DateTools.Resolution.MILLISECOND), true,
|
||||
false);
|
||||
|
@ -128,13 +128,13 @@ public class TestDateFilter extends LuceneTestCase {
|
|||
|
||||
// filter that should preserve matches
|
||||
// DateFilter df1 = DateFilter.After("datefield", now);
|
||||
TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools
|
||||
TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools
|
||||
.timeToString(now, DateTools.Resolution.MILLISECOND), DateTools
|
||||
.timeToString(now + 999999, DateTools.Resolution.MILLISECOND), true,
|
||||
false);
|
||||
// filter that should discard matches
|
||||
// DateFilter df2 = DateFilter.After("datefield", now + 999999);
|
||||
TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools
|
||||
TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools
|
||||
.timeToString(now + 999999, DateTools.Resolution.MILLISECOND),
|
||||
DateTools.timeToString(now + 999999999,
|
||||
DateTools.Resolution.MILLISECOND), false, true);
|
||||
|
|
|
@ -187,7 +187,7 @@ public class TestFilteredQuery extends LuceneTestCase {
|
|||
* This tests FilteredQuery's rewrite correctness
|
||||
*/
|
||||
public void testRangeQuery() throws Exception {
|
||||
TermRangeQuery rq = new TermRangeQuery(
|
||||
TermRangeQuery rq = TermRangeQuery.newStringRange(
|
||||
"sorter", "b", "d", true, true);
|
||||
|
||||
Query filteredquery = new FilteredQuery(rq, filter);
|
||||
|
|
|
@ -92,25 +92,17 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
|
||||
/** macro for readability */
|
||||
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
|
||||
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
|
||||
TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
|
||||
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
return query;
|
||||
}
|
||||
|
||||
public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
|
||||
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
|
||||
TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
|
||||
query.setRewriteMethod(method);
|
||||
return query;
|
||||
}
|
||||
|
||||
/** macro for readability */
|
||||
public static Query csrq(String f, String l, String h, boolean il,
|
||||
boolean ih, Collator c) {
|
||||
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c);
|
||||
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
return query;
|
||||
}
|
||||
|
||||
/** macro for readability */
|
||||
public static Query cspq(Term prefix) {
|
||||
PrefixQuery query = new PrefixQuery(prefix);
|
||||
|
@ -141,15 +133,6 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
"data", "pr*t?j")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicsRngCollating() throws IOException {
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
QueryUtils.check(csrq("data", "1", "6", T, T, c));
|
||||
QueryUtils.check(csrq("data", "A", "Z", T, T, c));
|
||||
QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A",
|
||||
"Z", T, T, c));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEqualScores() throws IOException {
|
||||
// NOTE: uses index build in *this* setUp
|
||||
|
@ -262,7 +245,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
// first do a regular TermRangeQuery which uses term expansion so
|
||||
// docs with more terms in range get higher scores
|
||||
|
||||
Query rq = new TermRangeQuery("data", "1", "4", T, T);
|
||||
Query rq = TermRangeQuery.newStringRange("data", "1", "4", T, T);
|
||||
|
||||
ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs;
|
||||
int numHits = expected.length;
|
||||
|
@ -415,92 +398,6 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangeQueryIdCollating() throws IOException {
|
||||
// NOTE: uses index build in *super* setUp
|
||||
|
||||
IndexReader reader = signedIndexReader;
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
|
||||
int medId = ((maxId - minId) / 2);
|
||||
|
||||
String minIP = pad(minId);
|
||||
String maxIP = pad(maxId);
|
||||
String medIP = pad(medId);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1 + maxId - minId);
|
||||
|
||||
ScoreDoc[] result;
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but last", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but first", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but ends", numDocs - 2, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("med and up", 1 + maxId - medId, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("up to med", 1 + medId - minId, result.length);
|
||||
|
||||
// unbounded id
|
||||
|
||||
result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("min and up", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("max and down", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("not min, but up", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("not max, but down", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("med and up, not max", maxId - medId, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("not min, up to med", medId - minId, result.length);
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F,c", 0, result.length);
|
||||
result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,F,F,c", 0, result.length);
|
||||
result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F,c", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T,c", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T,c", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,T,T,c", 1, result.length);
|
||||
|
||||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangeQueryRand() throws IOException {
|
||||
// NOTE: uses index build in *super* setUp
|
||||
|
@ -564,151 +461,4 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
|
||||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangeQueryRandCollating() throws IOException {
|
||||
// NOTE: uses index build in *super* setUp
|
||||
|
||||
// using the unsigned index because collation seems to ignore hyphens
|
||||
IndexReader reader = unsignedIndexReader;
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
|
||||
String minRP = pad(unsignedIndexDir.minR);
|
||||
String maxRP = pad(unsignedIndexDir.maxR);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1 + maxId - minId);
|
||||
|
||||
ScoreDoc[] result;
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
// test extremes, bounded on both ends
|
||||
|
||||
result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but biggest", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but smallest", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but extremes", numDocs - 2, result.length);
|
||||
|
||||
// unbounded
|
||||
|
||||
result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("smallest and up", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("biggest and down", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("not smallest, but up", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("not biggest, but down", numDocs - 1, result.length);
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F,c", 0, result.length);
|
||||
result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F,c", 0, result.length);
|
||||
|
||||
result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T,c", 1, result.length);
|
||||
|
||||
result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T,c", 1, result.length);
|
||||
|
||||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFarsi() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
Directory farsiIndex = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
Document doc = new Document();
|
||||
doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
doc
|
||||
.add(newField("body", "body", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator c = Collator.getInstance(new Locale("ar"));
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a ConstantScoreRangeQuery
|
||||
// with a Farsi Collator (or an Arabic one for the case when Farsi is
|
||||
// not supported).
|
||||
ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T,
|
||||
c), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null,
|
||||
1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
search.close();
|
||||
reader.close();
|
||||
farsiIndex.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDanish() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
Directory danishIndex = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
|
||||
// Danish collation orders the words below in the given order
|
||||
// (example taken from TestSort.testInternationalSort() ).
|
||||
String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
|
||||
for (int docnum = 0 ; docnum < words.length ; ++docnum) {
|
||||
Document doc = new Document();
|
||||
doc.add(newField("content", words[docnum],
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
doc.add(newField("body", "body",
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
|
||||
Collator c = Collator.getInstance(new Locale("da", "dk"));
|
||||
|
||||
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
|
||||
// but Danish collation does.
|
||||
ScoreDoc[] result = search.search
|
||||
(csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
result = search.search
|
||||
(csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
search.close();
|
||||
reader.close();
|
||||
danishIndex.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void checkDuplicateTerms(MultiTermQuery.RewriteMethod method) throws Exception {
|
||||
final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
|
||||
final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true);
|
||||
mtq.setRewriteMethod(method);
|
||||
final Query q1 = searcher.rewrite(mtq);
|
||||
final Query q2 = multiSearcher.rewrite(mtq);
|
||||
|
@ -158,7 +158,7 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
|||
final MultiTermQuery mtq = new MultiTermQuery("data") {
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
return new TermRangeTermsEnum(terms.iterator(), "2", "7", true, true, null) {
|
||||
return new TermRangeTermsEnum(terms.iterator(), new BytesRef("2"), new BytesRef("7"), true, true) {
|
||||
final BoostAttribute boostAtt =
|
||||
attributes().addAttribute(BoostAttribute.class);
|
||||
|
||||
|
@ -203,7 +203,7 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
|||
// default gets restored automatically by LuceneTestCase:
|
||||
BooleanQuery.setMaxClauseCount(3);
|
||||
|
||||
final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
|
||||
final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true);
|
||||
mtq.setRewriteMethod(method);
|
||||
try {
|
||||
multiSearcherDupls.rewrite(mtq);
|
||||
|
@ -219,7 +219,7 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
|||
// default gets restored automatically by LuceneTestCase:
|
||||
BooleanQuery.setMaxClauseCount(3);
|
||||
|
||||
final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
|
||||
final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true);
|
||||
mtq.setRewriteMethod(method);
|
||||
multiSearcherDupls.rewrite(mtq);
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase {
|
|||
if (lower>upper) {
|
||||
int a=lower; lower=upper; upper=a;
|
||||
}
|
||||
TermRangeQuery cq=new TermRangeQuery("asc", format.format(lower), format.format(upper), true, true);
|
||||
TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true);
|
||||
NumericRangeQuery<Integer> tq=NumericRangeQuery.newIntRange("trie", lower, upper, true, true);
|
||||
TopDocs trTopDocs = searcher.search(cq, 1);
|
||||
TopDocs nrTopDocs = searcher.search(tq, 1);
|
||||
|
|
|
@ -344,12 +344,10 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
|||
final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
|
||||
NumericUtils.intToPrefixCoded(lower, 0, lowerBytes);
|
||||
NumericUtils.intToPrefixCoded(upper, 0, upperBytes);
|
||||
// TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string!
|
||||
final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString();
|
||||
|
||||
// test inclusive range
|
||||
NumericRangeQuery<Integer> tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
|
||||
TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true);
|
||||
TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
@ -357,7 +355,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
|||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test exclusive range
|
||||
tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false);
|
||||
cq=new TermRangeQuery(field, lowerString, upperString, false, false);
|
||||
cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
@ -365,7 +363,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
|||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test left exclusive range
|
||||
tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true);
|
||||
cq=new TermRangeQuery(field, lowerString, upperString, false, true);
|
||||
cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
@ -373,7 +371,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCase {
|
|||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test right exclusive range
|
||||
tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false);
|
||||
cq=new TermRangeQuery(field, lowerString, upperString, true, false);
|
||||
cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
|
|
@ -361,12 +361,10 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
|||
final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
|
||||
NumericUtils.longToPrefixCoded(lower, 0, lowerBytes);
|
||||
NumericUtils.longToPrefixCoded(upper, 0, upperBytes);
|
||||
// TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string!
|
||||
final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString();
|
||||
|
||||
// test inclusive range
|
||||
NumericRangeQuery<Long> tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
|
||||
TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true);
|
||||
TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true);
|
||||
TopDocs tTopDocs = searcher.search(tq, 1);
|
||||
TopDocs cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
@ -374,7 +372,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
|||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test exclusive range
|
||||
tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false);
|
||||
cq=new TermRangeQuery(field, lowerString, upperString, false, false);
|
||||
cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
@ -382,7 +380,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
|||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test left exclusive range
|
||||
tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true);
|
||||
cq=new TermRangeQuery(field, lowerString, upperString, false, true);
|
||||
cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
@ -390,7 +388,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCase {
|
|||
termCountC += cq.getTotalNumberOfTerms();
|
||||
// test right exclusive range
|
||||
tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false);
|
||||
cq=new TermRangeQuery(field, lowerString, upperString, true, false);
|
||||
cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false);
|
||||
tTopDocs = searcher.search(tq, 1);
|
||||
cTopDocs = searcher.search(cq, 1);
|
||||
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
|
||||
|
|
|
@ -18,12 +18,8 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -110,11 +106,6 @@ public class TestSort extends LuceneTestCase {
|
|||
{ "d", "m", null, null, null, null, null, null, null, null, null, null}
|
||||
};
|
||||
|
||||
// the sort order of Ø versus U depends on the version of the rules being used
|
||||
// for the inherited root locale: Ø's order isnt specified in Locale.US since
|
||||
// its not used in english.
|
||||
private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
|
||||
|
||||
// create an index of all the documents, or just the x, or just the y documents
|
||||
private IndexSearcher getIndex (boolean even, boolean odd)
|
||||
throws IOException {
|
||||
|
@ -564,12 +555,6 @@ public class TestSort extends LuceneTestCase {
|
|||
sort.setSort (new SortField ("string", SortField.STRING, true) );
|
||||
assertMatches (full, queryF, sort, "IJZ");
|
||||
|
||||
sort.setSort (new SortField ("i18n", Locale.ENGLISH));
|
||||
assertMatches (full, queryF, sort, "ZJI");
|
||||
|
||||
sort.setSort (new SortField ("i18n", Locale.ENGLISH, true));
|
||||
assertMatches (full, queryF, sort, "IJZ");
|
||||
|
||||
sort.setSort (new SortField ("int", SortField.INT) );
|
||||
assertMatches (full, queryF, sort, "IZJ");
|
||||
|
||||
|
@ -630,36 +615,6 @@ public class TestSort extends LuceneTestCase {
|
|||
assertMatches (full, queryX, sort, "GICEA");
|
||||
}
|
||||
|
||||
// test using a Locale for sorting strings
|
||||
public void testLocaleSort() throws Exception {
|
||||
sort.setSort (new SortField ("string", Locale.US) );
|
||||
assertMatches (full, queryX, sort, "AIGEC");
|
||||
assertMatches (full, queryY, sort, "DJHFB");
|
||||
|
||||
sort.setSort (new SortField ("string", Locale.US, true) );
|
||||
assertMatches (full, queryX, sort, "CEGIA");
|
||||
assertMatches (full, queryY, sort, "BFHJD");
|
||||
}
|
||||
|
||||
// test using various international locales with accented characters
|
||||
// (which sort differently depending on locale)
|
||||
public void testInternationalSort() throws Exception {
|
||||
sort.setSort (new SortField ("i18n", Locale.US));
|
||||
assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
|
||||
|
||||
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
|
||||
assertMatches (full, queryY, sort, "BJDFH");
|
||||
|
||||
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
|
||||
assertMatches (full, queryY, sort, "BJDHF");
|
||||
|
||||
sort.setSort (new SortField ("i18n", Locale.US));
|
||||
assertMatches (full, queryX, sort, "ECAGI");
|
||||
|
||||
sort.setSort (new SortField ("i18n", Locale.FRANCE));
|
||||
assertMatches (full, queryX, sort, "EACGI");
|
||||
}
|
||||
|
||||
// test a variety of sorts using a parallel multisearcher
|
||||
public void testParallelMultiSort() throws Exception {
|
||||
ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random, 2, 8));
|
||||
|
@ -976,19 +931,6 @@ public class TestSort extends LuceneTestCase {
|
|||
assertSaneFieldCaches(getName() + " various");
|
||||
// next we'll check Locale based (String[]) for 'string', so purge first
|
||||
FieldCache.DEFAULT.purgeAllCaches();
|
||||
|
||||
sort.setSort(new SortField ("string", Locale.US) );
|
||||
assertMatches(multi, queryA, sort, "DJAIHGFEBC");
|
||||
|
||||
sort.setSort(new SortField ("string", Locale.US, true) );
|
||||
assertMatches(multi, queryA, sort, "CBEFGHIAJD");
|
||||
|
||||
sort.setSort(new SortField ("string", Locale.UK) );
|
||||
assertMatches(multi, queryA, sort, "DJAIHGFEBC");
|
||||
|
||||
assertSaneFieldCaches(getName() + " Locale.US + Locale.UK");
|
||||
FieldCache.DEFAULT.purgeAllCaches();
|
||||
|
||||
}
|
||||
|
||||
private void assertMatches(IndexSearcher searcher, Query query, Sort sort, String expectedResult) throws IOException {
|
||||
|
@ -1014,37 +956,6 @@ public class TestSort extends LuceneTestCase {
|
|||
assertEquals (msg, expectedResult, buff.toString());
|
||||
}
|
||||
|
||||
private HashMap<String,Float> getScores (ScoreDoc[] hits, IndexSearcher searcher)
|
||||
throws IOException {
|
||||
HashMap<String,Float> scoreMap = new HashMap<String,Float>();
|
||||
int n = hits.length;
|
||||
for (int i=0; i<n; ++i) {
|
||||
Document doc = searcher.doc(hits[i].doc);
|
||||
String[] v = doc.getValues("tracer");
|
||||
assertEquals (v.length, 1);
|
||||
scoreMap.put (v[0], Float.valueOf(hits[i].score));
|
||||
}
|
||||
return scoreMap;
|
||||
}
|
||||
|
||||
// make sure all the values in the maps match
|
||||
private <K, V> void assertSameValues (HashMap<K,V> m1, HashMap<K,V> m2) {
|
||||
int n = m1.size();
|
||||
int m = m2.size();
|
||||
assertEquals (n, m);
|
||||
Iterator<K> iter = m1.keySet().iterator();
|
||||
while (iter.hasNext()) {
|
||||
K key = iter.next();
|
||||
V o1 = m1.get(key);
|
||||
V o2 = m2.get(key);
|
||||
if (o1 instanceof Float) {
|
||||
assertEquals(((Float)o1).floatValue(), ((Float)o2).floatValue(), 1e-6);
|
||||
} else {
|
||||
assertEquals (m1.get(key), m2.get(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testEmptyStringVsNullStringSort() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
|
|
|
@ -18,15 +18,9 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
@ -61,193 +55,89 @@ public class TestTermRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("all but last", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("all but first", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("all but ends", numDocs - 2, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("med and up", 1 + maxId - medId, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("up to med", 1 + medId - minId, result.length);
|
||||
|
||||
// unbounded id
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, null, T, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, T, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("min and up", numDocs, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("max and down", numDocs, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, null, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("not min, but up", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("not max, but down", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("med and up, not max", maxId - medId, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, F, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("not min, up to med", medId - minId, result.length);
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F", 0, result.length);
|
||||
result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("med,med,F,F", 0, result.length);
|
||||
result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F", 0, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T", 1, result.length);
|
||||
result = search.search(q, new TermRangeFilter("id", null, minIP, F, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", null, minIP, F, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T", 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T", 1, result.length);
|
||||
result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, null, T, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T", 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("med,med,T,T", 1, result.length);
|
||||
|
||||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangeFilterIdCollating() throws IOException {
|
||||
|
||||
IndexReader reader = signedIndexReader;
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
int medId = ((maxId - minId) / 2);
|
||||
|
||||
String minIP = pad(minId);
|
||||
String maxIP = pad(maxId);
|
||||
String medIP = pad(medId);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1 + maxId - minId);
|
||||
|
||||
Query q = new TermQuery(new Term("body", "body"));
|
||||
|
||||
// test id, bounded on both ends
|
||||
int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T,
|
||||
T, c), 1000).totalHits;
|
||||
assertEquals("find all", numDocs, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits;
|
||||
assertEquals("all but last", numDocs - 1, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits;
|
||||
assertEquals("all but first", numDocs - 1, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits;
|
||||
assertEquals("all but ends", numDocs - 2, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits;
|
||||
assertEquals("med and up", 1 + maxId - medId, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits;
|
||||
assertEquals("up to med", 1 + medId - minId, numHits);
|
||||
|
||||
// unbounded id
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c),
|
||||
1000).totalHits;
|
||||
assertEquals("min and up", numDocs, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c),
|
||||
1000).totalHits;
|
||||
assertEquals("max and down", numDocs, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c),
|
||||
1000).totalHits;
|
||||
assertEquals("not min, but up", numDocs - 1, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c),
|
||||
1000).totalHits;
|
||||
assertEquals("not max, but down", numDocs - 1, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits;
|
||||
assertEquals("med and up, not max", maxId - medId, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits;
|
||||
assertEquals("not min, up to med", medId - minId, numHits);
|
||||
|
||||
// very small sets
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits;
|
||||
assertEquals("min,min,F,F", 0, numHits);
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits;
|
||||
assertEquals("med,med,F,F", 0, numHits);
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits;
|
||||
assertEquals("max,max,F,F", 0, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits;
|
||||
assertEquals("min,min,T,T", 1, numHits);
|
||||
numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c),
|
||||
1000).totalHits;
|
||||
assertEquals("nul,min,F,T", 1, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits;
|
||||
assertEquals("max,max,T,T", 1, numHits);
|
||||
numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c),
|
||||
1000).totalHits;
|
||||
assertEquals("max,nul,T,T", 1, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits;
|
||||
assertEquals("med,med,T,T", 1, numHits);
|
||||
|
||||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangeFilterRand() throws IOException {
|
||||
|
||||
|
@ -266,223 +156,63 @@ public class TestTermRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
// test extremes, bounded on both ends
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("all but biggest", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("all but smallest", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("all but extremes", numDocs - 2, result.length);
|
||||
|
||||
// unbounded
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, T, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("smallest and up", numDocs, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("biggest and down", numDocs, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("not smallest, but up", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("not biggest, but down", numDocs - 1, result.length);
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F", 0, result.length);
|
||||
result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, F, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F", 0, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T", 1, result.length);
|
||||
result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", null, minRP, F, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T", 1, result.length);
|
||||
|
||||
result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, T, T),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T", 1, result.length);
|
||||
result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F),
|
||||
result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, null, T, F),
|
||||
numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T", 1, result.length);
|
||||
|
||||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangeFilterRandCollating() throws IOException {
|
||||
|
||||
// using the unsigned index because collation seems to ignore hyphens
|
||||
IndexReader reader = unsignedIndexReader;
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
String minRP = pad(unsignedIndexDir.minR);
|
||||
String maxRP = pad(unsignedIndexDir.maxR);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1 + maxId - minId);
|
||||
|
||||
Query q = new TermQuery(new Term("body", "body"));
|
||||
|
||||
// test extremes, bounded on both ends
|
||||
|
||||
int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T,
|
||||
T, c), 1000).totalHits;
|
||||
assertEquals("find all", numDocs, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F,
|
||||
c), 1000).totalHits;
|
||||
assertEquals("all but biggest", numDocs - 1, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T,
|
||||
c), 1000).totalHits;
|
||||
assertEquals("all but smallest", numDocs - 1, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F,
|
||||
c), 1000).totalHits;
|
||||
assertEquals("all but extremes", numDocs - 2, numHits);
|
||||
|
||||
// unbounded
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits;
|
||||
assertEquals("smallest and up", numDocs, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits;
|
||||
assertEquals("biggest and down", numDocs, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits;
|
||||
assertEquals("not smallest, but up", numDocs - 1, numHits);
|
||||
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits;
|
||||
assertEquals("not biggest, but down", numDocs - 1, numHits);
|
||||
|
||||
// very small sets
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F,
|
||||
c), 1000).totalHits;
|
||||
assertEquals("min,min,F,F", 0, numHits);
|
||||
numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F,
|
||||
c), 1000).totalHits;
|
||||
assertEquals("max,max,F,F", 0, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T,
|
||||
c), 1000).totalHits;
|
||||
assertEquals("min,min,T,T", 1, numHits);
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits;
|
||||
assertEquals("nul,min,F,T", 1, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T,
|
||||
c), 1000).totalHits;
|
||||
assertEquals("max,max,T,T", 1, numHits);
|
||||
numHits = search.search(q,
|
||||
new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits;
|
||||
assertEquals("max,nul,T,T", 1, numHits);
|
||||
|
||||
search.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFarsi() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
Directory farsiIndex = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex);
|
||||
Document doc = new Document();
|
||||
doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
doc
|
||||
.add(newField("body", "body", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
Query q = new TermQuery(new Term("body", "body"));
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a TermRangeFilter with a Farsi
|
||||
// Collator (or an Arabic one for the case when Farsi is not supported).
|
||||
int numHits = search.search(q, new TermRangeFilter("content", "\u062F",
|
||||
"\u0698", T, T, collator), 1000).totalHits;
|
||||
assertEquals("The index Term should not be included.", 0, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("content", "\u0633",
|
||||
"\u0638", T, T, collator), 1000).totalHits;
|
||||
assertEquals("The index Term should be included.", 1, numHits);
|
||||
search.close();
|
||||
reader.close();
|
||||
farsiIndex.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDanish() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
Directory danishIndex = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex);
|
||||
// Danish collation orders the words below in the given order
|
||||
// (example taken from TestSort.testInternationalSort() ).
|
||||
String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};
|
||||
for (int docnum = 0; docnum < words.length; ++docnum) {
|
||||
Document doc = new Document();
|
||||
doc.add(newField("content", words[docnum], Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
doc.add(newField("body", "body", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
Query q = new TermQuery(new Term("body", "body"));
|
||||
|
||||
Collator collator = Collator.getInstance(new Locale("da", "dk"));
|
||||
|
||||
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
|
||||
// but Danish collation does.
|
||||
int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T",
|
||||
"MAND", F, F, collator), 1000).totalHits;
|
||||
assertEquals("The index Term should be included.", 1, numHits);
|
||||
|
||||
numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T",
|
||||
"MAND", F, F, collator), 1000).totalHits;
|
||||
assertEquals("The index Term should not be included.", 0, numHits);
|
||||
search.close();
|
||||
reader.close();
|
||||
danishIndex.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testExclusive() throws Exception {
|
||||
Query query = new TermRangeQuery("content", "A", "C", false, false);
|
||||
Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
|
||||
initializeIndex(new String[] {"A", "B", "C", "D"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
|
@ -74,7 +74,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testInclusive() throws Exception {
|
||||
Query query = new TermRangeQuery("content", "A", "C", true, true);
|
||||
Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
|
||||
|
||||
initializeIndex(new String[]{"A", "B", "C", "D"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
|
@ -105,11 +105,11 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
query = new TermRangeQuery("content", null, null, false, false);
|
||||
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
|
||||
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
|
||||
query = new TermRangeQuery("content", "", null, true, false);
|
||||
query = TermRangeQuery.newStringRange("content", "", null, true, false);
|
||||
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
|
||||
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
|
||||
// and now anothe one
|
||||
query = new TermRangeQuery("content", "B", null, true, false);
|
||||
query = TermRangeQuery.newStringRange("content", "B", null, true, false);
|
||||
assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
|
||||
assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
|
||||
searcher.close();
|
||||
|
@ -121,7 +121,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
|
||||
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
|
||||
TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true);
|
||||
checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
|
||||
|
||||
final int savedClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
|
@ -150,10 +150,10 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testEqualsHashcode() {
|
||||
Query query = new TermRangeQuery("content", "A", "C", true, true);
|
||||
Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
|
||||
|
||||
query.setBoost(1.0f);
|
||||
Query other = new TermRangeQuery("content", "A", "C", true, true);
|
||||
Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
|
||||
other.setBoost(1.0f);
|
||||
|
||||
assertEquals("query equals itself is true", query, query);
|
||||
|
@ -163,120 +163,32 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
other.setBoost(2.0f);
|
||||
assertFalse("Different boost queries are not equal", query.equals(other));
|
||||
|
||||
other = new TermRangeQuery("notcontent", "A", "C", true, true);
|
||||
other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true);
|
||||
assertFalse("Different fields are not equal", query.equals(other));
|
||||
|
||||
other = new TermRangeQuery("content", "X", "C", true, true);
|
||||
other = TermRangeQuery.newStringRange("content", "X", "C", true, true);
|
||||
assertFalse("Different lower terms are not equal", query.equals(other));
|
||||
|
||||
other = new TermRangeQuery("content", "A", "Z", true, true);
|
||||
other = TermRangeQuery.newStringRange("content", "A", "Z", true, true);
|
||||
assertFalse("Different upper terms are not equal", query.equals(other));
|
||||
|
||||
query = new TermRangeQuery("content", null, "C", true, true);
|
||||
other = new TermRangeQuery("content", null, "C", true, true);
|
||||
query = TermRangeQuery.newStringRange("content", null, "C", true, true);
|
||||
other = TermRangeQuery.newStringRange("content", null, "C", true, true);
|
||||
assertEquals("equivalent queries with null lowerterms are equal()", query, other);
|
||||
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
|
||||
|
||||
query = new TermRangeQuery("content", "C", null, true, true);
|
||||
other = new TermRangeQuery("content", "C", null, true, true);
|
||||
query = TermRangeQuery.newStringRange("content", "C", null, true, true);
|
||||
other = TermRangeQuery.newStringRange("content", "C", null, true, true);
|
||||
assertEquals("equivalent queries with null upperterms are equal()", query, other);
|
||||
assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
|
||||
|
||||
query = new TermRangeQuery("content", null, "C", true, true);
|
||||
other = new TermRangeQuery("content", "C", null, true, true);
|
||||
query = TermRangeQuery.newStringRange("content", null, "C", true, true);
|
||||
other = TermRangeQuery.newStringRange("content", "C", null, true, true);
|
||||
assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
|
||||
|
||||
query = new TermRangeQuery("content", "A", "C", false, false);
|
||||
other = new TermRangeQuery("content", "A", "C", true, true);
|
||||
query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
|
||||
other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
|
||||
assertFalse("queries with different inclusive are not equal", query.equals(other));
|
||||
|
||||
query = new TermRangeQuery("content", "A", "C", false, false);
|
||||
other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
|
||||
assertFalse("a query with a collator is not equal to one without", query.equals(other));
|
||||
}
|
||||
|
||||
public void testExclusiveCollating() throws Exception {
|
||||
Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
|
||||
initializeIndex(new String[] {"A", "B", "C", "D"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,C,D, only B in range", 1, hits.length);
|
||||
searcher.close();
|
||||
|
||||
initializeIndex(new String[] {"A", "B", "D"});
|
||||
searcher = new IndexSearcher(dir, true);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,D, only B in range", 1, hits.length);
|
||||
searcher.close();
|
||||
|
||||
addDoc("C");
|
||||
searcher = new IndexSearcher(dir, true);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("C added, still only B in range", 1, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
public void testInclusiveCollating() throws Exception {
|
||||
Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
|
||||
|
||||
initializeIndex(new String[]{"A", "B", "C", "D"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
|
||||
searcher.close();
|
||||
|
||||
initializeIndex(new String[]{"A", "B", "D"});
|
||||
searcher = new IndexSearcher(dir, true);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,D - A and B in range", 2, hits.length);
|
||||
searcher.close();
|
||||
|
||||
addDoc("C");
|
||||
searcher = new IndexSearcher(dir, true);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("C added - A, B, C in range", 3, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
public void testFarsi() throws Exception {
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a TermRangeQuery with a Farsi
|
||||
// Collator (or an Arabic one for the case when Farsi is not supported).
|
||||
initializeIndex(new String[]{ "\u0633\u0627\u0628"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, hits.length);
|
||||
|
||||
query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
public void testDanish() throws Exception {
|
||||
Collator collator = Collator.getInstance(new Locale("da", "dk"));
|
||||
// Danish collation orders the words below in the given order (example taken
|
||||
// from TestSort.testInternationalSort() ).
|
||||
String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
|
||||
Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
|
||||
|
||||
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
|
||||
// but Danish collation does.
|
||||
initializeIndex(words);
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, hits.length);
|
||||
|
||||
query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
private static class SingleCharAnalyzer extends Analyzer {
|
||||
|
@ -363,7 +275,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
public void testExclusiveLowerNull() throws Exception {
|
||||
Analyzer analyzer = new SingleCharAnalyzer();
|
||||
//http://issues.apache.org/jira/browse/LUCENE-38
|
||||
Query query = new TermRangeQuery("content", null, "C",
|
||||
Query query = TermRangeQuery.newStringRange("content", null, "C",
|
||||
false, false);
|
||||
initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
|
@ -396,7 +308,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
public void testInclusiveLowerNull() throws Exception {
|
||||
//http://issues.apache.org/jira/browse/LUCENE-38
|
||||
Analyzer analyzer = new SingleCharAnalyzer();
|
||||
Query query = new TermRangeQuery("content", null, "C", true, true);
|
||||
Query query = TermRangeQuery.newStringRange("content", null, "C", true, true);
|
||||
initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
int numHits = searcher.search(query, null, 1000).totalHits;
|
||||
|
|
|
@ -17,6 +17,10 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @deprecated Remove when IndexableBinaryStringTools is removed.
|
||||
*/
|
||||
@Deprecated
|
||||
public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||
private static final int NUM_RANDOM_TESTS = 2000 * RANDOM_MULTIPLIER;
|
||||
private static final int MAX_RANDOM_BINARY_LENGTH = 300 * RANDOM_MULTIPLIER;
|
||||
|
|
|
@ -25,6 +25,10 @@ API Changes
|
|||
* LUCENE-1370: Added ShingleFilter option to output unigrams if no shingles
|
||||
can be generated. (Chris Harris via Steven Rowe)
|
||||
|
||||
* LUCENE-2514, LUCENE-2551: JDK and ICU CollationKeyAnalyzers were changed to
|
||||
use pure byte keys when Version >= 4.0. This cuts sort key size approximately
|
||||
in half. (Robert Muir)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-2413: Consolidated Solr analysis components into common.
|
||||
|
|
|
@ -29,8 +29,8 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* Emits the entire input as a single token.
|
||||
*/
|
||||
public final class KeywordTokenizer extends Tokenizer {
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 256;
|
||||
/** Default read buffer size */
|
||||
public static final int DEFAULT_BUFFER_SIZE = 256;
|
||||
|
||||
private boolean done = false;
|
||||
private int finalOffset;
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
package org.apache.lucene.collation;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Converts each token into its {@link java.text.CollationKey}, and then
|
||||
* encodes the bytes as an index term.
|
||||
* </p>
|
||||
* <p>
|
||||
* <strong>WARNING:</strong> Make sure you use exactly the same Collator at
|
||||
* index and query time -- CollationKeys are only comparable when produced by
|
||||
* the same Collator. Since {@link java.text.RuleBasedCollator}s are not
|
||||
* independently versioned, it is unsafe to search against stored
|
||||
* CollationKeys unless the following are exactly the same (best practice is
|
||||
* to store this information with the index and check that they remain the
|
||||
* same at query time):
|
||||
* </p>
|
||||
* <ol>
|
||||
* <li>JVM vendor</li>
|
||||
* <li>JVM version, including patch version</li>
|
||||
* <li>
|
||||
* The language (and country and variant, if specified) of the Locale
|
||||
* used when constructing the collator via
|
||||
* {@link Collator#getInstance(java.util.Locale)}.
|
||||
* </li>
|
||||
* <li>
|
||||
* The collation strength used - see {@link Collator#setStrength(int)}
|
||||
* </li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* The <code>ICUCollationAttributeFactory</code> in the icu package of Lucene's
|
||||
* contrib area uses ICU4J's Collator, which makes its
|
||||
* version available, thus allowing collation to be versioned independently
|
||||
* from the JVM. ICUCollationAttributeFactory is also significantly faster and
|
||||
* generates significantly shorter keys than CollationAttributeFactory. See
|
||||
* <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
|
||||
* >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
|
||||
* generation timing and key length comparisons between ICU4J and
|
||||
* java.text.Collator over several languages.
|
||||
* </p>
|
||||
* <p>
|
||||
* CollationKeys generated by java.text.Collators are not compatible
|
||||
* with those those generated by ICU Collators. Specifically, if you use
|
||||
* CollationAttributeFactory to generate index terms, do not use
|
||||
* ICUCollationAttributeFactory on the query side, or vice versa.
|
||||
* </p>
|
||||
*/
|
||||
public class CollationAttributeFactory extends AttributeSource.AttributeFactory {
|
||||
private final Collator collator;
|
||||
private final AttributeSource.AttributeFactory delegate;
|
||||
|
||||
/**
|
||||
* Create a CollationAttributeFactory, using
|
||||
* {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the
|
||||
* factory for all other attributes.
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public CollationAttributeFactory(Collator collator) {
|
||||
this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a CollationAttributeFactory, using the supplied Attribute Factory
|
||||
* as the factory for all other attributes.
|
||||
* @param delegate Attribute Factory
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public CollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) {
|
||||
this.delegate = delegate;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttributeImpl createAttributeInstance(
|
||||
Class<? extends Attribute> attClass) {
|
||||
return attClass.isAssignableFrom(CollatedTermAttributeImpl.class)
|
||||
? new CollatedTermAttributeImpl(collator)
|
||||
: delegate.createAttributeInstance(attClass);
|
||||
}
|
||||
}
|
|
@ -18,14 +18,13 @@ package org.apache.lucene.collation;
|
|||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
@ -33,8 +32,8 @@ import java.io.IOException;
|
|||
* </p>
|
||||
* <p>
|
||||
* Converts the token into its {@link java.text.CollationKey}, and then
|
||||
* encodes the CollationKey with
|
||||
* {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow
|
||||
* encodes the CollationKey either directly or with
|
||||
* {@link IndexableBinaryStringTools} (see <a href="#version">below</a>), to allow
|
||||
* it to be stored as an index term.
|
||||
* </p>
|
||||
* <p>
|
||||
|
@ -75,39 +74,49 @@ import java.io.IOException;
|
|||
* CollationKeyAnalyzer to generate index terms, do not use
|
||||
* ICUCollationKeyAnalyzer on the query side, or vice versa.
|
||||
* </p>
|
||||
* <a name="version"/>
|
||||
* <p>You must specify the required {@link Version}
|
||||
* compatibility when creating CollationKeyAnalyzer:
|
||||
* <ul>
|
||||
* <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
|
||||
* versions will encode the bytes with {@link IndexableBinaryStringTools}.
|
||||
* </ul>
|
||||
*/
|
||||
public final class CollationKeyAnalyzer extends Analyzer {
|
||||
private Collator collator;
|
||||
public final class CollationKeyAnalyzer extends ReusableAnalyzerBase {
|
||||
private final Collator collator;
|
||||
private final CollationAttributeFactory factory;
|
||||
private final Version matchVersion;
|
||||
|
||||
public CollationKeyAnalyzer(Collator collator) {
|
||||
/**
|
||||
* Create a new CollationKeyAnalyzer, using the specified collator.
|
||||
*
|
||||
* @param matchVersion See <a href="#version">above</a>
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public CollationKeyAnalyzer(Version matchVersion, Collator collator) {
|
||||
this.matchVersion = matchVersion;
|
||||
this.collator = collator;
|
||||
this.factory = new CollationAttributeFactory(collator);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link CollationKeyAnalyzer#CollationKeyAnalyzer(Version, Collator)}
|
||||
* and specify a version instead. This ctor will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public CollationKeyAnalyzer(Collator collator) {
|
||||
this(Version.LUCENE_31, collator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new KeywordTokenizer(reader);
|
||||
result = new CollationKeyFilter(result, collator);
|
||||
return result;
|
||||
}
|
||||
|
||||
private class SavedStreams {
|
||||
Tokenizer source;
|
||||
TokenStream result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
||||
throws IOException {
|
||||
|
||||
SavedStreams streams = (SavedStreams)getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new KeywordTokenizer(reader);
|
||||
streams.result = new CollationKeyFilter(streams.source, collator);
|
||||
setPreviousTokenStream(streams);
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_40)) {
|
||||
KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
} else {
|
||||
streams.source.reset(reader);
|
||||
KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(tokenizer, new CollationKeyFilter(tokenizer, collator));
|
||||
}
|
||||
return streams.result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,7 +71,10 @@ import java.text.Collator;
|
|||
* CollationKeyFilter to generate index terms, do not use
|
||||
* ICUCollationKeyFilter on the query side, or vice versa.
|
||||
* </p>
|
||||
* @deprecated Use {@link CollationAttributeFactory} instead, which encodes
|
||||
* terms directly as bytes. This filter will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class CollationKeyFilter extends TokenFilter {
|
||||
private final Collator collator;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -55,10 +55,9 @@
|
|||
<code><pre>
|
||||
// "fa" Locale is not supported by Sun JDK 1.4 or 1.5
|
||||
Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(collator);
|
||||
CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter
|
||||
(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", "\u0633\u0627\u0628",
|
||||
Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -66,11 +65,8 @@
|
|||
writer.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
|
||||
// The AnalyzingQueryParser in Lucene's contrib allows terms in range queries
|
||||
// to be passed through an analyzer - Lucene's standard QueryParser does not
|
||||
// allow this.
|
||||
AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
|
||||
aqp.setLowercaseExpandedTerms(false);
|
||||
QueryParser aqp = new QueryParser(Version.LUCENE_40, "content", analyzer);
|
||||
aqp.setAnalyzeRangeTerms(true);
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
|
@ -85,10 +81,9 @@
|
|||
<h3>Danish Sorting</h3>
|
||||
<code><pre>
|
||||
Analyzer analyzer
|
||||
= new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
|
||||
= new CollationKeyAnalyzer(Version.LUCENE_40, Collator.getInstance(new Locale("da", "dk")));
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter
|
||||
(indexStore, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||
String[] tracer = new String[] { "A", "B", "C", "D", "E" };
|
||||
String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
|
||||
String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
|
||||
|
@ -99,7 +94,7 @@
|
|||
writer.addDocument(doc);
|
||||
}
|
||||
writer.close();
|
||||
Searcher searcher = new IndexSearcher(indexStore, true);
|
||||
IndexSearcher searcher = new IndexSearcher(indexStore, true);
|
||||
Sort sort = new Sort();
|
||||
sort.setSort(new SortField("contents", SortField.STRING));
|
||||
Query query = new MatchAllDocsQuery();
|
||||
|
@ -114,16 +109,15 @@
|
|||
<code><pre>
|
||||
Collator collator = Collator.getInstance(new Locale("tr", "TR"));
|
||||
collator.setStrength(Collator.PRIMARY);
|
||||
Analyzer analyzer = new CollationKeyAnalyzer(collator);
|
||||
Analyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter
|
||||
(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("contents", "DIGY", Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
QueryParser parser = new QueryParser("contents", analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_40, "contents", analyzer);
|
||||
Query query = parser.parse("d\u0131gy"); // U+0131: dotless i
|
||||
ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.queryParser.standard.config;
|
||||
package org.apache.lucene.collation.tokenattributes;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,19 +19,30 @@ package org.apache.lucene.queryParser.standard.config;
|
|||
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
|
||||
import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor
|
||||
* and must be defined in the {@link QueryConfigHandler}. This attribute tells
|
||||
* the processor which {@link Collator} should be used for a
|
||||
* {@link TermRangeQuery} <br/>
|
||||
*
|
||||
* Extension of {@link CharTermAttributeImpl} that encodes the term
|
||||
* text as a binary Unicode collation key instead of as UTF-8 bytes.
|
||||
*/
|
||||
public interface RangeCollatorAttribute extends Attribute {
|
||||
public void setDateResolution(Collator rangeCollator);
|
||||
public Collator getRangeCollator();
|
||||
public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
|
||||
private final Collator collator;
|
||||
|
||||
/**
|
||||
* Create a new CollatedTermAttributeImpl
|
||||
* @param collator Collation key generator
|
||||
*/
|
||||
public CollatedTermAttributeImpl(Collator collator) {
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int toBytesRef(BytesRef target) {
|
||||
target.bytes = collator.getCollationKey(toString()).toByteArray();
|
||||
target.offset = 0;
|
||||
target.length = target.bytes.length;
|
||||
return target.hashCode();
|
||||
}
|
||||
|
||||
}
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -56,7 +57,9 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
* @param keyBits the result from
|
||||
* collator.getCollationKey(original).toByteArray()
|
||||
* @return The encoded collation key for the original String
|
||||
* @deprecated only for testing deprecated filters
|
||||
*/
|
||||
@Deprecated
|
||||
protected String encodeCollationKey(byte[] keyBits) {
|
||||
// Ensure that the backing char[] array is large enough to hold the encoded
|
||||
// Binary String
|
||||
|
@ -66,9 +69,9 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
return new String(encodedBegArray);
|
||||
}
|
||||
|
||||
public void testFarsiRangeFilterCollating(Analyzer analyzer, String firstBeg,
|
||||
String firstEnd, String secondBeg,
|
||||
String secondEnd) throws Exception {
|
||||
public void testFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg,
|
||||
BytesRef firstEnd, BytesRef secondBeg,
|
||||
BytesRef secondEnd) throws Exception {
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer));
|
||||
|
@ -98,9 +101,9 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
searcher.close();
|
||||
}
|
||||
|
||||
public void testFarsiRangeQueryCollating(Analyzer analyzer, String firstBeg,
|
||||
String firstEnd, String secondBeg,
|
||||
String secondEnd) throws Exception {
|
||||
public void testFarsiRangeQueryCollating(Analyzer analyzer, BytesRef firstBeg,
|
||||
BytesRef firstEnd, BytesRef secondBeg,
|
||||
BytesRef secondEnd) throws Exception {
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer));
|
||||
|
@ -126,8 +129,8 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
searcher.close();
|
||||
}
|
||||
|
||||
public void testFarsiTermRangeQuery(Analyzer analyzer, String firstBeg,
|
||||
String firstEnd, String secondBeg, String secondEnd) throws Exception {
|
||||
public void testFarsiTermRangeQuery(Analyzer analyzer, BytesRef firstBeg,
|
||||
BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) throws Exception {
|
||||
|
||||
RAMDirectory farsiIndex = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(farsiIndex, new IndexWriterConfig(
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.collation;
|
|||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
@ -34,16 +36,18 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
|
|||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
private Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
private Analyzer analyzer = new CollationKeyAnalyzer(collator);
|
||||
private Analyzer analyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator);
|
||||
|
||||
private String firstRangeBeginning = encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
|
||||
private String firstRangeEnd = encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
|
||||
private String secondRangeBeginning = encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
|
||||
private String secondRangeEnd = encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
|
||||
private BytesRef firstRangeBeginning = new BytesRef(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
|
||||
private BytesRef firstRangeEnd = new BytesRef(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
|
||||
private BytesRef secondRangeBeginning = new BytesRef(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
|
||||
private BytesRef secondRangeEnd = new BytesRef(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
|
||||
}
|
||||
|
||||
public void testFarsiRangeFilterCollating() throws Exception {
|
||||
testFarsiRangeFilterCollating
|
||||
|
@ -65,13 +69,13 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
|
|||
|
||||
public void testCollationKeySort() throws Exception {
|
||||
Analyzer usAnalyzer
|
||||
= new CollationKeyAnalyzer(Collator.getInstance(Locale.US));
|
||||
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
|
||||
Analyzer franceAnalyzer
|
||||
= new CollationKeyAnalyzer(Collator.getInstance(Locale.FRANCE));
|
||||
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
|
||||
Analyzer swedenAnalyzer
|
||||
= new CollationKeyAnalyzer(Collator.getInstance(new Locale("sv", "se")));
|
||||
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se")));
|
||||
Analyzer denmarkAnalyzer
|
||||
= new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
|
||||
= new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
|
||||
|
||||
// The ICU Collator and Sun java.text.Collator implementations differ in their
|
||||
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
|
||||
|
|
|
@ -21,12 +21,16 @@ package org.apache.lucene.collation;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
import java.io.Reader;
|
||||
|
||||
|
||||
/**
|
||||
* @deprecated remove when CollationKeyFilter is removed.
|
||||
*/
|
||||
@Deprecated
|
||||
public class TestCollationKeyFilter extends CollationTestBase {
|
||||
// the sort order of Ø versus U depends on the version of the rules being used
|
||||
// for the inherited root locale: Ø's order isnt specified in Locale.US since
|
||||
|
@ -39,14 +43,14 @@ public class TestCollationKeyFilter extends CollationTestBase {
|
|||
private Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
private Analyzer analyzer = new TestAnalyzer(collator);
|
||||
|
||||
private String firstRangeBeginning = encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
|
||||
private String firstRangeEnd = encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
|
||||
private String secondRangeBeginning = encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
|
||||
private String secondRangeEnd = encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
|
||||
private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()));
|
||||
private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeEndOriginal).toByteArray()));
|
||||
private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()));
|
||||
private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
||||
|
||||
|
||||
public final class TestAnalyzer extends Analyzer {
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
package org.apache.lucene.collation;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Converts each token into its {@link com.ibm.icu.text.CollationKey}, and
|
||||
* then encodes bytes as an index term.
|
||||
* </p>
|
||||
* <p>
|
||||
* <strong>WARNING:</strong> Make sure you use exactly the same Collator at
|
||||
* index and query time -- CollationKeys are only comparable when produced by
|
||||
* the same Collator. {@link com.ibm.icu.text.RuleBasedCollator}s are
|
||||
* independently versioned, so it is safe to search against stored
|
||||
* CollationKeys if the following are exactly the same (best practice is
|
||||
* to store this information with the index and check that they remain the
|
||||
* same at query time):
|
||||
* </p>
|
||||
* <ol>
|
||||
* <li>
|
||||
* Collator version - see {@link Collator#getVersion()}
|
||||
* </li>
|
||||
* <li>
|
||||
* The collation strength used - see {@link Collator#setStrength(int)}
|
||||
* </li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* CollationKeys generated by ICU Collators are not compatible with those
|
||||
* generated by java.text.Collators. Specifically, if you use
|
||||
* ICUCollationAttributeFactory to generate index terms, do not use
|
||||
* {@link CollationAttributeFactory} on the query side, or vice versa.
|
||||
* </p>
|
||||
* <p>
|
||||
* ICUCollationAttributeFactory is significantly faster and generates significantly
|
||||
* shorter keys than CollationAttributeFactory. See
|
||||
* <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
|
||||
* >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
|
||||
* generation timing and key length comparisons between ICU4J and
|
||||
* java.text.Collator over several languages.
|
||||
* </p>
|
||||
*/
|
||||
public class ICUCollationAttributeFactory extends AttributeSource.AttributeFactory {
|
||||
private final Collator collator;
|
||||
private final AttributeSource.AttributeFactory delegate;
|
||||
|
||||
/**
|
||||
* Create an ICUCollationAttributeFactory, using
|
||||
* {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the
|
||||
* factory for all other attributes.
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public ICUCollationAttributeFactory(Collator collator) {
|
||||
this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an ICUCollationAttributeFactory, using the supplied Attribute
|
||||
* Factory as the factory for all other attributes.
|
||||
* @param delegate Attribute Factory
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public ICUCollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) {
|
||||
this.delegate = delegate;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttributeImpl createAttributeInstance(
|
||||
Class<? extends Attribute> attClass) {
|
||||
return attClass.isAssignableFrom(ICUCollatedTermAttributeImpl.class)
|
||||
? new ICUCollatedTermAttributeImpl(collator)
|
||||
: delegate.createAttributeInstance(attClass);
|
||||
}
|
||||
}
|
|
@ -19,24 +19,20 @@ package org.apache.lucene.collation;
|
|||
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.collation.CollationKeyAnalyzer; // javadocs
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Filters {@link KeywordTokenizer} with {@link ICUCollationKeyFilter}.
|
||||
* <p>
|
||||
* Converts the token into its {@link com.ibm.icu.text.CollationKey}, and
|
||||
* then encodes the CollationKey with
|
||||
* {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow it to
|
||||
* then encodes the CollationKey either directly or with
|
||||
* {@link IndexableBinaryStringTools} (see <a href="#version">below</a>), to allow it to
|
||||
* be stored as an index term.
|
||||
* </p>
|
||||
* <p>
|
||||
|
@ -70,39 +66,48 @@ import java.io.IOException;
|
|||
* generation timing and key length comparisons between ICU4J and
|
||||
* java.text.Collator over several languages.
|
||||
* </p>
|
||||
* <a name="version"/>
|
||||
* <p>You must specify the required {@link Version}
|
||||
* compatibility when creating ICUCollationKeyAnalyzer:
|
||||
* <ul>
|
||||
* <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
|
||||
* versions will encode the bytes with {@link IndexableBinaryStringTools}.
|
||||
* </ul>
|
||||
*/
|
||||
public final class ICUCollationKeyAnalyzer extends Analyzer {
|
||||
private Collator collator;
|
||||
public final class ICUCollationKeyAnalyzer extends ReusableAnalyzerBase {
|
||||
private final Collator collator;
|
||||
private final ICUCollationAttributeFactory factory;
|
||||
private final Version matchVersion;
|
||||
|
||||
public ICUCollationKeyAnalyzer(Collator collator) {
|
||||
/**
|
||||
* Create a new ICUCollationKeyAnalyzer, using the specified collator.
|
||||
*
|
||||
* @param matchVersion See <a href="#version">above</a>
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public ICUCollationKeyAnalyzer(Version matchVersion, Collator collator) {
|
||||
this.matchVersion = matchVersion;
|
||||
this.collator = collator;
|
||||
this.factory = new ICUCollationAttributeFactory(collator);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link ICUCollationKeyAnalyzer#ICUCollationKeyAnalyzer(Version, Collator)}
|
||||
* and specify a version instead. This ctor will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public ICUCollationKeyAnalyzer(Collator collator) {
|
||||
this(Version.LUCENE_31, collator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new KeywordTokenizer(reader);
|
||||
result = new ICUCollationKeyFilter(result, collator);
|
||||
return result;
|
||||
}
|
||||
|
||||
private class SavedStreams {
|
||||
Tokenizer source;
|
||||
TokenStream result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
||||
throws IOException {
|
||||
|
||||
SavedStreams streams = (SavedStreams)getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new KeywordTokenizer(reader);
|
||||
streams.result = new ICUCollationKeyFilter(streams.source, collator);
|
||||
setPreviousTokenStream(streams);
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_40)) {
|
||||
KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
|
||||
return new TokenStreamComponents(tokenizer, tokenizer);
|
||||
} else {
|
||||
streams.source.reset(reader);
|
||||
KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(tokenizer, new ICUCollationKeyFilter(tokenizer, collator));
|
||||
}
|
||||
return streams.result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,10 @@ import java.io.IOException;
|
|||
* generation timing and key length comparisons between ICU4J and
|
||||
* java.text.Collator over several languages.
|
||||
* </p>
|
||||
* @deprecated Use {@link ICUCollationAttributeFactory} instead, which encodes
|
||||
* terms directly as bytes. This filter will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class ICUCollationKeyFilter extends TokenFilter {
|
||||
private Collator collator = null;
|
||||
private RawCollationKey reusableKey = new RawCollationKey();
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
package org.apache.lucene.collation.tokenattributes;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
|
||||
/**
|
||||
* Extension of {@link CharTermAttributeImpl} that encodes the term
|
||||
* text as a binary Unicode collation key instead of as UTF-8 bytes.
|
||||
*/
|
||||
public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
|
||||
private final Collator collator;
|
||||
private final RawCollationKey key = new RawCollationKey();
|
||||
|
||||
/**
|
||||
* Create a new ICUCollatedTermAttributeImpl
|
||||
* @param collator Collation key generator
|
||||
*/
|
||||
public ICUCollatedTermAttributeImpl(Collator collator) {
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int toBytesRef(BytesRef target) {
|
||||
collator.getRawCollationKey(toString(), key);
|
||||
target.bytes = key.bytes;
|
||||
target.offset = 0;
|
||||
target.length = key.size;
|
||||
return target.hashCode();
|
||||
}
|
||||
}
|
|
@ -112,11 +112,10 @@ algorithm.
|
|||
|
||||
<h3>Farsi Range Queries</h3>
|
||||
<code><pre>
|
||||
Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
ICUCollationKeyAnalyzer analyzer = new ICUCollationKeyAnalyzer(collator);
|
||||
Collator collator = Collator.getInstance(new ULocale("ar"));
|
||||
ICUCollationKeyAnalyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator);
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter
|
||||
(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", "\u0633\u0627\u0628",
|
||||
Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -124,11 +123,8 @@ algorithm.
|
|||
writer.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
|
||||
// The AnalyzingQueryParser in Lucene's contrib allows terms in range queries
|
||||
// to be passed through an analyzer - Lucene's standard QueryParser does not
|
||||
// allow this.
|
||||
AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
|
||||
aqp.setLowercaseExpandedTerms(false);
|
||||
QueryParser aqp = new QueryParser(Version.LUCENE_40, "content", analyzer);
|
||||
aqp.setAnalyzeRangeTerms(true);
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
|
@ -143,10 +139,9 @@ algorithm.
|
|||
<h3>Danish Sorting</h3>
|
||||
<code><pre>
|
||||
Analyzer analyzer
|
||||
= new ICUCollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
|
||||
= new ICUCollationKeyAnalyzer(Version.LUCENE_40, Collator.getInstance(new ULocale("da", "dk")));
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter
|
||||
(indexStore, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||
String[] tracer = new String[] { "A", "B", "C", "D", "E" };
|
||||
String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
|
||||
String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
|
||||
|
@ -157,7 +152,7 @@ algorithm.
|
|||
writer.addDocument(doc);
|
||||
}
|
||||
writer.close();
|
||||
Searcher searcher = new IndexSearcher(indexStore, true);
|
||||
IndexSearcher searcher = new IndexSearcher(indexStore, true);
|
||||
Sort sort = new Sort();
|
||||
sort.setSort(new SortField("contents", SortField.STRING));
|
||||
Query query = new MatchAllDocsQuery();
|
||||
|
@ -170,18 +165,17 @@ algorithm.
|
|||
|
||||
<h3>Turkish Case Normalization</h3>
|
||||
<code><pre>
|
||||
Collator collator = Collator.getInstance(new Locale("tr", "TR"));
|
||||
Collator collator = Collator.getInstance(new ULocale("tr", "TR"));
|
||||
collator.setStrength(Collator.PRIMARY);
|
||||
Analyzer analyzer = new ICUCollationKeyAnalyzer(collator);
|
||||
Analyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator);
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter
|
||||
(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("contents", "DIGY", Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
QueryParser parser = new QueryParser("contents", analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_40, "contents", analyzer);
|
||||
Query query = parser.parse("d\u0131gy"); // U+0131: dotless i
|
||||
ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.lucene.collation;
|
|||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
|
@ -27,17 +29,23 @@ import java.util.Locale;
|
|||
public class TestICUCollationKeyAnalyzer extends CollationTestBase {
|
||||
|
||||
private Collator collator = Collator.getInstance(new Locale("fa"));
|
||||
private Analyzer analyzer = new ICUCollationKeyAnalyzer(collator);
|
||||
private Analyzer analyzer = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator);
|
||||
|
||||
private String firstRangeBeginning = encodeCollationKey
|
||||
private BytesRef firstRangeBeginning = new BytesRef
|
||||
(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
|
||||
private String firstRangeEnd = encodeCollationKey
|
||||
private BytesRef firstRangeEnd = new BytesRef
|
||||
(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
|
||||
private String secondRangeBeginning = encodeCollationKey
|
||||
private BytesRef secondRangeBeginning = new BytesRef
|
||||
(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
|
||||
private String secondRangeEnd = encodeCollationKey
|
||||
private BytesRef secondRangeEnd = new BytesRef
|
||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
|
||||
}
|
||||
|
||||
public void testFarsiRangeFilterCollating() throws Exception {
|
||||
testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd,
|
||||
secondRangeBeginning, secondRangeEnd);
|
||||
|
@ -62,13 +70,13 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
|
|||
//
|
||||
public void testCollationKeySort() throws Exception {
|
||||
Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
|
||||
(Collator.getInstance(Locale.US));
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
|
||||
Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
|
||||
(Collator.getInstance(Locale.FRANCE));
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
|
||||
Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
|
||||
(Collator.getInstance(new Locale("sv", "se")));
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se")));
|
||||
Analyzer denmarkAnalyzer = new ICUCollationKeyAnalyzer
|
||||
(Collator.getInstance(new Locale("da", "dk")));
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
|
||||
|
||||
// The ICU Collator and java.text.Collator implementations differ in their
|
||||
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
|
||||
|
|
|
@ -22,24 +22,26 @@ import com.ibm.icu.text.Collator;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Locale;
|
||||
|
||||
|
||||
/** @deprecated remove this when ICUCollationKeyFilter is removed */
|
||||
@Deprecated
|
||||
public class TestICUCollationKeyFilter extends CollationTestBase {
|
||||
|
||||
private Collator collator = Collator.getInstance(new Locale("fa"));
|
||||
private Analyzer analyzer = new TestAnalyzer(collator);
|
||||
|
||||
private String firstRangeBeginning = encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
|
||||
private String firstRangeEnd = encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
|
||||
private String secondRangeBeginning = encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
|
||||
private String secondRangeEnd = encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
|
||||
private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()));
|
||||
private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(firstRangeEndOriginal).toByteArray()));
|
||||
private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()));
|
||||
private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey
|
||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
||||
|
||||
|
||||
public final class TestAnalyzer extends Analyzer {
|
||||
|
|
|
@ -95,10 +95,10 @@ class ShardFieldSortedHitQueue extends PriorityQueue {
|
|||
|
||||
String fieldname = fields[i].getField();
|
||||
comparators[i] = getCachedComparator(fieldname, fields[i]
|
||||
.getType(), fields[i].getLocale(), fields[i].getComparatorSource());
|
||||
.getType(), fields[i].getComparatorSource());
|
||||
|
||||
if (fields[i].getType() == SortField.STRING) {
|
||||
this.fields[i] = new SortField(fieldname, fields[i].getLocale(),
|
||||
this.fields[i] = new SortField(fieldname, SortField.STRING,
|
||||
fields[i].getReverse());
|
||||
} else {
|
||||
this.fields[i] = new SortField(fieldname, fields[i].getType(),
|
||||
|
@ -145,17 +145,14 @@ class ShardFieldSortedHitQueue extends PriorityQueue {
|
|||
return c < 0;
|
||||
}
|
||||
|
||||
Comparator getCachedComparator(String fieldname, int type, Locale locale, FieldComparatorSource factory) {
|
||||
Comparator getCachedComparator(String fieldname, int type, FieldComparatorSource factory) {
|
||||
Comparator comparator = null;
|
||||
switch (type) {
|
||||
case SortField.SCORE:
|
||||
comparator = comparatorScore(fieldname);
|
||||
break;
|
||||
case SortField.STRING:
|
||||
if (locale != null)
|
||||
comparator = comparatorStringLocale(fieldname, locale);
|
||||
else
|
||||
comparator = comparatorNatural(fieldname);
|
||||
comparator = comparatorNatural(fieldname);
|
||||
break;
|
||||
case SortField.CUSTOM:
|
||||
if (factory instanceof MissingStringLastComparatorSource){
|
||||
|
|
|
@ -410,7 +410,7 @@ public class DateField extends FieldType {
|
|||
|
||||
/** DateField specific range query */
|
||||
public Query getRangeQuery(QParser parser, SchemaField sf, Date part1, Date part2, boolean minInclusive, boolean maxInclusive) {
|
||||
return new TermRangeQuery(
|
||||
return TermRangeQuery.newStringRange(
|
||||
sf.getName(),
|
||||
part1 == null ? null : toInternal(part1),
|
||||
part2 == null ? null : toInternal(part2),
|
||||
|
|
|
@ -521,7 +521,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
*/
|
||||
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
|
||||
// constant score mode is now enabled per default
|
||||
return new TermRangeQuery(
|
||||
return TermRangeQuery.newStringRange(
|
||||
field.getName(),
|
||||
part1 == null ? null : toInternal(part1),
|
||||
part2 == null ? null : toInternal(part2),
|
||||
|
|
|
@ -398,8 +398,8 @@ public class QueryParsing {
|
|||
String fname = q.getField();
|
||||
FieldType ft = writeFieldName(fname, schema, out, flags);
|
||||
out.append(q.includesLower() ? '[' : '{');
|
||||
String lt = q.getLowerTerm();
|
||||
String ut = q.getUpperTerm();
|
||||
String lt = q.getLowerTerm().utf8ToString();
|
||||
String ut = q.getUpperTerm().utf8ToString();
|
||||
if (lt == null) {
|
||||
out.append('*');
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue