mirror of https://github.com/apache/lucene.git
LUCENE-6281: Removed slow collation support.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661720 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a43ebd6870
commit
0c078aaf4d
|
@ -152,6 +152,10 @@ API Changes
|
||||||
* LUCENE-6272: Scorer extends DocSetIdIterator rather than DocsEnum (Alan
|
* LUCENE-6272: Scorer extends DocSetIdIterator rather than DocsEnum (Alan
|
||||||
Woodward)
|
Woodward)
|
||||||
|
|
||||||
|
* LUCENE-6281: Removed support for slow collations from lucene/sandbox. Better
|
||||||
|
performance would be achieved through CollationKeyAnalyzer or
|
||||||
|
ICUCollationKeyAnalyzer. (Adrien Grand)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-6248: Remove unused odd constants from StandardSyntaxParser.jj
|
* LUCENE-6248: Remove unused odd constants from StandardSyntaxParser.jj
|
||||||
|
|
|
@ -1,141 +0,0 @@
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.text.Collator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
|
||||||
import org.apache.lucene.index.DocValues;
|
|
||||||
import org.apache.lucene.search.SimpleFieldComparator;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/** Sorts by a field's value using the given Collator
|
|
||||||
*
|
|
||||||
* <p><b>WARNING</b>: this is very slow; you'll
|
|
||||||
* get much better performance using the
|
|
||||||
* CollationKeyAnalyzer or ICUCollationKeyAnalyzer.
|
|
||||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
|
||||||
* This class will be removed in Lucene 5.0
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public final class SlowCollatedStringComparator extends SimpleFieldComparator<String> {
|
|
||||||
|
|
||||||
private final String[] values;
|
|
||||||
private BinaryDocValues currentDocTerms;
|
|
||||||
private Bits docsWithField;
|
|
||||||
private final String field;
|
|
||||||
final Collator collator;
|
|
||||||
private String bottom;
|
|
||||||
private String topValue;
|
|
||||||
|
|
||||||
public SlowCollatedStringComparator(int numHits, String field, Collator collator) {
|
|
||||||
values = new String[numHits];
|
|
||||||
this.field = field;
|
|
||||||
this.collator = collator;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(int slot1, int slot2) {
|
|
||||||
final String val1 = values[slot1];
|
|
||||||
final String val2 = values[slot2];
|
|
||||||
if (val1 == null) {
|
|
||||||
if (val2 == null) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
} else if (val2 == null) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return collator.compare(val1, val2);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareBottom(int doc) {
|
|
||||||
final BytesRef term = currentDocTerms.get(doc);
|
|
||||||
final String val2 = term.length == 0 && docsWithField.get(doc) == false ? null : term.utf8ToString();
|
|
||||||
if (bottom == null) {
|
|
||||||
if (val2 == null) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
} else if (val2 == null) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return collator.compare(bottom, val2);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) {
|
|
||||||
final BytesRef term = currentDocTerms.get(doc);
|
|
||||||
if (term.length == 0 && docsWithField.get(doc) == false) {
|
|
||||||
values[slot] = null;
|
|
||||||
} else {
|
|
||||||
values[slot] = term.utf8ToString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
|
||||||
currentDocTerms = DocValues.getBinary(context.reader(), field);
|
|
||||||
docsWithField = DocValues.getDocsWithField(context.reader(), field);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setBottom(final int bottom) {
|
|
||||||
this.bottom = values[bottom];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setTopValue(final String value) {
|
|
||||||
this.topValue = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String value(int slot) {
|
|
||||||
return values[slot];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareValues(String first, String second) {
|
|
||||||
if (first == null) {
|
|
||||||
if (second == null) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
} else if (second == null) {
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return collator.compare(first, second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTop(int doc) {
|
|
||||||
final BytesRef term = currentDocTerms.get(doc);
|
|
||||||
final String docValue;
|
|
||||||
if (term.length == 0 && docsWithField.get(doc) == false) {
|
|
||||||
docValue = null;
|
|
||||||
} else {
|
|
||||||
docValue = term.utf8ToString();
|
|
||||||
}
|
|
||||||
return compareValues(topValue, docValue);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,75 +0,0 @@
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.text.Collator;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
|
||||||
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
|
||||||
import org.apache.lucene.search.NumericRangeFilter; // javadoc
|
|
||||||
// javadoc
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A Filter that restricts search results to a range of term
|
|
||||||
* values in a given field.
|
|
||||||
*
|
|
||||||
* <p>This filter matches the documents looking for terms that fall into the
|
|
||||||
* supplied range according to {@link
|
|
||||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
|
||||||
* for numerical ranges; use {@link NumericRangeFilter} instead.
|
|
||||||
*
|
|
||||||
* <p>If you construct a large number of range filters with different ranges but on the
|
|
||||||
* same field, {@link DocValuesRangeQuery} may have significantly better performance.
|
|
||||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
|
||||||
* This class will be removed in Lucene 5.0
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public class SlowCollatedTermRangeFilter extends MultiTermQueryWrapperFilter<SlowCollatedTermRangeQuery> {
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param lowerTerm The lower bound on this range
|
|
||||||
* @param upperTerm The upper bound on this range
|
|
||||||
* @param includeLower Does this range include the lower bound?
|
|
||||||
* @param includeUpper Does this range include the upper bound?
|
|
||||||
* @param collator The collator to use when determining range inclusion; set
|
|
||||||
* to null to use Unicode code point ordering instead of collation.
|
|
||||||
* @throws IllegalArgumentException if both terms are null or if
|
|
||||||
* lowerTerm is null and includeLower is true (similar for upperTerm
|
|
||||||
* and includeUpper)
|
|
||||||
*/
|
|
||||||
public SlowCollatedTermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
|
||||||
boolean includeLower, boolean includeUpper,
|
|
||||||
Collator collator) {
|
|
||||||
super(new SlowCollatedTermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the lower value of this range filter */
|
|
||||||
public String getLowerTerm() { return query.getLowerTerm(); }
|
|
||||||
|
|
||||||
/** Returns the upper value of this range filter */
|
|
||||||
public String getUpperTerm() { return query.getUpperTerm(); }
|
|
||||||
|
|
||||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
|
||||||
public boolean includesLower() { return query.includesLower(); }
|
|
||||||
|
|
||||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
|
||||||
public boolean includesUpper() { return query.includesUpper(); }
|
|
||||||
|
|
||||||
/** Returns the collator used to determine range inclusion, if any. */
|
|
||||||
public Collator getCollator() { return query.getCollator(); }
|
|
||||||
}
|
|
|
@ -1,178 +0,0 @@
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.text.Collator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.search.MultiTermQuery; // javadoc
|
|
||||||
import org.apache.lucene.search.NumericRangeQuery; // javadoc
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A Query that matches documents within an range of terms.
|
|
||||||
*
|
|
||||||
* <p>This query matches the documents looking for terms that fall into the
|
|
||||||
* supplied range according to {@link
|
|
||||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
|
||||||
* for numerical ranges; use {@link NumericRangeQuery} instead.
|
|
||||||
*
|
|
||||||
* <p>This query uses the {@link
|
|
||||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
|
||||||
* rewrite method.
|
|
||||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
|
||||||
* This class will be removed in Lucene 5.0
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public class SlowCollatedTermRangeQuery extends MultiTermQuery {
|
|
||||||
private String lowerTerm;
|
|
||||||
private String upperTerm;
|
|
||||||
private boolean includeLower;
|
|
||||||
private boolean includeUpper;
|
|
||||||
private Collator collator;
|
|
||||||
|
|
||||||
/** Constructs a query selecting all terms greater/equal than
|
|
||||||
* <code>lowerTerm</code> but less/equal than <code>upperTerm</code>.
|
|
||||||
* <p>
|
|
||||||
* If an endpoint is null, it is said
|
|
||||||
* to be "open". Either or both endpoints may be open. Open endpoints may not
|
|
||||||
* be exclusive (you can't select all but the first or last term without
|
|
||||||
* explicitly specifying the term to exclude.)
|
|
||||||
* <p>
|
|
||||||
*
|
|
||||||
* @param lowerTerm The Term text at the lower end of the range
|
|
||||||
* @param upperTerm The Term text at the upper end of the range
|
|
||||||
* @param includeLower
|
|
||||||
* If true, the <code>lowerTerm</code> is
|
|
||||||
* included in the range.
|
|
||||||
* @param includeUpper
|
|
||||||
* If true, the <code>upperTerm</code> is
|
|
||||||
* included in the range.
|
|
||||||
* @param collator The collator to use to collate index Terms, to determine
|
|
||||||
* their membership in the range bounded by <code>lowerTerm</code> and
|
|
||||||
* <code>upperTerm</code>.
|
|
||||||
*/
|
|
||||||
public SlowCollatedTermRangeQuery(String field, String lowerTerm, String upperTerm,
|
|
||||||
boolean includeLower, boolean includeUpper, Collator collator) {
|
|
||||||
super(field);
|
|
||||||
this.lowerTerm = lowerTerm;
|
|
||||||
this.upperTerm = upperTerm;
|
|
||||||
this.includeLower = includeLower;
|
|
||||||
this.includeUpper = includeUpper;
|
|
||||||
this.collator = collator;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the lower value of this range query */
|
|
||||||
public String getLowerTerm() { return lowerTerm; }
|
|
||||||
|
|
||||||
/** Returns the upper value of this range query */
|
|
||||||
public String getUpperTerm() { return upperTerm; }
|
|
||||||
|
|
||||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
|
||||||
public boolean includesLower() { return includeLower; }
|
|
||||||
|
|
||||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
|
||||||
public boolean includesUpper() { return includeUpper; }
|
|
||||||
|
|
||||||
/** Returns the collator used to determine range inclusion */
|
|
||||||
public Collator getCollator() { return collator; }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
|
||||||
if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) {
|
|
||||||
return TermsEnum.EMPTY;
|
|
||||||
}
|
|
||||||
|
|
||||||
TermsEnum tenum = terms.iterator(null);
|
|
||||||
|
|
||||||
if (lowerTerm == null && upperTerm == null) {
|
|
||||||
return tenum;
|
|
||||||
}
|
|
||||||
return new SlowCollatedTermRangeTermsEnum(tenum,
|
|
||||||
lowerTerm, upperTerm, includeLower, includeUpper, collator);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @deprecated Use {@link #getField()} instead. */
|
|
||||||
@Deprecated
|
|
||||||
public String field() {
|
|
||||||
return getField();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Prints a user-readable version of this query. */
|
|
||||||
@Override
|
|
||||||
public String toString(String field) {
|
|
||||||
StringBuilder buffer = new StringBuilder();
|
|
||||||
if (!getField().equals(field)) {
|
|
||||||
buffer.append(getField());
|
|
||||||
buffer.append(":");
|
|
||||||
}
|
|
||||||
buffer.append(includeLower ? '[' : '{');
|
|
||||||
buffer.append(lowerTerm != null ? lowerTerm : "*");
|
|
||||||
buffer.append(" TO ");
|
|
||||||
buffer.append(upperTerm != null ? upperTerm : "*");
|
|
||||||
buffer.append(includeUpper ? ']' : '}');
|
|
||||||
buffer.append(ToStringUtils.boost(getBoost()));
|
|
||||||
return buffer.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
final int prime = 31;
|
|
||||||
int result = super.hashCode();
|
|
||||||
result = prime * result + ((collator == null) ? 0 : collator.hashCode());
|
|
||||||
result = prime * result + (includeLower ? 1231 : 1237);
|
|
||||||
result = prime * result + (includeUpper ? 1231 : 1237);
|
|
||||||
result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
|
|
||||||
result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode());
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (this == obj)
|
|
||||||
return true;
|
|
||||||
if (!super.equals(obj))
|
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
SlowCollatedTermRangeQuery other = (SlowCollatedTermRangeQuery) obj;
|
|
||||||
if (collator == null) {
|
|
||||||
if (other.collator != null)
|
|
||||||
return false;
|
|
||||||
} else if (!collator.equals(other.collator))
|
|
||||||
return false;
|
|
||||||
if (includeLower != other.includeLower)
|
|
||||||
return false;
|
|
||||||
if (includeUpper != other.includeUpper)
|
|
||||||
return false;
|
|
||||||
if (lowerTerm == null) {
|
|
||||||
if (other.lowerTerm != null)
|
|
||||||
return false;
|
|
||||||
} else if (!lowerTerm.equals(other.lowerTerm))
|
|
||||||
return false;
|
|
||||||
if (upperTerm == null) {
|
|
||||||
if (other.upperTerm != null)
|
|
||||||
return false;
|
|
||||||
} else if (!upperTerm.equals(other.upperTerm))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,100 +0,0 @@
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.text.Collator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Subclass of FilteredTermEnum for enumerating all terms that match the
|
|
||||||
* specified range parameters.
|
|
||||||
* <p>Term enumerations are always ordered by
|
|
||||||
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
|
||||||
* greater than all that precede it.</p>
|
|
||||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
|
||||||
* This class will be removed in Lucene 5.0
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public class SlowCollatedTermRangeTermsEnum extends FilteredTermsEnum {
|
|
||||||
private Collator collator;
|
|
||||||
private String upperTermText;
|
|
||||||
private String lowerTermText;
|
|
||||||
private boolean includeLower;
|
|
||||||
private boolean includeUpper;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Enumerates all terms greater/equal than <code>lowerTerm</code>
|
|
||||||
* but less/equal than <code>upperTerm</code>.
|
|
||||||
*
|
|
||||||
* If an endpoint is null, it is said to be "open". Either or both
|
|
||||||
* endpoints may be open. Open endpoints may not be exclusive
|
|
||||||
* (you can't select all but the first or last term without
|
|
||||||
* explicitly specifying the term to exclude.)
|
|
||||||
*
|
|
||||||
* @param tenum source of the terms to enumerate.
|
|
||||||
* @param lowerTermText
|
|
||||||
* The term text at the lower end of the range
|
|
||||||
* @param upperTermText
|
|
||||||
* The term text at the upper end of the range
|
|
||||||
* @param includeLower
|
|
||||||
* If true, the <code>lowerTerm</code> is included in the range.
|
|
||||||
* @param includeUpper
|
|
||||||
* If true, the <code>upperTerm</code> is included in the range.
|
|
||||||
* @param collator
|
|
||||||
* The collator to use to collate index Terms, to determine their
|
|
||||||
* membership in the range bounded by <code>lowerTerm</code> and
|
|
||||||
* <code>upperTerm</code>.
|
|
||||||
*/
|
|
||||||
public SlowCollatedTermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText,
|
|
||||||
boolean includeLower, boolean includeUpper, Collator collator) {
|
|
||||||
super(tenum);
|
|
||||||
this.collator = collator;
|
|
||||||
this.upperTermText = upperTermText;
|
|
||||||
this.lowerTermText = lowerTermText;
|
|
||||||
this.includeLower = includeLower;
|
|
||||||
this.includeUpper = includeUpper;
|
|
||||||
|
|
||||||
// do a little bit of normalization...
|
|
||||||
// open ended range queries should always be inclusive.
|
|
||||||
if (this.lowerTermText == null) {
|
|
||||||
this.lowerTermText = "";
|
|
||||||
this.includeLower = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: optimize
|
|
||||||
BytesRef startBytesRef = new BytesRef("");
|
|
||||||
setInitialSeekTerm(startBytesRef);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected AcceptStatus accept(BytesRef term) {
|
|
||||||
if ((includeLower
|
|
||||||
? collator.compare(term.utf8ToString(), lowerTermText) >= 0
|
|
||||||
: collator.compare(term.utf8ToString(), lowerTermText) > 0)
|
|
||||||
&& (upperTermText == null
|
|
||||||
|| (includeUpper
|
|
||||||
? collator.compare(term.utf8ToString(), upperTermText) <= 0
|
|
||||||
: collator.compare(term.utf8ToString(), upperTermText) < 0))) {
|
|
||||||
return AcceptStatus.YES;
|
|
||||||
}
|
|
||||||
return AcceptStatus.NO;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,190 +0,0 @@
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
import java.text.Collator;
|
|
||||||
import java.util.Locale;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.SortedDocValuesField;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
|
||||||
import org.apache.lucene.search.*;
|
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
|
||||||
import org.junit.AfterClass;
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests SlowCollatedStringComparator, SlowCollatedTermRangeQuery, and SlowCollatedTermRangeFilter
|
|
||||||
*/
|
|
||||||
public class TestSlowCollationMethods extends LuceneTestCase {
|
|
||||||
private static Collator collator;
|
|
||||||
private static IndexSearcher searcher;
|
|
||||||
private static IndexReader reader;
|
|
||||||
private static Directory dir;
|
|
||||||
private static int numDocs;
|
|
||||||
private static String splitDoc;
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void beforeClass() throws Exception {
|
|
||||||
final Locale locale = LuceneTestCase.randomLocale(random());
|
|
||||||
collator = Collator.getInstance(locale);
|
|
||||||
collator.setStrength(Collator.IDENTICAL);
|
|
||||||
collator.setDecomposition(Collator.NO_DECOMPOSITION);
|
|
||||||
|
|
||||||
numDocs = 1000 * RANDOM_MULTIPLIER;
|
|
||||||
dir = newDirectory();
|
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
|
||||||
for (int i = 0; i < numDocs; i++) {
|
|
||||||
Document doc = new Document();
|
|
||||||
String value = TestUtil.randomUnicodeString(random());
|
|
||||||
Field field = newStringField("field", value, Field.Store.YES);
|
|
||||||
doc.add(field);
|
|
||||||
Field dvField = new SortedDocValuesField("field", new BytesRef(value));
|
|
||||||
doc.add(dvField);
|
|
||||||
iw.addDocument(doc);
|
|
||||||
}
|
|
||||||
splitDoc = TestUtil.randomUnicodeString(random());
|
|
||||||
reader = iw.getReader();
|
|
||||||
iw.close();
|
|
||||||
|
|
||||||
searcher = newSearcher(reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterClass
|
|
||||||
public static void afterClass() throws Exception {
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
collator = null;
|
|
||||||
searcher = null;
|
|
||||||
reader = null;
|
|
||||||
dir = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void doCheckSorting(TopDocs docs) throws Exception {
|
|
||||||
String prev = "";
|
|
||||||
for (ScoreDoc doc : docs.scoreDocs) {
|
|
||||||
String value = reader.document(doc.doc).get("field");
|
|
||||||
assertTrue(collator.compare(value, prev) >= 0);
|
|
||||||
prev = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testSort() throws Exception {
|
|
||||||
SortField sf = new SortField("field", new FieldComparatorSource() {
|
|
||||||
@Override
|
|
||||||
public FieldComparator<String> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
|
|
||||||
return new SlowCollatedStringComparator(numHits, fieldname, collator);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
final Sort sort = new Sort(sf);
|
|
||||||
|
|
||||||
final TopFieldDocs docs1 = searcher.search(TermRangeQuery.newStringRange("field", null, splitDoc, true, true), null, numDocs/(1+random().nextInt(4)), sort);
|
|
||||||
doCheckSorting(docs1);
|
|
||||||
|
|
||||||
final TopFieldDocs docs2 = searcher.search(TermRangeQuery.newStringRange("field", splitDoc, null, true, true), null, numDocs/(1+random().nextInt(4)), sort);
|
|
||||||
doCheckSorting(docs2);
|
|
||||||
|
|
||||||
final TopFieldDocs docs = TopDocs.merge(sort, numDocs/(1+random().nextInt(4)), new TopFieldDocs[]{docs1, docs2});
|
|
||||||
doCheckSorting(docs);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void doTestRanges(String startPoint, String endPoint, Query query) throws Exception {
|
|
||||||
QueryUtils.check(query);
|
|
||||||
|
|
||||||
// positive test
|
|
||||||
TopDocs docs = searcher.search(query, numDocs);
|
|
||||||
for (ScoreDoc doc : docs.scoreDocs) {
|
|
||||||
String value = reader.document(doc.doc).get("field");
|
|
||||||
assertTrue(collator.compare(value, startPoint) >= 0);
|
|
||||||
assertTrue(collator.compare(value, endPoint) <= 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// negative test
|
|
||||||
BooleanQuery bq = new BooleanQuery();
|
|
||||||
bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
|
||||||
bq.add(query, Occur.MUST_NOT);
|
|
||||||
docs = searcher.search(bq, numDocs);
|
|
||||||
for (ScoreDoc doc : docs.scoreDocs) {
|
|
||||||
String value = reader.document(doc.doc).get("field");
|
|
||||||
assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testRangeQuery() throws Exception {
|
|
||||||
int numQueries = 50*RANDOM_MULTIPLIER;
|
|
||||||
for (int i = 0; i < numQueries; i++) {
|
|
||||||
String startPoint = TestUtil.randomUnicodeString(random());
|
|
||||||
String endPoint = TestUtil.randomUnicodeString(random());
|
|
||||||
Query query = new SlowCollatedTermRangeQuery("field", startPoint, endPoint, true, true, collator);
|
|
||||||
doTestRanges(startPoint, endPoint, query);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testRangeFilter() throws Exception {
|
|
||||||
int numQueries = 50*RANDOM_MULTIPLIER;
|
|
||||||
for (int i = 0; i < numQueries; i++) {
|
|
||||||
String startPoint = TestUtil.randomUnicodeString(random());
|
|
||||||
String endPoint = TestUtil.randomUnicodeString(random());
|
|
||||||
Query query = new ConstantScoreQuery(new SlowCollatedTermRangeFilter("field", startPoint, endPoint, true, true, collator));
|
|
||||||
doTestRanges(startPoint, endPoint, query);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testQuery() throws Exception {
|
|
||||||
|
|
||||||
// Copied from beforeClass, but scaled down to few docs:
|
|
||||||
// since otherwise this test can run for a very long
|
|
||||||
// time (1-2 hours or more; see Lucene-Solr-4.x-Linux Build #2204):
|
|
||||||
final Locale locale = LuceneTestCase.randomLocale(random());
|
|
||||||
Collator collator = Collator.getInstance(locale);
|
|
||||||
collator.setStrength(Collator.IDENTICAL);
|
|
||||||
collator.setDecomposition(Collator.NO_DECOMPOSITION);
|
|
||||||
|
|
||||||
int numDocs = 20 * RANDOM_MULTIPLIER;
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
|
||||||
for (int i = 0; i < numDocs; i++) {
|
|
||||||
Document doc = new Document();
|
|
||||||
String value = TestUtil.randomUnicodeString(random());
|
|
||||||
Field field = newStringField("field", value, Field.Store.YES);
|
|
||||||
doc.add(field);
|
|
||||||
iw.addDocument(doc);
|
|
||||||
}
|
|
||||||
IndexReader reader = iw.getReader();
|
|
||||||
iw.close();
|
|
||||||
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
|
|
||||||
String startPoint = TestUtil.randomUnicodeString(random());
|
|
||||||
String endPoint = TestUtil.randomUnicodeString(random());
|
|
||||||
Query query = new SlowCollatedTermRangeQuery("field", startPoint, endPoint, true, true, collator);
|
|
||||||
QueryUtils.check(random(), query, searcher);
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
collator = null;
|
|
||||||
searcher = null;
|
|
||||||
reader = null;
|
|
||||||
dir = null;
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue