mirror of https://github.com/apache/lucene.git
LUCENE-6281: Removed slow collation support.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1661720 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a43ebd6870
commit
0c078aaf4d
|
@ -152,6 +152,10 @@ API Changes
|
|||
* LUCENE-6272: Scorer extends DocSetIdIterator rather than DocsEnum (Alan
|
||||
Woodward)
|
||||
|
||||
* LUCENE-6281: Removed support for slow collations from lucene/sandbox. Better
|
||||
performance would be achieved through CollationKeyAnalyzer or
|
||||
ICUCollationKeyAnalyzer. (Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-6248: Remove unused odd constants from StandardSyntaxParser.jj
|
||||
|
|
|
@ -1,141 +0,0 @@
|
|||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.search.SimpleFieldComparator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Sorts by a field's value using the given Collator
|
||||
*
|
||||
* <p><b>WARNING</b>: this is very slow; you'll
|
||||
* get much better performance using the
|
||||
* CollationKeyAnalyzer or ICUCollationKeyAnalyzer.
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class SlowCollatedStringComparator extends SimpleFieldComparator<String> {
|
||||
|
||||
private final String[] values;
|
||||
private BinaryDocValues currentDocTerms;
|
||||
private Bits docsWithField;
|
||||
private final String field;
|
||||
final Collator collator;
|
||||
private String bottom;
|
||||
private String topValue;
|
||||
|
||||
public SlowCollatedStringComparator(int numHits, String field, Collator collator) {
|
||||
values = new String[numHits];
|
||||
this.field = field;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
final String val1 = values[slot1];
|
||||
final String val2 = values[slot2];
|
||||
if (val1 == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (val2 == null) {
|
||||
return 1;
|
||||
}
|
||||
return collator.compare(val1, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
final BytesRef term = currentDocTerms.get(doc);
|
||||
final String val2 = term.length == 0 && docsWithField.get(doc) == false ? null : term.utf8ToString();
|
||||
if (bottom == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (val2 == null) {
|
||||
return 1;
|
||||
}
|
||||
return collator.compare(bottom, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) {
|
||||
final BytesRef term = currentDocTerms.get(doc);
|
||||
if (term.length == 0 && docsWithField.get(doc) == false) {
|
||||
values[slot] = null;
|
||||
} else {
|
||||
values[slot] = term.utf8ToString();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
currentDocTerms = DocValues.getBinary(context.reader(), field);
|
||||
docsWithField = DocValues.getDocsWithField(context.reader(), field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = values[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(final String value) {
|
||||
this.topValue = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareValues(String first, String second) {
|
||||
if (first == null) {
|
||||
if (second == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (second == null) {
|
||||
return 1;
|
||||
} else {
|
||||
return collator.compare(first, second);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) {
|
||||
final BytesRef term = currentDocTerms.get(doc);
|
||||
final String docValue;
|
||||
if (term.length == 0 && docsWithField.get(doc) == false) {
|
||||
docValue = null;
|
||||
} else {
|
||||
docValue = term.utf8ToString();
|
||||
}
|
||||
return compareValues(topValue, docValue);
|
||||
}
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||
import org.apache.lucene.search.NumericRangeFilter; // javadoc
|
||||
// javadoc
|
||||
|
||||
/**
|
||||
* A Filter that restricts search results to a range of term
|
||||
* values in a given field.
|
||||
*
|
||||
* <p>This filter matches the documents looking for terms that fall into the
|
||||
* supplied range according to {@link
|
||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
||||
* for numerical ranges; use {@link NumericRangeFilter} instead.
|
||||
*
|
||||
* <p>If you construct a large number of range filters with different ranges but on the
|
||||
* same field, {@link DocValuesRangeQuery} may have significantly better performance.
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowCollatedTermRangeFilter extends MultiTermQueryWrapperFilter<SlowCollatedTermRangeQuery> {
|
||||
/**
|
||||
*
|
||||
* @param lowerTerm The lower bound on this range
|
||||
* @param upperTerm The upper bound on this range
|
||||
* @param includeLower Does this range include the lower bound?
|
||||
* @param includeUpper Does this range include the upper bound?
|
||||
* @param collator The collator to use when determining range inclusion; set
|
||||
* to null to use Unicode code point ordering instead of collation.
|
||||
* @throws IllegalArgumentException if both terms are null or if
|
||||
* lowerTerm is null and includeLower is true (similar for upperTerm
|
||||
* and includeUpper)
|
||||
*/
|
||||
public SlowCollatedTermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper,
|
||||
Collator collator) {
|
||||
super(new SlowCollatedTermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range filter */
|
||||
public String getLowerTerm() { return query.getLowerTerm(); }
|
||||
|
||||
/** Returns the upper value of this range filter */
|
||||
public String getUpperTerm() { return query.getUpperTerm(); }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesLower() { return query.includesLower(); }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesUpper() { return query.includesUpper(); }
|
||||
|
||||
/** Returns the collator used to determine range inclusion, if any. */
|
||||
public Collator getCollator() { return query.getCollator(); }
|
||||
}
|
|
@ -1,178 +0,0 @@
|
|||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.MultiTermQuery; // javadoc
|
||||
import org.apache.lucene.search.NumericRangeQuery; // javadoc
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
* A Query that matches documents within an range of terms.
|
||||
*
|
||||
* <p>This query matches the documents looking for terms that fall into the
|
||||
* supplied range according to {@link
|
||||
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
|
||||
* for numerical ranges; use {@link NumericRangeQuery} instead.
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* rewrite method.
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowCollatedTermRangeQuery extends MultiTermQuery {
|
||||
private String lowerTerm;
|
||||
private String upperTerm;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
private Collator collator;
|
||||
|
||||
/** Constructs a query selecting all terms greater/equal than
|
||||
* <code>lowerTerm</code> but less/equal than <code>upperTerm</code>.
|
||||
* <p>
|
||||
* If an endpoint is null, it is said
|
||||
* to be "open". Either or both endpoints may be open. Open endpoints may not
|
||||
* be exclusive (you can't select all but the first or last term without
|
||||
* explicitly specifying the term to exclude.)
|
||||
* <p>
|
||||
*
|
||||
* @param lowerTerm The Term text at the lower end of the range
|
||||
* @param upperTerm The Term text at the upper end of the range
|
||||
* @param includeLower
|
||||
* If true, the <code>lowerTerm</code> is
|
||||
* included in the range.
|
||||
* @param includeUpper
|
||||
* If true, the <code>upperTerm</code> is
|
||||
* included in the range.
|
||||
* @param collator The collator to use to collate index Terms, to determine
|
||||
* their membership in the range bounded by <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code>.
|
||||
*/
|
||||
public SlowCollatedTermRangeQuery(String field, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper, Collator collator) {
|
||||
super(field);
|
||||
this.lowerTerm = lowerTerm;
|
||||
this.upperTerm = upperTerm;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
/** Returns the lower value of this range query */
|
||||
public String getLowerTerm() { return lowerTerm; }
|
||||
|
||||
/** Returns the upper value of this range query */
|
||||
public String getUpperTerm() { return upperTerm; }
|
||||
|
||||
/** Returns <code>true</code> if the lower endpoint is inclusive */
|
||||
public boolean includesLower() { return includeLower; }
|
||||
|
||||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesUpper() { return includeUpper; }
|
||||
|
||||
/** Returns the collator used to determine range inclusion */
|
||||
public Collator getCollator() { return collator; }
|
||||
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) {
|
||||
return TermsEnum.EMPTY;
|
||||
}
|
||||
|
||||
TermsEnum tenum = terms.iterator(null);
|
||||
|
||||
if (lowerTerm == null && upperTerm == null) {
|
||||
return tenum;
|
||||
}
|
||||
return new SlowCollatedTermRangeTermsEnum(tenum,
|
||||
lowerTerm, upperTerm, includeLower, includeUpper, collator);
|
||||
}
|
||||
|
||||
/** @deprecated Use {@link #getField()} instead. */
|
||||
@Deprecated
|
||||
public String field() {
|
||||
return getField();
|
||||
}
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
if (!getField().equals(field)) {
|
||||
buffer.append(getField());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append(includeLower ? '[' : '{');
|
||||
buffer.append(lowerTerm != null ? lowerTerm : "*");
|
||||
buffer.append(" TO ");
|
||||
buffer.append(upperTerm != null ? upperTerm : "*");
|
||||
buffer.append(includeUpper ? ']' : '}');
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + ((collator == null) ? 0 : collator.hashCode());
|
||||
result = prime * result + (includeLower ? 1231 : 1237);
|
||||
result = prime * result + (includeUpper ? 1231 : 1237);
|
||||
result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
|
||||
result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (!super.equals(obj))
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
SlowCollatedTermRangeQuery other = (SlowCollatedTermRangeQuery) obj;
|
||||
if (collator == null) {
|
||||
if (other.collator != null)
|
||||
return false;
|
||||
} else if (!collator.equals(other.collator))
|
||||
return false;
|
||||
if (includeLower != other.includeLower)
|
||||
return false;
|
||||
if (includeUpper != other.includeUpper)
|
||||
return false;
|
||||
if (lowerTerm == null) {
|
||||
if (other.lowerTerm != null)
|
||||
return false;
|
||||
} else if (!lowerTerm.equals(other.lowerTerm))
|
||||
return false;
|
||||
if (upperTerm == null) {
|
||||
if (other.upperTerm != null)
|
||||
return false;
|
||||
} else if (!upperTerm.equals(other.upperTerm))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -1,100 +0,0 @@
|
|||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Subclass of FilteredTermEnum for enumerating all terms that match the
|
||||
* specified range parameters.
|
||||
* <p>Term enumerations are always ordered by
|
||||
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
||||
* greater than all that precede it.</p>
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowCollatedTermRangeTermsEnum extends FilteredTermsEnum {
|
||||
private Collator collator;
|
||||
private String upperTermText;
|
||||
private String lowerTermText;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
|
||||
/**
|
||||
* Enumerates all terms greater/equal than <code>lowerTerm</code>
|
||||
* but less/equal than <code>upperTerm</code>.
|
||||
*
|
||||
* If an endpoint is null, it is said to be "open". Either or both
|
||||
* endpoints may be open. Open endpoints may not be exclusive
|
||||
* (you can't select all but the first or last term without
|
||||
* explicitly specifying the term to exclude.)
|
||||
*
|
||||
* @param tenum source of the terms to enumerate.
|
||||
* @param lowerTermText
|
||||
* The term text at the lower end of the range
|
||||
* @param upperTermText
|
||||
* The term text at the upper end of the range
|
||||
* @param includeLower
|
||||
* If true, the <code>lowerTerm</code> is included in the range.
|
||||
* @param includeUpper
|
||||
* If true, the <code>upperTerm</code> is included in the range.
|
||||
* @param collator
|
||||
* The collator to use to collate index Terms, to determine their
|
||||
* membership in the range bounded by <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code>.
|
||||
*/
|
||||
public SlowCollatedTermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText,
|
||||
boolean includeLower, boolean includeUpper, Collator collator) {
|
||||
super(tenum);
|
||||
this.collator = collator;
|
||||
this.upperTermText = upperTermText;
|
||||
this.lowerTermText = lowerTermText;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
|
||||
// do a little bit of normalization...
|
||||
// open ended range queries should always be inclusive.
|
||||
if (this.lowerTermText == null) {
|
||||
this.lowerTermText = "";
|
||||
this.includeLower = true;
|
||||
}
|
||||
|
||||
// TODO: optimize
|
||||
BytesRef startBytesRef = new BytesRef("");
|
||||
setInitialSeekTerm(startBytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
if ((includeLower
|
||||
? collator.compare(term.utf8ToString(), lowerTermText) >= 0
|
||||
: collator.compare(term.utf8ToString(), lowerTermText) > 0)
|
||||
&& (upperTermText == null
|
||||
|| (includeUpper
|
||||
? collator.compare(term.utf8ToString(), upperTermText) <= 0
|
||||
: collator.compare(term.utf8ToString(), upperTermText) < 0))) {
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
}
|
|
@ -1,190 +0,0 @@
|
|||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Tests SlowCollatedStringComparator, SlowCollatedTermRangeQuery, and SlowCollatedTermRangeFilter
|
||||
*/
|
||||
public class TestSlowCollationMethods extends LuceneTestCase {
|
||||
private static Collator collator;
|
||||
private static IndexSearcher searcher;
|
||||
private static IndexReader reader;
|
||||
private static Directory dir;
|
||||
private static int numDocs;
|
||||
private static String splitDoc;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
final Locale locale = LuceneTestCase.randomLocale(random());
|
||||
collator = Collator.getInstance(locale);
|
||||
collator.setStrength(Collator.IDENTICAL);
|
||||
collator.setDecomposition(Collator.NO_DECOMPOSITION);
|
||||
|
||||
numDocs = 1000 * RANDOM_MULTIPLIER;
|
||||
dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
String value = TestUtil.randomUnicodeString(random());
|
||||
Field field = newStringField("field", value, Field.Store.YES);
|
||||
doc.add(field);
|
||||
Field dvField = new SortedDocValuesField("field", new BytesRef(value));
|
||||
doc.add(dvField);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
splitDoc = TestUtil.randomUnicodeString(random());
|
||||
reader = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
searcher = newSearcher(reader);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
reader.close();
|
||||
dir.close();
|
||||
collator = null;
|
||||
searcher = null;
|
||||
reader = null;
|
||||
dir = null;
|
||||
}
|
||||
|
||||
private void doCheckSorting(TopDocs docs) throws Exception {
|
||||
String prev = "";
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, prev) >= 0);
|
||||
prev = value;
|
||||
}
|
||||
}
|
||||
|
||||
public void testSort() throws Exception {
|
||||
SortField sf = new SortField("field", new FieldComparatorSource() {
|
||||
@Override
|
||||
public FieldComparator<String> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
|
||||
return new SlowCollatedStringComparator(numHits, fieldname, collator);
|
||||
}
|
||||
});
|
||||
final Sort sort = new Sort(sf);
|
||||
|
||||
final TopFieldDocs docs1 = searcher.search(TermRangeQuery.newStringRange("field", null, splitDoc, true, true), null, numDocs/(1+random().nextInt(4)), sort);
|
||||
doCheckSorting(docs1);
|
||||
|
||||
final TopFieldDocs docs2 = searcher.search(TermRangeQuery.newStringRange("field", splitDoc, null, true, true), null, numDocs/(1+random().nextInt(4)), sort);
|
||||
doCheckSorting(docs2);
|
||||
|
||||
final TopFieldDocs docs = TopDocs.merge(sort, numDocs/(1+random().nextInt(4)), new TopFieldDocs[]{docs1, docs2});
|
||||
doCheckSorting(docs);
|
||||
}
|
||||
|
||||
private void doTestRanges(String startPoint, String endPoint, Query query) throws Exception {
|
||||
QueryUtils.check(query);
|
||||
|
||||
// positive test
|
||||
TopDocs docs = searcher.search(query, numDocs);
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, startPoint) >= 0);
|
||||
assertTrue(collator.compare(value, endPoint) <= 0);
|
||||
}
|
||||
|
||||
// negative test
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
||||
bq.add(query, Occur.MUST_NOT);
|
||||
docs = searcher.search(bq, numDocs);
|
||||
for (ScoreDoc doc : docs.scoreDocs) {
|
||||
String value = reader.document(doc.doc).get("field");
|
||||
assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeQuery() throws Exception {
|
||||
int numQueries = 50*RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < numQueries; i++) {
|
||||
String startPoint = TestUtil.randomUnicodeString(random());
|
||||
String endPoint = TestUtil.randomUnicodeString(random());
|
||||
Query query = new SlowCollatedTermRangeQuery("field", startPoint, endPoint, true, true, collator);
|
||||
doTestRanges(startPoint, endPoint, query);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRangeFilter() throws Exception {
|
||||
int numQueries = 50*RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < numQueries; i++) {
|
||||
String startPoint = TestUtil.randomUnicodeString(random());
|
||||
String endPoint = TestUtil.randomUnicodeString(random());
|
||||
Query query = new ConstantScoreQuery(new SlowCollatedTermRangeFilter("field", startPoint, endPoint, true, true, collator));
|
||||
doTestRanges(startPoint, endPoint, query);
|
||||
}
|
||||
}
|
||||
|
||||
public void testQuery() throws Exception {
|
||||
|
||||
// Copied from beforeClass, but scaled down to few docs:
|
||||
// since otherwise this test can run for a very long
|
||||
// time (1-2 hours or more; see Lucene-Solr-4.x-Linux Build #2204):
|
||||
final Locale locale = LuceneTestCase.randomLocale(random());
|
||||
Collator collator = Collator.getInstance(locale);
|
||||
collator.setStrength(Collator.IDENTICAL);
|
||||
collator.setDecomposition(Collator.NO_DECOMPOSITION);
|
||||
|
||||
int numDocs = 20 * RANDOM_MULTIPLIER;
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
String value = TestUtil.randomUnicodeString(random());
|
||||
Field field = newStringField("field", value, Field.Store.YES);
|
||||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
IndexReader reader = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
String startPoint = TestUtil.randomUnicodeString(random());
|
||||
String endPoint = TestUtil.randomUnicodeString(random());
|
||||
Query query = new SlowCollatedTermRangeQuery("field", startPoint, endPoint, true, true, collator);
|
||||
QueryUtils.check(random(), query, searcher);
|
||||
reader.close();
|
||||
dir.close();
|
||||
collator = null;
|
||||
searcher = null;
|
||||
reader = null;
|
||||
dir = null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue