mirror of https://github.com/apache/lucene.git
FieldSortedHitQueue - subsequent String sorts with different locales sort identically: LUCENE-526
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@391895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
825bc9bdac
commit
6c32f48074
|
@ -34,6 +34,9 @@ Bug fixes
|
||||||
that sometimes caused the index order of documents to change.
|
that sometimes caused the index order of documents to change.
|
||||||
(Yonik Seeley)
|
(Yonik Seeley)
|
||||||
|
|
||||||
|
7. LUCENE-526: Fixed a bug in FieldSortedHitQueue that caused
|
||||||
|
subsequent String sorts with different locales to sort identically.
|
||||||
|
(Paul Cowan via Yonik Seeley)
|
||||||
|
|
||||||
1.9.1
|
1.9.1
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.index.TermEnum;
|
||||||
import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
|
import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.WeakHashMap;
|
import java.util.WeakHashMap;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -45,12 +46,14 @@ implements FieldCache {
|
||||||
final String field; // which Field
|
final String field; // which Field
|
||||||
final int type; // which SortField type
|
final int type; // which SortField type
|
||||||
final Object custom; // which custom comparator
|
final Object custom; // which custom comparator
|
||||||
|
final Locale locale; // the locale we're sorting (if string)
|
||||||
|
|
||||||
/** Creates one of these objects. */
|
/** Creates one of these objects. */
|
||||||
Entry (String field, int type) {
|
Entry (String field, int type, Locale locale) {
|
||||||
this.field = field.intern();
|
this.field = field.intern();
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.custom = null;
|
this.custom = null;
|
||||||
|
this.locale = locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates one of these objects for a custom comparator. */
|
/** Creates one of these objects for a custom comparator. */
|
||||||
|
@ -58,6 +61,7 @@ implements FieldCache {
|
||||||
this.field = field.intern();
|
this.field = field.intern();
|
||||||
this.type = SortField.CUSTOM;
|
this.type = SortField.CUSTOM;
|
||||||
this.custom = custom;
|
this.custom = custom;
|
||||||
|
this.locale = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Two of these are equal iff they reference the same field and type. */
|
/** Two of these are equal iff they reference the same field and type. */
|
||||||
|
@ -65,6 +69,7 @@ implements FieldCache {
|
||||||
if (o instanceof Entry) {
|
if (o instanceof Entry) {
|
||||||
Entry other = (Entry) o;
|
Entry other = (Entry) o;
|
||||||
if (other.field == field && other.type == type) {
|
if (other.field == field && other.type == type) {
|
||||||
|
if (other.locale == null ? locale == null : other.locale.equals(locale)) {
|
||||||
if (other.custom == null) {
|
if (other.custom == null) {
|
||||||
if (custom == null) return true;
|
if (custom == null) return true;
|
||||||
} else if (other.custom.equals (custom)) {
|
} else if (other.custom.equals (custom)) {
|
||||||
|
@ -72,12 +77,13 @@ implements FieldCache {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Composes a hashcode based on the field and type. */
|
/** Composes a hashcode based on the field and type. */
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode());
|
return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode()) ^ (locale==null ? 0 : locale.hashCode());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,8 +103,8 @@ implements FieldCache {
|
||||||
final Map cache = new WeakHashMap();
|
final Map cache = new WeakHashMap();
|
||||||
|
|
||||||
/** See if an object is in the cache. */
|
/** See if an object is in the cache. */
|
||||||
Object lookup (IndexReader reader, String field, int type) {
|
Object lookup (IndexReader reader, String field, int type, Locale locale) {
|
||||||
Entry entry = new Entry (field, type);
|
Entry entry = new Entry (field, type, locale);
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
HashMap readerCache = (HashMap)cache.get(reader);
|
HashMap readerCache = (HashMap)cache.get(reader);
|
||||||
if (readerCache == null) return null;
|
if (readerCache == null) return null;
|
||||||
|
@ -117,8 +123,8 @@ implements FieldCache {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Put an object into the cache. */
|
/** Put an object into the cache. */
|
||||||
Object store (IndexReader reader, String field, int type, Object value) {
|
Object store (IndexReader reader, String field, int type, Locale locale, Object value) {
|
||||||
Entry entry = new Entry (field, type);
|
Entry entry = new Entry (field, type, locale);
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
HashMap readerCache = (HashMap)cache.get(reader);
|
HashMap readerCache = (HashMap)cache.get(reader);
|
||||||
if (readerCache == null) {
|
if (readerCache == null) {
|
||||||
|
@ -215,7 +221,7 @@ implements FieldCache {
|
||||||
public String[] getStrings (IndexReader reader, String field)
|
public String[] getStrings (IndexReader reader, String field)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
field = field.intern();
|
field = field.intern();
|
||||||
Object ret = lookup (reader, field, SortField.STRING);
|
Object ret = lookup (reader, field, SortField.STRING, null);
|
||||||
if (ret == null) {
|
if (ret == null) {
|
||||||
final String[] retArray = new String[reader.maxDoc()];
|
final String[] retArray = new String[reader.maxDoc()];
|
||||||
TermDocs termDocs = reader.termDocs();
|
TermDocs termDocs = reader.termDocs();
|
||||||
|
@ -234,7 +240,7 @@ implements FieldCache {
|
||||||
termDocs.close();
|
termDocs.close();
|
||||||
termEnum.close();
|
termEnum.close();
|
||||||
}
|
}
|
||||||
store (reader, field, SortField.STRING, retArray);
|
store (reader, field, SortField.STRING, null, retArray);
|
||||||
return retArray;
|
return retArray;
|
||||||
}
|
}
|
||||||
return (String[]) ret;
|
return (String[]) ret;
|
||||||
|
@ -244,7 +250,7 @@ implements FieldCache {
|
||||||
public StringIndex getStringIndex (IndexReader reader, String field)
|
public StringIndex getStringIndex (IndexReader reader, String field)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
field = field.intern();
|
field = field.intern();
|
||||||
Object ret = lookup (reader, field, STRING_INDEX);
|
Object ret = lookup (reader, field, STRING_INDEX, null);
|
||||||
if (ret == null) {
|
if (ret == null) {
|
||||||
final int[] retArray = new int[reader.maxDoc()];
|
final int[] retArray = new int[reader.maxDoc()];
|
||||||
String[] mterms = new String[reader.maxDoc()+1];
|
String[] mterms = new String[reader.maxDoc()+1];
|
||||||
|
@ -295,7 +301,7 @@ implements FieldCache {
|
||||||
}
|
}
|
||||||
|
|
||||||
StringIndex value = new StringIndex (retArray, mterms);
|
StringIndex value = new StringIndex (retArray, mterms);
|
||||||
store (reader, field, STRING_INDEX, value);
|
store (reader, field, STRING_INDEX, null, value);
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
return (StringIndex) ret;
|
return (StringIndex) ret;
|
||||||
|
@ -316,7 +322,7 @@ implements FieldCache {
|
||||||
public Object getAuto (IndexReader reader, String field)
|
public Object getAuto (IndexReader reader, String field)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
field = field.intern();
|
field = field.intern();
|
||||||
Object ret = lookup (reader, field, SortField.AUTO);
|
Object ret = lookup (reader, field, SortField.AUTO, null);
|
||||||
if (ret == null) {
|
if (ret == null) {
|
||||||
TermEnum enumerator = reader.terms (new Term (field, ""));
|
TermEnum enumerator = reader.terms (new Term (field, ""));
|
||||||
try {
|
try {
|
||||||
|
@ -350,7 +356,7 @@ implements FieldCache {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ret != null) {
|
if (ret != null) {
|
||||||
store (reader, field, SortField.AUTO, ret);
|
store (reader, field, SortField.AUTO, null, ret);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed");
|
throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed");
|
||||||
|
|
|
@ -56,8 +56,13 @@ extends PriorityQueue {
|
||||||
for (int i=0; i<n; ++i) {
|
for (int i=0; i<n; ++i) {
|
||||||
String fieldname = fields[i].getField();
|
String fieldname = fields[i].getField();
|
||||||
comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory());
|
comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory());
|
||||||
|
|
||||||
|
if (comparators[i].sortType() == SortField.STRING) {
|
||||||
|
this.fields[i] = new SortField (fieldname, fields[i].getLocale(), fields[i].getReverse());
|
||||||
|
} else {
|
||||||
this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
|
this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
initialize (size);
|
initialize (size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,10 +152,10 @@ extends PriorityQueue {
|
||||||
static final Map Comparators = new WeakHashMap();
|
static final Map Comparators = new WeakHashMap();
|
||||||
|
|
||||||
/** Returns a comparator if it is in the cache. */
|
/** Returns a comparator if it is in the cache. */
|
||||||
static ScoreDocComparator lookup (IndexReader reader, String field, int type, Object factory) {
|
static ScoreDocComparator lookup (IndexReader reader, String field, int type, Locale locale, Object factory) {
|
||||||
FieldCacheImpl.Entry entry = (factory != null)
|
FieldCacheImpl.Entry entry = (factory != null)
|
||||||
? new FieldCacheImpl.Entry (field, factory)
|
? new FieldCacheImpl.Entry (field, factory)
|
||||||
: new FieldCacheImpl.Entry (field, type);
|
: new FieldCacheImpl.Entry (field, type, locale);
|
||||||
synchronized (Comparators) {
|
synchronized (Comparators) {
|
||||||
HashMap readerCache = (HashMap)Comparators.get(reader);
|
HashMap readerCache = (HashMap)Comparators.get(reader);
|
||||||
if (readerCache == null) return null;
|
if (readerCache == null) return null;
|
||||||
|
@ -159,10 +164,10 @@ extends PriorityQueue {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Stores a comparator into the cache. */
|
/** Stores a comparator into the cache. */
|
||||||
static Object store (IndexReader reader, String field, int type, Object factory, Object value) {
|
static Object store (IndexReader reader, String field, int type, Locale locale, Object factory, Object value) {
|
||||||
FieldCacheImpl.Entry entry = (factory != null)
|
FieldCacheImpl.Entry entry = (factory != null)
|
||||||
? new FieldCacheImpl.Entry (field, factory)
|
? new FieldCacheImpl.Entry (field, factory)
|
||||||
: new FieldCacheImpl.Entry (field, type);
|
: new FieldCacheImpl.Entry (field, type, locale);
|
||||||
synchronized (Comparators) {
|
synchronized (Comparators) {
|
||||||
HashMap readerCache = (HashMap)Comparators.get(reader);
|
HashMap readerCache = (HashMap)Comparators.get(reader);
|
||||||
if (readerCache == null) {
|
if (readerCache == null) {
|
||||||
|
@ -177,7 +182,7 @@ extends PriorityQueue {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
|
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
|
||||||
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
|
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
|
||||||
ScoreDocComparator comparator = lookup (reader, fieldname, type, factory);
|
ScoreDocComparator comparator = lookup (reader, fieldname, type, locale, factory);
|
||||||
if (comparator == null) {
|
if (comparator == null) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case SortField.AUTO:
|
case SortField.AUTO:
|
||||||
|
@ -199,7 +204,7 @@ extends PriorityQueue {
|
||||||
default:
|
default:
|
||||||
throw new RuntimeException ("unknown field type: "+type);
|
throw new RuntimeException ("unknown field type: "+type);
|
||||||
}
|
}
|
||||||
store (reader, fieldname, type, factory, comparator);
|
store (reader, fieldname, type, locale, factory, comparator);
|
||||||
}
|
}
|
||||||
return comparator;
|
return comparator;
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,22 +94,23 @@ implements Serializable {
|
||||||
// the int field to sort by int
|
// the int field to sort by int
|
||||||
// the float field to sort by float
|
// the float field to sort by float
|
||||||
// the string field to sort by string
|
// the string field to sort by string
|
||||||
|
// the i18n field includes accented characters for testing locale-specific sorting
|
||||||
private String[][] data = new String[][] {
|
private String[][] data = new String[][] {
|
||||||
// tracer contents int float string custom
|
// tracer contents int float string custom i18n
|
||||||
{ "A", "x a", "5", "4f", "c", "A-3" },
|
{ "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche"},
|
||||||
{ "B", "y a", "5", "3.4028235E38", "i", "B-10" },
|
{ "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT"},
|
||||||
{ "C", "x a b c", "2147483647", "1.0", "j", "A-2" },
|
{ "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9"},
|
||||||
{ "D", "y a b c", "-1", "0.0f", "a", "C-0" },
|
{ "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT"},
|
||||||
{ "E", "x a b c d", "5", "2f", "h", "B-8" },
|
{ "E", "x a b c d", "5", "2f", "h", "B-8", "peach"},
|
||||||
{ "F", "y a b c d", "2", "3.14159f", "g", "B-1" },
|
{ "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T"},
|
||||||
{ "G", "x a b c d", "3", "-1.0", "f", "C-100" },
|
{ "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin"},
|
||||||
{ "H", "y a b c d", "0", "1.4E-45", "e", "C-88" },
|
{ "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T"},
|
||||||
{ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" },
|
{ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn"},
|
||||||
{ "J", "y a b c d e f", "4", ".5", "b", "C-7" },
|
{ "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT"},
|
||||||
{ "W", "g", "1", null, null, null },
|
{ "W", "g", "1", null, null, null, null},
|
||||||
{ "X", "g", "1", "0.1", null, null },
|
{ "X", "g", "1", "0.1", null, null, null},
|
||||||
{ "Y", "g", "1", "0.2", null, null },
|
{ "Y", "g", "1", "0.2", null, null, null},
|
||||||
{ "Z", "f g", null, null, null, null }
|
{ "Z", "f g", null, null, null, null, null}
|
||||||
};
|
};
|
||||||
|
|
||||||
// create an index of all the documents, or just the x, or just the y documents
|
// create an index of all the documents, or just the x, or just the y documents
|
||||||
|
@ -126,6 +127,7 @@ implements Serializable {
|
||||||
if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED));
|
if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED));
|
||||||
if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED));
|
if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED));
|
||||||
if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED));
|
if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED));
|
||||||
|
if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.UN_TOKENIZED));
|
||||||
doc.setBoost(2); // produce some scores above 1.0
|
doc.setBoost(2); // produce some scores above 1.0
|
||||||
writer.addDocument (doc);
|
writer.addDocument (doc);
|
||||||
}
|
}
|
||||||
|
@ -342,6 +344,40 @@ implements Serializable {
|
||||||
assertMatches (full, queryY, sort, "BFHJD");
|
assertMatches (full, queryY, sort, "BFHJD");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test using various international locales with accented characters
|
||||||
|
// (which sort differently depending on locale)
|
||||||
|
public void testInternationalSort() throws Exception {
|
||||||
|
sort.setSort (new SortField ("i18n", Locale.US));
|
||||||
|
assertMatches (full, queryY, sort, "BFJDH");
|
||||||
|
|
||||||
|
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
|
||||||
|
assertMatches (full, queryY, sort, "BJDFH");
|
||||||
|
|
||||||
|
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
|
||||||
|
assertMatches (full, queryY, sort, "BJDHF");
|
||||||
|
|
||||||
|
sort.setSort (new SortField ("i18n", Locale.US));
|
||||||
|
assertMatches (full, queryX, sort, "ECAGI");
|
||||||
|
|
||||||
|
sort.setSort (new SortField ("i18n", Locale.FRANCE));
|
||||||
|
assertMatches (full, queryX, sort, "EACGI");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test the MultiSearcher's ability to preserve locale-sensitive ordering
|
||||||
|
// by wrapping it around a single searcher
|
||||||
|
public void testInternationalMultiSearcherSort() throws Exception {
|
||||||
|
Searcher multiSearcher = new MultiSearcher (new Searchable[] { full });
|
||||||
|
|
||||||
|
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
|
||||||
|
assertMatches (multiSearcher, queryY, sort, "BJDFH");
|
||||||
|
|
||||||
|
sort.setSort (new SortField ("i18n", Locale.US));
|
||||||
|
assertMatches (multiSearcher, queryY, sort, "BFJDH");
|
||||||
|
|
||||||
|
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
|
||||||
|
assertMatches (multiSearcher, queryY, sort, "BJDHF");
|
||||||
|
}
|
||||||
|
|
||||||
// test a custom sort function
|
// test a custom sort function
|
||||||
public void testCustomSorts() throws Exception {
|
public void testCustomSorts() throws Exception {
|
||||||
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
|
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
|
||||||
|
|
Loading…
Reference in New Issue