FieldSortedHitQueue - subsequent String sorts with different locales sort identically: LUCENE-526

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@391895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-04-06 04:02:09 +00:00
parent 825bc9bdac
commit 6c32f48074
4 changed files with 88 additions and 38 deletions

View File

@ -34,6 +34,9 @@ Bug fixes
that sometimes caused the index order of documents to change.
(Yonik Seeley)
7. LUCENE-526: Fixed a bug in FieldSortedHitQueue that caused
subsequent String sorts with different locales to sort identically.
(Paul Cowan via Yonik Seeley)
1.9.1

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
import java.io.IOException;
import java.util.Locale;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.HashMap;
@ -45,12 +46,14 @@ implements FieldCache {
final String field; // which Field
final int type; // which SortField type
final Object custom; // which custom comparator
final Locale locale; // the locale we're sorting (if string)
/** Creates one of these objects. */
Entry (String field, int type) {
Entry (String field, int type, Locale locale) {
this.field = field.intern();
this.type = type;
this.custom = null;
this.locale = locale;
}
/** Creates one of these objects for a custom comparator. */
@ -58,6 +61,7 @@ implements FieldCache {
this.field = field.intern();
this.type = SortField.CUSTOM;
this.custom = custom;
this.locale = null;
}
/** Two of these are equal iff they reference the same field and type. */
@ -65,6 +69,7 @@ implements FieldCache {
if (o instanceof Entry) {
Entry other = (Entry) o;
if (other.field == field && other.type == type) {
if (other.locale == null ? locale == null : other.locale.equals(locale)) {
if (other.custom == null) {
if (custom == null) return true;
} else if (other.custom.equals (custom)) {
@ -72,12 +77,13 @@ implements FieldCache {
}
}
}
}
return false;
}
/** Composes a hashcode based on the field and type. */
public int hashCode() {
return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode());
return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode()) ^ (locale==null ? 0 : locale.hashCode());
}
}
@ -97,8 +103,8 @@ implements FieldCache {
final Map cache = new WeakHashMap();
/** See if an object is in the cache. */
Object lookup (IndexReader reader, String field, int type) {
Entry entry = new Entry (field, type);
Object lookup (IndexReader reader, String field, int type, Locale locale) {
Entry entry = new Entry (field, type, locale);
synchronized (this) {
HashMap readerCache = (HashMap)cache.get(reader);
if (readerCache == null) return null;
@ -117,8 +123,8 @@ implements FieldCache {
}
/** Put an object into the cache. */
Object store (IndexReader reader, String field, int type, Object value) {
Entry entry = new Entry (field, type);
Object store (IndexReader reader, String field, int type, Locale locale, Object value) {
Entry entry = new Entry (field, type, locale);
synchronized (this) {
HashMap readerCache = (HashMap)cache.get(reader);
if (readerCache == null) {
@ -215,7 +221,7 @@ implements FieldCache {
public String[] getStrings (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, SortField.STRING);
Object ret = lookup (reader, field, SortField.STRING, null);
if (ret == null) {
final String[] retArray = new String[reader.maxDoc()];
TermDocs termDocs = reader.termDocs();
@ -234,7 +240,7 @@ implements FieldCache {
termDocs.close();
termEnum.close();
}
store (reader, field, SortField.STRING, retArray);
store (reader, field, SortField.STRING, null, retArray);
return retArray;
}
return (String[]) ret;
@ -244,7 +250,7 @@ implements FieldCache {
public StringIndex getStringIndex (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, STRING_INDEX);
Object ret = lookup (reader, field, STRING_INDEX, null);
if (ret == null) {
final int[] retArray = new int[reader.maxDoc()];
String[] mterms = new String[reader.maxDoc()+1];
@ -295,7 +301,7 @@ implements FieldCache {
}
StringIndex value = new StringIndex (retArray, mterms);
store (reader, field, STRING_INDEX, value);
store (reader, field, STRING_INDEX, null, value);
return value;
}
return (StringIndex) ret;
@ -316,7 +322,7 @@ implements FieldCache {
public Object getAuto (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, SortField.AUTO);
Object ret = lookup (reader, field, SortField.AUTO, null);
if (ret == null) {
TermEnum enumerator = reader.terms (new Term (field, ""));
try {
@ -350,7 +356,7 @@ implements FieldCache {
}
}
if (ret != null) {
store (reader, field, SortField.AUTO, ret);
store (reader, field, SortField.AUTO, null, ret);
}
} else {
throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed");

View File

@ -56,8 +56,13 @@ extends PriorityQueue {
for (int i=0; i<n; ++i) {
String fieldname = fields[i].getField();
comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory());
if (comparators[i].sortType() == SortField.STRING) {
this.fields[i] = new SortField (fieldname, fields[i].getLocale(), fields[i].getReverse());
} else {
this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
}
}
initialize (size);
}
@ -147,10 +152,10 @@ extends PriorityQueue {
static final Map Comparators = new WeakHashMap();
/** Returns a comparator if it is in the cache. */
static ScoreDocComparator lookup (IndexReader reader, String field, int type, Object factory) {
static ScoreDocComparator lookup (IndexReader reader, String field, int type, Locale locale, Object factory) {
FieldCacheImpl.Entry entry = (factory != null)
? new FieldCacheImpl.Entry (field, factory)
: new FieldCacheImpl.Entry (field, type);
: new FieldCacheImpl.Entry (field, type, locale);
synchronized (Comparators) {
HashMap readerCache = (HashMap)Comparators.get(reader);
if (readerCache == null) return null;
@ -159,10 +164,10 @@ extends PriorityQueue {
}
/** Stores a comparator into the cache. */
static Object store (IndexReader reader, String field, int type, Object factory, Object value) {
static Object store (IndexReader reader, String field, int type, Locale locale, Object factory, Object value) {
FieldCacheImpl.Entry entry = (factory != null)
? new FieldCacheImpl.Entry (field, factory)
: new FieldCacheImpl.Entry (field, type);
: new FieldCacheImpl.Entry (field, type, locale);
synchronized (Comparators) {
HashMap readerCache = (HashMap)Comparators.get(reader);
if (readerCache == null) {
@ -177,7 +182,7 @@ extends PriorityQueue {
throws IOException {
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
ScoreDocComparator comparator = lookup (reader, fieldname, type, factory);
ScoreDocComparator comparator = lookup (reader, fieldname, type, locale, factory);
if (comparator == null) {
switch (type) {
case SortField.AUTO:
@ -199,7 +204,7 @@ extends PriorityQueue {
default:
throw new RuntimeException ("unknown field type: "+type);
}
store (reader, fieldname, type, factory, comparator);
store (reader, fieldname, type, locale, factory, comparator);
}
return comparator;
}

View File

@ -94,22 +94,23 @@ implements Serializable {
// the int field to sort by int
// the float field to sort by float
// the string field to sort by string
// the i18n field includes accented characters for testing locale-specific sorting
private String[][] data = new String[][] {
// tracer contents int float string custom
{ "A", "x a", "5", "4f", "c", "A-3" },
{ "B", "y a", "5", "3.4028235E38", "i", "B-10" },
{ "C", "x a b c", "2147483647", "1.0", "j", "A-2" },
{ "D", "y a b c", "-1", "0.0f", "a", "C-0" },
{ "E", "x a b c d", "5", "2f", "h", "B-8" },
{ "F", "y a b c d", "2", "3.14159f", "g", "B-1" },
{ "G", "x a b c d", "3", "-1.0", "f", "C-100" },
{ "H", "y a b c d", "0", "1.4E-45", "e", "C-88" },
{ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" },
{ "J", "y a b c d e f", "4", ".5", "b", "C-7" },
{ "W", "g", "1", null, null, null },
{ "X", "g", "1", "0.1", null, null },
{ "Y", "g", "1", "0.2", null, null },
{ "Z", "f g", null, null, null, null }
// tracer contents int float string custom i18n
{ "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche"},
{ "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT"},
{ "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9"},
{ "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT"},
{ "E", "x a b c d", "5", "2f", "h", "B-8", "peach"},
{ "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T"},
{ "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin"},
{ "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T"},
{ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn"},
{ "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT"},
{ "W", "g", "1", null, null, null, null},
{ "X", "g", "1", "0.1", null, null, null},
{ "Y", "g", "1", "0.2", null, null, null},
{ "Z", "f g", null, null, null, null, null}
};
// create an index of all the documents, or just the x, or just the y documents
@ -126,6 +127,7 @@ implements Serializable {
if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.UN_TOKENIZED));
doc.setBoost(2); // produce some scores above 1.0
writer.addDocument (doc);
}
@ -342,6 +344,40 @@ implements Serializable {
assertMatches (full, queryY, sort, "BFHJD");
}
// test using various international locales with accented characters
// (which sort differently depending on locale)
public void testInternationalSort() throws Exception {
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (full, queryY, sort, "BFJDH");
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
assertMatches (full, queryY, sort, "BJDFH");
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
assertMatches (full, queryY, sort, "BJDHF");
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (full, queryX, sort, "ECAGI");
sort.setSort (new SortField ("i18n", Locale.FRANCE));
assertMatches (full, queryX, sort, "EACGI");
}
// Test the MultiSearcher's ability to preserve locale-sensitive ordering
// by wrapping it around a single searcher
public void testInternationalMultiSearcherSort() throws Exception {
Searcher multiSearcher = new MultiSearcher (new Searchable[] { full });
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
assertMatches (multiSearcher, queryY, sort, "BJDFH");
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (multiSearcher, queryY, sort, "BFJDH");
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
assertMatches (multiSearcher, queryY, sort, "BJDHF");
}
// test a custom sort function
public void testCustomSorts() throws Exception {
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));