FieldSortedHitQueue - subsequent String sorts with different locales sort identically: LUCENE-526

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@391895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-04-06 04:02:09 +00:00
parent 825bc9bdac
commit 6c32f48074
4 changed files with 88 additions and 38 deletions

View File

@ -34,6 +34,9 @@ Bug fixes
that sometimes caused the index order of documents to change. that sometimes caused the index order of documents to change.
(Yonik Seeley) (Yonik Seeley)
7. LUCENE-526: Fixed a bug in FieldSortedHitQueue that caused
subsequent String sorts with different locales to sort identically.
(Paul Cowan via Yonik Seeley)
1.9.1 1.9.1

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
import java.io.IOException; import java.io.IOException;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.WeakHashMap; import java.util.WeakHashMap;
import java.util.HashMap; import java.util.HashMap;
@ -45,12 +46,14 @@ implements FieldCache {
final String field; // which Field final String field; // which Field
final int type; // which SortField type final int type; // which SortField type
final Object custom; // which custom comparator final Object custom; // which custom comparator
final Locale locale; // the locale we're sorting (if string)
/** Creates one of these objects. */ /** Creates one of these objects. */
Entry (String field, int type) { Entry (String field, int type, Locale locale) {
this.field = field.intern(); this.field = field.intern();
this.type = type; this.type = type;
this.custom = null; this.custom = null;
this.locale = locale;
} }
/** Creates one of these objects for a custom comparator. */ /** Creates one of these objects for a custom comparator. */
@ -58,6 +61,7 @@ implements FieldCache {
this.field = field.intern(); this.field = field.intern();
this.type = SortField.CUSTOM; this.type = SortField.CUSTOM;
this.custom = custom; this.custom = custom;
this.locale = null;
} }
/** Two of these are equal iff they reference the same field and type. */ /** Two of these are equal iff they reference the same field and type. */
@ -65,6 +69,7 @@ implements FieldCache {
if (o instanceof Entry) { if (o instanceof Entry) {
Entry other = (Entry) o; Entry other = (Entry) o;
if (other.field == field && other.type == type) { if (other.field == field && other.type == type) {
if (other.locale == null ? locale == null : other.locale.equals(locale)) {
if (other.custom == null) { if (other.custom == null) {
if (custom == null) return true; if (custom == null) return true;
} else if (other.custom.equals (custom)) { } else if (other.custom.equals (custom)) {
@ -72,12 +77,13 @@ implements FieldCache {
} }
} }
} }
}
return false; return false;
} }
/** Composes a hashcode based on the field and type. */ /** Composes a hashcode based on the field and type. */
public int hashCode() { public int hashCode() {
return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode()); return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode()) ^ (locale==null ? 0 : locale.hashCode());
} }
} }
@ -97,8 +103,8 @@ implements FieldCache {
final Map cache = new WeakHashMap(); final Map cache = new WeakHashMap();
/** See if an object is in the cache. */ /** See if an object is in the cache. */
Object lookup (IndexReader reader, String field, int type) { Object lookup (IndexReader reader, String field, int type, Locale locale) {
Entry entry = new Entry (field, type); Entry entry = new Entry (field, type, locale);
synchronized (this) { synchronized (this) {
HashMap readerCache = (HashMap)cache.get(reader); HashMap readerCache = (HashMap)cache.get(reader);
if (readerCache == null) return null; if (readerCache == null) return null;
@ -117,8 +123,8 @@ implements FieldCache {
} }
/** Put an object into the cache. */ /** Put an object into the cache. */
Object store (IndexReader reader, String field, int type, Object value) { Object store (IndexReader reader, String field, int type, Locale locale, Object value) {
Entry entry = new Entry (field, type); Entry entry = new Entry (field, type, locale);
synchronized (this) { synchronized (this) {
HashMap readerCache = (HashMap)cache.get(reader); HashMap readerCache = (HashMap)cache.get(reader);
if (readerCache == null) { if (readerCache == null) {
@ -215,7 +221,7 @@ implements FieldCache {
public String[] getStrings (IndexReader reader, String field) public String[] getStrings (IndexReader reader, String field)
throws IOException { throws IOException {
field = field.intern(); field = field.intern();
Object ret = lookup (reader, field, SortField.STRING); Object ret = lookup (reader, field, SortField.STRING, null);
if (ret == null) { if (ret == null) {
final String[] retArray = new String[reader.maxDoc()]; final String[] retArray = new String[reader.maxDoc()];
TermDocs termDocs = reader.termDocs(); TermDocs termDocs = reader.termDocs();
@ -234,7 +240,7 @@ implements FieldCache {
termDocs.close(); termDocs.close();
termEnum.close(); termEnum.close();
} }
store (reader, field, SortField.STRING, retArray); store (reader, field, SortField.STRING, null, retArray);
return retArray; return retArray;
} }
return (String[]) ret; return (String[]) ret;
@ -244,7 +250,7 @@ implements FieldCache {
public StringIndex getStringIndex (IndexReader reader, String field) public StringIndex getStringIndex (IndexReader reader, String field)
throws IOException { throws IOException {
field = field.intern(); field = field.intern();
Object ret = lookup (reader, field, STRING_INDEX); Object ret = lookup (reader, field, STRING_INDEX, null);
if (ret == null) { if (ret == null) {
final int[] retArray = new int[reader.maxDoc()]; final int[] retArray = new int[reader.maxDoc()];
String[] mterms = new String[reader.maxDoc()+1]; String[] mterms = new String[reader.maxDoc()+1];
@ -295,7 +301,7 @@ implements FieldCache {
} }
StringIndex value = new StringIndex (retArray, mterms); StringIndex value = new StringIndex (retArray, mterms);
store (reader, field, STRING_INDEX, value); store (reader, field, STRING_INDEX, null, value);
return value; return value;
} }
return (StringIndex) ret; return (StringIndex) ret;
@ -316,7 +322,7 @@ implements FieldCache {
public Object getAuto (IndexReader reader, String field) public Object getAuto (IndexReader reader, String field)
throws IOException { throws IOException {
field = field.intern(); field = field.intern();
Object ret = lookup (reader, field, SortField.AUTO); Object ret = lookup (reader, field, SortField.AUTO, null);
if (ret == null) { if (ret == null) {
TermEnum enumerator = reader.terms (new Term (field, "")); TermEnum enumerator = reader.terms (new Term (field, ""));
try { try {
@ -350,7 +356,7 @@ implements FieldCache {
} }
} }
if (ret != null) { if (ret != null) {
store (reader, field, SortField.AUTO, ret); store (reader, field, SortField.AUTO, null, ret);
} }
} else { } else {
throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed"); throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed");

View File

@ -56,8 +56,13 @@ extends PriorityQueue {
for (int i=0; i<n; ++i) { for (int i=0; i<n; ++i) {
String fieldname = fields[i].getField(); String fieldname = fields[i].getField();
comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory()); comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory());
if (comparators[i].sortType() == SortField.STRING) {
this.fields[i] = new SortField (fieldname, fields[i].getLocale(), fields[i].getReverse());
} else {
this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse()); this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
} }
}
initialize (size); initialize (size);
} }
@ -147,10 +152,10 @@ extends PriorityQueue {
static final Map Comparators = new WeakHashMap(); static final Map Comparators = new WeakHashMap();
/** Returns a comparator if it is in the cache. */ /** Returns a comparator if it is in the cache. */
static ScoreDocComparator lookup (IndexReader reader, String field, int type, Object factory) { static ScoreDocComparator lookup (IndexReader reader, String field, int type, Locale locale, Object factory) {
FieldCacheImpl.Entry entry = (factory != null) FieldCacheImpl.Entry entry = (factory != null)
? new FieldCacheImpl.Entry (field, factory) ? new FieldCacheImpl.Entry (field, factory)
: new FieldCacheImpl.Entry (field, type); : new FieldCacheImpl.Entry (field, type, locale);
synchronized (Comparators) { synchronized (Comparators) {
HashMap readerCache = (HashMap)Comparators.get(reader); HashMap readerCache = (HashMap)Comparators.get(reader);
if (readerCache == null) return null; if (readerCache == null) return null;
@ -159,10 +164,10 @@ extends PriorityQueue {
} }
/** Stores a comparator into the cache. */ /** Stores a comparator into the cache. */
static Object store (IndexReader reader, String field, int type, Object factory, Object value) { static Object store (IndexReader reader, String field, int type, Locale locale, Object factory, Object value) {
FieldCacheImpl.Entry entry = (factory != null) FieldCacheImpl.Entry entry = (factory != null)
? new FieldCacheImpl.Entry (field, factory) ? new FieldCacheImpl.Entry (field, factory)
: new FieldCacheImpl.Entry (field, type); : new FieldCacheImpl.Entry (field, type, locale);
synchronized (Comparators) { synchronized (Comparators) {
HashMap readerCache = (HashMap)Comparators.get(reader); HashMap readerCache = (HashMap)Comparators.get(reader);
if (readerCache == null) { if (readerCache == null) {
@ -177,7 +182,7 @@ extends PriorityQueue {
throws IOException { throws IOException {
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER; if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE; if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
ScoreDocComparator comparator = lookup (reader, fieldname, type, factory); ScoreDocComparator comparator = lookup (reader, fieldname, type, locale, factory);
if (comparator == null) { if (comparator == null) {
switch (type) { switch (type) {
case SortField.AUTO: case SortField.AUTO:
@ -199,7 +204,7 @@ extends PriorityQueue {
default: default:
throw new RuntimeException ("unknown field type: "+type); throw new RuntimeException ("unknown field type: "+type);
} }
store (reader, fieldname, type, factory, comparator); store (reader, fieldname, type, locale, factory, comparator);
} }
return comparator; return comparator;
} }

View File

@ -94,22 +94,23 @@ implements Serializable {
// the int field to sort by int // the int field to sort by int
// the float field to sort by float // the float field to sort by float
// the string field to sort by string // the string field to sort by string
// the i18n field includes accented characters for testing locale-specific sorting
private String[][] data = new String[][] { private String[][] data = new String[][] {
// tracer contents int float string custom // tracer contents int float string custom i18n
{ "A", "x a", "5", "4f", "c", "A-3" }, { "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche"},
{ "B", "y a", "5", "3.4028235E38", "i", "B-10" }, { "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT"},
{ "C", "x a b c", "2147483647", "1.0", "j", "A-2" }, { "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9"},
{ "D", "y a b c", "-1", "0.0f", "a", "C-0" }, { "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT"},
{ "E", "x a b c d", "5", "2f", "h", "B-8" }, { "E", "x a b c d", "5", "2f", "h", "B-8", "peach"},
{ "F", "y a b c d", "2", "3.14159f", "g", "B-1" }, { "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T"},
{ "G", "x a b c d", "3", "-1.0", "f", "C-100" }, { "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin"},
{ "H", "y a b c d", "0", "1.4E-45", "e", "C-88" }, { "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T"},
{ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" }, { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn"},
{ "J", "y a b c d e f", "4", ".5", "b", "C-7" }, { "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT"},
{ "W", "g", "1", null, null, null }, { "W", "g", "1", null, null, null, null},
{ "X", "g", "1", "0.1", null, null }, { "X", "g", "1", "0.1", null, null, null},
{ "Y", "g", "1", "0.2", null, null }, { "Y", "g", "1", "0.2", null, null, null},
{ "Z", "f g", null, null, null, null } { "Z", "f g", null, null, null, null, null}
}; };
// create an index of all the documents, or just the x, or just the y documents // create an index of all the documents, or just the x, or just the y documents
@ -126,6 +127,7 @@ implements Serializable {
if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.UN_TOKENIZED));
doc.setBoost(2); // produce some scores above 1.0 doc.setBoost(2); // produce some scores above 1.0
writer.addDocument (doc); writer.addDocument (doc);
} }
@ -342,6 +344,40 @@ implements Serializable {
assertMatches (full, queryY, sort, "BFHJD"); assertMatches (full, queryY, sort, "BFHJD");
} }
// test using various international locales with accented characters
// (which sort differently depending on locale)
public void testInternationalSort() throws Exception {
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (full, queryY, sort, "BFJDH");
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
assertMatches (full, queryY, sort, "BJDFH");
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
assertMatches (full, queryY, sort, "BJDHF");
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (full, queryX, sort, "ECAGI");
sort.setSort (new SortField ("i18n", Locale.FRANCE));
assertMatches (full, queryX, sort, "EACGI");
}
// Test the MultiSearcher's ability to preserve locale-sensitive ordering
// by wrapping it around a single searcher
public void testInternationalMultiSearcherSort() throws Exception {
Searcher multiSearcher = new MultiSearcher (new Searchable[] { full });
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
assertMatches (multiSearcher, queryY, sort, "BJDFH");
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (multiSearcher, queryY, sort, "BFJDH");
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
assertMatches (multiSearcher, queryY, sort, "BJDHF");
}
// test a custom sort function // test a custom sort function
public void testCustomSorts() throws Exception { public void testCustomSorts() throws Exception {
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource())); sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));