SOLR-1777: fix sortMissingLast, sortMissingFirst

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@911245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-02-18 01:46:51 +00:00
parent 95480576eb
commit 4cd6455a04
3 changed files with 224 additions and 46 deletions

View File

@ -185,6 +185,10 @@ Bug Fixes
* SOLR-1579: Fixes to XML escaping in stats.jsp
(David Bowen and hossman)
* SOLR-1777: fieldTypes with sortMissingLast=true or sortMissingFirst=true can
result in incorrectly sorted results. (yonik)
Other Changes
----------------------

View File

@ -43,18 +43,17 @@ public class MissingStringLastComparatorSource extends FieldComparatorSource {
}
public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, true, missingValueProxy);
return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, missingValueProxy);
}
}
// Copied from Lucene and modified since the Lucene version couldn't
// be extended or have it's values accessed.
// NOTE: there were a number of other interesting String
// comparators explored, but this one seemed to perform
// best all around. See LUCENE-1483 for details.
class MissingLastOrdComparator extends FieldComparator {
class MissingLastOrdComparator extends FieldComparator {
private static final int NULL_ORD = Integer.MAX_VALUE;
private final String nullVal;
private final int[] ords;
private final String[] values;
@ -71,30 +70,19 @@ class MissingLastOrdComparator extends FieldComparator {
private final boolean reversed;
private final int sortPos;
private final int nullCmp;
private final Comparable nullVal;
public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, boolean sortMissingLast, Comparable nullVal) {
public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, String nullVal) {
ords = new int[numHits];
values = new String[numHits];
readerGen = new int[numHits];
this.sortPos = sortPos;
this.reversed = reversed;
this.field = field;
this.nullCmp = sortMissingLast ? 1 : -1;
this.nullVal = nullVal;
}
public int compare(int slot1, int slot2) {
int ord1 = ords[slot1];
int ord2 = ords[slot2];
int cmp = ord1-ord2;
if (ord1==0 || ord2==0) {
if (cmp==0) return 0;
return ord1==0 ? nullCmp : -nullCmp;
}
public int compare(int slot1, int slot2) {
if (readerGen[slot1] == readerGen[slot2]) {
int cmp = ords[slot1] - ords[slot2];
if (cmp != 0) {
return cmp;
}
@ -102,13 +90,14 @@ class MissingLastOrdComparator extends FieldComparator {
final String val1 = values[slot1];
final String val2 = values[slot2];
if (val1 == null) {
if (val2 == null) {
return 0;
}
return nullCmp;
return 1;
} else if (val2 == null) {
return -nullCmp;
return -1;
}
return val1.compareTo(val2);
}
@ -116,27 +105,17 @@ class MissingLastOrdComparator extends FieldComparator {
public int compareBottom(int doc) {
assert bottomSlot != -1;
int order = this.order[doc];
final int cmp = bottomOrd - order;
if (bottomOrd==0 || order==0) {
if (cmp==0) return 0;
return bottomOrd==0 ? nullCmp : -nullCmp;
}
int ord = (order == 0) ? NULL_ORD : order;
final int cmp = bottomOrd - ord;
if (cmp != 0) {
return cmp;
}
final String val2 = lookup[order];
if (bottomValue == null) {
if (val2 == null) {
return 0;
}
// bottom wins
return nullCmp;
} else if (val2 == null) {
// doc wins
return -nullCmp;
}
// take care of the case where both vals are null
if (bottomValue == val2) return 0;
return bottomValue.compareTo(val2);
}
@ -145,7 +124,8 @@ class MissingLastOrdComparator extends FieldComparator {
int index = 0;
String value = values[slot];
if (value == null) {
ords[slot] = 0;
// should already be done
// ords[slot] = NULL_ORD;
return;
}
@ -171,7 +151,7 @@ class MissingLastOrdComparator extends FieldComparator {
public void copy(int slot, int doc) {
final int ord = order[doc];
ords[slot] = ord;
ords[slot] = ord == 0 ? NULL_ORD : ord;
assert ord >= 0;
values[slot] = lookup[ord];
readerGen[slot] = currentReaderGen;
@ -196,14 +176,10 @@ class MissingLastOrdComparator extends FieldComparator {
}
bottomOrd = ords[bottom];
assert bottomOrd >= 0;
assert bottomOrd < lookup.length;
// assert bottomOrd < lookup.length;
bottomValue = values[bottom];
}
public int sortType() {
return SortField.STRING;
}
public Comparable value(int slot) {
Comparable v = values[slot];
return v==null ? nullVal : v;
@ -220,4 +196,4 @@ class MissingLastOrdComparator extends FieldComparator {
public String getField() {
return field;
}
}
}

View File

@ -0,0 +1,198 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.*;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.util.AbstractSolrTestCase;
import java.io.IOException;
import java.util.*;
public class TestSort extends AbstractSolrTestCase {
public String getSchemaFile() { return null; }
public String getSolrConfigFile() { return null; }
Random r = new Random();
int ndocs = 77;
int iter = 100;
int qiter = 1000;
int commitCount = ndocs/5 + 1;
int maxval = ndocs*2;
static class MyDoc {
int doc;
String val;
}
public void testSort() throws Exception {
RAMDirectory dir = new RAMDirectory();
Document smallDoc = new Document();
// Field id = new Field("id","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
Field f = new Field("f","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
smallDoc.add(f);
Document emptyDoc = new Document();
for (int iterCnt = 0; iterCnt<iter; iterCnt++) {
IndexWriter iw = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
final MyDoc[] mydocs = new MyDoc[ndocs];
int commitCountdown = commitCount;
for (int i=0; i< ndocs; i++) {
Document doc;
MyDoc mydoc = new MyDoc();
mydoc.doc = i;
mydocs[i] = mydoc;
if (r.nextInt(3)==0) {
doc = emptyDoc;
mydoc.val = null;
} else {
mydoc.val = Integer.toString(r.nextInt(maxval));
f.setValue(mydoc.val);
doc = smallDoc;
}
iw.addDocument(doc);
if (--commitCountdown <= 0) {
commitCountdown = commitCount;
iw.commit();
}
}
iw.close();
/***
Arrays.sort(mydocs, new Comparator<MyDoc>() {
public int compare(MyDoc o1, MyDoc o2) {
String v1 = o1.val==null ? "zzz" : o1.val;
String v2 = o2.val==null ? "zzz" : o2.val;
int cmp = v1.compareTo(v2);
cmp = cmp==0 ? o1.doc-o2.doc : cmp;
return cmp;
}
});
***/
IndexSearcher searcher = new IndexSearcher(dir, true);
// System.out.println("segments="+searcher.getIndexReader().getSequentialSubReaders().length);
assertTrue(searcher.getIndexReader().getSequentialSubReaders().length > 1);
for (int i=0; i<qiter; i++) {
Filter filt = new Filter() {
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return randSet(reader.maxDoc());
}
};
int top = r.nextInt((ndocs>>3)+1)+1;
final boolean sortMissingLast = r.nextBoolean();
final boolean reverse = !sortMissingLast;
List<SortField> sfields = new ArrayList<SortField>();
if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
// hit both use-cases of sort-missing-last
sfields.add( Sorting.getStringSortField("f", reverse, sortMissingLast, !sortMissingLast) );
int sortIdx = sfields.size() - 1;
if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
Sort sort = new Sort(sfields.toArray(new SortField[sfields.size()]));
// final String nullRep = sortMissingLast ? "zzz" : "";
final String nullRep = "zzz";
boolean trackScores = r.nextBoolean();
boolean trackMaxScores = r.nextBoolean();
boolean scoreInOrder = r.nextBoolean();
final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores, scoreInOrder);
final List<MyDoc> collectedDocs = new ArrayList<MyDoc>();
// delegate and collect docs ourselves
Collector myCollector = new Collector() {
int docBase;
@Override
public void setScorer(Scorer scorer) throws IOException {
topCollector.setScorer(scorer);
}
@Override
public void collect(int doc) throws IOException {
topCollector.collect(doc);
collectedDocs.add(mydocs[doc + docBase]);
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
topCollector.setNextReader(reader,docBase);
this.docBase = docBase;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return topCollector.acceptsDocsOutOfOrder();
}
};
searcher.search(new MatchAllDocsQuery(), filt, myCollector);
Collections.sort(collectedDocs, new Comparator<MyDoc>() {
public int compare(MyDoc o1, MyDoc o2) {
String v1 = o1.val==null ? nullRep : o1.val;
String v2 = o2.val==null ? nullRep : o2.val;
int cmp = v1.compareTo(v2);
if (reverse) cmp = -cmp;
cmp = cmp==0 ? o1.doc-o2.doc : cmp;
return cmp;
}
});
TopDocs topDocs = topCollector.topDocs();
ScoreDoc[] sdocs = topDocs.scoreDocs;
for (int j=0; j<sdocs.length; j++) {
int id = sdocs[j].doc;
String s = (String)((FieldDoc)sdocs[j]).fields[sortIdx];
if (id != collectedDocs.get(j).doc) {
System.out.println("Error at pos " + j);
}
assertEquals(id, collectedDocs.get(j).doc);
}
}
}
}
public DocIdSet randSet(int sz) {
OpenBitSet obs = new OpenBitSet(sz);
int n = r.nextInt(sz);
for (int i=0; i<n; i++) {
obs.fastSet(r.nextInt(sz));
}
return obs;
}
}