mirror of https://github.com/apache/lucene.git
SOLR-1777: fix sortMissingLast, sortMissingFirst
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@911245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
95480576eb
commit
4cd6455a04
|
@ -185,6 +185,10 @@ Bug Fixes
|
|||
* SOLR-1579: Fixes to XML escaping in stats.jsp
|
||||
(David Bowen and hossman)
|
||||
|
||||
* SOLR-1777: fieldTypes with sortMissingLast=true or sortMissingFirst=true can
|
||||
result in incorrectly sorted results. (yonik)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -43,18 +43,17 @@ public class MissingStringLastComparatorSource extends FieldComparatorSource {
|
|||
}
|
||||
|
||||
public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
|
||||
return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, true, missingValueProxy);
|
||||
return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, missingValueProxy);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Copied from Lucene and modified since the Lucene version couldn't
|
||||
// be extended or have it's values accessed.
|
||||
|
||||
// NOTE: there were a number of other interesting String
|
||||
// comparators explored, but this one seemed to perform
|
||||
// best all around. See LUCENE-1483 for details.
|
||||
class MissingLastOrdComparator extends FieldComparator {
|
||||
class MissingLastOrdComparator extends FieldComparator {
|
||||
private static final int NULL_ORD = Integer.MAX_VALUE;
|
||||
private final String nullVal;
|
||||
|
||||
private final int[] ords;
|
||||
private final String[] values;
|
||||
|
@ -71,30 +70,19 @@ class MissingLastOrdComparator extends FieldComparator {
|
|||
private final boolean reversed;
|
||||
private final int sortPos;
|
||||
|
||||
private final int nullCmp;
|
||||
private final Comparable nullVal;
|
||||
|
||||
public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, boolean sortMissingLast, Comparable nullVal) {
|
||||
public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, String nullVal) {
|
||||
ords = new int[numHits];
|
||||
values = new String[numHits];
|
||||
readerGen = new int[numHits];
|
||||
this.sortPos = sortPos;
|
||||
this.reversed = reversed;
|
||||
this.field = field;
|
||||
this.nullCmp = sortMissingLast ? 1 : -1;
|
||||
this.nullVal = nullVal;
|
||||
}
|
||||
|
||||
public int compare(int slot1, int slot2) {
|
||||
int ord1 = ords[slot1];
|
||||
int ord2 = ords[slot2];
|
||||
int cmp = ord1-ord2;
|
||||
if (ord1==0 || ord2==0) {
|
||||
if (cmp==0) return 0;
|
||||
return ord1==0 ? nullCmp : -nullCmp;
|
||||
}
|
||||
|
||||
public int compare(int slot1, int slot2) {
|
||||
if (readerGen[slot1] == readerGen[slot2]) {
|
||||
int cmp = ords[slot1] - ords[slot2];
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
@ -102,13 +90,14 @@ class MissingLastOrdComparator extends FieldComparator {
|
|||
|
||||
final String val1 = values[slot1];
|
||||
final String val2 = values[slot2];
|
||||
|
||||
if (val1 == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
return nullCmp;
|
||||
return 1;
|
||||
} else if (val2 == null) {
|
||||
return -nullCmp;
|
||||
return -1;
|
||||
}
|
||||
return val1.compareTo(val2);
|
||||
}
|
||||
|
@ -116,27 +105,17 @@ class MissingLastOrdComparator extends FieldComparator {
|
|||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
int order = this.order[doc];
|
||||
final int cmp = bottomOrd - order;
|
||||
if (bottomOrd==0 || order==0) {
|
||||
if (cmp==0) return 0;
|
||||
return bottomOrd==0 ? nullCmp : -nullCmp;
|
||||
}
|
||||
|
||||
int ord = (order == 0) ? NULL_ORD : order;
|
||||
final int cmp = bottomOrd - ord;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
final String val2 = lookup[order];
|
||||
if (bottomValue == null) {
|
||||
if (val2 == null) {
|
||||
return 0;
|
||||
}
|
||||
// bottom wins
|
||||
return nullCmp;
|
||||
} else if (val2 == null) {
|
||||
// doc wins
|
||||
return -nullCmp;
|
||||
}
|
||||
|
||||
// take care of the case where both vals are null
|
||||
if (bottomValue == val2) return 0;
|
||||
|
||||
return bottomValue.compareTo(val2);
|
||||
}
|
||||
|
||||
|
@ -145,7 +124,8 @@ class MissingLastOrdComparator extends FieldComparator {
|
|||
int index = 0;
|
||||
String value = values[slot];
|
||||
if (value == null) {
|
||||
ords[slot] = 0;
|
||||
// should already be done
|
||||
// ords[slot] = NULL_ORD;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -171,7 +151,7 @@ class MissingLastOrdComparator extends FieldComparator {
|
|||
|
||||
public void copy(int slot, int doc) {
|
||||
final int ord = order[doc];
|
||||
ords[slot] = ord;
|
||||
ords[slot] = ord == 0 ? NULL_ORD : ord;
|
||||
assert ord >= 0;
|
||||
values[slot] = lookup[ord];
|
||||
readerGen[slot] = currentReaderGen;
|
||||
|
@ -196,14 +176,10 @@ class MissingLastOrdComparator extends FieldComparator {
|
|||
}
|
||||
bottomOrd = ords[bottom];
|
||||
assert bottomOrd >= 0;
|
||||
assert bottomOrd < lookup.length;
|
||||
// assert bottomOrd < lookup.length;
|
||||
bottomValue = values[bottom];
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.STRING;
|
||||
}
|
||||
|
||||
public Comparable value(int slot) {
|
||||
Comparable v = values[slot];
|
||||
return v==null ? nullVal : v;
|
||||
|
@ -220,4 +196,4 @@ class MissingLastOrdComparator extends FieldComparator {
|
|||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,198 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class TestSort extends AbstractSolrTestCase {
|
||||
public String getSchemaFile() { return null; }
|
||||
public String getSolrConfigFile() { return null; }
|
||||
|
||||
Random r = new Random();
|
||||
|
||||
int ndocs = 77;
|
||||
int iter = 100;
|
||||
int qiter = 1000;
|
||||
int commitCount = ndocs/5 + 1;
|
||||
int maxval = ndocs*2;
|
||||
|
||||
static class MyDoc {
|
||||
int doc;
|
||||
String val;
|
||||
}
|
||||
|
||||
public void testSort() throws Exception {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
Document smallDoc = new Document();
|
||||
// Field id = new Field("id","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
Field f = new Field("f","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
smallDoc.add(f);
|
||||
|
||||
Document emptyDoc = new Document();
|
||||
|
||||
for (int iterCnt = 0; iterCnt<iter; iterCnt++) {
|
||||
IndexWriter iw = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
final MyDoc[] mydocs = new MyDoc[ndocs];
|
||||
|
||||
int commitCountdown = commitCount;
|
||||
for (int i=0; i< ndocs; i++) {
|
||||
Document doc;
|
||||
MyDoc mydoc = new MyDoc();
|
||||
mydoc.doc = i;
|
||||
mydocs[i] = mydoc;
|
||||
|
||||
if (r.nextInt(3)==0) {
|
||||
doc = emptyDoc;
|
||||
mydoc.val = null;
|
||||
} else {
|
||||
mydoc.val = Integer.toString(r.nextInt(maxval));
|
||||
f.setValue(mydoc.val);
|
||||
doc = smallDoc;
|
||||
}
|
||||
iw.addDocument(doc);
|
||||
if (--commitCountdown <= 0) {
|
||||
commitCountdown = commitCount;
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
iw.close();
|
||||
|
||||
/***
|
||||
Arrays.sort(mydocs, new Comparator<MyDoc>() {
|
||||
public int compare(MyDoc o1, MyDoc o2) {
|
||||
String v1 = o1.val==null ? "zzz" : o1.val;
|
||||
String v2 = o2.val==null ? "zzz" : o2.val;
|
||||
int cmp = v1.compareTo(v2);
|
||||
cmp = cmp==0 ? o1.doc-o2.doc : cmp;
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
***/
|
||||
|
||||
IndexSearcher searcher = new IndexSearcher(dir, true);
|
||||
// System.out.println("segments="+searcher.getIndexReader().getSequentialSubReaders().length);
|
||||
assertTrue(searcher.getIndexReader().getSequentialSubReaders().length > 1);
|
||||
|
||||
for (int i=0; i<qiter; i++) {
|
||||
Filter filt = new Filter() {
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
return randSet(reader.maxDoc());
|
||||
}
|
||||
};
|
||||
|
||||
int top = r.nextInt((ndocs>>3)+1)+1;
|
||||
final boolean sortMissingLast = r.nextBoolean();
|
||||
final boolean reverse = !sortMissingLast;
|
||||
List<SortField> sfields = new ArrayList<SortField>();
|
||||
|
||||
if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
|
||||
// hit both use-cases of sort-missing-last
|
||||
sfields.add( Sorting.getStringSortField("f", reverse, sortMissingLast, !sortMissingLast) );
|
||||
int sortIdx = sfields.size() - 1;
|
||||
if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
|
||||
|
||||
Sort sort = new Sort(sfields.toArray(new SortField[sfields.size()]));
|
||||
|
||||
// final String nullRep = sortMissingLast ? "zzz" : "";
|
||||
final String nullRep = "zzz";
|
||||
|
||||
boolean trackScores = r.nextBoolean();
|
||||
boolean trackMaxScores = r.nextBoolean();
|
||||
boolean scoreInOrder = r.nextBoolean();
|
||||
final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores, scoreInOrder);
|
||||
|
||||
final List<MyDoc> collectedDocs = new ArrayList<MyDoc>();
|
||||
// delegate and collect docs ourselves
|
||||
Collector myCollector = new Collector() {
|
||||
int docBase;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
topCollector.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
topCollector.collect(doc);
|
||||
collectedDocs.add(mydocs[doc + docBase]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
topCollector.setNextReader(reader,docBase);
|
||||
this.docBase = docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return topCollector.acceptsDocsOutOfOrder();
|
||||
}
|
||||
};
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), filt, myCollector);
|
||||
|
||||
Collections.sort(collectedDocs, new Comparator<MyDoc>() {
|
||||
public int compare(MyDoc o1, MyDoc o2) {
|
||||
String v1 = o1.val==null ? nullRep : o1.val;
|
||||
String v2 = o2.val==null ? nullRep : o2.val;
|
||||
int cmp = v1.compareTo(v2);
|
||||
if (reverse) cmp = -cmp;
|
||||
cmp = cmp==0 ? o1.doc-o2.doc : cmp;
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
TopDocs topDocs = topCollector.topDocs();
|
||||
ScoreDoc[] sdocs = topDocs.scoreDocs;
|
||||
for (int j=0; j<sdocs.length; j++) {
|
||||
int id = sdocs[j].doc;
|
||||
String s = (String)((FieldDoc)sdocs[j]).fields[sortIdx];
|
||||
if (id != collectedDocs.get(j).doc) {
|
||||
System.out.println("Error at pos " + j);
|
||||
}
|
||||
assertEquals(id, collectedDocs.get(j).doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public DocIdSet randSet(int sz) {
|
||||
OpenBitSet obs = new OpenBitSet(sz);
|
||||
int n = r.nextInt(sz);
|
||||
for (int i=0; i<n; i++) {
|
||||
obs.fastSet(r.nextInt(sz));
|
||||
}
|
||||
return obs;
|
||||
}
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue