mirror of https://github.com/apache/lucene.git
break sorting ties by index order: LUCENE-456
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@332431 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5bba415099
commit
48b55ab1e0
|
@ -245,6 +245,10 @@ Bug fixes
|
||||||
change the sort order when sorting by string for documents without
|
change the sort order when sorting by string for documents without
|
||||||
a value for the sort field.
|
a value for the sort field.
|
||||||
(Luc Vanlerberghe via Yonik, LUCENE-453)
|
(Luc Vanlerberghe via Yonik, LUCENE-453)
|
||||||
|
|
||||||
|
16. Fixed a sorting problem with MultiSearchers that can lead to
|
||||||
|
missing or duplicate docs due to equal docs sorting in an arbitrary order.
|
||||||
|
(Yonik Seeley, LUCENE-456)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
|
|
|
@ -157,6 +157,11 @@ extends PriorityQueue {
|
||||||
c = -c;
|
c = -c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return c > 0;
|
|
||||||
|
// avoid random sort order that could lead to duplicates (bug #31241):
|
||||||
|
if (c == 0)
|
||||||
|
return docA.doc > docB.doc;
|
||||||
|
|
||||||
|
return c > 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,268 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.GregorianCalendar;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
import junit.framework.Test;
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.DateTools;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit test for sorting code.
|
||||||
|
*
|
||||||
|
* @author Martin Seitz (T-Systems)
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TestCustomSearcherSort
|
||||||
|
extends TestCase
|
||||||
|
implements Serializable {
|
||||||
|
|
||||||
|
private Directory index = null;
|
||||||
|
private Query query = null;
|
||||||
|
// reduced from 20000 to 2000 to speed up test...
|
||||||
|
private final static int INDEX_SIZE = 2000;
|
||||||
|
|
||||||
|
public TestCustomSearcherSort (String name) {
|
||||||
|
super (name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main (String[] argv) {
|
||||||
|
TestRunner.run (suite());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Test suite() {
|
||||||
|
return new TestSuite (TestCustomSearcherSort.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// create an index for testing
|
||||||
|
private Directory getIndex()
|
||||||
|
throws IOException {
|
||||||
|
RAMDirectory indexStore = new RAMDirectory ();
|
||||||
|
IndexWriter writer = new IndexWriter (indexStore, new StandardAnalyzer(), true);
|
||||||
|
RandomGen random = new RandomGen();
|
||||||
|
for (int i=0; i<INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up
|
||||||
|
Document doc = new Document();
|
||||||
|
if((i%5)!=0) { // some documents must not have an entry in the first sort field
|
||||||
|
doc.add (new Field("publicationDate_", random.getLuceneDate(), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
}
|
||||||
|
if((i%7)==0) { // some documents to match the query (see below)
|
||||||
|
doc.add (new Field("content", "test", Field.Store.YES, Field.Index.TOKENIZED));
|
||||||
|
}
|
||||||
|
// every document has a defined 'mandant' field
|
||||||
|
doc.add(new Field("mandant", Integer.toString(i%3), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
writer.addDocument (doc);
|
||||||
|
}
|
||||||
|
writer.optimize ();
|
||||||
|
writer.close ();
|
||||||
|
return indexStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create index and query for test cases.
|
||||||
|
*/
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
index = getIndex();
|
||||||
|
query = new TermQuery( new Term("content", "test"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the test using two CustomSearcher instances.
|
||||||
|
*/
|
||||||
|
public void testFieldSortCustomSearcher() throws Exception {
|
||||||
|
log("Run testFieldSortCustomSearcher");
|
||||||
|
// define the sort criteria
|
||||||
|
Sort custSort = new Sort(new SortField[] {
|
||||||
|
new SortField("publicationDate_"),
|
||||||
|
SortField.FIELD_SCORE
|
||||||
|
});
|
||||||
|
Searcher searcher = new CustomSearcher (index, 2);
|
||||||
|
// search and check hits
|
||||||
|
matchHits(searcher, custSort);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Run the test using one CustomSearcher wrapped by a MultiSearcher.
|
||||||
|
*/
|
||||||
|
public void testFieldSortSingleSearcher() throws Exception {
|
||||||
|
log("Run testFieldSortSingleSearcher");
|
||||||
|
// define the sort criteria
|
||||||
|
Sort custSort = new Sort(new SortField[] {
|
||||||
|
new SortField("publicationDate_"),
|
||||||
|
SortField.FIELD_SCORE
|
||||||
|
});
|
||||||
|
Searcher searcher =
|
||||||
|
new MultiSearcher(new Searchable[] {
|
||||||
|
new CustomSearcher (index, 2)});
|
||||||
|
// search and check hits
|
||||||
|
matchHits(searcher, custSort);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Run the test using two CustomSearcher instances.
|
||||||
|
*/
|
||||||
|
public void testFieldSortMultiCustomSearcher() throws Exception {
|
||||||
|
log("Run testFieldSortMultiCustomSearcher");
|
||||||
|
// define the sort criteria
|
||||||
|
Sort custSort = new Sort(new SortField[] {
|
||||||
|
new SortField("publicationDate_"),
|
||||||
|
SortField.FIELD_SCORE
|
||||||
|
});
|
||||||
|
Searcher searcher =
|
||||||
|
new MultiSearcher(new Searchable[] {
|
||||||
|
new CustomSearcher (index, 0),
|
||||||
|
new CustomSearcher (index, 2)});
|
||||||
|
// search and check hits
|
||||||
|
matchHits(searcher, custSort);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// make sure the documents returned by the search match the expected list
|
||||||
|
private void matchHits (Searcher searcher, Sort sort)
|
||||||
|
throws IOException {
|
||||||
|
// make a query without sorting first
|
||||||
|
Hits hitsByRank = searcher.search(query);
|
||||||
|
checkHits(hitsByRank, "Sort by rank: "); // check for duplicates
|
||||||
|
Map resultMap = new TreeMap();
|
||||||
|
// store hits in TreeMap - TreeMap does not allow duplicates; existing entries are silently overwritten
|
||||||
|
for(int hitid=0;hitid<hitsByRank.length(); ++hitid) {
|
||||||
|
resultMap.put(
|
||||||
|
new Integer(hitsByRank.id(hitid)), // Key: Lucene Document ID
|
||||||
|
new Integer(hitid)); // Value: Hits-Objekt Index
|
||||||
|
}
|
||||||
|
|
||||||
|
// now make a query using the sort criteria
|
||||||
|
Hits resultSort = searcher.search (query, sort);
|
||||||
|
checkHits(resultSort, "Sort by custom criteria: "); // check for duplicates
|
||||||
|
|
||||||
|
String lf = System.getProperty("line.separator", "\n");
|
||||||
|
// besides the sorting both sets of hits must be identical
|
||||||
|
for(int hitid=0;hitid<resultSort.length(); ++hitid) {
|
||||||
|
Integer idHitDate = new Integer(resultSort.id(hitid)); // document ID from sorted search
|
||||||
|
if(!resultMap.containsKey(idHitDate)) {
|
||||||
|
log("ID "+idHitDate+" not found. Possibliy a duplicate.");
|
||||||
|
}
|
||||||
|
assertTrue(resultMap.containsKey(idHitDate)); // same ID must be in the Map from the rank-sorted search
|
||||||
|
// every hit must appear once in both result sets --> remove it from the Map.
|
||||||
|
// At the end the Map must be empty!
|
||||||
|
resultMap.remove(idHitDate);
|
||||||
|
}
|
||||||
|
if(resultMap.size()==0) {
|
||||||
|
log("All hits matched");
|
||||||
|
} else {
|
||||||
|
log("Couldn't match "+resultMap.size()+" hits.");
|
||||||
|
}
|
||||||
|
assertEquals(resultMap.size(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the hits for duplicates.
|
||||||
|
* @param hits
|
||||||
|
*/
|
||||||
|
private void checkHits(Hits hits, String prefix) {
|
||||||
|
if(hits!=null) {
|
||||||
|
Map idMap = new TreeMap();
|
||||||
|
for(int docnum=0;docnum<hits.length();++docnum) {
|
||||||
|
Integer luceneId = null;
|
||||||
|
try {
|
||||||
|
luceneId = new Integer(hits.id(docnum));
|
||||||
|
if(idMap.containsKey(luceneId)) {
|
||||||
|
StringBuffer message = new StringBuffer(prefix);
|
||||||
|
message.append("Duplicate key for hit index = ");
|
||||||
|
message.append(docnum);
|
||||||
|
message.append(", previous index = ");
|
||||||
|
message.append(((Integer)idMap.get(luceneId)).toString());
|
||||||
|
message.append(", Lucene ID = ");
|
||||||
|
message.append(luceneId);
|
||||||
|
log(message.toString());
|
||||||
|
} else {
|
||||||
|
idMap.put(luceneId, new Integer(docnum));
|
||||||
|
}
|
||||||
|
} catch(IOException ioe) {
|
||||||
|
StringBuffer message = new StringBuffer(prefix);
|
||||||
|
message.append("Error occurred for hit index = ");
|
||||||
|
message.append(docnum);
|
||||||
|
message.append(" (");
|
||||||
|
message.append(ioe.getMessage());
|
||||||
|
message.append(")");
|
||||||
|
log(message.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simply write to console - choosen to be independant of log4j etc
|
||||||
|
private void log(String message) {
|
||||||
|
System.out.println(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class CustomSearcher extends IndexSearcher {
|
||||||
|
private int switcher;
|
||||||
|
/**
|
||||||
|
* @param directory
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public CustomSearcher(Directory directory, int switcher) throws IOException {
|
||||||
|
super(directory);
|
||||||
|
this.switcher = switcher;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @param r
|
||||||
|
*/
|
||||||
|
public CustomSearcher(IndexReader r, int switcher) {
|
||||||
|
super(r);
|
||||||
|
this.switcher = switcher;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @param path
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public CustomSearcher(String path, int switcher) throws IOException {
|
||||||
|
super(path);
|
||||||
|
this.switcher = switcher;
|
||||||
|
}
|
||||||
|
/* (non-Javadoc)
|
||||||
|
* @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int, org.apache.lucene.search.Sort)
|
||||||
|
*/
|
||||||
|
public TopFieldDocs search(Query query, Filter filter, int nDocs,
|
||||||
|
Sort sort) throws IOException {
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
bq.add(query, BooleanClause.Occur.MUST);
|
||||||
|
bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
|
||||||
|
return super.search(bq, filter, nDocs, sort);
|
||||||
|
}
|
||||||
|
/* (non-Javadoc)
|
||||||
|
* @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int)
|
||||||
|
*/
|
||||||
|
public TopDocs search(Query query, Filter filter, int nDocs)
|
||||||
|
throws IOException {
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
bq.add(query, BooleanClause.Occur.MUST);
|
||||||
|
bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), BooleanClause.Occur.MUST);
|
||||||
|
return super.search(bq, filter, nDocs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private class RandomGen {
|
||||||
|
private Random random = new Random(0); // to generate some arbitrary contents
|
||||||
|
private Calendar base = new GregorianCalendar(1980, 1, 1);
|
||||||
|
|
||||||
|
// Just to generate some different Lucene Date strings
|
||||||
|
private String getLuceneDate() {
|
||||||
|
return DateTools.timeToString(base.getTimeInMillis() + random.nextInt() - Integer.MIN_VALUE, DateTools.Resolution.DAY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue