mirror of https://github.com/apache/lucene.git
LUCENE-1096: Fixed Hits behavior when hits' docs are deleted along with iterating the hits.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@606614 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
89fe185d71
commit
23da0335a5
|
@ -189,6 +189,14 @@ Bug fixes
|
|||
25. LUCENE-1042: Remove throwing of IOException in getTermFreqVector(int, String, TermVectorMapper) to be consistent
|
||||
with other getTermFreqVector calls. Also removed the throwing of the other IOException in that method to be consistent. (Karl Wettin via Grant Ingersoll)
|
||||
|
||||
26. LUCENE-1096: Fixed Hits behavior when hits' docs are deleted
|
||||
along with iterating the hits. Deleting docs already retrieved
|
||||
now works seamlessly. If docs not yet retrieved are deleted
|
||||
(e.g. from another thread), and then, relying on the initial
|
||||
Hits.length(), an application attempts to retrieve more hits
|
||||
than actually exist , a ConcurrentMidificationException
|
||||
is thrown. (Doron Cohen)
|
||||
|
||||
|
||||
New features
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ConcurrentModificationException;
|
||||
import java.util.Vector;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
@ -31,6 +32,12 @@ import org.apache.lucene.index.CorruptIndexException;
|
|||
* performance issues. If you need to iterate over many or all hits, consider
|
||||
* using the search method that takes a {@link HitCollector}.
|
||||
* </p>
|
||||
* <p><b>Note:</b> Deleting matching documents concurrently with traversing
|
||||
* the hits, might, when deleting hits that were not yet retrieved, decrease
|
||||
* {@link #length()}. In such case,
|
||||
* {@link java.util.ConcurrentModificationException ConcurrentModificationException}
|
||||
* is thrown when accessing hit <code>n</code> ≥ current_{@link #length()}
|
||||
* (but <code>n</code> < {@link #length()}_at_start).
|
||||
*/
|
||||
public final class Hits {
|
||||
private Weight weight;
|
||||
|
@ -45,12 +52,20 @@ public final class Hits {
|
|||
private HitDoc last; // tail of LRU cache
|
||||
private int numDocs = 0; // number cached
|
||||
private int maxDocs = 200; // max to cache
|
||||
|
||||
private int nDeletions; // # deleted docs in the index.
|
||||
private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down).
|
||||
private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
|
||||
|
||||
boolean debugCheckedForDeletions = false; // for test purposes.
|
||||
|
||||
Hits(Searcher s, Query q, Filter f) throws IOException {
|
||||
weight = q.weight(s);
|
||||
searcher = s;
|
||||
filter = f;
|
||||
nDeletions = countDeletions(s);
|
||||
getMoreDocs(50); // retrieve 100 initially
|
||||
lengthAtStart = length;
|
||||
}
|
||||
|
||||
Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
|
||||
|
@ -58,7 +73,18 @@ public final class Hits {
|
|||
searcher = s;
|
||||
filter = f;
|
||||
sort = o;
|
||||
nDeletions = countDeletions(s);
|
||||
getMoreDocs(50); // retrieve 100 initially
|
||||
lengthAtStart = length;
|
||||
}
|
||||
|
||||
// count # deletions, return -1 if unknown.
|
||||
private int countDeletions(Searcher s) throws IOException {
|
||||
int cnt = -1;
|
||||
if (s instanceof IndexSearcher) {
|
||||
cnt = s.maxDoc() - ((IndexSearcher) s).getIndexReader().numDocs();
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -72,6 +98,7 @@ public final class Hits {
|
|||
|
||||
int n = min * 2; // double # retrieved
|
||||
TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
|
||||
|
||||
length = topDocs.totalHits;
|
||||
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
||||
|
||||
|
@ -81,11 +108,36 @@ public final class Hits {
|
|||
scoreNorm = 1.0f / topDocs.getMaxScore();
|
||||
}
|
||||
|
||||
int start = hitDocs.size() - nDeletedHits;
|
||||
|
||||
// any new deletions?
|
||||
int nDels2 = countDeletions(searcher);
|
||||
debugCheckedForDeletions = false;
|
||||
if (nDeletions < 0 || nDels2 > nDeletions) {
|
||||
// either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
|
||||
nDeletedHits = 0;
|
||||
debugCheckedForDeletions = true;
|
||||
int i2 = 0;
|
||||
for (int i1=0; i1<hitDocs.size() && i2<scoreDocs.length; i1++) {
|
||||
int id1 = ((HitDoc)hitDocs.get(i1)).id;
|
||||
int id2 = scoreDocs[i2].doc;
|
||||
if (id1 == id2) {
|
||||
i2++;
|
||||
} else {
|
||||
nDeletedHits ++;
|
||||
}
|
||||
}
|
||||
start = i2;
|
||||
}
|
||||
|
||||
int end = scoreDocs.length < length ? scoreDocs.length : length;
|
||||
for (int i = hitDocs.size(); i < end; i++) {
|
||||
length += nDeletedHits;
|
||||
for (int i = start; i < end; i++) {
|
||||
hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
|
||||
scoreDocs[i].doc));
|
||||
}
|
||||
|
||||
nDeletions = nDels2;
|
||||
}
|
||||
|
||||
/** Returns the total number of hits available in this set. */
|
||||
|
@ -146,7 +198,7 @@ public final class Hits {
|
|||
}
|
||||
|
||||
private final HitDoc hitDoc(int n) throws IOException {
|
||||
if (n >= length) {
|
||||
if (n >= lengthAtStart) {
|
||||
throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
|
||||
}
|
||||
|
||||
|
@ -154,6 +206,10 @@ public final class Hits {
|
|||
getMoreDocs(n);
|
||||
}
|
||||
|
||||
if (n >= length) {
|
||||
throw new ConcurrentModificationException("Not a valid hit number: " + n);
|
||||
}
|
||||
|
||||
return (HitDoc) hitDocs.elementAt(n);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ConcurrentModificationException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/**
|
||||
* Test Hits searches with interleaved deletions.
|
||||
*
|
||||
* See {@link http://issues.apache.org/jira/browse/LUCENE-1096}.
|
||||
*/
|
||||
public class TestSearchHitsWithDeletions extends TestCase {
|
||||
|
||||
private static boolean VERBOSE = false;
|
||||
private static final String TEXT_FIELD = "text";
|
||||
private static final int N = 16100;
|
||||
|
||||
private static Directory directory;
|
||||
|
||||
public void setUp() throws Exception {
|
||||
// Create an index writer.
|
||||
directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
|
||||
for (int i=0; i<N; i++) {
|
||||
writer.addDocument(createDocument(i));
|
||||
}
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletions during search should not alter previously retrieved hits.
|
||||
*/
|
||||
public void testSearchHitsDeleteAll() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(1,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletions during search should not alter previously retrieved hits.
|
||||
*/
|
||||
public void testSearchHitsDeleteEvery2ndHit() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(2,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletions during search should not alter previously retrieved hits.
|
||||
*/
|
||||
public void testSearchHitsDeleteEvery4thHit() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(4,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletions during search should not alter previously retrieved hits.
|
||||
*/
|
||||
public void testSearchHitsDeleteEvery8thHit() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(8,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletions during search should not alter previously retrieved hits.
|
||||
*/
|
||||
public void testSearchHitsDeleteEvery90thHit() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(90,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletions during search should not alter previously retrieved hits,
|
||||
* and deletions that affect total number of hits should throw the
|
||||
* correct exception when trying to fetch "too many".
|
||||
*/
|
||||
public void testSearchHitsDeleteEvery8thHitAndInAdvance() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(8,true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that ok also with no deletions at all.
|
||||
*/
|
||||
public void testSearchHitsNoDeletes() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(N+100,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletions that affect total number of hits should throw the
|
||||
* correct exception when trying to fetch "too many".
|
||||
*/
|
||||
public void testSearchHitsDeleteInAdvance() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(N+100,true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Intermittent deletions during search, should not alter previously retrieved hits.
|
||||
* (Using a debugger to verify that the check in Hits is performed only
|
||||
*/
|
||||
public void testSearchHitsDeleteIntermittent() throws Exception {
|
||||
doTestSearchHitsDeleteEvery(-1,false);
|
||||
}
|
||||
|
||||
|
||||
private void doTestSearchHitsDeleteEvery(int k, boolean deleteInFront) throws Exception {
|
||||
boolean intermittent = k<0;
|
||||
log("Test search hits with "+(intermittent ? "intermittent deletions." : "deletions of every "+k+" hit."));
|
||||
IndexSearcher searcher = new IndexSearcher(directory);
|
||||
IndexReader reader = searcher.getIndexReader();
|
||||
Query q = new TermQuery(new Term(TEXT_FIELD,"text")); // matching all docs
|
||||
Hits hits = searcher.search(q);
|
||||
log("Got "+hits.length()+" results");
|
||||
assertEquals("must match all "+N+" docs, not only "+hits.length()+" docs!",N,hits.length());
|
||||
if (deleteInFront) {
|
||||
log("deleting hits that was not yet retrieved!");
|
||||
reader.deleteDocument(reader.maxDoc()-1);
|
||||
reader.deleteDocument(reader.maxDoc()-2);
|
||||
reader.deleteDocument(reader.maxDoc()-3);
|
||||
}
|
||||
try {
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
int id = hits.id(i);
|
||||
assertEquals("Hit "+i+" has doc id "+hits.id(i)+" instead of "+i,i,hits.id(i));
|
||||
if ((intermittent && (i==50 || i==250 || i==950)) || //100-yes, 200-no, 400-yes, 800-no, 1600-yes
|
||||
(!intermittent && (k<2 || (i>0 && i%k==0)))) {
|
||||
Document doc = hits.doc(id);
|
||||
log("Deleting hit "+i+" - doc "+doc+" with id "+id);
|
||||
reader.deleteDocument(id);
|
||||
}
|
||||
if (intermittent) {
|
||||
// check internal behavior of Hits (go 50 ahead of getMoreDocs points because the deletions cause to use more of the available hits)
|
||||
if (i==150 || i==450 || i==1650) {
|
||||
assertTrue("Hit "+i+": hits should have checked for deletions in last call to getMoreDocs()",hits.debugCheckedForDeletions);
|
||||
} else if (i==50 || i==250 || i==850) {
|
||||
assertFalse("Hit "+i+": hits should have NOT checked for deletions in last call to getMoreDocs()",hits.debugCheckedForDeletions);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (ConcurrentModificationException e) {
|
||||
// this is the only valid exception, and only when deletng in front.
|
||||
assertTrue(e.getMessage()+" not expected unless deleting hits that were not yet seen!",deleteInFront);
|
||||
}
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
private static Document createDocument(int id) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(TEXT_FIELD, "text of document"+id, Field.Store.YES, Field.Index.TOKENIZED));
|
||||
return doc;
|
||||
}
|
||||
|
||||
private static void log (String s) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue