LUCENE-1096: Fixed Hits behavior when hits' docs are deleted along with iterating the hits.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@606614 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2007-12-23 20:50:29 +00:00
parent 89fe185d71
commit 23da0335a5
3 changed files with 246 additions and 2 deletions

View File

@ -189,6 +189,14 @@ Bug fixes
25. LUCENE-1042: Remove throwing of IOException in getTermFreqVector(int, String, TermVectorMapper) to be consistent
with other getTermFreqVector calls. Also removed the throwing of the other IOException in that method to be consistent. (Karl Wettin via Grant Ingersoll)
26. LUCENE-1096: Fixed Hits behavior when hits' docs are deleted
along with iterating the hits. Deleting docs already retrieved
now works seamlessly. If docs not yet retrieved are deleted
(e.g. from another thread), and then, relying on the initial
Hits.length(), an application attempts to retrieve more hits
than actually exist , a ConcurrentMidificationException
is thrown. (Doron Cohen)
New features

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.ConcurrentModificationException;
import java.util.Vector;
import java.util.Iterator;
@ -31,6 +32,12 @@ import org.apache.lucene.index.CorruptIndexException;
* performance issues. If you need to iterate over many or all hits, consider
* using the search method that takes a {@link HitCollector}.
* </p>
* <p><b>Note:</b> Deleting matching documents concurrently with traversing
* the hits, might, when deleting hits that were not yet retrieved, decrease
* {@link #length()}. In such case,
* {@link java.util.ConcurrentModificationException ConcurrentModificationException}
* is thrown when accessing hit <code>n</code> &ge; current_{@link #length()}
* (but <code>n</code> &lt; {@link #length()}_at_start).
*/
public final class Hits {
private Weight weight;
@ -45,12 +52,20 @@ public final class Hits {
private HitDoc last; // tail of LRU cache
private int numDocs = 0; // number cached
private int maxDocs = 200; // max to cache
private int nDeletions; // # deleted docs in the index.
private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down).
private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
boolean debugCheckedForDeletions = false; // for test purposes.
Hits(Searcher s, Query q, Filter f) throws IOException {
weight = q.weight(s);
searcher = s;
filter = f;
nDeletions = countDeletions(s);
getMoreDocs(50); // retrieve 100 initially
lengthAtStart = length;
}
Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
@ -58,7 +73,18 @@ public final class Hits {
searcher = s;
filter = f;
sort = o;
nDeletions = countDeletions(s);
getMoreDocs(50); // retrieve 100 initially
lengthAtStart = length;
}
// count # deletions, return -1 if unknown.
private int countDeletions(Searcher s) throws IOException {
int cnt = -1;
if (s instanceof IndexSearcher) {
cnt = s.maxDoc() - ((IndexSearcher) s).getIndexReader().numDocs();
}
return cnt;
}
/**
@ -72,6 +98,7 @@ public final class Hits {
int n = min * 2; // double # retrieved
TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
length = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
@ -81,11 +108,36 @@ public final class Hits {
scoreNorm = 1.0f / topDocs.getMaxScore();
}
int start = hitDocs.size() - nDeletedHits;
// any new deletions?
int nDels2 = countDeletions(searcher);
debugCheckedForDeletions = false;
if (nDeletions < 0 || nDels2 > nDeletions) {
// either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
nDeletedHits = 0;
debugCheckedForDeletions = true;
int i2 = 0;
for (int i1=0; i1<hitDocs.size() && i2<scoreDocs.length; i1++) {
int id1 = ((HitDoc)hitDocs.get(i1)).id;
int id2 = scoreDocs[i2].doc;
if (id1 == id2) {
i2++;
} else {
nDeletedHits ++;
}
}
start = i2;
}
int end = scoreDocs.length < length ? scoreDocs.length : length;
for (int i = hitDocs.size(); i < end; i++) {
length += nDeletedHits;
for (int i = start; i < end; i++) {
hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
scoreDocs[i].doc));
}
nDeletions = nDels2;
}
/** Returns the total number of hits available in this set. */
@ -146,7 +198,7 @@ public final class Hits {
}
private final HitDoc hitDoc(int n) throws IOException {
if (n >= length) {
if (n >= lengthAtStart) {
throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
}
@ -154,6 +206,10 @@ public final class Hits {
getMoreDocs(n);
}
if (n >= length) {
throw new ConcurrentModificationException("Not a valid hit number: " + n);
}
return (HitDoc) hitDocs.elementAt(n);
}

View File

@ -0,0 +1,180 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ConcurrentModificationException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
/**
* Test Hits searches with interleaved deletions.
*
* See {@link http://issues.apache.org/jira/browse/LUCENE-1096}.
*/
public class TestSearchHitsWithDeletions extends TestCase {
private static boolean VERBOSE = false;
private static final String TEXT_FIELD = "text";
private static final int N = 16100;
private static Directory directory;
public void setUp() throws Exception {
// Create an index writer.
directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
for (int i=0; i<N; i++) {
writer.addDocument(createDocument(i));
}
writer.optimize();
writer.close();
}
/**
* Deletions during search should not alter previously retrieved hits.
*/
public void testSearchHitsDeleteAll() throws Exception {
doTestSearchHitsDeleteEvery(1,false);
}
/**
* Deletions during search should not alter previously retrieved hits.
*/
public void testSearchHitsDeleteEvery2ndHit() throws Exception {
doTestSearchHitsDeleteEvery(2,false);
}
/**
* Deletions during search should not alter previously retrieved hits.
*/
public void testSearchHitsDeleteEvery4thHit() throws Exception {
doTestSearchHitsDeleteEvery(4,false);
}
/**
* Deletions during search should not alter previously retrieved hits.
*/
public void testSearchHitsDeleteEvery8thHit() throws Exception {
doTestSearchHitsDeleteEvery(8,false);
}
/**
* Deletions during search should not alter previously retrieved hits.
*/
public void testSearchHitsDeleteEvery90thHit() throws Exception {
doTestSearchHitsDeleteEvery(90,false);
}
/**
* Deletions during search should not alter previously retrieved hits,
* and deletions that affect total number of hits should throw the
* correct exception when trying to fetch "too many".
*/
public void testSearchHitsDeleteEvery8thHitAndInAdvance() throws Exception {
doTestSearchHitsDeleteEvery(8,true);
}
/**
* Verify that ok also with no deletions at all.
*/
public void testSearchHitsNoDeletes() throws Exception {
doTestSearchHitsDeleteEvery(N+100,false);
}
/**
* Deletions that affect total number of hits should throw the
* correct exception when trying to fetch "too many".
*/
public void testSearchHitsDeleteInAdvance() throws Exception {
doTestSearchHitsDeleteEvery(N+100,true);
}
/**
* Intermittent deletions during search, should not alter previously retrieved hits.
* (Using a debugger to verify that the check in Hits is performed only
*/
public void testSearchHitsDeleteIntermittent() throws Exception {
doTestSearchHitsDeleteEvery(-1,false);
}
private void doTestSearchHitsDeleteEvery(int k, boolean deleteInFront) throws Exception {
boolean intermittent = k<0;
log("Test search hits with "+(intermittent ? "intermittent deletions." : "deletions of every "+k+" hit."));
IndexSearcher searcher = new IndexSearcher(directory);
IndexReader reader = searcher.getIndexReader();
Query q = new TermQuery(new Term(TEXT_FIELD,"text")); // matching all docs
Hits hits = searcher.search(q);
log("Got "+hits.length()+" results");
assertEquals("must match all "+N+" docs, not only "+hits.length()+" docs!",N,hits.length());
if (deleteInFront) {
log("deleting hits that was not yet retrieved!");
reader.deleteDocument(reader.maxDoc()-1);
reader.deleteDocument(reader.maxDoc()-2);
reader.deleteDocument(reader.maxDoc()-3);
}
try {
for (int i = 0; i < hits.length(); i++) {
int id = hits.id(i);
assertEquals("Hit "+i+" has doc id "+hits.id(i)+" instead of "+i,i,hits.id(i));
if ((intermittent && (i==50 || i==250 || i==950)) || //100-yes, 200-no, 400-yes, 800-no, 1600-yes
(!intermittent && (k<2 || (i>0 && i%k==0)))) {
Document doc = hits.doc(id);
log("Deleting hit "+i+" - doc "+doc+" with id "+id);
reader.deleteDocument(id);
}
if (intermittent) {
// check internal behavior of Hits (go 50 ahead of getMoreDocs points because the deletions cause to use more of the available hits)
if (i==150 || i==450 || i==1650) {
assertTrue("Hit "+i+": hits should have checked for deletions in last call to getMoreDocs()",hits.debugCheckedForDeletions);
} else if (i==50 || i==250 || i==850) {
assertFalse("Hit "+i+": hits should have NOT checked for deletions in last call to getMoreDocs()",hits.debugCheckedForDeletions);
}
}
}
} catch (ConcurrentModificationException e) {
// this is the only valid exception, and only when deletng in front.
assertTrue(e.getMessage()+" not expected unless deleting hits that were not yet seen!",deleteInFront);
}
searcher.close();
}
private static Document createDocument(int id) {
Document doc = new Document();
doc.add(new Field(TEXT_FIELD, "text of document"+id, Field.Store.YES, Field.Index.TOKENIZED));
return doc;
}
private static void log (String s) {
if (VERBOSE) {
System.out.println(s);
}
}
}