LUCENE-6379: short circuit IndexWriter.deleteDocuments(MatchAllDocsQuery) to the much faster and schema-cleansing .deleteAll

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-03-31 17:47:12 +00:00
parent 72e8c75c29
commit 041f9077c2
4 changed files with 92 additions and 4 deletions

View File

@ -41,6 +41,13 @@ New Features
faster intersection by avoiding loading positions in certain cases. faster intersection by avoiding loading positions in certain cases.
(Paul Elschot, Robert Muir via Mike McCandless) (Paul Elschot, Robert Muir via Mike McCandless)
Optimizations
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
one of the queries is MatchAllDocsQuery and just invokes the much
faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
Grand, Mike McCandless)
======================= Lucene 5.1.0 ======================= ======================= Lucene 5.1.0 =======================
New Features New Features

View File

@ -32,8 +32,8 @@ import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Map;
import java.util.Queue; import java.util.Queue;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -47,6 +47,7 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate; import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.FieldInfos.FieldNumbers;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -1315,6 +1316,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
*/ */
public void deleteDocuments(Query... queries) throws IOException { public void deleteDocuments(Query... queries) throws IOException {
ensureOpen(); ensureOpen();
// LUCENE-6379: Specialize MatchAllDocsQuery
for(Query query : queries) {
if (query.getClass() == MatchAllDocsQuery.class) {
deleteAll();
return;
}
}
try { try {
if (docWriter.deleteQueries(queries)) { if (docWriter.deleteQueries(queries)) {
processEvents(true, false); processEvents(true, false);

View File

@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUtils;
* A query that matches all documents. * A query that matches all documents.
* *
*/ */
public class MatchAllDocsQuery extends Query { public final class MatchAllDocsQuery extends Query {
private class MatchAllScorer extends Scorer { private class MatchAllScorer extends Scorer {
final float score; final float score;
@ -88,7 +88,7 @@ public class MatchAllDocsQuery extends Query {
private float queryWeight; private float queryWeight;
private float queryNorm; private float queryNorm;
public MatchAllDocsWeight(IndexSearcher searcher) { public MatchAllDocsWeight() {
super(MatchAllDocsQuery.this); super(MatchAllDocsQuery.this);
} }
@ -130,7 +130,7 @@ public class MatchAllDocsQuery extends Query {
@Override @Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) { public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
return new MatchAllDocsWeight(searcher); return new MatchAllDocsWeight();
} }
@Override @Override

View File

@ -0,0 +1,71 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestIndexWriterDeleteByQuery extends LuceneTestCase {
// LUCENE-6379
public void testDeleteMatchAllDocsQuery() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
// Norms are disabled:
doc.add(newStringField("field", "foo", Field.Store.NO));
w.addDocument(doc);
DirectoryReader r = DirectoryReader.open(w, true);
FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo("field");
assertNotNull(fi);
assertFalse(fi.hasNorms());
assertEquals(1, r.numDocs());
assertEquals(1, r.maxDoc());
w.deleteDocuments(new MatchAllDocsQuery());
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
r.close();
assertNotNull(r2);
assertEquals(0, r2.numDocs());
assertEquals(0, r2.maxDoc());
// Confirm the omitNorms bit is in fact no longer set:
doc = new Document();
// Norms are disabled:
doc.add(newTextField("field", "foo", Field.Store.NO));
w.addDocument(doc);
DirectoryReader r3 = DirectoryReader.openIfChanged(r2);
r2.close();
assertNotNull(r3);
assertEquals(1, r3.numDocs());
assertEquals(1, r3.maxDoc());
// Make sure norms can come back to life for a field after deleting by MatchAllDocsQuery:
fi = MultiFields.getMergedFieldInfos(r3).fieldInfo("field");
assertNotNull(fi);
assertTrue(fi.hasNorms());
r3.close();
w.close();
dir.close();
}
}