LUCENE-6379: short circuit IndexWriter.deleteDocuments(MatchAllDocsQuery) to the much faster and schema-cleansing .deleteAll

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670410 13f79535-47bb-0310-9956-ffa450edef68
2015-03-31 17:47:12 +00:00 · 2015-03-31 17:47:12 +00:00 · 041f9077c2
parent 72e8c75c29
commit 041f9077c2
4 changed files with 92 additions and 4 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -41,6 +41,13 @@ New Features
  faster intersection by avoiding loading positions in certain cases.
  (Paul Elschot, Robert Muir via Mike McCandless)

+Optimizations
+
+* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
+  one of the queries is MatchAllDocsQuery and just invokes the much
+  faster IndexWriter.deleteAll in that case (Robert Muir, Adrien
+  Grand, Mike McCandless)
+
 ======================= Lucene 5.1.0 =======================

 New Features
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -32,8 +32,8 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Locale;
-import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Map;
 import java.util.Queue;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
@ -47,6 +47,7 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
 import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
 import org.apache.lucene.index.FieldInfos.FieldNumbers;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
@ -1315,6 +1316,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
   */
  public void deleteDocuments(Query... queries) throws IOException {
    ensureOpen();
+
+    // LUCENE-6379: Specialize MatchAllDocsQuery
+    for(Query query : queries) {
+      if (query.getClass() == MatchAllDocsQuery.class) {
+        deleteAll();
+        return;
+      }
+    }
+
    try {
      if (docWriter.deleteQueries(queries)) {
        processEvents(true, false);
--- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUtils;
 * A query that matches all documents.
 *
 */
-public class MatchAllDocsQuery extends Query {
+public final class MatchAllDocsQuery extends Query {

  private class MatchAllScorer extends Scorer {
    final float score;
@ -88,7 +88,7 @@ public class MatchAllDocsQuery extends Query {
    private float queryWeight;
    private float queryNorm;

-    public MatchAllDocsWeight(IndexSearcher searcher) {
+    public MatchAllDocsWeight() {
      super(MatchAllDocsQuery.this);
    }

@ -130,7 +130,7 @@ public class MatchAllDocsQuery extends Query {

  @Override
  public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
-    return new MatchAllDocsWeight(searcher);
+    return new MatchAllDocsWeight();
  }

  @Override
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java
@ -0,0 +1,71 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestIndexWriterDeleteByQuery extends LuceneTestCase {
+
+  // LUCENE-6379
+  public void testDeleteMatchAllDocsQuery() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+    Document doc = new Document();
+    // Norms are disabled:
+    doc.add(newStringField("field", "foo", Field.Store.NO));
+    w.addDocument(doc);
+    DirectoryReader r = DirectoryReader.open(w, true);
+    FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo("field");
+    assertNotNull(fi);
+    assertFalse(fi.hasNorms());
+    assertEquals(1, r.numDocs());
+    assertEquals(1, r.maxDoc());
+
+    w.deleteDocuments(new MatchAllDocsQuery());
+    DirectoryReader r2 = DirectoryReader.openIfChanged(r);
+    r.close();
+
+    assertNotNull(r2);
+    assertEquals(0, r2.numDocs());
+    assertEquals(0, r2.maxDoc());
+
+    // Confirm the omitNorms bit is in fact no longer set:
+    doc = new Document();
+    // Norms are disabled:
+    doc.add(newTextField("field", "foo", Field.Store.NO));
+    w.addDocument(doc);
+
+    DirectoryReader r3 = DirectoryReader.openIfChanged(r2);
+    r2.close();
+    assertNotNull(r3);
+    assertEquals(1, r3.numDocs());
+    assertEquals(1, r3.maxDoc());
+
+    // Make sure norms can come back to life for a field after deleting by MatchAllDocsQuery:
+    fi = MultiFields.getMergedFieldInfos(r3).fieldInfo("field");    
+    assertNotNull(fi);
+    assertTrue(fi.hasNorms());
+    r3.close();
+    w.close();
+    dir.close();
+  }
+}