From 041f9077c2a0193b17cfa53ac61caa63a68a643a Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 31 Mar 2015 17:47:12 +0000 Subject: [PATCH] LUCENE-6379: short circuit IndexWriter.deleteDocuments(MatchAllDocsQuery) to the much faster and schema-cleansing .deleteAll git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1670410 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 ++ .../org/apache/lucene/index/IndexWriter.java | 12 +++- .../lucene/search/MatchAllDocsQuery.java | 6 +- .../index/TestIndexWriterDeleteByQuery.java | 71 +++++++++++++++++++ 4 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ab541b937b2..b97d5aa526b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -41,6 +41,13 @@ New Features faster intersection by avoiding loading positions in certain cases. (Paul Elschot, Robert Muir via Mike McCandless) +Optimizations + +* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if + one of the queries is MatchAllDocsQuery and just invokes the much + faster IndexWriter.deleteAll in that case (Robert Muir, Adrien + Grand, Mike McCandless) + ======================= Lucene 5.1.0 ======================= New Features diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 2893eea53ca..a64be82aef2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -32,8 +32,8 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Map.Entry; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -47,6 +47,7 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate; import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate; import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -1315,6 +1316,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { */ public void deleteDocuments(Query... queries) throws IOException { ensureOpen(); + + // LUCENE-6379: Specialize MatchAllDocsQuery + for(Query query : queries) { + if (query.getClass() == MatchAllDocsQuery.class) { + deleteAll(); + return; + } + } + try { if (docWriter.deleteQueries(queries)) { processEvents(true, false); diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 4089bc050e8..f13667ad401 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -30,7 +30,7 @@ import org.apache.lucene.util.ToStringUtils; * A query that matches all documents. * */ -public class MatchAllDocsQuery extends Query { +public final class MatchAllDocsQuery extends Query { private class MatchAllScorer extends Scorer { final float score; @@ -88,7 +88,7 @@ public class MatchAllDocsQuery extends Query { private float queryWeight; private float queryNorm; - public MatchAllDocsWeight(IndexSearcher searcher) { + public MatchAllDocsWeight() { super(MatchAllDocsQuery.this); } @@ -130,7 +130,7 @@ public class MatchAllDocsQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) { - return new MatchAllDocsWeight(searcher); + return new MatchAllDocsWeight(); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java new file mode 100644 index 00000000000..10023506298 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDeleteByQuery.java @@ -0,0 +1,71 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestIndexWriterDeleteByQuery extends LuceneTestCase { + + // LUCENE-6379 + public void testDeleteMatchAllDocsQuery() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + Document doc = new Document(); + // Norms are disabled: + doc.add(newStringField("field", "foo", Field.Store.NO)); + w.addDocument(doc); + DirectoryReader r = DirectoryReader.open(w, true); + FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo("field"); + assertNotNull(fi); + assertFalse(fi.hasNorms()); + assertEquals(1, r.numDocs()); + assertEquals(1, r.maxDoc()); + + w.deleteDocuments(new MatchAllDocsQuery()); + DirectoryReader r2 = DirectoryReader.openIfChanged(r); + r.close(); + + assertNotNull(r2); + assertEquals(0, r2.numDocs()); + assertEquals(0, r2.maxDoc()); + + // Confirm the omitNorms bit is in fact no longer set: + doc = new Document(); + // Norms are disabled: + doc.add(newTextField("field", "foo", Field.Store.NO)); + w.addDocument(doc); + + DirectoryReader r3 = DirectoryReader.openIfChanged(r2); + r2.close(); + assertNotNull(r3); + assertEquals(1, r3.numDocs()); + assertEquals(1, r3.maxDoc()); + + // Make sure norms can come back to life for a field after deleting by MatchAllDocsQuery: + fi = MultiFields.getMergedFieldInfos(r3).fieldInfo("field"); + assertNotNull(fi); + assertTrue(fi.hasNorms()); + r3.close(); + w.close(); + dir.close(); + } +}