diff --git a/CHANGES.txt b/CHANGES.txt index 0a0464c107a..56014d679a7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -145,6 +145,13 @@ Optimizations 4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length is small compared to minSimilarity. (Timo Nentwig, Mark Miller) + 5. LUCENE-1316: MatchAllDocsQuery now avoids the synchronized + IndexReader.isDeleted() call per document, by directly accessing + the underlying deleteDocs BitVector. This improves performance + with non-readOnly readers, especially in a multi-threaded + environment. (Todd Feak, Yonik Seeley, Jason Rutherglen via Mike + McCandless) + Documentation Build diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 31106d0f797..186be49d0d4 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -847,13 +846,19 @@ public class MemoryIndex implements Serializable { private boolean hasNext; private int cursor = 0; private ArrayIntList current; + private Term term; public void seek(Term term) { + this.term = term; if (DEBUG) System.err.println(".seek: " + term); - Info info = getInfo(term.field()); - current = info == null ? null : info.getPositions(term.text()); - hasNext = (current != null); - cursor = 0; + if (term == null) { + hasNext = true; // term==null means match all docs + } else { + Info info = getInfo(term.field()); + current = info == null ? null : info.getPositions(term.text()); + hasNext = (current != null); + cursor = 0; + } } public void seek(TermEnum termEnum) { @@ -867,7 +872,7 @@ public class MemoryIndex implements Serializable { } public int freq() { - int freq = current != null ? numPositions(current) : 0; + int freq = current != null ? numPositions(current) : (term == null ? 1 : 0); if (DEBUG) System.err.println(".freq: " + freq); return freq; } diff --git a/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index 50059587f97..9ea7c5437f2 100644 --- a/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -51,6 +51,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.TermDocs; /** Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, @@ -282,7 +283,9 @@ public class MemoryIndexTest extends TestCase { // new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords), // new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS), }; - + + boolean first = true; + for (int iter=0; iter < iters; iter++) { System.out.println("\n########### iteration=" + iter); long start = System.currentTimeMillis(); @@ -306,6 +309,18 @@ public class MemoryIndexTest extends TestCase { boolean measureIndexing = false; // toggle this to measure query performance MemoryIndex memind = null; if (useMemIndex && !measureIndexing) memind = createMemoryIndex(doc); + + if (first) { + IndexSearcher s = memind.createSearcher(); + TermDocs td = s.getIndexReader().termDocs(null); + assertTrue(td.next()); + assertEquals(0, td.doc()); + assertEquals(1, td.freq()); + td.close(); + s.close(); + first = false; + } + RAMDirectory ramind = null; if (useRAMIndex && !measureIndexing) ramind = createRAMIndex(doc); diff --git a/src/java/org/apache/lucene/index/AllTermDocs.java b/src/java/org/apache/lucene/index/AllTermDocs.java new file mode 100644 index 00000000000..c695cdb9447 --- /dev/null +++ b/src/java/org/apache/lucene/index/AllTermDocs.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import org.apache.lucene.util.BitVector; +import java.io.IOException; + +class AllTermDocs implements TermDocs { + protected BitVector deletedDocs; + protected int maxDoc; + protected int doc = -1; + + protected AllTermDocs(SegmentReader parent) { + synchronized (parent) { + this.deletedDocs = parent.deletedDocs; + } + this.maxDoc = parent.maxDoc(); + } + + public void seek(Term term) throws IOException { + if (term==null) { + doc = -1; + } else { + throw new UnsupportedOperationException(); + } + } + + public void seek(TermEnum termEnum) throws IOException { + throw new UnsupportedOperationException(); + } + + public int doc() { + return doc; + } + + public int freq() { + return 1; + } + + public boolean next() throws IOException { + return skipTo(doc+1); + } + + public int read(int[] docs, int[] freqs) throws IOException { + final int length = docs.length; + int i = 0; + while (i < length && doc < maxDoc) { + if (deletedDocs == null || !deletedDocs.get(doc)) { + docs[i] = doc; + freqs[i] = 1; + ++i; + } + doc++; + } + return i; + } + + public boolean skipTo(int target) throws IOException { + doc = target; + while (doc < maxDoc) { + if (deletedDocs == null || !deletedDocs.get(doc)) { + return true; + } + doc++; + } + return false; + } + + public void close() throws IOException { + } +} diff --git a/src/java/org/apache/lucene/index/FilterIndexReader.java b/src/java/org/apache/lucene/index/FilterIndexReader.java index 95daad5e42d..85e222a5c7f 100644 --- a/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -198,6 +198,11 @@ public class FilterIndexReader extends IndexReader { return in.termDocs(); } + public TermDocs termDocs(Term term) throws IOException { + ensureOpen(); + return in.termDocs(term); + } + public TermPositions termPositions() throws IOException { ensureOpen(); return in.termPositions(); diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java index 005fb4be594..04e757e1895 100644 --- a/src/java/org/apache/lucene/index/IndexReader.java +++ b/src/java/org/apache/lucene/index/IndexReader.java @@ -796,7 +796,9 @@ public abstract class IndexReader { /** Returns an enumeration of all the documents which contain * term. For each document, the document number, the frequency of - * the term in that document is also provided, for use in search scoring. + * the term in that document is also provided, for use in + * search scoring. If term is null, then all non-deleted + * docs are returned with freq=1. * Thus, this method implements the mapping: *