mirror of https://github.com/apache/lucene.git
LUCENE-1316: don't call synchronized IndexReader.isDeleted when scoring MatchAllDocsQuery
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@737513 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c10a419fa4
commit
c6f6f01643
|
@ -145,6 +145,13 @@ Optimizations
|
|||
4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length
|
||||
is small compared to minSimilarity. (Timo Nentwig, Mark Miller)
|
||||
|
||||
5. LUCENE-1316: MatchAllDocsQuery now avoids the synchronized
|
||||
IndexReader.isDeleted() call per document, by directly accessing
|
||||
the underlying deleteDocs BitVector. This improves performance
|
||||
with non-readOnly readers, especially in a multi-threaded
|
||||
environment. (Todd Feak, Yonik Seeley, Jason Rutherglen via Mike
|
||||
McCandless)
|
||||
|
||||
Documentation
|
||||
|
||||
Build
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -847,13 +846,19 @@ public class MemoryIndex implements Serializable {
|
|||
private boolean hasNext;
|
||||
private int cursor = 0;
|
||||
private ArrayIntList current;
|
||||
private Term term;
|
||||
|
||||
public void seek(Term term) {
|
||||
this.term = term;
|
||||
if (DEBUG) System.err.println(".seek: " + term);
|
||||
Info info = getInfo(term.field());
|
||||
current = info == null ? null : info.getPositions(term.text());
|
||||
hasNext = (current != null);
|
||||
cursor = 0;
|
||||
if (term == null) {
|
||||
hasNext = true; // term==null means match all docs
|
||||
} else {
|
||||
Info info = getInfo(term.field());
|
||||
current = info == null ? null : info.getPositions(term.text());
|
||||
hasNext = (current != null);
|
||||
cursor = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void seek(TermEnum termEnum) {
|
||||
|
@ -867,7 +872,7 @@ public class MemoryIndex implements Serializable {
|
|||
}
|
||||
|
||||
public int freq() {
|
||||
int freq = current != null ? numPositions(current) : 0;
|
||||
int freq = current != null ? numPositions(current) : (term == null ? 1 : 0);
|
||||
if (DEBUG) System.err.println(".freq: " + freq);
|
||||
return freq;
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
|
||||
/**
|
||||
Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
|
||||
|
@ -282,7 +283,9 @@ public class MemoryIndexTest extends TestCase {
|
|||
// new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords),
|
||||
// new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS),
|
||||
};
|
||||
|
||||
|
||||
boolean first = true;
|
||||
|
||||
for (int iter=0; iter < iters; iter++) {
|
||||
System.out.println("\n########### iteration=" + iter);
|
||||
long start = System.currentTimeMillis();
|
||||
|
@ -306,6 +309,18 @@ public class MemoryIndexTest extends TestCase {
|
|||
boolean measureIndexing = false; // toggle this to measure query performance
|
||||
MemoryIndex memind = null;
|
||||
if (useMemIndex && !measureIndexing) memind = createMemoryIndex(doc);
|
||||
|
||||
if (first) {
|
||||
IndexSearcher s = memind.createSearcher();
|
||||
TermDocs td = s.getIndexReader().termDocs(null);
|
||||
assertTrue(td.next());
|
||||
assertEquals(0, td.doc());
|
||||
assertEquals(1, td.freq());
|
||||
td.close();
|
||||
s.close();
|
||||
first = false;
|
||||
}
|
||||
|
||||
RAMDirectory ramind = null;
|
||||
if (useRAMIndex && !measureIndexing) ramind = createRAMIndex(doc);
|
||||
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import org.apache.lucene.util.BitVector;
|
||||
import java.io.IOException;
|
||||
|
||||
class AllTermDocs implements TermDocs {
|
||||
protected BitVector deletedDocs;
|
||||
protected int maxDoc;
|
||||
protected int doc = -1;
|
||||
|
||||
protected AllTermDocs(SegmentReader parent) {
|
||||
synchronized (parent) {
|
||||
this.deletedDocs = parent.deletedDocs;
|
||||
}
|
||||
this.maxDoc = parent.maxDoc();
|
||||
}
|
||||
|
||||
public void seek(Term term) throws IOException {
|
||||
if (term==null) {
|
||||
doc = -1;
|
||||
} else {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public void seek(TermEnum termEnum) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public int doc() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
public int freq() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
public boolean next() throws IOException {
|
||||
return skipTo(doc+1);
|
||||
}
|
||||
|
||||
public int read(int[] docs, int[] freqs) throws IOException {
|
||||
final int length = docs.length;
|
||||
int i = 0;
|
||||
while (i < length && doc < maxDoc) {
|
||||
if (deletedDocs == null || !deletedDocs.get(doc)) {
|
||||
docs[i] = doc;
|
||||
freqs[i] = 1;
|
||||
++i;
|
||||
}
|
||||
doc++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
doc = target;
|
||||
while (doc < maxDoc) {
|
||||
if (deletedDocs == null || !deletedDocs.get(doc)) {
|
||||
return true;
|
||||
}
|
||||
doc++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
}
|
||||
}
|
|
@ -198,6 +198,11 @@ public class FilterIndexReader extends IndexReader {
|
|||
return in.termDocs();
|
||||
}
|
||||
|
||||
public TermDocs termDocs(Term term) throws IOException {
|
||||
ensureOpen();
|
||||
return in.termDocs(term);
|
||||
}
|
||||
|
||||
public TermPositions termPositions() throws IOException {
|
||||
ensureOpen();
|
||||
return in.termPositions();
|
||||
|
|
|
@ -796,7 +796,9 @@ public abstract class IndexReader {
|
|||
|
||||
/** Returns an enumeration of all the documents which contain
|
||||
* <code>term</code>. For each document, the document number, the frequency of
|
||||
* the term in that document is also provided, for use in search scoring.
|
||||
* the term in that document is also provided, for use in
|
||||
* search scoring. If term is null, then all non-deleted
|
||||
* docs are returned with freq=1.
|
||||
* Thus, this method implements the mapping:
|
||||
* <p><ul>
|
||||
* Term => <docNum, freq><sup>*</sup>
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
@ -531,7 +530,7 @@ class MultiSegmentReader extends DirectoryIndexReader {
|
|||
|
||||
readerTermDocs = new TermDocs[r.length];
|
||||
}
|
||||
|
||||
|
||||
public int doc() {
|
||||
return base + current.doc();
|
||||
}
|
||||
|
@ -601,8 +600,6 @@ class MultiSegmentReader extends DirectoryIndexReader {
|
|||
}
|
||||
|
||||
private TermDocs termDocs(int i) throws IOException {
|
||||
if (term == null)
|
||||
return null;
|
||||
TermDocs result = readerTermDocs[i];
|
||||
if (result == null)
|
||||
result = readerTermDocs[i] = termDocs(readers[i]);
|
||||
|
@ -612,7 +609,7 @@ class MultiSegmentReader extends DirectoryIndexReader {
|
|||
|
||||
protected TermDocs termDocs(IndexReader reader)
|
||||
throws IOException {
|
||||
return reader.termDocs();
|
||||
return term==null ? reader.termDocs(null) : reader.termDocs();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
|
|
|
@ -523,7 +523,12 @@ public class ParallelReader extends IndexReader {
|
|||
protected TermDocs termDocs;
|
||||
|
||||
public ParallelTermDocs() {}
|
||||
public ParallelTermDocs(Term term) throws IOException { seek(term); }
|
||||
public ParallelTermDocs(Term term) throws IOException {
|
||||
if (term == null)
|
||||
termDocs = readers.isEmpty() ? null : ((IndexReader)readers.get(0)).termDocs(null);
|
||||
else
|
||||
seek(term);
|
||||
}
|
||||
|
||||
public int doc() { return termDocs.doc(); }
|
||||
public int freq() { return termDocs.freq(); }
|
||||
|
|
|
@ -724,6 +724,14 @@ class SegmentReader extends DirectoryIndexReader {
|
|||
return (deletedDocs != null && deletedDocs.get(n));
|
||||
}
|
||||
|
||||
public TermDocs termDocs(Term term) throws IOException {
|
||||
if (term == null) {
|
||||
return new AllTermDocs(this);
|
||||
} else {
|
||||
return super.termDocs(term);
|
||||
}
|
||||
}
|
||||
|
||||
public TermDocs termDocs() throws IOException {
|
||||
ensureOpen();
|
||||
return new SegmentTermDocs(this);
|
||||
|
|
|
@ -46,7 +46,9 @@ class SegmentTermDocs implements TermDocs {
|
|||
protected SegmentTermDocs(SegmentReader parent) {
|
||||
this.parent = parent;
|
||||
this.freqStream = (IndexInput) parent.freqStream.clone();
|
||||
this.deletedDocs = parent.deletedDocs;
|
||||
synchronized (parent) {
|
||||
this.deletedDocs = parent.deletedDocs;
|
||||
}
|
||||
this.skipInterval = parent.tis.getSkipInterval();
|
||||
this.maxSkipLevels = parent.tis.getMaxSkipLevels();
|
||||
}
|
||||
|
|
|
@ -18,15 +18,11 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.util.Set;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A query that matches all documents.
|
||||
|
@ -38,17 +34,13 @@ public class MatchAllDocsQuery extends Query {
|
|||
}
|
||||
|
||||
private class MatchAllScorer extends Scorer {
|
||||
|
||||
final IndexReader reader;
|
||||
int id;
|
||||
final int maxId;
|
||||
final TermDocs termDocs;
|
||||
final float score;
|
||||
|
||||
MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) {
|
||||
MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) throws IOException
|
||||
{
|
||||
super(similarity);
|
||||
this.reader = reader;
|
||||
id = -1;
|
||||
maxId = reader.maxDoc() - 1;
|
||||
this.termDocs = reader.termDocs(null);
|
||||
score = w.getValue();
|
||||
}
|
||||
|
||||
|
@ -57,26 +49,19 @@ public class MatchAllDocsQuery extends Query {
|
|||
}
|
||||
|
||||
public int doc() {
|
||||
return id;
|
||||
return termDocs.doc();
|
||||
}
|
||||
|
||||
public boolean next() {
|
||||
while (id < maxId) {
|
||||
id++;
|
||||
if (!reader.isDeleted(id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
public boolean next() throws IOException {
|
||||
return termDocs.next();
|
||||
}
|
||||
|
||||
public float score() {
|
||||
return score;
|
||||
}
|
||||
|
||||
public boolean skipTo(int target) {
|
||||
id = target - 1;
|
||||
return next();
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
return termDocs.skipTo(target);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -112,7 +97,7 @@ public class MatchAllDocsQuery extends Query {
|
|||
queryWeight *= this.queryNorm;
|
||||
}
|
||||
|
||||
public Scorer scorer(IndexReader reader) {
|
||||
public Scorer scorer(IndexReader reader) throws IOException {
|
||||
return new MatchAllScorer(reader, similarity, this);
|
||||
}
|
||||
|
||||
|
|
|
@ -125,6 +125,15 @@ public class TestFilterIndexReader extends LuceneTestCase {
|
|||
assertTrue((positions.doc() % 2) == 1);
|
||||
}
|
||||
|
||||
int NUM_DOCS = 3;
|
||||
|
||||
TermDocs td = reader.termDocs(null);
|
||||
for(int i=0;i<NUM_DOCS;i++) {
|
||||
assertTrue(td.next());
|
||||
assertEquals(i, td.doc());
|
||||
assertEquals(1, td.freq());
|
||||
}
|
||||
td.close();
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
|
|
@ -149,6 +149,19 @@ public class TestMultiSegmentReader extends LuceneTestCase {
|
|||
mr.close();
|
||||
}
|
||||
|
||||
public void testAllTermDocs() throws IOException {
|
||||
IndexReader reader = openReader();
|
||||
int NUM_DOCS = 2;
|
||||
TermDocs td = reader.termDocs(null);
|
||||
for(int i=0;i<NUM_DOCS;i++) {
|
||||
assertTrue(td.next());
|
||||
assertEquals(i, td.doc());
|
||||
assertEquals(1, td.freq());
|
||||
}
|
||||
td.close();
|
||||
reader.close();
|
||||
}
|
||||
|
||||
private void addDoc(RAMDirectory ramDir1, String s, boolean create) throws IOException {
|
||||
IndexWriter iw = new IndexWriter(ramDir1, new StandardAnalyzer(), create, IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -123,7 +123,7 @@ public class TestParallelReader extends LuceneTestCase {
|
|||
|
||||
public void testIsCurrent() throws IOException {
|
||||
Directory dir1 = getDir1();
|
||||
Directory dir2 = getDir1();
|
||||
Directory dir2 = getDir2();
|
||||
ParallelReader pr = new ParallelReader();
|
||||
pr.add(IndexReader.open(dir1));
|
||||
pr.add(IndexReader.open(dir2));
|
||||
|
@ -147,7 +147,7 @@ public class TestParallelReader extends LuceneTestCase {
|
|||
|
||||
public void testIsOptimized() throws IOException {
|
||||
Directory dir1 = getDir1();
|
||||
Directory dir2 = getDir1();
|
||||
Directory dir2 = getDir2();
|
||||
|
||||
// add another document to ensure that the indexes are not optimized
|
||||
IndexWriter modifier = new IndexWriter(dir1, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
|
||||
|
@ -194,6 +194,25 @@ public class TestParallelReader extends LuceneTestCase {
|
|||
|
||||
}
|
||||
|
||||
public void testAllTermDocs() throws IOException {
|
||||
Directory dir1 = getDir1();
|
||||
Directory dir2 = getDir2();
|
||||
ParallelReader pr = new ParallelReader();
|
||||
pr.add(IndexReader.open(dir1));
|
||||
pr.add(IndexReader.open(dir2));
|
||||
int NUM_DOCS = 2;
|
||||
TermDocs td = pr.termDocs(null);
|
||||
for(int i=0;i<NUM_DOCS;i++) {
|
||||
assertTrue(td.next());
|
||||
assertEquals(i, td.doc());
|
||||
assertEquals(1, td.freq());
|
||||
}
|
||||
td.close();
|
||||
pr.close();
|
||||
dir1.close();
|
||||
dir2.close();
|
||||
}
|
||||
|
||||
|
||||
private void queryTest(Query query) throws IOException {
|
||||
ScoreDoc[] parallelHits = parallel.search(query, null, 1000).scoreDocs;
|
||||
|
|
|
@ -36,6 +36,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase {
|
|||
public void testQuery() throws IOException {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
iw.setMaxBufferedDocs(2); // force multi-segment
|
||||
addDoc("one", iw);
|
||||
addDoc("two", iw);
|
||||
addDoc("three four", iw);
|
||||
|
|
Loading…
Reference in New Issue