From c1496444b214c88174546ea4d629f3e9de0476e2 Mon Sep 17 00:00:00 2001 From: Michael Busch Date: Thu, 26 Jul 2007 22:52:08 +0000 Subject: [PATCH] LUCENE-781: MultiReader fixed to not throw NPE if isCurrent(), isOptimized() or getVersion() is called. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@560024 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 8 + .../org/apache/lucene/index/IndexReader.java | 2 +- .../org/apache/lucene/index/MultiReader.java | 449 +---------------- .../lucene/index/MultiSegmentReader.java | 475 ++++++++++++++++++ .../apache/lucene/index/TestMultiReader.java | 14 +- 5 files changed, 506 insertions(+), 442 deletions(-) create mode 100644 src/java/org/apache/lucene/index/MultiSegmentReader.java diff --git a/CHANGES.txt b/CHANGES.txt index c8e93f6fe63..4578746ae9d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -46,6 +46,14 @@ Bug fixes 6. LUCENE-957: RAMDirectory fixed to properly handle directories larger than Integer.MAX_VALUE. (Doron Cohen) + + 7. LUCENE-781: MultiReader fixed to not throw NPE if isCurrent(), + isOptimized() or getVersion() is called. Separated MultiReader + into two classes: MultiSegmentReader extends IndexReader, is + package-protected and is created automatically by IndexReader.open() + in case the index has multiple segments. The public MultiReader + now extends MultiSegmentReader and is intended to be used by users + who want to add their own subreaders. (Daniel Naber, Michael Busch) New features diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java index 84d6fd9cacb..b2c007020cc 100644 --- a/src/java/org/apache/lucene/index/IndexReader.java +++ b/src/java/org/apache/lucene/index/IndexReader.java @@ -208,7 +208,7 @@ public abstract class IndexReader { } } - reader = new MultiReader(directory, infos, closeDirectory, readers); + reader = new MultiSegmentReader(directory, infos, closeDirectory, readers); } reader.deletionPolicy = deletionPolicy; return reader; diff --git a/src/java/org/apache/lucene/index/MultiReader.java b/src/java/org/apache/lucene/index/MultiReader.java index 3e1d8009c23..2b9eead3f3c 100644 --- a/src/java/org/apache/lucene/index/MultiReader.java +++ b/src/java/org/apache/lucene/index/MultiReader.java @@ -31,14 +31,7 @@ import java.util.Set; * * @version $Id$ */ -public class MultiReader extends IndexReader { - private IndexReader[] subReaders; - private int[] starts; // 1st docno for each segment - private Hashtable normsCache = new Hashtable(); - private int maxDoc = 0; - private int numDocs = -1; - private boolean hasDeletions = false; - +public class MultiReader extends MultiSegmentReader { /** *

Construct a MultiReader aggregating the named set of (sub)readers. * Directory locking for delete, undeleteAll, and setNorm operations is @@ -48,438 +41,28 @@ public class MultiReader extends IndexReader { * @throws IOException */ public MultiReader(IndexReader[] subReaders) throws IOException { - super(subReaders.length == 0 ? null : subReaders[0].directory()); - initialize(subReaders); + super(subReaders.length == 0 ? null : subReaders[0].directory(), + null, false, subReaders); } - /** Construct reading the named set of readers. */ - MultiReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) { - super(directory, sis, closeDirectory); - initialize(subReaders); - } - - private void initialize(IndexReader[] subReaders) { - this.subReaders = subReaders; - starts = new int[subReaders.length + 1]; // build starts array + /** + * Checks recursively if all subreaders are up to date. + */ + public boolean isCurrent() throws CorruptIndexException, IOException { for (int i = 0; i < subReaders.length; i++) { - starts[i] = maxDoc; - maxDoc += subReaders[i].maxDoc(); // compute maxDocs - - if (subReaders[i].hasDeletions()) - hasDeletions = true; - } - starts[subReaders.length] = maxDoc; - } - - - public TermFreqVector[] getTermFreqVectors(int n) throws IOException { - ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment - } - - public TermFreqVector getTermFreqVector(int n, String field) - throws IOException { - ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].getTermFreqVector(n - starts[i], field); - } - - - public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { - ensureOpen(); - int i = readerIndex(docNumber); // find segment num - subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper); - } - - public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { - ensureOpen(); - int i = readerIndex(docNumber); // find segment num - subReaders[i].getTermFreqVector(docNumber - starts[i], mapper); - } - - public synchronized int numDocs() { - // Don't call ensureOpen() here (it could affect performance) - if (numDocs == -1) { // check cache - int n = 0; // cache miss--recompute - for (int i = 0; i < subReaders.length; i++) - n += subReaders[i].numDocs(); // sum from readers - numDocs = n; - } - return numDocs; - } - - public int maxDoc() { - // Don't call ensureOpen() here (it could affect performance) - return maxDoc; - } - - // inherit javadoc - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { - ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader - } - - public boolean isDeleted(int n) { - // Don't call ensureOpen() here (it could affect performance) - int i = readerIndex(n); // find segment num - return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader - } - - public boolean hasDeletions() { - // Don't call ensureOpen() here (it could affect performance) - return hasDeletions; - } - - protected void doDelete(int n) throws CorruptIndexException, IOException { - numDocs = -1; // invalidate cache - int i = readerIndex(n); // find segment num - subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader - hasDeletions = true; - } - - protected void doUndeleteAll() throws CorruptIndexException, IOException { - for (int i = 0; i < subReaders.length; i++) - subReaders[i].undeleteAll(); - - hasDeletions = false; - numDocs = -1; // invalidate cache - } - - private int readerIndex(int n) { // find reader for doc n: - int lo = 0; // search starts array - int hi = subReaders.length - 1; // for first element less - - while (hi >= lo) { - int mid = (lo + hi) >> 1; - int midValue = starts[mid]; - if (n < midValue) - hi = mid - 1; - else if (n > midValue) - lo = mid + 1; - else { // found a match - while (mid+1 < subReaders.length && starts[mid+1] == midValue) { - mid++; // scan to last match - } - return mid; + if (!subReaders[i].isCurrent()) { + return false; } } - return hi; - } - - public boolean hasNorms(String field) throws IOException { - ensureOpen(); - for (int i = 0; i < subReaders.length; i++) { - if (subReaders[i].hasNorms(field)) return true; - } - return false; - } - - private byte[] ones; - private byte[] fakeNorms() { - if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc()); - return ones; - } - - public synchronized byte[] norms(String field) throws IOException { - ensureOpen(); - byte[] bytes = (byte[])normsCache.get(field); - if (bytes != null) - return bytes; // cache hit - if (!hasNorms(field)) - return fakeNorms(); - - bytes = new byte[maxDoc()]; - for (int i = 0; i < subReaders.length; i++) - subReaders[i].norms(field, bytes, starts[i]); - normsCache.put(field, bytes); // update cache - return bytes; - } - - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - ensureOpen(); - byte[] bytes = (byte[])normsCache.get(field); - if (bytes==null && !hasNorms(field)) bytes=fakeNorms(); - if (bytes != null) // cache hit - System.arraycopy(bytes, 0, result, offset, maxDoc()); - - for (int i = 0; i < subReaders.length; i++) // read from segments - subReaders[i].norms(field, result, offset + starts[i]); - } - - protected void doSetNorm(int n, String field, byte value) - throws CorruptIndexException, IOException { - normsCache.remove(field); // clear cache - int i = readerIndex(n); // find segment num - subReaders[i].setNorm(n-starts[i], field, value); // dispatch - } - - public TermEnum terms() throws IOException { - ensureOpen(); - return new MultiTermEnum(subReaders, starts, null); - } - - public TermEnum terms(Term term) throws IOException { - ensureOpen(); - return new MultiTermEnum(subReaders, starts, term); - } - - public int docFreq(Term t) throws IOException { - ensureOpen(); - int total = 0; // sum freqs in segments - for (int i = 0; i < subReaders.length; i++) - total += subReaders[i].docFreq(t); - return total; - } - - public TermDocs termDocs() throws IOException { - ensureOpen(); - return new MultiTermDocs(subReaders, starts); - } - - public TermPositions termPositions() throws IOException { - ensureOpen(); - return new MultiTermPositions(subReaders, starts); - } - - protected void doCommit() throws IOException { - for (int i = 0; i < subReaders.length; i++) - subReaders[i].commit(); - } - - void startCommit() { - super.startCommit(); - for (int i = 0; i < subReaders.length; i++) { - subReaders[i].startCommit(); - } - } - - void rollbackCommit() { - super.rollbackCommit(); - for (int i = 0; i < subReaders.length; i++) { - subReaders[i].rollbackCommit(); - } - } - - protected synchronized void doClose() throws IOException { - for (int i = 0; i < subReaders.length; i++) - subReaders[i].close(); - } - - public Collection getFieldNames (IndexReader.FieldOption fieldNames) { - // maintain a unique set of field names - ensureOpen(); - Set fieldSet = new HashSet(); - for (int i = 0; i < subReaders.length; i++) { - IndexReader reader = subReaders[i]; - Collection names = reader.getFieldNames(fieldNames); - fieldSet.addAll(names); - } - return fieldSet; - } - -} - -class MultiTermEnum extends TermEnum { - private SegmentMergeQueue queue; - - private Term term; - private int docFreq; - - public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) - throws IOException { - queue = new SegmentMergeQueue(readers.length); - for (int i = 0; i < readers.length; i++) { - IndexReader reader = readers[i]; - TermEnum termEnum; - - if (t != null) { - termEnum = reader.terms(t); - } else - termEnum = reader.terms(); - - SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); - if (t == null ? smi.next() : termEnum.term() != null) - queue.put(smi); // initialize queue - else - smi.close(); - } - - if (t != null && queue.size() > 0) { - next(); - } - } - - public boolean next() throws IOException { - SegmentMergeInfo top = (SegmentMergeInfo)queue.top(); - if (top == null) { - term = null; - return false; - } - - term = top.term; - docFreq = 0; - - while (top != null && term.compareTo(top.term) == 0) { - queue.pop(); - docFreq += top.termEnum.docFreq(); // increment freq - if (top.next()) - queue.put(top); // restore queue - else - top.close(); // done with a segment - top = (SegmentMergeInfo)queue.top(); - } + + // all subreaders are up to date return true; } - - public Term term() { - return term; - } - - public int docFreq() { - return docFreq; - } - - public void close() throws IOException { - queue.close(); - } -} - -class MultiTermDocs implements TermDocs { - protected IndexReader[] readers; - protected int[] starts; - protected Term term; - - protected int base = 0; - protected int pointer = 0; - - private TermDocs[] readerTermDocs; - protected TermDocs current; // == readerTermDocs[pointer] - - public MultiTermDocs(IndexReader[] r, int[] s) { - readers = r; - starts = s; - - readerTermDocs = new TermDocs[r.length]; - } - - public int doc() { - return base + current.doc(); - } - public int freq() { - return current.freq(); - } - - public void seek(Term term) { - this.term = term; - this.base = 0; - this.pointer = 0; - this.current = null; - } - - public void seek(TermEnum termEnum) throws IOException { - seek(termEnum.term()); - } - - public boolean next() throws IOException { - for(;;) { - if (current!=null && current.next()) { - return true; - } - else if (pointer < readers.length) { - base = starts[pointer]; - current = termDocs(pointer++); - } else { - return false; - } - } - } - - /** Optimized implementation. */ - public int read(final int[] docs, final int[] freqs) throws IOException { - while (true) { - while (current == null) { - if (pointer < readers.length) { // try next segment - base = starts[pointer]; - current = termDocs(pointer++); - } else { - return 0; - } - } - int end = current.read(docs, freqs); - if (end == 0) { // none left in segment - current = null; - } else { // got some - final int b = base; // adjust doc numbers - for (int i = 0; i < end; i++) - docs[i] += b; - return end; - } - } - } - - /* A Possible future optimization could skip entire segments */ - public boolean skipTo(int target) throws IOException { - for(;;) { - if (current != null && current.skipTo(target-base)) { - return true; - } else if (pointer < readers.length) { - base = starts[pointer]; - current = termDocs(pointer++); - } else - return false; - } - } - - private TermDocs termDocs(int i) throws IOException { - if (term == null) - return null; - TermDocs result = readerTermDocs[i]; - if (result == null) - result = readerTermDocs[i] = termDocs(readers[i]); - result.seek(term); - return result; - } - - protected TermDocs termDocs(IndexReader reader) - throws IOException { - return reader.termDocs(); - } - - public void close() throws IOException { - for (int i = 0; i < readerTermDocs.length; i++) { - if (readerTermDocs[i] != null) - readerTermDocs[i].close(); - } - } -} - -class MultiTermPositions extends MultiTermDocs implements TermPositions { - public MultiTermPositions(IndexReader[] r, int[] s) { - super(r,s); - } - - protected TermDocs termDocs(IndexReader reader) throws IOException { - return (TermDocs)reader.termPositions(); - } - - public int nextPosition() throws IOException { - return ((TermPositions)current).nextPosition(); - } - public int getPayloadLength() { - return ((TermPositions)current).getPayloadLength(); - } - - public byte[] getPayload(byte[] data, int offset) throws IOException { - return ((TermPositions)current).getPayload(data, offset); - } - - - // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { - return ((TermPositions) current).isPayloadAvailable(); + /** Not implemented. + * @throws UnsupportedOperationException + */ + public long getVersion() { + throw new UnsupportedOperationException("MultiReader does not support this method."); } } diff --git a/src/java/org/apache/lucene/index/MultiSegmentReader.java b/src/java/org/apache/lucene/index/MultiSegmentReader.java new file mode 100644 index 00000000000..04b10605875 --- /dev/null +++ b/src/java/org/apache/lucene/index/MultiSegmentReader.java @@ -0,0 +1,475 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.store.Directory; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.Set; + +/** + * An IndexReader which reads indexes with multiple segments. + */ +class MultiSegmentReader extends IndexReader { + protected IndexReader[] subReaders; + private int[] starts; // 1st docno for each segment + private Hashtable normsCache = new Hashtable(); + private int maxDoc = 0; + private int numDocs = -1; + private boolean hasDeletions = false; + + /** Construct reading the named set of readers. */ + MultiSegmentReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) { + super(directory, sis, closeDirectory); + initialize(subReaders); + } + + private void initialize(IndexReader[] subReaders) { + this.subReaders = subReaders; + starts = new int[subReaders.length + 1]; // build starts array + for (int i = 0; i < subReaders.length; i++) { + starts[i] = maxDoc; + maxDoc += subReaders[i].maxDoc(); // compute maxDocs + + if (subReaders[i].hasDeletions()) + hasDeletions = true; + } + starts[subReaders.length] = maxDoc; + } + + + public TermFreqVector[] getTermFreqVectors(int n) throws IOException { + ensureOpen(); + int i = readerIndex(n); // find segment num + return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment + } + + public TermFreqVector getTermFreqVector(int n, String field) + throws IOException { + ensureOpen(); + int i = readerIndex(n); // find segment num + return subReaders[i].getTermFreqVector(n - starts[i], field); + } + + + public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { + ensureOpen(); + int i = readerIndex(docNumber); // find segment num + subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper); + } + + public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { + ensureOpen(); + int i = readerIndex(docNumber); // find segment num + subReaders[i].getTermFreqVector(docNumber - starts[i], mapper); + } + + public boolean isOptimized() { + return false; + } + + public synchronized int numDocs() { + // Don't call ensureOpen() here (it could affect performance) + if (numDocs == -1) { // check cache + int n = 0; // cache miss--recompute + for (int i = 0; i < subReaders.length; i++) + n += subReaders[i].numDocs(); // sum from readers + numDocs = n; + } + return numDocs; + } + + public int maxDoc() { + // Don't call ensureOpen() here (it could affect performance) + return maxDoc; + } + + // inherit javadoc + public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + ensureOpen(); + int i = readerIndex(n); // find segment num + return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader + } + + public boolean isDeleted(int n) { + // Don't call ensureOpen() here (it could affect performance) + int i = readerIndex(n); // find segment num + return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader + } + + public boolean hasDeletions() { + // Don't call ensureOpen() here (it could affect performance) + return hasDeletions; + } + + protected void doDelete(int n) throws CorruptIndexException, IOException { + numDocs = -1; // invalidate cache + int i = readerIndex(n); // find segment num + subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader + hasDeletions = true; + } + + protected void doUndeleteAll() throws CorruptIndexException, IOException { + for (int i = 0; i < subReaders.length; i++) + subReaders[i].undeleteAll(); + + hasDeletions = false; + numDocs = -1; // invalidate cache + } + + private int readerIndex(int n) { // find reader for doc n: + int lo = 0; // search starts array + int hi = subReaders.length - 1; // for first element less + + while (hi >= lo) { + int mid = (lo + hi) >> 1; + int midValue = starts[mid]; + if (n < midValue) + hi = mid - 1; + else if (n > midValue) + lo = mid + 1; + else { // found a match + while (mid+1 < subReaders.length && starts[mid+1] == midValue) { + mid++; // scan to last match + } + return mid; + } + } + return hi; + } + + public boolean hasNorms(String field) throws IOException { + ensureOpen(); + for (int i = 0; i < subReaders.length; i++) { + if (subReaders[i].hasNorms(field)) return true; + } + return false; + } + + private byte[] ones; + private byte[] fakeNorms() { + if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc()); + return ones; + } + + public synchronized byte[] norms(String field) throws IOException { + ensureOpen(); + byte[] bytes = (byte[])normsCache.get(field); + if (bytes != null) + return bytes; // cache hit + if (!hasNorms(field)) + return fakeNorms(); + + bytes = new byte[maxDoc()]; + for (int i = 0; i < subReaders.length; i++) + subReaders[i].norms(field, bytes, starts[i]); + normsCache.put(field, bytes); // update cache + return bytes; + } + + public synchronized void norms(String field, byte[] result, int offset) + throws IOException { + ensureOpen(); + byte[] bytes = (byte[])normsCache.get(field); + if (bytes==null && !hasNorms(field)) bytes=fakeNorms(); + if (bytes != null) // cache hit + System.arraycopy(bytes, 0, result, offset, maxDoc()); + + for (int i = 0; i < subReaders.length; i++) // read from segments + subReaders[i].norms(field, result, offset + starts[i]); + } + + protected void doSetNorm(int n, String field, byte value) + throws CorruptIndexException, IOException { + normsCache.remove(field); // clear cache + int i = readerIndex(n); // find segment num + subReaders[i].setNorm(n-starts[i], field, value); // dispatch + } + + public TermEnum terms() throws IOException { + ensureOpen(); + return new MultiTermEnum(subReaders, starts, null); + } + + public TermEnum terms(Term term) throws IOException { + ensureOpen(); + return new MultiTermEnum(subReaders, starts, term); + } + + public int docFreq(Term t) throws IOException { + ensureOpen(); + int total = 0; // sum freqs in segments + for (int i = 0; i < subReaders.length; i++) + total += subReaders[i].docFreq(t); + return total; + } + + public TermDocs termDocs() throws IOException { + ensureOpen(); + return new MultiTermDocs(subReaders, starts); + } + + public TermPositions termPositions() throws IOException { + ensureOpen(); + return new MultiTermPositions(subReaders, starts); + } + + protected void doCommit() throws IOException { + for (int i = 0; i < subReaders.length; i++) + subReaders[i].commit(); + } + + void startCommit() { + super.startCommit(); + for (int i = 0; i < subReaders.length; i++) { + subReaders[i].startCommit(); + } + } + + void rollbackCommit() { + super.rollbackCommit(); + for (int i = 0; i < subReaders.length; i++) { + subReaders[i].rollbackCommit(); + } + } + + protected synchronized void doClose() throws IOException { + for (int i = 0; i < subReaders.length; i++) + subReaders[i].close(); + } + + public Collection getFieldNames (IndexReader.FieldOption fieldNames) { + // maintain a unique set of field names + ensureOpen(); + Set fieldSet = new HashSet(); + for (int i = 0; i < subReaders.length; i++) { + IndexReader reader = subReaders[i]; + Collection names = reader.getFieldNames(fieldNames); + fieldSet.addAll(names); + } + return fieldSet; + } + + + static class MultiTermEnum extends TermEnum { + private SegmentMergeQueue queue; + + private Term term; + private int docFreq; + + public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) + throws IOException { + queue = new SegmentMergeQueue(readers.length); + for (int i = 0; i < readers.length; i++) { + IndexReader reader = readers[i]; + TermEnum termEnum; + + if (t != null) { + termEnum = reader.terms(t); + } else + termEnum = reader.terms(); + + SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); + if (t == null ? smi.next() : termEnum.term() != null) + queue.put(smi); // initialize queue + else + smi.close(); + } + + if (t != null && queue.size() > 0) { + next(); + } + } + + public boolean next() throws IOException { + SegmentMergeInfo top = (SegmentMergeInfo)queue.top(); + if (top == null) { + term = null; + return false; + } + + term = top.term; + docFreq = 0; + + while (top != null && term.compareTo(top.term) == 0) { + queue.pop(); + docFreq += top.termEnum.docFreq(); // increment freq + if (top.next()) + queue.put(top); // restore queue + else + top.close(); // done with a segment + top = (SegmentMergeInfo)queue.top(); + } + return true; + } + + public Term term() { + return term; + } + + public int docFreq() { + return docFreq; + } + + public void close() throws IOException { + queue.close(); + } + } + + static class MultiTermDocs implements TermDocs { + protected IndexReader[] readers; + protected int[] starts; + protected Term term; + + protected int base = 0; + protected int pointer = 0; + + private TermDocs[] readerTermDocs; + protected TermDocs current; // == readerTermDocs[pointer] + + public MultiTermDocs(IndexReader[] r, int[] s) { + readers = r; + starts = s; + + readerTermDocs = new TermDocs[r.length]; + } + + public int doc() { + return base + current.doc(); + } + public int freq() { + return current.freq(); + } + + public void seek(Term term) { + this.term = term; + this.base = 0; + this.pointer = 0; + this.current = null; + } + + public void seek(TermEnum termEnum) throws IOException { + seek(termEnum.term()); + } + + public boolean next() throws IOException { + for(;;) { + if (current!=null && current.next()) { + return true; + } + else if (pointer < readers.length) { + base = starts[pointer]; + current = termDocs(pointer++); + } else { + return false; + } + } + } + + /** Optimized implementation. */ + public int read(final int[] docs, final int[] freqs) throws IOException { + while (true) { + while (current == null) { + if (pointer < readers.length) { // try next segment + base = starts[pointer]; + current = termDocs(pointer++); + } else { + return 0; + } + } + int end = current.read(docs, freqs); + if (end == 0) { // none left in segment + current = null; + } else { // got some + final int b = base; // adjust doc numbers + for (int i = 0; i < end; i++) + docs[i] += b; + return end; + } + } + } + + /* A Possible future optimization could skip entire segments */ + public boolean skipTo(int target) throws IOException { + for(;;) { + if (current != null && current.skipTo(target-base)) { + return true; + } else if (pointer < readers.length) { + base = starts[pointer]; + current = termDocs(pointer++); + } else + return false; + } + } + + private TermDocs termDocs(int i) throws IOException { + if (term == null) + return null; + TermDocs result = readerTermDocs[i]; + if (result == null) + result = readerTermDocs[i] = termDocs(readers[i]); + result.seek(term); + return result; + } + + protected TermDocs termDocs(IndexReader reader) + throws IOException { + return reader.termDocs(); + } + + public void close() throws IOException { + for (int i = 0; i < readerTermDocs.length; i++) { + if (readerTermDocs[i] != null) + readerTermDocs[i].close(); + } + } + } + + static class MultiTermPositions extends MultiTermDocs implements TermPositions { + public MultiTermPositions(IndexReader[] r, int[] s) { + super(r,s); + } + + protected TermDocs termDocs(IndexReader reader) throws IOException { + return (TermDocs)reader.termPositions(); + } + + public int nextPosition() throws IOException { + return ((TermPositions)current).nextPosition(); + } + + public int getPayloadLength() { + return ((TermPositions)current).getPayloadLength(); + } + + public byte[] getPayload(byte[] data, int offset) throws IOException { + return ((TermPositions)current).getPayload(data, offset); + } + + + // TODO: Remove warning after API has been finalized + public boolean isPayloadAvailable() { + return ((TermPositions) current).isPayloadAvailable(); + } + } +} diff --git a/src/test/org/apache/lucene/index/TestMultiReader.java b/src/test/org/apache/lucene/index/TestMultiReader.java index 36c49c23656..bfd3b2ce168 100644 --- a/src/test/org/apache/lucene/index/TestMultiReader.java +++ b/src/test/org/apache/lucene/index/TestMultiReader.java @@ -61,7 +61,7 @@ public class TestMultiReader extends TestCase { public void testDocument() throws IOException { sis.read(dir); - MultiReader reader = new MultiReader(dir, sis, false, readers); + MultiSegmentReader reader = new MultiSegmentReader(dir, sis, false, readers); assertTrue(reader != null); Document newDoc1 = reader.document(0); assertTrue(newDoc1 != null); @@ -76,7 +76,7 @@ public class TestMultiReader extends TestCase { public void testUndeleteAll() throws IOException { sis.read(dir); - MultiReader reader = new MultiReader(dir, sis, false, readers); + MultiSegmentReader reader = new MultiSegmentReader(dir, sis, false, readers); assertTrue(reader != null); assertEquals( 2, reader.numDocs() ); reader.deleteDocument(0); @@ -88,7 +88,7 @@ public class TestMultiReader extends TestCase { reader.commit(); reader.close(); sis.read(dir); - reader = new MultiReader(dir, sis, false, readers); + reader = new MultiSegmentReader(dir, sis, false, readers); assertEquals( 2, reader.numDocs() ); reader.deleteDocument(0); @@ -96,17 +96,17 @@ public class TestMultiReader extends TestCase { reader.commit(); reader.close(); sis.read(dir); - reader = new MultiReader(dir, sis, false, readers); + reader = new MultiSegmentReader(dir, sis, false, readers); assertEquals( 1, reader.numDocs() ); } public void testTermVectors() { - MultiReader reader = new MultiReader(dir, sis, false, readers); + MultiSegmentReader reader = new MultiSegmentReader(dir, sis, false, readers); assertTrue(reader != null); } - /* known to fail, see https://issues.apache.org/jira/browse/LUCENE-781 + public void testIsCurrent() throws IOException { RAMDirectory ramDir1=new RAMDirectory(); addDoc(ramDir1, "test foo", true); @@ -135,6 +135,4 @@ public class TestMultiReader extends TestCase { iw.addDocument(doc); iw.close(); } - */ - }