mirror of https://github.com/apache/lucene.git
LUCENE-781: MultiReader fixed to not throw NPE if isCurrent(), isOptimized() or getVersion() is called.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@560024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
02dd452026
commit
c1496444b2
|
@ -46,6 +46,14 @@ Bug fixes
|
||||||
|
|
||||||
6. LUCENE-957: RAMDirectory fixed to properly handle directories
|
6. LUCENE-957: RAMDirectory fixed to properly handle directories
|
||||||
larger than Integer.MAX_VALUE. (Doron Cohen)
|
larger than Integer.MAX_VALUE. (Doron Cohen)
|
||||||
|
|
||||||
|
7. LUCENE-781: MultiReader fixed to not throw NPE if isCurrent(),
|
||||||
|
isOptimized() or getVersion() is called. Separated MultiReader
|
||||||
|
into two classes: MultiSegmentReader extends IndexReader, is
|
||||||
|
package-protected and is created automatically by IndexReader.open()
|
||||||
|
in case the index has multiple segments. The public MultiReader
|
||||||
|
now extends MultiSegmentReader and is intended to be used by users
|
||||||
|
who want to add their own subreaders. (Daniel Naber, Michael Busch)
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
|
|
|
@ -208,7 +208,7 @@ public abstract class IndexReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reader = new MultiReader(directory, infos, closeDirectory, readers);
|
reader = new MultiSegmentReader(directory, infos, closeDirectory, readers);
|
||||||
}
|
}
|
||||||
reader.deletionPolicy = deletionPolicy;
|
reader.deletionPolicy = deletionPolicy;
|
||||||
return reader;
|
return reader;
|
||||||
|
|
|
@ -31,14 +31,7 @@ import java.util.Set;
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class MultiReader extends IndexReader {
|
public class MultiReader extends MultiSegmentReader {
|
||||||
private IndexReader[] subReaders;
|
|
||||||
private int[] starts; // 1st docno for each segment
|
|
||||||
private Hashtable normsCache = new Hashtable();
|
|
||||||
private int maxDoc = 0;
|
|
||||||
private int numDocs = -1;
|
|
||||||
private boolean hasDeletions = false;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Construct a MultiReader aggregating the named set of (sub)readers.
|
* <p>Construct a MultiReader aggregating the named set of (sub)readers.
|
||||||
* Directory locking for delete, undeleteAll, and setNorm operations is
|
* Directory locking for delete, undeleteAll, and setNorm operations is
|
||||||
|
@ -48,438 +41,28 @@ public class MultiReader extends IndexReader {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public MultiReader(IndexReader[] subReaders) throws IOException {
|
public MultiReader(IndexReader[] subReaders) throws IOException {
|
||||||
super(subReaders.length == 0 ? null : subReaders[0].directory());
|
super(subReaders.length == 0 ? null : subReaders[0].directory(),
|
||||||
initialize(subReaders);
|
null, false, subReaders);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Construct reading the named set of readers. */
|
/**
|
||||||
MultiReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) {
|
* Checks recursively if all subreaders are up to date.
|
||||||
super(directory, sis, closeDirectory);
|
*/
|
||||||
initialize(subReaders);
|
public boolean isCurrent() throws CorruptIndexException, IOException {
|
||||||
}
|
|
||||||
|
|
||||||
private void initialize(IndexReader[] subReaders) {
|
|
||||||
this.subReaders = subReaders;
|
|
||||||
starts = new int[subReaders.length + 1]; // build starts array
|
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
starts[i] = maxDoc;
|
if (!subReaders[i].isCurrent()) {
|
||||||
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
|
return false;
|
||||||
|
|
||||||
if (subReaders[i].hasDeletions())
|
|
||||||
hasDeletions = true;
|
|
||||||
}
|
|
||||||
starts[subReaders.length] = maxDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
int i = readerIndex(n); // find segment num
|
|
||||||
return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
|
|
||||||
}
|
|
||||||
|
|
||||||
public TermFreqVector getTermFreqVector(int n, String field)
|
|
||||||
throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
int i = readerIndex(n); // find segment num
|
|
||||||
return subReaders[i].getTermFreqVector(n - starts[i], field);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
int i = readerIndex(docNumber); // find segment num
|
|
||||||
subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
int i = readerIndex(docNumber); // find segment num
|
|
||||||
subReaders[i].getTermFreqVector(docNumber - starts[i], mapper);
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized int numDocs() {
|
|
||||||
// Don't call ensureOpen() here (it could affect performance)
|
|
||||||
if (numDocs == -1) { // check cache
|
|
||||||
int n = 0; // cache miss--recompute
|
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
|
||||||
n += subReaders[i].numDocs(); // sum from readers
|
|
||||||
numDocs = n;
|
|
||||||
}
|
|
||||||
return numDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int maxDoc() {
|
|
||||||
// Don't call ensureOpen() here (it could affect performance)
|
|
||||||
return maxDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
// inherit javadoc
|
|
||||||
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
|
||||||
ensureOpen();
|
|
||||||
int i = readerIndex(n); // find segment num
|
|
||||||
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isDeleted(int n) {
|
|
||||||
// Don't call ensureOpen() here (it could affect performance)
|
|
||||||
int i = readerIndex(n); // find segment num
|
|
||||||
return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasDeletions() {
|
|
||||||
// Don't call ensureOpen() here (it could affect performance)
|
|
||||||
return hasDeletions;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void doDelete(int n) throws CorruptIndexException, IOException {
|
|
||||||
numDocs = -1; // invalidate cache
|
|
||||||
int i = readerIndex(n); // find segment num
|
|
||||||
subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader
|
|
||||||
hasDeletions = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void doUndeleteAll() throws CorruptIndexException, IOException {
|
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
|
||||||
subReaders[i].undeleteAll();
|
|
||||||
|
|
||||||
hasDeletions = false;
|
|
||||||
numDocs = -1; // invalidate cache
|
|
||||||
}
|
|
||||||
|
|
||||||
private int readerIndex(int n) { // find reader for doc n:
|
|
||||||
int lo = 0; // search starts array
|
|
||||||
int hi = subReaders.length - 1; // for first element less
|
|
||||||
|
|
||||||
while (hi >= lo) {
|
|
||||||
int mid = (lo + hi) >> 1;
|
|
||||||
int midValue = starts[mid];
|
|
||||||
if (n < midValue)
|
|
||||||
hi = mid - 1;
|
|
||||||
else if (n > midValue)
|
|
||||||
lo = mid + 1;
|
|
||||||
else { // found a match
|
|
||||||
while (mid+1 < subReaders.length && starts[mid+1] == midValue) {
|
|
||||||
mid++; // scan to last match
|
|
||||||
}
|
|
||||||
return mid;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return hi;
|
|
||||||
}
|
// all subreaders are up to date
|
||||||
|
|
||||||
public boolean hasNorms(String field) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
|
||||||
if (subReaders[i].hasNorms(field)) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private byte[] ones;
|
|
||||||
private byte[] fakeNorms() {
|
|
||||||
if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
|
|
||||||
return ones;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized byte[] norms(String field) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
byte[] bytes = (byte[])normsCache.get(field);
|
|
||||||
if (bytes != null)
|
|
||||||
return bytes; // cache hit
|
|
||||||
if (!hasNorms(field))
|
|
||||||
return fakeNorms();
|
|
||||||
|
|
||||||
bytes = new byte[maxDoc()];
|
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
|
||||||
subReaders[i].norms(field, bytes, starts[i]);
|
|
||||||
normsCache.put(field, bytes); // update cache
|
|
||||||
return bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void norms(String field, byte[] result, int offset)
|
|
||||||
throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
byte[] bytes = (byte[])normsCache.get(field);
|
|
||||||
if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
|
|
||||||
if (bytes != null) // cache hit
|
|
||||||
System.arraycopy(bytes, 0, result, offset, maxDoc());
|
|
||||||
|
|
||||||
for (int i = 0; i < subReaders.length; i++) // read from segments
|
|
||||||
subReaders[i].norms(field, result, offset + starts[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void doSetNorm(int n, String field, byte value)
|
|
||||||
throws CorruptIndexException, IOException {
|
|
||||||
normsCache.remove(field); // clear cache
|
|
||||||
int i = readerIndex(n); // find segment num
|
|
||||||
subReaders[i].setNorm(n-starts[i], field, value); // dispatch
|
|
||||||
}
|
|
||||||
|
|
||||||
public TermEnum terms() throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
return new MultiTermEnum(subReaders, starts, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public TermEnum terms(Term term) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
return new MultiTermEnum(subReaders, starts, term);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int docFreq(Term t) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
int total = 0; // sum freqs in segments
|
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
|
||||||
total += subReaders[i].docFreq(t);
|
|
||||||
return total;
|
|
||||||
}
|
|
||||||
|
|
||||||
public TermDocs termDocs() throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
return new MultiTermDocs(subReaders, starts);
|
|
||||||
}
|
|
||||||
|
|
||||||
public TermPositions termPositions() throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
return new MultiTermPositions(subReaders, starts);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void doCommit() throws IOException {
|
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
|
||||||
subReaders[i].commit();
|
|
||||||
}
|
|
||||||
|
|
||||||
void startCommit() {
|
|
||||||
super.startCommit();
|
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
|
||||||
subReaders[i].startCommit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void rollbackCommit() {
|
|
||||||
super.rollbackCommit();
|
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
|
||||||
subReaders[i].rollbackCommit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected synchronized void doClose() throws IOException {
|
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
|
||||||
subReaders[i].close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public Collection getFieldNames (IndexReader.FieldOption fieldNames) {
|
|
||||||
// maintain a unique set of field names
|
|
||||||
ensureOpen();
|
|
||||||
Set fieldSet = new HashSet();
|
|
||||||
for (int i = 0; i < subReaders.length; i++) {
|
|
||||||
IndexReader reader = subReaders[i];
|
|
||||||
Collection names = reader.getFieldNames(fieldNames);
|
|
||||||
fieldSet.addAll(names);
|
|
||||||
}
|
|
||||||
return fieldSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
class MultiTermEnum extends TermEnum {
|
|
||||||
private SegmentMergeQueue queue;
|
|
||||||
|
|
||||||
private Term term;
|
|
||||||
private int docFreq;
|
|
||||||
|
|
||||||
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
|
|
||||||
throws IOException {
|
|
||||||
queue = new SegmentMergeQueue(readers.length);
|
|
||||||
for (int i = 0; i < readers.length; i++) {
|
|
||||||
IndexReader reader = readers[i];
|
|
||||||
TermEnum termEnum;
|
|
||||||
|
|
||||||
if (t != null) {
|
|
||||||
termEnum = reader.terms(t);
|
|
||||||
} else
|
|
||||||
termEnum = reader.terms();
|
|
||||||
|
|
||||||
SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
|
|
||||||
if (t == null ? smi.next() : termEnum.term() != null)
|
|
||||||
queue.put(smi); // initialize queue
|
|
||||||
else
|
|
||||||
smi.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (t != null && queue.size() > 0) {
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
|
||||||
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
|
|
||||||
if (top == null) {
|
|
||||||
term = null;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
term = top.term;
|
|
||||||
docFreq = 0;
|
|
||||||
|
|
||||||
while (top != null && term.compareTo(top.term) == 0) {
|
|
||||||
queue.pop();
|
|
||||||
docFreq += top.termEnum.docFreq(); // increment freq
|
|
||||||
if (top.next())
|
|
||||||
queue.put(top); // restore queue
|
|
||||||
else
|
|
||||||
top.close(); // done with a segment
|
|
||||||
top = (SegmentMergeInfo)queue.top();
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Term term() {
|
|
||||||
return term;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int docFreq() {
|
|
||||||
return docFreq;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
queue.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class MultiTermDocs implements TermDocs {
|
|
||||||
protected IndexReader[] readers;
|
|
||||||
protected int[] starts;
|
|
||||||
protected Term term;
|
|
||||||
|
|
||||||
protected int base = 0;
|
|
||||||
protected int pointer = 0;
|
|
||||||
|
|
||||||
private TermDocs[] readerTermDocs;
|
|
||||||
protected TermDocs current; // == readerTermDocs[pointer]
|
|
||||||
|
|
||||||
public MultiTermDocs(IndexReader[] r, int[] s) {
|
|
||||||
readers = r;
|
|
||||||
starts = s;
|
|
||||||
|
|
||||||
readerTermDocs = new TermDocs[r.length];
|
|
||||||
}
|
|
||||||
|
|
||||||
public int doc() {
|
|
||||||
return base + current.doc();
|
|
||||||
}
|
|
||||||
public int freq() {
|
|
||||||
return current.freq();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void seek(Term term) {
|
|
||||||
this.term = term;
|
|
||||||
this.base = 0;
|
|
||||||
this.pointer = 0;
|
|
||||||
this.current = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void seek(TermEnum termEnum) throws IOException {
|
|
||||||
seek(termEnum.term());
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
|
||||||
for(;;) {
|
|
||||||
if (current!=null && current.next()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (pointer < readers.length) {
|
|
||||||
base = starts[pointer];
|
|
||||||
current = termDocs(pointer++);
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Optimized implementation. */
|
|
||||||
public int read(final int[] docs, final int[] freqs) throws IOException {
|
|
||||||
while (true) {
|
|
||||||
while (current == null) {
|
|
||||||
if (pointer < readers.length) { // try next segment
|
|
||||||
base = starts[pointer];
|
|
||||||
current = termDocs(pointer++);
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int end = current.read(docs, freqs);
|
|
||||||
if (end == 0) { // none left in segment
|
|
||||||
current = null;
|
|
||||||
} else { // got some
|
|
||||||
final int b = base; // adjust doc numbers
|
|
||||||
for (int i = 0; i < end; i++)
|
|
||||||
docs[i] += b;
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* A Possible future optimization could skip entire segments */
|
|
||||||
public boolean skipTo(int target) throws IOException {
|
|
||||||
for(;;) {
|
|
||||||
if (current != null && current.skipTo(target-base)) {
|
|
||||||
return true;
|
|
||||||
} else if (pointer < readers.length) {
|
|
||||||
base = starts[pointer];
|
|
||||||
current = termDocs(pointer++);
|
|
||||||
} else
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private TermDocs termDocs(int i) throws IOException {
|
|
||||||
if (term == null)
|
|
||||||
return null;
|
|
||||||
TermDocs result = readerTermDocs[i];
|
|
||||||
if (result == null)
|
|
||||||
result = readerTermDocs[i] = termDocs(readers[i]);
|
|
||||||
result.seek(term);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected TermDocs termDocs(IndexReader reader)
|
|
||||||
throws IOException {
|
|
||||||
return reader.termDocs();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
for (int i = 0; i < readerTermDocs.length; i++) {
|
|
||||||
if (readerTermDocs[i] != null)
|
|
||||||
readerTermDocs[i].close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class MultiTermPositions extends MultiTermDocs implements TermPositions {
|
|
||||||
public MultiTermPositions(IndexReader[] r, int[] s) {
|
|
||||||
super(r,s);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected TermDocs termDocs(IndexReader reader) throws IOException {
|
|
||||||
return (TermDocs)reader.termPositions();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int nextPosition() throws IOException {
|
|
||||||
return ((TermPositions)current).nextPosition();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getPayloadLength() {
|
/** Not implemented.
|
||||||
return ((TermPositions)current).getPayloadLength();
|
* @throws UnsupportedOperationException
|
||||||
}
|
*/
|
||||||
|
public long getVersion() {
|
||||||
public byte[] getPayload(byte[] data, int offset) throws IOException {
|
throw new UnsupportedOperationException("MultiReader does not support this method.");
|
||||||
return ((TermPositions)current).getPayload(data, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// TODO: Remove warning after API has been finalized
|
|
||||||
public boolean isPayloadAvailable() {
|
|
||||||
return ((TermPositions) current).isPayloadAvailable();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,475 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.FieldSelector;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Hashtable;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An IndexReader which reads indexes with multiple segments.
|
||||||
|
*/
|
||||||
|
class MultiSegmentReader extends IndexReader {
|
||||||
|
protected IndexReader[] subReaders;
|
||||||
|
private int[] starts; // 1st docno for each segment
|
||||||
|
private Hashtable normsCache = new Hashtable();
|
||||||
|
private int maxDoc = 0;
|
||||||
|
private int numDocs = -1;
|
||||||
|
private boolean hasDeletions = false;
|
||||||
|
|
||||||
|
/** Construct reading the named set of readers. */
|
||||||
|
MultiSegmentReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) {
|
||||||
|
super(directory, sis, closeDirectory);
|
||||||
|
initialize(subReaders);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize(IndexReader[] subReaders) {
|
||||||
|
this.subReaders = subReaders;
|
||||||
|
starts = new int[subReaders.length + 1]; // build starts array
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
starts[i] = maxDoc;
|
||||||
|
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
|
||||||
|
|
||||||
|
if (subReaders[i].hasDeletions())
|
||||||
|
hasDeletions = true;
|
||||||
|
}
|
||||||
|
starts[subReaders.length] = maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
int i = readerIndex(n); // find segment num
|
||||||
|
return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermFreqVector getTermFreqVector(int n, String field)
|
||||||
|
throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
int i = readerIndex(n); // find segment num
|
||||||
|
return subReaders[i].getTermFreqVector(n - starts[i], field);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
int i = readerIndex(docNumber); // find segment num
|
||||||
|
subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
int i = readerIndex(docNumber); // find segment num
|
||||||
|
subReaders[i].getTermFreqVector(docNumber - starts[i], mapper);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isOptimized() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized int numDocs() {
|
||||||
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
if (numDocs == -1) { // check cache
|
||||||
|
int n = 0; // cache miss--recompute
|
||||||
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
n += subReaders[i].numDocs(); // sum from readers
|
||||||
|
numDocs = n;
|
||||||
|
}
|
||||||
|
return numDocs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int maxDoc() {
|
||||||
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
return maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// inherit javadoc
|
||||||
|
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||||
|
ensureOpen();
|
||||||
|
int i = readerIndex(n); // find segment num
|
||||||
|
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDeleted(int n) {
|
||||||
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
int i = readerIndex(n); // find segment num
|
||||||
|
return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasDeletions() {
|
||||||
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
return hasDeletions;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void doDelete(int n) throws CorruptIndexException, IOException {
|
||||||
|
numDocs = -1; // invalidate cache
|
||||||
|
int i = readerIndex(n); // find segment num
|
||||||
|
subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader
|
||||||
|
hasDeletions = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void doUndeleteAll() throws CorruptIndexException, IOException {
|
||||||
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
subReaders[i].undeleteAll();
|
||||||
|
|
||||||
|
hasDeletions = false;
|
||||||
|
numDocs = -1; // invalidate cache
|
||||||
|
}
|
||||||
|
|
||||||
|
private int readerIndex(int n) { // find reader for doc n:
|
||||||
|
int lo = 0; // search starts array
|
||||||
|
int hi = subReaders.length - 1; // for first element less
|
||||||
|
|
||||||
|
while (hi >= lo) {
|
||||||
|
int mid = (lo + hi) >> 1;
|
||||||
|
int midValue = starts[mid];
|
||||||
|
if (n < midValue)
|
||||||
|
hi = mid - 1;
|
||||||
|
else if (n > midValue)
|
||||||
|
lo = mid + 1;
|
||||||
|
else { // found a match
|
||||||
|
while (mid+1 < subReaders.length && starts[mid+1] == midValue) {
|
||||||
|
mid++; // scan to last match
|
||||||
|
}
|
||||||
|
return mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNorms(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
if (subReaders[i].hasNorms(field)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] ones;
|
||||||
|
private byte[] fakeNorms() {
|
||||||
|
if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
|
||||||
|
return ones;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized byte[] norms(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
byte[] bytes = (byte[])normsCache.get(field);
|
||||||
|
if (bytes != null)
|
||||||
|
return bytes; // cache hit
|
||||||
|
if (!hasNorms(field))
|
||||||
|
return fakeNorms();
|
||||||
|
|
||||||
|
bytes = new byte[maxDoc()];
|
||||||
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
subReaders[i].norms(field, bytes, starts[i]);
|
||||||
|
normsCache.put(field, bytes); // update cache
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void norms(String field, byte[] result, int offset)
|
||||||
|
throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
byte[] bytes = (byte[])normsCache.get(field);
|
||||||
|
if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
|
||||||
|
if (bytes != null) // cache hit
|
||||||
|
System.arraycopy(bytes, 0, result, offset, maxDoc());
|
||||||
|
|
||||||
|
for (int i = 0; i < subReaders.length; i++) // read from segments
|
||||||
|
subReaders[i].norms(field, result, offset + starts[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void doSetNorm(int n, String field, byte value)
|
||||||
|
throws CorruptIndexException, IOException {
|
||||||
|
normsCache.remove(field); // clear cache
|
||||||
|
int i = readerIndex(n); // find segment num
|
||||||
|
subReaders[i].setNorm(n-starts[i], field, value); // dispatch
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermEnum terms() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return new MultiTermEnum(subReaders, starts, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermEnum terms(Term term) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return new MultiTermEnum(subReaders, starts, term);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int docFreq(Term t) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
int total = 0; // sum freqs in segments
|
||||||
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
total += subReaders[i].docFreq(t);
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermDocs termDocs() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return new MultiTermDocs(subReaders, starts);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermPositions termPositions() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return new MultiTermPositions(subReaders, starts);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void doCommit() throws IOException {
|
||||||
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
subReaders[i].commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
void startCommit() {
|
||||||
|
super.startCommit();
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
subReaders[i].startCommit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void rollbackCommit() {
|
||||||
|
super.rollbackCommit();
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
subReaders[i].rollbackCommit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected synchronized void doClose() throws IOException {
|
||||||
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
subReaders[i].close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection getFieldNames (IndexReader.FieldOption fieldNames) {
|
||||||
|
// maintain a unique set of field names
|
||||||
|
ensureOpen();
|
||||||
|
Set fieldSet = new HashSet();
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
IndexReader reader = subReaders[i];
|
||||||
|
Collection names = reader.getFieldNames(fieldNames);
|
||||||
|
fieldSet.addAll(names);
|
||||||
|
}
|
||||||
|
return fieldSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static class MultiTermEnum extends TermEnum {
|
||||||
|
private SegmentMergeQueue queue;
|
||||||
|
|
||||||
|
private Term term;
|
||||||
|
private int docFreq;
|
||||||
|
|
||||||
|
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
|
||||||
|
throws IOException {
|
||||||
|
queue = new SegmentMergeQueue(readers.length);
|
||||||
|
for (int i = 0; i < readers.length; i++) {
|
||||||
|
IndexReader reader = readers[i];
|
||||||
|
TermEnum termEnum;
|
||||||
|
|
||||||
|
if (t != null) {
|
||||||
|
termEnum = reader.terms(t);
|
||||||
|
} else
|
||||||
|
termEnum = reader.terms();
|
||||||
|
|
||||||
|
SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
|
||||||
|
if (t == null ? smi.next() : termEnum.term() != null)
|
||||||
|
queue.put(smi); // initialize queue
|
||||||
|
else
|
||||||
|
smi.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (t != null && queue.size() > 0) {
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
|
||||||
|
if (top == null) {
|
||||||
|
term = null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
term = top.term;
|
||||||
|
docFreq = 0;
|
||||||
|
|
||||||
|
while (top != null && term.compareTo(top.term) == 0) {
|
||||||
|
queue.pop();
|
||||||
|
docFreq += top.termEnum.docFreq(); // increment freq
|
||||||
|
if (top.next())
|
||||||
|
queue.put(top); // restore queue
|
||||||
|
else
|
||||||
|
top.close(); // done with a segment
|
||||||
|
top = (SegmentMergeInfo)queue.top();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Term term() {
|
||||||
|
return term;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int docFreq() {
|
||||||
|
return docFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
queue.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class MultiTermDocs implements TermDocs {
|
||||||
|
protected IndexReader[] readers;
|
||||||
|
protected int[] starts;
|
||||||
|
protected Term term;
|
||||||
|
|
||||||
|
protected int base = 0;
|
||||||
|
protected int pointer = 0;
|
||||||
|
|
||||||
|
private TermDocs[] readerTermDocs;
|
||||||
|
protected TermDocs current; // == readerTermDocs[pointer]
|
||||||
|
|
||||||
|
public MultiTermDocs(IndexReader[] r, int[] s) {
|
||||||
|
readers = r;
|
||||||
|
starts = s;
|
||||||
|
|
||||||
|
readerTermDocs = new TermDocs[r.length];
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doc() {
|
||||||
|
return base + current.doc();
|
||||||
|
}
|
||||||
|
public int freq() {
|
||||||
|
return current.freq();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void seek(Term term) {
|
||||||
|
this.term = term;
|
||||||
|
this.base = 0;
|
||||||
|
this.pointer = 0;
|
||||||
|
this.current = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void seek(TermEnum termEnum) throws IOException {
|
||||||
|
seek(termEnum.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
for(;;) {
|
||||||
|
if (current!=null && current.next()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (pointer < readers.length) {
|
||||||
|
base = starts[pointer];
|
||||||
|
current = termDocs(pointer++);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Optimized implementation. */
|
||||||
|
public int read(final int[] docs, final int[] freqs) throws IOException {
|
||||||
|
while (true) {
|
||||||
|
while (current == null) {
|
||||||
|
if (pointer < readers.length) { // try next segment
|
||||||
|
base = starts[pointer];
|
||||||
|
current = termDocs(pointer++);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int end = current.read(docs, freqs);
|
||||||
|
if (end == 0) { // none left in segment
|
||||||
|
current = null;
|
||||||
|
} else { // got some
|
||||||
|
final int b = base; // adjust doc numbers
|
||||||
|
for (int i = 0; i < end; i++)
|
||||||
|
docs[i] += b;
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A Possible future optimization could skip entire segments */
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
for(;;) {
|
||||||
|
if (current != null && current.skipTo(target-base)) {
|
||||||
|
return true;
|
||||||
|
} else if (pointer < readers.length) {
|
||||||
|
base = starts[pointer];
|
||||||
|
current = termDocs(pointer++);
|
||||||
|
} else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private TermDocs termDocs(int i) throws IOException {
|
||||||
|
if (term == null)
|
||||||
|
return null;
|
||||||
|
TermDocs result = readerTermDocs[i];
|
||||||
|
if (result == null)
|
||||||
|
result = readerTermDocs[i] = termDocs(readers[i]);
|
||||||
|
result.seek(term);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TermDocs termDocs(IndexReader reader)
|
||||||
|
throws IOException {
|
||||||
|
return reader.termDocs();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
for (int i = 0; i < readerTermDocs.length; i++) {
|
||||||
|
if (readerTermDocs[i] != null)
|
||||||
|
readerTermDocs[i].close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class MultiTermPositions extends MultiTermDocs implements TermPositions {
|
||||||
|
public MultiTermPositions(IndexReader[] r, int[] s) {
|
||||||
|
super(r,s);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TermDocs termDocs(IndexReader reader) throws IOException {
|
||||||
|
return (TermDocs)reader.termPositions();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nextPosition() throws IOException {
|
||||||
|
return ((TermPositions)current).nextPosition();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getPayloadLength() {
|
||||||
|
return ((TermPositions)current).getPayloadLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getPayload(byte[] data, int offset) throws IOException {
|
||||||
|
return ((TermPositions)current).getPayload(data, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: Remove warning after API has been finalized
|
||||||
|
public boolean isPayloadAvailable() {
|
||||||
|
return ((TermPositions) current).isPayloadAvailable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -61,7 +61,7 @@ public class TestMultiReader extends TestCase {
|
||||||
|
|
||||||
public void testDocument() throws IOException {
|
public void testDocument() throws IOException {
|
||||||
sis.read(dir);
|
sis.read(dir);
|
||||||
MultiReader reader = new MultiReader(dir, sis, false, readers);
|
MultiSegmentReader reader = new MultiSegmentReader(dir, sis, false, readers);
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
Document newDoc1 = reader.document(0);
|
Document newDoc1 = reader.document(0);
|
||||||
assertTrue(newDoc1 != null);
|
assertTrue(newDoc1 != null);
|
||||||
|
@ -76,7 +76,7 @@ public class TestMultiReader extends TestCase {
|
||||||
|
|
||||||
public void testUndeleteAll() throws IOException {
|
public void testUndeleteAll() throws IOException {
|
||||||
sis.read(dir);
|
sis.read(dir);
|
||||||
MultiReader reader = new MultiReader(dir, sis, false, readers);
|
MultiSegmentReader reader = new MultiSegmentReader(dir, sis, false, readers);
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
assertEquals( 2, reader.numDocs() );
|
assertEquals( 2, reader.numDocs() );
|
||||||
reader.deleteDocument(0);
|
reader.deleteDocument(0);
|
||||||
|
@ -88,7 +88,7 @@ public class TestMultiReader extends TestCase {
|
||||||
reader.commit();
|
reader.commit();
|
||||||
reader.close();
|
reader.close();
|
||||||
sis.read(dir);
|
sis.read(dir);
|
||||||
reader = new MultiReader(dir, sis, false, readers);
|
reader = new MultiSegmentReader(dir, sis, false, readers);
|
||||||
assertEquals( 2, reader.numDocs() );
|
assertEquals( 2, reader.numDocs() );
|
||||||
|
|
||||||
reader.deleteDocument(0);
|
reader.deleteDocument(0);
|
||||||
|
@ -96,17 +96,17 @@ public class TestMultiReader extends TestCase {
|
||||||
reader.commit();
|
reader.commit();
|
||||||
reader.close();
|
reader.close();
|
||||||
sis.read(dir);
|
sis.read(dir);
|
||||||
reader = new MultiReader(dir, sis, false, readers);
|
reader = new MultiSegmentReader(dir, sis, false, readers);
|
||||||
assertEquals( 1, reader.numDocs() );
|
assertEquals( 1, reader.numDocs() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testTermVectors() {
|
public void testTermVectors() {
|
||||||
MultiReader reader = new MultiReader(dir, sis, false, readers);
|
MultiSegmentReader reader = new MultiSegmentReader(dir, sis, false, readers);
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* known to fail, see https://issues.apache.org/jira/browse/LUCENE-781
|
|
||||||
public void testIsCurrent() throws IOException {
|
public void testIsCurrent() throws IOException {
|
||||||
RAMDirectory ramDir1=new RAMDirectory();
|
RAMDirectory ramDir1=new RAMDirectory();
|
||||||
addDoc(ramDir1, "test foo", true);
|
addDoc(ramDir1, "test foo", true);
|
||||||
|
@ -135,6 +135,4 @@ public class TestMultiReader extends TestCase {
|
||||||
iw.addDocument(doc);
|
iw.addDocument(doc);
|
||||||
iw.close();
|
iw.close();
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue