mirror of https://github.com/apache/lucene.git
Changed IndexReader so that it can be subclassed.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150110 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ebd6dc86fa
commit
f0d57d81a3
12
CHANGES.txt
12
CHANGES.txt
|
@ -40,6 +40,18 @@ $Id$
|
|||
|
||||
10. Added Locale setting to QueryParser, for use by date range parsing.
|
||||
|
||||
11. Changed IndexReader so that it can be subclassed by classes
|
||||
outside of its package. Previously it had package-private
|
||||
abstract methods. Also modified the index merging code so that it
|
||||
can work on an arbitrary IndexReader implementation, and added a
|
||||
new method, IndexWriter.addIndexes(IndexReader[]), to take
|
||||
advantage of this. (cutting)
|
||||
|
||||
12. Added a limit to the number of clauses which may be added to a
|
||||
BooleanQuery. The default limit is 1024 clauses. This should
|
||||
stop most OutOfMemoryExceptions by prefix, wildcard and fuzzy
|
||||
queries which run amok. (cutting)
|
||||
|
||||
|
||||
1.3 RC1
|
||||
|
||||
|
|
|
@ -57,6 +57,8 @@ package org.apache.lucene.index;
|
|||
import java.util.Hashtable;
|
||||
import java.util.Vector;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -92,11 +94,10 @@ final class FieldInfos {
|
|||
}
|
||||
}
|
||||
|
||||
/** Merges in information from another FieldInfos. */
|
||||
final void add(FieldInfos other) {
|
||||
for (int i = 0; i < other.size(); i++) {
|
||||
FieldInfo fi = other.fieldInfo(i);
|
||||
add(fi.name, fi.isIndexed);
|
||||
final void add(Collection names, boolean isIndexed) {
|
||||
Iterator i = names.iterator();
|
||||
while (i.hasNext()) {
|
||||
add((String)i.next(), isIndexed);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2003 The Apache Software Foundation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache Lucene" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
||||
/** A <code>FilterIndexReader</code> contains another IndexReader, which it
|
||||
* uses as its basic source of data, possibly transforming the data along the
|
||||
* way or providing additional functionality. The class
|
||||
* <code>FilterIndexReader</code> itself simply implements all abstract methods
|
||||
* of <code>IndexReader</code> with versions that pass all requests to the
|
||||
* contained index reader. Subclasses of <code>FilterIndexReader</code> may
|
||||
* further override some of these methods and may also provide additional
|
||||
* methods and fields.
|
||||
*/
|
||||
public class FilterIndexReader extends IndexReader {
|
||||
|
||||
/** Base class for filtering {@link TermDocs} implementations. */
|
||||
public static class FilterTermDocs implements TermDocs {
|
||||
protected TermDocs in;
|
||||
|
||||
public FilterTermDocs(TermDocs in) { this.in = in; }
|
||||
|
||||
public void seek(Term term) throws IOException { in.seek(term); }
|
||||
public void seek(TermEnum enum) throws IOException { in.seek(enum); }
|
||||
public int doc() { return in.doc(); }
|
||||
public int freq() { return in.freq(); }
|
||||
public boolean next() throws IOException { return in.next(); }
|
||||
public int read(int[] docs, int[] freqs) throws IOException {
|
||||
return in.read(docs, freqs);
|
||||
}
|
||||
public boolean skipTo(int i) throws IOException { return in.skipTo(i); }
|
||||
public void close() throws IOException { in.close(); }
|
||||
}
|
||||
|
||||
/** Base class for filtering {@link TermPositions} implementations. */
|
||||
public static class FilterTermPositions
|
||||
extends FilterTermDocs implements TermPositions {
|
||||
|
||||
public FilterTermPositions(TermPositions in) { super(in); }
|
||||
|
||||
public int nextPosition() throws IOException {
|
||||
return ((TermPositions)in).nextPosition();
|
||||
}
|
||||
}
|
||||
|
||||
/** Base class for filtering {@link TermEnum} implementations. */
|
||||
public static class FilterTermEnum extends TermEnum {
|
||||
protected TermEnum in;
|
||||
|
||||
public FilterTermEnum(TermEnum in) { this.in = in; }
|
||||
|
||||
public boolean next() throws IOException { return in.next(); }
|
||||
public Term term() { return in.term(); }
|
||||
public int docFreq() { return in.docFreq(); }
|
||||
public void close() throws IOException { in.close(); }
|
||||
}
|
||||
|
||||
protected IndexReader in;
|
||||
|
||||
public FilterIndexReader(IndexReader in) {
|
||||
super(in.directory());
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
public int numDocs() { return in.numDocs(); }
|
||||
public int maxDoc() { return in.maxDoc(); }
|
||||
|
||||
public Document document(int n) throws IOException {return in.document(n);}
|
||||
|
||||
public boolean isDeleted(int n) { return in.isDeleted(n); }
|
||||
public boolean hasDeletions() { return in.hasDeletions(); }
|
||||
|
||||
public byte[] norms(String f) throws IOException { return in.norms(f); }
|
||||
|
||||
public TermEnum terms() throws IOException { return in.terms(); }
|
||||
public TermEnum terms(Term t) throws IOException { return in.terms(t); }
|
||||
|
||||
public int docFreq(Term t) throws IOException { return in.docFreq(t); }
|
||||
|
||||
public TermDocs termDocs() throws IOException { return in.termDocs(); }
|
||||
public TermPositions termPositions() throws IOException {
|
||||
return in.termPositions();
|
||||
}
|
||||
|
||||
protected void doDelete(int n) throws IOException { in.doDelete(n); }
|
||||
protected void doClose() throws IOException { in.doClose(); }
|
||||
|
||||
public Collection getFieldNames() throws IOException {
|
||||
return in.getFieldNames();
|
||||
}
|
||||
public Collection getFieldNames(boolean indexed) throws IOException {
|
||||
return in.getFieldNames(indexed);
|
||||
}
|
||||
}
|
|
@ -86,7 +86,7 @@ public abstract class IndexReader {
|
|||
segmentInfosAge = Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
Directory directory;
|
||||
private Directory directory;
|
||||
private Lock writeLock;
|
||||
|
||||
//used to determine whether index has chaged since reader was opened
|
||||
|
@ -131,6 +131,9 @@ public abstract class IndexReader {
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns the directory this index resides in. */
|
||||
public Directory directory() { return directory; }
|
||||
|
||||
/** Returns the time the index in the named directory was last modified. */
|
||||
public static long lastModified(String directory) throws IOException {
|
||||
return lastModified(new File(directory));
|
||||
|
@ -194,6 +197,9 @@ public abstract class IndexReader {
|
|||
/** Returns true if document <i>n</i> has been deleted */
|
||||
public abstract boolean isDeleted(int n);
|
||||
|
||||
/** Returns true if any documents have been deleted */
|
||||
public abstract boolean hasDeletions();
|
||||
|
||||
/** Returns the byte-encoded normalization factor for the named field of
|
||||
* every document. This is used by the search code to score documents.
|
||||
*
|
||||
|
@ -286,7 +292,10 @@ public abstract class IndexReader {
|
|||
doDelete(docNum);
|
||||
}
|
||||
|
||||
abstract void doDelete(int docNum) throws IOException;
|
||||
/** Implements deletion of the document numbered <code>docNum</code>.
|
||||
* Applications should call {@link #delete(int)} or {@link #delete(Term)}.
|
||||
*/
|
||||
protected abstract void doDelete(int docNum) throws IOException;
|
||||
|
||||
/** Deletes all documents containing <code>term</code>.
|
||||
This is useful if one uses a document field to hold a unique ID string for
|
||||
|
@ -323,7 +332,7 @@ public abstract class IndexReader {
|
|||
}
|
||||
|
||||
/** Implements close. */
|
||||
abstract void doClose() throws IOException;
|
||||
protected abstract void doClose() throws IOException;
|
||||
|
||||
/** Release the write lock, if needed. */
|
||||
protected final void finalize() throws IOException {
|
||||
|
|
|
@ -324,6 +324,37 @@ public class IndexWriter {
|
|||
optimize(); // final cleanup
|
||||
}
|
||||
|
||||
/** Merges the provided indexes into this index.
|
||||
* <p>After this completes, the index is optimized. */
|
||||
public synchronized void addIndexes(IndexReader[] readers)
|
||||
throws IOException {
|
||||
|
||||
optimize(); // start with zero or 1 seg
|
||||
|
||||
String mergedName = newSegmentName();
|
||||
SegmentMerger merger = new SegmentMerger(directory, mergedName, false);
|
||||
|
||||
if (segmentInfos.size() == 1) // add existing index, if any
|
||||
merger.add(new SegmentReader(segmentInfos.info(0)));
|
||||
|
||||
for (int i = 0; i < readers.length; i++) // add new indexes
|
||||
merger.add(readers[i]);
|
||||
|
||||
int docCount = merger.merge(); // merge 'em
|
||||
|
||||
segmentInfos.setSize(0); // pop old infos & add new
|
||||
segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory));
|
||||
|
||||
synchronized (directory) { // in- & inter-process sync
|
||||
new Lock.With(directory.makeLock("commit.lock")) {
|
||||
public Object doBody() throws IOException {
|
||||
segmentInfos.write(directory); // commit changes
|
||||
return null;
|
||||
}
|
||||
}.run();
|
||||
}
|
||||
}
|
||||
|
||||
/** Merges all RAM-resident segments. */
|
||||
private final void flushRamSegments() throws IOException {
|
||||
int minSegment = segmentInfos.size()-1;
|
||||
|
@ -379,12 +410,12 @@ public class IndexWriter {
|
|||
for (int i = minSegment; i < segmentInfos.size(); i++) {
|
||||
SegmentInfo si = segmentInfos.info(i);
|
||||
if (infoStream != null)
|
||||
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
||||
SegmentReader reader = new SegmentReader(si);
|
||||
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
||||
IndexReader reader = new SegmentReader(si);
|
||||
merger.add(reader);
|
||||
if ((reader.directory == this.directory) || // if we own the directory
|
||||
(reader.directory == this.ramDirectory))
|
||||
segmentsToDelete.addElement(reader); // queue segment for deletion
|
||||
if ((reader.directory()==this.directory) || // if we own the directory
|
||||
(reader.directory()==this.ramDirectory))
|
||||
segmentsToDelete.addElement(reader); // queue segment for deletion
|
||||
mergedDocCount += reader.numDocs();
|
||||
}
|
||||
if (infoStream != null) {
|
||||
|
@ -420,10 +451,10 @@ public class IndexWriter {
|
|||
|
||||
for (int i = 0; i < segments.size(); i++) {
|
||||
SegmentReader reader = (SegmentReader)segments.elementAt(i);
|
||||
if (reader.directory == this.directory)
|
||||
deleteFiles(reader.files(), deletable); // try to delete our files
|
||||
if (reader.directory() == this.directory)
|
||||
deleteFiles(reader.files(), deletable); // try to delete our files
|
||||
else
|
||||
deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
|
||||
deleteFiles(reader.files(), reader.directory()); // delete other files
|
||||
}
|
||||
|
||||
writeDeleteableFiles(deletable); // note files we can't delete
|
||||
|
|
|
@ -297,6 +297,11 @@ public class MultipleTermPositions
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public void seek(TermEnum termEnum) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Describe <code>read</code> method here.
|
||||
*
|
||||
|
@ -311,4 +316,5 @@ public class MultipleTermPositions
|
|||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -60,30 +60,29 @@ import org.apache.lucene.util.BitVector;
|
|||
final class SegmentMergeInfo {
|
||||
Term term;
|
||||
int base;
|
||||
SegmentTermEnum termEnum;
|
||||
SegmentReader reader;
|
||||
SegmentTermPositions postings;
|
||||
TermEnum termEnum;
|
||||
IndexReader reader;
|
||||
TermPositions postings;
|
||||
int[] docMap = null; // maps around deleted docs
|
||||
|
||||
SegmentMergeInfo(int b, SegmentTermEnum te, SegmentReader r)
|
||||
SegmentMergeInfo(int b, TermEnum te, IndexReader r)
|
||||
throws IOException {
|
||||
base = b;
|
||||
reader = r;
|
||||
termEnum = te;
|
||||
term = te.term();
|
||||
postings = new SegmentTermPositions(r);
|
||||
postings = reader.termPositions();
|
||||
|
||||
if (reader.deletedDocs != null) {
|
||||
// build array which maps document numbers around deletions
|
||||
BitVector deletedDocs = reader.deletedDocs;
|
||||
// build array which maps document numbers around deletions
|
||||
if (reader.hasDeletions()) {
|
||||
int maxDoc = reader.maxDoc();
|
||||
docMap = new int[maxDoc];
|
||||
int j = 0;
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (deletedDocs.get(i))
|
||||
docMap[i] = -1;
|
||||
else
|
||||
docMap[i] = j++;
|
||||
if (reader.isDeleted(i))
|
||||
docMap[i] = -1;
|
||||
else
|
||||
docMap[i] = j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -83,29 +83,32 @@ final class SegmentMerger {
|
|||
useCompoundFile = compoundFile;
|
||||
}
|
||||
|
||||
final void add(SegmentReader reader) {
|
||||
final void add(IndexReader reader) {
|
||||
readers.addElement(reader);
|
||||
}
|
||||
|
||||
final SegmentReader segmentReader(int i) {
|
||||
return (SegmentReader)readers.elementAt(i);
|
||||
final IndexReader segmentReader(int i) {
|
||||
return (IndexReader)readers.elementAt(i);
|
||||
}
|
||||
|
||||
final void merge() throws IOException {
|
||||
final int merge() throws IOException {
|
||||
int value;
|
||||
try {
|
||||
mergeFields();
|
||||
mergeTerms();
|
||||
mergeNorms();
|
||||
value = mergeNorms();
|
||||
|
||||
} finally {
|
||||
for (int i = 0; i < readers.size(); i++) { // close readers
|
||||
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
||||
reader.close();
|
||||
IndexReader reader = (IndexReader)readers.elementAt(i);
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (useCompoundFile)
|
||||
createCompoundFile();
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private final void createCompoundFile()
|
||||
|
@ -149,8 +152,9 @@ final class SegmentMerger {
|
|||
private final void mergeFields() throws IOException {
|
||||
fieldInfos = new FieldInfos(); // merge field names
|
||||
for (int i = 0; i < readers.size(); i++) {
|
||||
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
||||
fieldInfos.add(reader.fieldInfos);
|
||||
IndexReader reader = (IndexReader)readers.elementAt(i);
|
||||
fieldInfos.add(reader.getFieldNames(true), true);
|
||||
fieldInfos.add(reader.getFieldNames(false), false);
|
||||
}
|
||||
fieldInfos.write(directory, segment + ".fnm");
|
||||
|
||||
|
@ -158,12 +162,11 @@ final class SegmentMerger {
|
|||
new FieldsWriter(directory, segment, fieldInfos);
|
||||
try {
|
||||
for (int i = 0; i < readers.size(); i++) {
|
||||
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
||||
BitVector deletedDocs = reader.deletedDocs;
|
||||
int maxDoc = reader.maxDoc();
|
||||
for (int j = 0; j < maxDoc; j++)
|
||||
if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
|
||||
fieldsWriter.addDocument(reader.document(j));
|
||||
IndexReader reader = (IndexReader)readers.elementAt(i);
|
||||
int maxDoc = reader.maxDoc();
|
||||
for (int j = 0; j < maxDoc; j++)
|
||||
if (!reader.isDeleted(j)) // skip deleted docs
|
||||
fieldsWriter.addDocument(reader.document(j));
|
||||
}
|
||||
} finally {
|
||||
fieldsWriter.close();
|
||||
|
@ -196,8 +199,8 @@ final class SegmentMerger {
|
|||
queue = new SegmentMergeQueue(readers.size());
|
||||
int base = 0;
|
||||
for (int i = 0; i < readers.size(); i++) {
|
||||
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
||||
SegmentTermEnum termEnum = (SegmentTermEnum)reader.terms();
|
||||
IndexReader reader = (IndexReader)readers.elementAt(i);
|
||||
TermEnum termEnum = reader.terms();
|
||||
SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
|
||||
base += reader.numDocs();
|
||||
if (smi.next())
|
||||
|
@ -246,42 +249,40 @@ final class SegmentMerger {
|
|||
termInfosWriter.add(smis[0].term, termInfo);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private final int appendPostings(SegmentMergeInfo[] smis, int n)
|
||||
throws IOException {
|
||||
int lastDoc = 0;
|
||||
int df = 0; // number of docs w/ term
|
||||
for (int i = 0; i < n; i++) {
|
||||
SegmentMergeInfo smi = smis[i];
|
||||
SegmentTermPositions postings = smi.postings;
|
||||
TermPositions postings = smi.postings;
|
||||
int base = smi.base;
|
||||
int[] docMap = smi.docMap;
|
||||
smi.termEnum.termInfo(termInfo);
|
||||
postings.seek(termInfo);
|
||||
postings.seek(smi.termEnum);
|
||||
while (postings.next()) {
|
||||
int doc;
|
||||
if (docMap == null)
|
||||
doc = base + postings.doc; // no deletions
|
||||
else
|
||||
doc = base + docMap[postings.doc]; // re-map around deletions
|
||||
int doc = postings.doc();
|
||||
if (docMap != null)
|
||||
doc = docMap[doc]; // map around deletions
|
||||
doc += base; // convert to merged space
|
||||
|
||||
if (doc < lastDoc)
|
||||
throw new IllegalStateException("docs out of order");
|
||||
|
||||
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
|
||||
lastDoc = doc;
|
||||
|
||||
int freq = postings.freq;
|
||||
|
||||
int freq = postings.freq();
|
||||
if (freq == 1) {
|
||||
freqOutput.writeVInt(docCode | 1); // write doc & freq=1
|
||||
} else {
|
||||
freqOutput.writeVInt(docCode); // write doc
|
||||
freqOutput.writeVInt(freq); // write frequency in doc
|
||||
}
|
||||
|
||||
|
||||
int lastPosition = 0; // write position deltas
|
||||
for (int j = 0; j < freq; j++) {
|
||||
int position = postings.nextPosition();
|
||||
for (int j = 0; j < freq; j++) {
|
||||
int position = postings.nextPosition();
|
||||
proxOutput.writeVInt(position - lastPosition);
|
||||
lastPosition = position;
|
||||
}
|
||||
|
@ -291,33 +292,31 @@ final class SegmentMerger {
|
|||
}
|
||||
return df;
|
||||
}
|
||||
|
||||
private final void mergeNorms() throws IOException {
|
||||
private final int mergeNorms() throws IOException {
|
||||
int docCount = 0;
|
||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||
if (fi.isIndexed) {
|
||||
OutputStream output = directory.createFile(segment + ".f" + i);
|
||||
try {
|
||||
for (int j = 0; j < readers.size(); j++) {
|
||||
SegmentReader reader = (SegmentReader)readers.elementAt(j);
|
||||
BitVector deletedDocs = reader.deletedDocs;
|
||||
InputStream input = reader.normStream(fi.name);
|
||||
OutputStream output = directory.createFile(segment + ".f" + i);
|
||||
try {
|
||||
for (int j = 0; j < readers.size(); j++) {
|
||||
IndexReader reader = (IndexReader)readers.elementAt(j);
|
||||
byte[] input = reader.norms(fi.name);
|
||||
int maxDoc = reader.maxDoc();
|
||||
try {
|
||||
for (int k = 0; k < maxDoc; k++) {
|
||||
byte norm = input != null ? input.readByte() : (byte)0;
|
||||
if (deletedDocs == null || !deletedDocs.get(k))
|
||||
output.writeByte(norm);
|
||||
for (int k = 0; k < maxDoc; k++) {
|
||||
byte norm = input != null ? input[k] : (byte)0;
|
||||
if (!reader.isDeleted(k)) {
|
||||
output.writeByte(norm);
|
||||
docCount++;
|
||||
}
|
||||
} finally {
|
||||
if (input != null)
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
return docCount;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -110,9 +110,9 @@ final class SegmentReader extends IndexReader {
|
|||
segment = si.name;
|
||||
|
||||
// Use compound file directory for some files, if it exists
|
||||
Directory cfsDir = directory;
|
||||
if (directory.fileExists(segment + ".cfs")) {
|
||||
cfsReader = new CompoundFileReader(directory, segment + ".cfs");
|
||||
Directory cfsDir = directory();
|
||||
if (directory().fileExists(segment + ".cfs")) {
|
||||
cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
|
||||
cfsDir = cfsReader;
|
||||
}
|
||||
|
||||
|
@ -124,7 +124,7 @@ final class SegmentReader extends IndexReader {
|
|||
|
||||
// NOTE: the bitvector is stored using the regular directory, not cfs
|
||||
if (hasDeletions(si))
|
||||
deletedDocs = new BitVector(directory, segment + ".del");
|
||||
deletedDocs = new BitVector(directory(), segment + ".del");
|
||||
|
||||
// make sure that all index files have been read or are kept open
|
||||
// so that if an index update removes them we'll still have them
|
||||
|
@ -133,16 +133,15 @@ final class SegmentReader extends IndexReader {
|
|||
openNorms(cfsDir);
|
||||
}
|
||||
|
||||
|
||||
final synchronized void doClose() throws IOException {
|
||||
protected final synchronized void doClose() throws IOException {
|
||||
if (deletedDocsDirty) {
|
||||
synchronized (directory) { // in- & inter-process sync
|
||||
new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
|
||||
synchronized (directory()) { // in- & inter-process sync
|
||||
new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
|
||||
IndexWriter.COMMIT_LOCK_TIMEOUT) {
|
||||
public Object doBody() throws IOException {
|
||||
deletedDocs.write(directory, segment + ".tmp");
|
||||
directory.renameFile(segment + ".tmp", segment + ".del");
|
||||
directory.touchFile("segments");
|
||||
deletedDocs.write(directory(), segment + ".tmp");
|
||||
directory().renameFile(segment + ".tmp", segment + ".del");
|
||||
directory().touchFile("segments");
|
||||
return null;
|
||||
}
|
||||
}.run();
|
||||
|
@ -164,18 +163,22 @@ final class SegmentReader extends IndexReader {
|
|||
cfsReader.close();
|
||||
|
||||
if (closeDirectory)
|
||||
directory.close();
|
||||
directory().close();
|
||||
}
|
||||
|
||||
static final boolean hasDeletions(SegmentInfo si) throws IOException {
|
||||
return si.dir.fileExists(si.name + ".del");
|
||||
}
|
||||
|
||||
public boolean hasDeletions() {
|
||||
return deletedDocs != null;
|
||||
}
|
||||
|
||||
static final boolean usesCompoundFile(SegmentInfo si) throws IOException {
|
||||
return si.dir.fileExists(si.name + ".cfs");
|
||||
}
|
||||
|
||||
final synchronized void doDelete(int docNum) throws IOException {
|
||||
protected final synchronized void doDelete(int docNum) throws IOException {
|
||||
if (deletedDocs == null)
|
||||
deletedDocs = new BitVector(maxDoc());
|
||||
deletedDocsDirty = true;
|
||||
|
@ -190,7 +193,7 @@ final class SegmentReader extends IndexReader {
|
|||
|
||||
for (int i=0; i<ext.length; i++) {
|
||||
String name = segment + "." + ext[i];
|
||||
if (directory.fileExists(name))
|
||||
if (directory().fileExists(name))
|
||||
files.addElement(name);
|
||||
}
|
||||
|
||||
|
|
|
@ -78,6 +78,15 @@ class SegmentTermDocs implements TermDocs {
|
|||
seek(ti);
|
||||
}
|
||||
|
||||
public void seek(TermEnum enum) throws IOException {
|
||||
TermInfo ti;
|
||||
if (enum instanceof SegmentTermEnum) // optimized case
|
||||
ti = ((SegmentTermEnum)enum).termInfo();
|
||||
else // punt case
|
||||
ti = parent.tis.get(enum.term());
|
||||
seek(ti);
|
||||
}
|
||||
|
||||
void seek(TermInfo ti) throws IOException {
|
||||
if (ti == null) {
|
||||
freqCount = 0;
|
||||
|
|
|
@ -106,6 +106,7 @@ extends SegmentTermDocs implements TermPositions {
|
|||
|
||||
public final int read(final int[] docs, final int[] freqs)
|
||||
throws IOException {
|
||||
throw new RuntimeException();
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -76,6 +76,7 @@ final class SegmentsReader extends IndexReader
|
|||
private Hashtable normsCache = new Hashtable();
|
||||
private int maxDoc = 0;
|
||||
private int numDocs = -1;
|
||||
private boolean hasDeletions = false;
|
||||
|
||||
SegmentsReader(Directory directory, SegmentReader[] r) throws IOException {
|
||||
super(directory);
|
||||
|
@ -84,6 +85,9 @@ final class SegmentsReader extends IndexReader
|
|||
for (int i = 0; i < readers.length; i++) {
|
||||
starts[i] = maxDoc;
|
||||
maxDoc += readers[i].maxDoc(); // compute maxDocs
|
||||
|
||||
if (readers[i].hasDeletions())
|
||||
hasDeletions = true;
|
||||
}
|
||||
starts[readers.length] = maxDoc;
|
||||
}
|
||||
|
@ -112,10 +116,13 @@ final class SegmentsReader extends IndexReader
|
|||
return readers[i].isDeleted(n - starts[i]); // dispatch to segment reader
|
||||
}
|
||||
|
||||
final synchronized void doDelete(int n) throws IOException {
|
||||
public boolean hasDeletions() { return hasDeletions; }
|
||||
|
||||
protected final synchronized void doDelete(int n) throws IOException {
|
||||
numDocs = -1; // invalidate cache
|
||||
int i = readerIndex(n); // find segment num
|
||||
readers[i].doDelete(n - starts[i]); // dispatch to segment reader
|
||||
hasDeletions = true;
|
||||
}
|
||||
|
||||
private final int readerIndex(int n) { // find reader for doc n:
|
||||
|
@ -174,7 +181,7 @@ final class SegmentsReader extends IndexReader
|
|||
return new SegmentsTermPositions(readers, starts);
|
||||
}
|
||||
|
||||
final synchronized void doClose() throws IOException {
|
||||
protected final synchronized void doClose() throws IOException {
|
||||
for (int i = 0; i < readers.length; i++)
|
||||
readers[i].close();
|
||||
}
|
||||
|
@ -309,6 +316,10 @@ class SegmentsTermDocs implements TermDocs {
|
|||
this.current = null;
|
||||
}
|
||||
|
||||
public void seek(TermEnum termEnum) throws IOException {
|
||||
seek(termEnum.term());
|
||||
}
|
||||
|
||||
public final boolean next() throws IOException {
|
||||
if (current != null && current.next()) {
|
||||
return true;
|
||||
|
@ -389,4 +400,5 @@ class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
|
|||
public final int nextPosition() throws IOException {
|
||||
return ((SegmentTermPositions)current).nextPosition();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -71,6 +71,11 @@ public interface TermDocs {
|
|||
*/
|
||||
void seek(Term term) throws IOException;
|
||||
|
||||
/** Sets this to the data for the current term in a {@link TermEnum}.
|
||||
* This may be optimized in some implementations.
|
||||
*/
|
||||
void seek(TermEnum termEnum) throws IOException;
|
||||
|
||||
/** Returns the current document number. <p> This is invalid until {@link
|
||||
#next()} is called for the first time.*/
|
||||
int doc();
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache Lucene" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.TestSuite;
|
||||
import junit.textui.TestRunner;
|
||||
import junit.framework.TestResult;
|
||||
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.io.IOException;
|
||||
|
||||
public class TestFilterIndexReader extends TestCase {
|
||||
|
||||
private static class TestReader extends FilterIndexReader {
|
||||
|
||||
/** Filter that only permits terms containing 'e'.*/
|
||||
private static class TestTermEnum extends FilterTermEnum {
|
||||
public TestTermEnum(TermEnum enum)
|
||||
throws IOException {
|
||||
super(enum);
|
||||
}
|
||||
|
||||
/** Scan for terms containing the letter 'e'.*/
|
||||
public boolean next() throws IOException {
|
||||
while (in.next()) {
|
||||
if (in.term().text().indexOf('e') != -1)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Filter that only returns odd numbered documents. */
|
||||
private static class TestTermPositions extends FilterTermPositions {
|
||||
public TestTermPositions(TermPositions in)
|
||||
throws IOException {
|
||||
super(in);
|
||||
}
|
||||
|
||||
/** Scan for odd numbered documents. */
|
||||
public boolean next() throws IOException {
|
||||
while (in.next()) {
|
||||
if ((in.doc() % 2) == 1)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public TestReader(IndexReader reader) {
|
||||
super(reader);
|
||||
}
|
||||
|
||||
/** Filter terms with TestTermEnum. */
|
||||
public TermEnum terms() throws IOException {
|
||||
return new TestTermEnum(in.terms());
|
||||
}
|
||||
|
||||
/** Filter positions with TestTermPositions. */
|
||||
public TermPositions termPositions() throws IOException {
|
||||
return new TestTermPositions(in.termPositions());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Main for running test case by itself. */
|
||||
public static void main(String args[]) {
|
||||
TestRunner.run (new TestSuite(TestIndexReader.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the IndexReader.getFieldNames implementation
|
||||
* @throws Exception on error
|
||||
*/
|
||||
public void testFilterIndexReader() throws Exception {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
IndexWriter writer =
|
||||
new IndexWriter(directory, new WhitespaceAnalyzer(), true);
|
||||
|
||||
Document d1 = new Document();
|
||||
d1.add(Field.Text("default","one two"));
|
||||
writer.addDocument(d1);
|
||||
|
||||
Document d2 = new Document();
|
||||
d2.add(Field.Text("default","one three"));
|
||||
writer.addDocument(d2);
|
||||
|
||||
Document d3 = new Document();
|
||||
d3.add(Field.Text("default","two four"));
|
||||
writer.addDocument(d3);
|
||||
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = new TestReader(IndexReader.open(directory));
|
||||
|
||||
TermEnum terms = reader.terms();
|
||||
while (terms.next()) {
|
||||
assertTrue(terms.term().text().indexOf('e') != -1);
|
||||
}
|
||||
terms.close();
|
||||
|
||||
TermPositions positions = reader.termPositions(new Term("default", "one"));
|
||||
while (positions.next()) {
|
||||
assertTrue((positions.doc() % 2) == 1);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue