From f0d57d81a37d62083b86cce316b0b780f7a46e1b Mon Sep 17 00:00:00 2001 From: Doug Cutting Date: Tue, 21 Oct 2003 17:59:17 +0000 Subject: [PATCH] Changed IndexReader so that it can be subclassed. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150110 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 12 ++ .../org/apache/lucene/index/FieldInfos.java | 11 +- .../lucene/index/FilterIndexReader.java | 149 +++++++++++++++ .../org/apache/lucene/index/IndexReader.java | 15 +- .../org/apache/lucene/index/IndexWriter.java | 47 ++++- .../lucene/index/MultipleTermPositions.java | 6 + .../apache/lucene/index/SegmentMergeInfo.java | 23 ++- .../apache/lucene/index/SegmentMerger.java | 103 +++++------ .../apache/lucene/index/SegmentReader.java | 31 ++-- .../apache/lucene/index/SegmentTermDocs.java | 9 + .../lucene/index/SegmentTermPositions.java | 3 +- .../apache/lucene/index/SegmentsReader.java | 16 +- .../org/apache/lucene/index/TermDocs.java | 5 + .../lucene/index/TestFilterIndexReader.java | 175 ++++++++++++++++++ 14 files changed, 508 insertions(+), 97 deletions(-) create mode 100644 src/java/org/apache/lucene/index/FilterIndexReader.java create mode 100644 src/test/org/apache/lucene/index/TestFilterIndexReader.java diff --git a/CHANGES.txt b/CHANGES.txt index 32b9aa12bb9..abddcbe3cad 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -40,6 +40,18 @@ $Id$ 10. Added Locale setting to QueryParser, for use by date range parsing. +11. Changed IndexReader so that it can be subclassed by classes + outside of its package. Previously it had package-private + abstract methods. Also modified the index merging code so that it + can work on an arbitrary IndexReader implementation, and added a + new method, IndexWriter.addIndexes(IndexReader[]), to take + advantage of this. (cutting) + +12. Added a limit to the number of clauses which may be added to a + BooleanQuery. The default limit is 1024 clauses. This should + stop most OutOfMemoryExceptions by prefix, wildcard and fuzzy + queries which run amok. (cutting) + 1.3 RC1 diff --git a/src/java/org/apache/lucene/index/FieldInfos.java b/src/java/org/apache/lucene/index/FieldInfos.java index 95748f0b10b..bc2bb9ce858 100644 --- a/src/java/org/apache/lucene/index/FieldInfos.java +++ b/src/java/org/apache/lucene/index/FieldInfos.java @@ -57,6 +57,8 @@ package org.apache.lucene.index; import java.util.Hashtable; import java.util.Vector; import java.util.Enumeration; +import java.util.Collection; +import java.util.Iterator; import java.io.IOException; import org.apache.lucene.document.Document; @@ -92,11 +94,10 @@ final class FieldInfos { } } - /** Merges in information from another FieldInfos. */ - final void add(FieldInfos other) { - for (int i = 0; i < other.size(); i++) { - FieldInfo fi = other.fieldInfo(i); - add(fi.name, fi.isIndexed); + final void add(Collection names, boolean isIndexed) { + Iterator i = names.iterator(); + while (i.hasNext()) { + add((String)i.next(), isIndexed); } } diff --git a/src/java/org/apache/lucene/index/FilterIndexReader.java b/src/java/org/apache/lucene/index/FilterIndexReader.java new file mode 100644 index 00000000000..ce33830cf8e --- /dev/null +++ b/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -0,0 +1,149 @@ +package org.apache.lucene.index; + +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2003 The Apache Software Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.document.Document; + +/** A FilterIndexReader contains another IndexReader, which it + * uses as its basic source of data, possibly transforming the data along the + * way or providing additional functionality. The class + * FilterIndexReader itself simply implements all abstract methods + * of IndexReader with versions that pass all requests to the + * contained index reader. Subclasses of FilterIndexReader may + * further override some of these methods and may also provide additional + * methods and fields. +*/ +public class FilterIndexReader extends IndexReader { + + /** Base class for filtering {@link TermDocs} implementations. */ + public static class FilterTermDocs implements TermDocs { + protected TermDocs in; + + public FilterTermDocs(TermDocs in) { this.in = in; } + + public void seek(Term term) throws IOException { in.seek(term); } + public void seek(TermEnum enum) throws IOException { in.seek(enum); } + public int doc() { return in.doc(); } + public int freq() { return in.freq(); } + public boolean next() throws IOException { return in.next(); } + public int read(int[] docs, int[] freqs) throws IOException { + return in.read(docs, freqs); + } + public boolean skipTo(int i) throws IOException { return in.skipTo(i); } + public void close() throws IOException { in.close(); } + } + + /** Base class for filtering {@link TermPositions} implementations. */ + public static class FilterTermPositions + extends FilterTermDocs implements TermPositions { + + public FilterTermPositions(TermPositions in) { super(in); } + + public int nextPosition() throws IOException { + return ((TermPositions)in).nextPosition(); + } + } + + /** Base class for filtering {@link TermEnum} implementations. */ + public static class FilterTermEnum extends TermEnum { + protected TermEnum in; + + public FilterTermEnum(TermEnum in) { this.in = in; } + + public boolean next() throws IOException { return in.next(); } + public Term term() { return in.term(); } + public int docFreq() { return in.docFreq(); } + public void close() throws IOException { in.close(); } + } + + protected IndexReader in; + + public FilterIndexReader(IndexReader in) { + super(in.directory()); + this.in = in; + } + + public int numDocs() { return in.numDocs(); } + public int maxDoc() { return in.maxDoc(); } + + public Document document(int n) throws IOException {return in.document(n);} + + public boolean isDeleted(int n) { return in.isDeleted(n); } + public boolean hasDeletions() { return in.hasDeletions(); } + + public byte[] norms(String f) throws IOException { return in.norms(f); } + + public TermEnum terms() throws IOException { return in.terms(); } + public TermEnum terms(Term t) throws IOException { return in.terms(t); } + + public int docFreq(Term t) throws IOException { return in.docFreq(t); } + + public TermDocs termDocs() throws IOException { return in.termDocs(); } + public TermPositions termPositions() throws IOException { + return in.termPositions(); + } + + protected void doDelete(int n) throws IOException { in.doDelete(n); } + protected void doClose() throws IOException { in.doClose(); } + + public Collection getFieldNames() throws IOException { + return in.getFieldNames(); + } + public Collection getFieldNames(boolean indexed) throws IOException { + return in.getFieldNames(indexed); + } +} diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java index 280183ae19d..7bd1fbe3606 100644 --- a/src/java/org/apache/lucene/index/IndexReader.java +++ b/src/java/org/apache/lucene/index/IndexReader.java @@ -86,7 +86,7 @@ public abstract class IndexReader { segmentInfosAge = Long.MAX_VALUE; } - Directory directory; + private Directory directory; private Lock writeLock; //used to determine whether index has chaged since reader was opened @@ -131,6 +131,9 @@ public abstract class IndexReader { } } + /** Returns the directory this index resides in. */ + public Directory directory() { return directory; } + /** Returns the time the index in the named directory was last modified. */ public static long lastModified(String directory) throws IOException { return lastModified(new File(directory)); @@ -194,6 +197,9 @@ public abstract class IndexReader { /** Returns true if document n has been deleted */ public abstract boolean isDeleted(int n); + /** Returns true if any documents have been deleted */ + public abstract boolean hasDeletions(); + /** Returns the byte-encoded normalization factor for the named field of * every document. This is used by the search code to score documents. * @@ -286,7 +292,10 @@ public abstract class IndexReader { doDelete(docNum); } - abstract void doDelete(int docNum) throws IOException; + /** Implements deletion of the document numbered docNum. + * Applications should call {@link #delete(int)} or {@link #delete(Term)}. + */ + protected abstract void doDelete(int docNum) throws IOException; /** Deletes all documents containing term. This is useful if one uses a document field to hold a unique ID string for @@ -323,7 +332,7 @@ public abstract class IndexReader { } /** Implements close. */ - abstract void doClose() throws IOException; + protected abstract void doClose() throws IOException; /** Release the write lock, if needed. */ protected final void finalize() throws IOException { diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java index a0137bfe023..dea9a3cbed8 100644 --- a/src/java/org/apache/lucene/index/IndexWriter.java +++ b/src/java/org/apache/lucene/index/IndexWriter.java @@ -324,6 +324,37 @@ public class IndexWriter { optimize(); // final cleanup } + /** Merges the provided indexes into this index. + *

After this completes, the index is optimized. */ + public synchronized void addIndexes(IndexReader[] readers) + throws IOException { + + optimize(); // start with zero or 1 seg + + String mergedName = newSegmentName(); + SegmentMerger merger = new SegmentMerger(directory, mergedName, false); + + if (segmentInfos.size() == 1) // add existing index, if any + merger.add(new SegmentReader(segmentInfos.info(0))); + + for (int i = 0; i < readers.length; i++) // add new indexes + merger.add(readers[i]); + + int docCount = merger.merge(); // merge 'em + + segmentInfos.setSize(0); // pop old infos & add new + segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory)); + + synchronized (directory) { // in- & inter-process sync + new Lock.With(directory.makeLock("commit.lock")) { + public Object doBody() throws IOException { + segmentInfos.write(directory); // commit changes + return null; + } + }.run(); + } + } + /** Merges all RAM-resident segments. */ private final void flushRamSegments() throws IOException { int minSegment = segmentInfos.size()-1; @@ -379,12 +410,12 @@ public class IndexWriter { for (int i = minSegment; i < segmentInfos.size(); i++) { SegmentInfo si = segmentInfos.info(i); if (infoStream != null) - infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); - SegmentReader reader = new SegmentReader(si); + infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); + IndexReader reader = new SegmentReader(si); merger.add(reader); - if ((reader.directory == this.directory) || // if we own the directory - (reader.directory == this.ramDirectory)) - segmentsToDelete.addElement(reader); // queue segment for deletion + if ((reader.directory()==this.directory) || // if we own the directory + (reader.directory()==this.ramDirectory)) + segmentsToDelete.addElement(reader); // queue segment for deletion mergedDocCount += reader.numDocs(); } if (infoStream != null) { @@ -420,10 +451,10 @@ public class IndexWriter { for (int i = 0; i < segments.size(); i++) { SegmentReader reader = (SegmentReader)segments.elementAt(i); - if (reader.directory == this.directory) - deleteFiles(reader.files(), deletable); // try to delete our files + if (reader.directory() == this.directory) + deleteFiles(reader.files(), deletable); // try to delete our files else - deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files + deleteFiles(reader.files(), reader.directory()); // delete other files } writeDeleteableFiles(deletable); // note files we can't delete diff --git a/src/java/org/apache/lucene/index/MultipleTermPositions.java b/src/java/org/apache/lucene/index/MultipleTermPositions.java index 45cc72a3d67..16c51c35c4a 100644 --- a/src/java/org/apache/lucene/index/MultipleTermPositions.java +++ b/src/java/org/apache/lucene/index/MultipleTermPositions.java @@ -297,6 +297,11 @@ public class MultipleTermPositions throw new UnsupportedOperationException(); } + public void seek(TermEnum termEnum) throws IOException { + throw new UnsupportedOperationException(); + } + + /** * Describe read method here. * @@ -311,4 +316,5 @@ public class MultipleTermPositions { throw new UnsupportedOperationException(); } + } diff --git a/src/java/org/apache/lucene/index/SegmentMergeInfo.java b/src/java/org/apache/lucene/index/SegmentMergeInfo.java index b3d581cd71c..c6bcb53ee0b 100644 --- a/src/java/org/apache/lucene/index/SegmentMergeInfo.java +++ b/src/java/org/apache/lucene/index/SegmentMergeInfo.java @@ -60,30 +60,29 @@ import org.apache.lucene.util.BitVector; final class SegmentMergeInfo { Term term; int base; - SegmentTermEnum termEnum; - SegmentReader reader; - SegmentTermPositions postings; + TermEnum termEnum; + IndexReader reader; + TermPositions postings; int[] docMap = null; // maps around deleted docs - SegmentMergeInfo(int b, SegmentTermEnum te, SegmentReader r) + SegmentMergeInfo(int b, TermEnum te, IndexReader r) throws IOException { base = b; reader = r; termEnum = te; term = te.term(); - postings = new SegmentTermPositions(r); + postings = reader.termPositions(); - if (reader.deletedDocs != null) { - // build array which maps document numbers around deletions - BitVector deletedDocs = reader.deletedDocs; + // build array which maps document numbers around deletions + if (reader.hasDeletions()) { int maxDoc = reader.maxDoc(); docMap = new int[maxDoc]; int j = 0; for (int i = 0; i < maxDoc; i++) { - if (deletedDocs.get(i)) - docMap[i] = -1; - else - docMap[i] = j++; + if (reader.isDeleted(i)) + docMap[i] = -1; + else + docMap[i] = j++; } } } diff --git a/src/java/org/apache/lucene/index/SegmentMerger.java b/src/java/org/apache/lucene/index/SegmentMerger.java index a507f5006b6..f8a07aa138b 100644 --- a/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/src/java/org/apache/lucene/index/SegmentMerger.java @@ -83,29 +83,32 @@ final class SegmentMerger { useCompoundFile = compoundFile; } - final void add(SegmentReader reader) { + final void add(IndexReader reader) { readers.addElement(reader); } - final SegmentReader segmentReader(int i) { - return (SegmentReader)readers.elementAt(i); + final IndexReader segmentReader(int i) { + return (IndexReader)readers.elementAt(i); } - final void merge() throws IOException { + final int merge() throws IOException { + int value; try { mergeFields(); mergeTerms(); - mergeNorms(); + value = mergeNorms(); } finally { for (int i = 0; i < readers.size(); i++) { // close readers - SegmentReader reader = (SegmentReader)readers.elementAt(i); - reader.close(); + IndexReader reader = (IndexReader)readers.elementAt(i); + reader.close(); } } if (useCompoundFile) createCompoundFile(); + + return value; } private final void createCompoundFile() @@ -149,8 +152,9 @@ final class SegmentMerger { private final void mergeFields() throws IOException { fieldInfos = new FieldInfos(); // merge field names for (int i = 0; i < readers.size(); i++) { - SegmentReader reader = (SegmentReader)readers.elementAt(i); - fieldInfos.add(reader.fieldInfos); + IndexReader reader = (IndexReader)readers.elementAt(i); + fieldInfos.add(reader.getFieldNames(true), true); + fieldInfos.add(reader.getFieldNames(false), false); } fieldInfos.write(directory, segment + ".fnm"); @@ -158,12 +162,11 @@ final class SegmentMerger { new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.size(); i++) { - SegmentReader reader = (SegmentReader)readers.elementAt(i); - BitVector deletedDocs = reader.deletedDocs; - int maxDoc = reader.maxDoc(); - for (int j = 0; j < maxDoc; j++) - if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs - fieldsWriter.addDocument(reader.document(j)); + IndexReader reader = (IndexReader)readers.elementAt(i); + int maxDoc = reader.maxDoc(); + for (int j = 0; j < maxDoc; j++) + if (!reader.isDeleted(j)) // skip deleted docs + fieldsWriter.addDocument(reader.document(j)); } } finally { fieldsWriter.close(); @@ -196,8 +199,8 @@ final class SegmentMerger { queue = new SegmentMergeQueue(readers.size()); int base = 0; for (int i = 0; i < readers.size(); i++) { - SegmentReader reader = (SegmentReader)readers.elementAt(i); - SegmentTermEnum termEnum = (SegmentTermEnum)reader.terms(); + IndexReader reader = (IndexReader)readers.elementAt(i); + TermEnum termEnum = reader.terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader); base += reader.numDocs(); if (smi.next()) @@ -246,42 +249,40 @@ final class SegmentMerger { termInfosWriter.add(smis[0].term, termInfo); } } - + private final int appendPostings(SegmentMergeInfo[] smis, int n) throws IOException { int lastDoc = 0; int df = 0; // number of docs w/ term for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; - SegmentTermPositions postings = smi.postings; + TermPositions postings = smi.postings; int base = smi.base; int[] docMap = smi.docMap; - smi.termEnum.termInfo(termInfo); - postings.seek(termInfo); + postings.seek(smi.termEnum); while (postings.next()) { - int doc; - if (docMap == null) - doc = base + postings.doc; // no deletions - else - doc = base + docMap[postings.doc]; // re-map around deletions + int doc = postings.doc(); + if (docMap != null) + doc = docMap[doc]; // map around deletions + doc += base; // convert to merged space if (doc < lastDoc) throw new IllegalStateException("docs out of order"); int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; - - int freq = postings.freq; + + int freq = postings.freq(); if (freq == 1) { freqOutput.writeVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.writeVInt(docCode); // write doc freqOutput.writeVInt(freq); // write frequency in doc } - + int lastPosition = 0; // write position deltas - for (int j = 0; j < freq; j++) { - int position = postings.nextPosition(); + for (int j = 0; j < freq; j++) { + int position = postings.nextPosition(); proxOutput.writeVInt(position - lastPosition); lastPosition = position; } @@ -291,33 +292,31 @@ final class SegmentMerger { } return df; } - - private final void mergeNorms() throws IOException { + private final int mergeNorms() throws IOException { + int docCount = 0; for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed) { - OutputStream output = directory.createFile(segment + ".f" + i); - try { - for (int j = 0; j < readers.size(); j++) { - SegmentReader reader = (SegmentReader)readers.elementAt(j); - BitVector deletedDocs = reader.deletedDocs; - InputStream input = reader.normStream(fi.name); + OutputStream output = directory.createFile(segment + ".f" + i); + try { + for (int j = 0; j < readers.size(); j++) { + IndexReader reader = (IndexReader)readers.elementAt(j); + byte[] input = reader.norms(fi.name); int maxDoc = reader.maxDoc(); - try { - for (int k = 0; k < maxDoc; k++) { - byte norm = input != null ? input.readByte() : (byte)0; - if (deletedDocs == null || !deletedDocs.get(k)) - output.writeByte(norm); + for (int k = 0; k < maxDoc; k++) { + byte norm = input != null ? input[k] : (byte)0; + if (!reader.isDeleted(k)) { + output.writeByte(norm); + docCount++; } - } finally { - if (input != null) - input.close(); - } - } - } finally { - output.close(); - } + } + } + } finally { + output.close(); + } } } + return docCount; } + } diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java index a1590325157..c885d02161e 100644 --- a/src/java/org/apache/lucene/index/SegmentReader.java +++ b/src/java/org/apache/lucene/index/SegmentReader.java @@ -110,9 +110,9 @@ final class SegmentReader extends IndexReader { segment = si.name; // Use compound file directory for some files, if it exists - Directory cfsDir = directory; - if (directory.fileExists(segment + ".cfs")) { - cfsReader = new CompoundFileReader(directory, segment + ".cfs"); + Directory cfsDir = directory(); + if (directory().fileExists(segment + ".cfs")) { + cfsReader = new CompoundFileReader(directory(), segment + ".cfs"); cfsDir = cfsReader; } @@ -124,7 +124,7 @@ final class SegmentReader extends IndexReader { // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) - deletedDocs = new BitVector(directory, segment + ".del"); + deletedDocs = new BitVector(directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them @@ -133,16 +133,15 @@ final class SegmentReader extends IndexReader { openNorms(cfsDir); } - - final synchronized void doClose() throws IOException { + protected final synchronized void doClose() throws IOException { if (deletedDocsDirty) { - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), + synchronized (directory()) { // in- & inter-process sync + new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME), IndexWriter.COMMIT_LOCK_TIMEOUT) { public Object doBody() throws IOException { - deletedDocs.write(directory, segment + ".tmp"); - directory.renameFile(segment + ".tmp", segment + ".del"); - directory.touchFile("segments"); + deletedDocs.write(directory(), segment + ".tmp"); + directory().renameFile(segment + ".tmp", segment + ".del"); + directory().touchFile("segments"); return null; } }.run(); @@ -164,18 +163,22 @@ final class SegmentReader extends IndexReader { cfsReader.close(); if (closeDirectory) - directory.close(); + directory().close(); } static final boolean hasDeletions(SegmentInfo si) throws IOException { return si.dir.fileExists(si.name + ".del"); } + public boolean hasDeletions() { + return deletedDocs != null; + } + static final boolean usesCompoundFile(SegmentInfo si) throws IOException { return si.dir.fileExists(si.name + ".cfs"); } - final synchronized void doDelete(int docNum) throws IOException { + protected final synchronized void doDelete(int docNum) throws IOException { if (deletedDocs == null) deletedDocs = new BitVector(maxDoc()); deletedDocsDirty = true; @@ -190,7 +193,7 @@ final class SegmentReader extends IndexReader { for (int i=0; i This is invalid until {@link #next()} is called for the first time.*/ int doc(); diff --git a/src/test/org/apache/lucene/index/TestFilterIndexReader.java b/src/test/org/apache/lucene/index/TestFilterIndexReader.java new file mode 100644 index 00000000000..fcd5c4d0645 --- /dev/null +++ b/src/test/org/apache/lucene/index/TestFilterIndexReader.java @@ -0,0 +1,175 @@ +package org.apache.lucene.index; + +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Lucene" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Lucene", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + + +import junit.framework.TestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; +import junit.framework.TestResult; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +import java.util.Collection; +import java.io.IOException; + +public class TestFilterIndexReader extends TestCase { + + private static class TestReader extends FilterIndexReader { + + /** Filter that only permits terms containing 'e'.*/ + private static class TestTermEnum extends FilterTermEnum { + public TestTermEnum(TermEnum enum) + throws IOException { + super(enum); + } + + /** Scan for terms containing the letter 'e'.*/ + public boolean next() throws IOException { + while (in.next()) { + if (in.term().text().indexOf('e') != -1) + return true; + } + return false; + } + } + + /** Filter that only returns odd numbered documents. */ + private static class TestTermPositions extends FilterTermPositions { + public TestTermPositions(TermPositions in) + throws IOException { + super(in); + } + + /** Scan for odd numbered documents. */ + public boolean next() throws IOException { + while (in.next()) { + if ((in.doc() % 2) == 1) + return true; + } + return false; + } + } + + public TestReader(IndexReader reader) { + super(reader); + } + + /** Filter terms with TestTermEnum. */ + public TermEnum terms() throws IOException { + return new TestTermEnum(in.terms()); + } + + /** Filter positions with TestTermPositions. */ + public TermPositions termPositions() throws IOException { + return new TestTermPositions(in.termPositions()); + } + } + + + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run (new TestSuite(TestIndexReader.class)); + } + + /** + * Tests the IndexReader.getFieldNames implementation + * @throws Exception on error + */ + public void testFilterIndexReader() throws Exception { + RAMDirectory directory = new RAMDirectory(); + IndexWriter writer = + new IndexWriter(directory, new WhitespaceAnalyzer(), true); + + Document d1 = new Document(); + d1.add(Field.Text("default","one two")); + writer.addDocument(d1); + + Document d2 = new Document(); + d2.add(Field.Text("default","one three")); + writer.addDocument(d2); + + Document d3 = new Document(); + d3.add(Field.Text("default","two four")); + writer.addDocument(d3); + + writer.close(); + + IndexReader reader = new TestReader(IndexReader.open(directory)); + + TermEnum terms = reader.terms(); + while (terms.next()) { + assertTrue(terms.term().text().indexOf('e') != -1); + } + terms.close(); + + TermPositions positions = reader.termPositions(new Term("default", "one")); + while (positions.next()) { + assertTrue((positions.doc() % 2) == 1); + } + + reader.close(); + } +}