From 99fdf6af1f8312403be094a3c7b75db478b29453 Mon Sep 17 00:00:00 2001 From: Doug Cutting Date: Thu, 16 Sep 2004 21:13:37 +0000 Subject: [PATCH] Permit unbuffered index input. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150517 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 18 +- .../lucene/index/CompoundFileReader.java | 27 ++- .../lucene/index/CompoundFileWriter.java | 6 +- .../org/apache/lucene/index/FieldInfos.java | 12 +- .../org/apache/lucene/index/FieldsReader.java | 10 +- .../org/apache/lucene/index/IndexWriter.java | 4 +- .../org/apache/lucene/index/SegmentInfos.java | 6 +- .../apache/lucene/index/SegmentReader.java | 18 +- .../apache/lucene/index/SegmentTermDocs.java | 10 +- .../apache/lucene/index/SegmentTermEnum.java | 8 +- .../lucene/index/SegmentTermPositions.java | 6 +- .../apache/lucene/index/TermInfosReader.java | 9 +- .../lucene/index/TermVectorsReader.java | 16 +- .../lucene/store/BufferedIndexInput.java | 109 +++++++++ .../org/apache/lucene/store/Directory.java | 13 +- .../org/apache/lucene/store/FSDirectory.java | 26 ++- .../org/apache/lucene/store/IndexInput.java | 156 +++++++++++++ .../org/apache/lucene/store/InputStream.java | 210 +----------------- .../org/apache/lucene/util/BitVector.java | 4 +- src/java/overview.html | 2 +- src/test/org/apache/lucene/StoreTest.java | 4 +- ...ckInputStream.java => MockIndexInput.java} | 12 +- .../apache/lucene/index/TestCompoundFile.java | 82 +++---- ...stInputStream.java => TestIndexInput.java} | 14 +- .../org/apache/lucene/store/_TestHelper.java | 6 +- 25 files changed, 450 insertions(+), 338 deletions(-) create mode 100644 src/java/org/apache/lucene/store/BufferedIndexInput.java create mode 100644 src/java/org/apache/lucene/store/IndexInput.java rename src/test/org/apache/lucene/index/{MockInputStream.java => MockIndexInput.java} (87%) rename src/test/org/apache/lucene/index/{TestInputStream.java => TestIndexInput.java} (65%) diff --git a/CHANGES.txt b/CHANGES.txt index abe64566cd1..35cba46c4b6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -48,14 +48,18 @@ $Id$ 9. PhraseQuery and PhrasePrefixQuery now allow the explicit specification of relative positions. (Christoph Goller) - 10. QueryParser changes: Fix for ArrayIndexOutOfBoundsExceptions - (patch #9110); some unused method parameters removed; The ability - to specify a minimum similarity for FuzzyQuery has been added. - (Christoph Goller) +10. QueryParser changes: Fix for ArrayIndexOutOfBoundsExceptions + (patch #9110); some unused method parameters removed; The ability + to specify a minimum similarity for FuzzyQuery has been added. + (Christoph Goller) - 11. Added support for binary stored fields (patch #29370) - (Drew Farris and Bernhard Messer via Christoph) - +11. Added support for binary stored fields (patch #29370) + (Drew Farris and Bernhard Messer via Christoph) + +12. Permit unbuffered Directory implementations (e.g., using mmap). + InputStream is replaced by the new classes IndexInput and + BufferedIndexInput. InputStream is now deprecated and FSDirectory + is now subclassable. (cutting) 1.4.1 diff --git a/src/java/org/apache/lucene/index/CompoundFileReader.java b/src/java/org/apache/lucene/index/CompoundFileReader.java index 99b126dfb51..5e109d8de45 100644 --- a/src/java/org/apache/lucene/index/CompoundFileReader.java +++ b/src/java/org/apache/lucene/index/CompoundFileReader.java @@ -17,7 +17,8 @@ package org.apache.lucene.index; */ import org.apache.lucene.store.Directory; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.OutputStream; import org.apache.lucene.store.Lock; import java.util.HashMap; @@ -44,7 +45,7 @@ class CompoundFileReader extends Directory { private Directory directory; private String fileName; - private InputStream stream; + private IndexInput stream; private HashMap entries = new HashMap(); @@ -57,7 +58,7 @@ class CompoundFileReader extends Directory { boolean success = false; try { - stream = dir.openFile(name); + stream = dir.openInput(name); // read the directory and init files int count = stream.readVInt(); @@ -109,7 +110,7 @@ class CompoundFileReader extends Directory { stream = null; } - public synchronized InputStream openFile(String id) + public synchronized IndexInput openInput(String id) throws IOException { if (stream == null) @@ -119,7 +120,7 @@ class CompoundFileReader extends Directory { if (entry == null) throw new IOException("No sub-file with id " + id + " found"); - return new CSInputStream(stream, entry.offset, entry.length); + return new CSIndexInput(stream, entry.offset, entry.length); } /** Returns an array of strings, one for each file in the directory. */ @@ -182,21 +183,22 @@ class CompoundFileReader extends Directory { throw new UnsupportedOperationException(); } - /** Implementation of an InputStream that reads from a portion of the + /** Implementation of an IndexInput that reads from a portion of the * compound file. The visibility is left as "package" *only* because * this helps with testing since JUnit test cases in a different class * can then access package fields of this class. */ - static final class CSInputStream extends InputStream { + static final class CSIndexInput extends BufferedIndexInput { - InputStream base; + IndexInput base; long fileOffset; + long length; - CSInputStream(final InputStream base, final long fileOffset, final long length) + CSIndexInput(final IndexInput base, final long fileOffset, final long length) { this.base = base; this.fileOffset = fileOffset; - this.length = length; // variable in the superclass + this.length = length; } /** Expert: implements buffer refill. Reads bytes from the current @@ -226,5 +228,10 @@ class CompoundFileReader extends Directory { /** Closes the stream to further operations. */ public void close() {} + public long length() { + return length; + } + + } } diff --git a/src/java/org/apache/lucene/index/CompoundFileWriter.java b/src/java/org/apache/lucene/index/CompoundFileWriter.java index 2207ec71b06..10341be817d 100644 --- a/src/java/org/apache/lucene/index/CompoundFileWriter.java +++ b/src/java/org/apache/lucene/index/CompoundFileWriter.java @@ -18,7 +18,7 @@ package org.apache.lucene.index; import org.apache.lucene.store.Directory; import org.apache.lucene.store.OutputStream; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import java.util.LinkedList; import java.util.HashSet; import java.util.Iterator; @@ -196,11 +196,11 @@ final class CompoundFileWriter { private void copyFile(FileEntry source, OutputStream os, byte buffer[]) throws IOException { - InputStream is = null; + IndexInput is = null; try { long startPtr = os.getFilePointer(); - is = directory.openFile(source.file); + is = directory.openInput(source.file); long length = is.length(); long remainder = length; int chunk = buffer.length; diff --git a/src/java/org/apache/lucene/index/FieldInfos.java b/src/java/org/apache/lucene/index/FieldInfos.java index 050b71e3594..14bbed1ec81 100644 --- a/src/java/org/apache/lucene/index/FieldInfos.java +++ b/src/java/org/apache/lucene/index/FieldInfos.java @@ -24,7 +24,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.store.Directory; import org.apache.lucene.store.OutputStream; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; /** Access to the Field Info file that describes document fields and whether or * not they are indexed. Each segment has a separate Field Info file. Objects @@ -42,13 +42,13 @@ final class FieldInfos { /** * Construct a FieldInfos object using the directory and the name of the file - * InputStream - * @param d The directory to open the InputStream from - * @param name The name of the file to open the InputStream from in the Directory + * IndexInput + * @param d The directory to open the IndexInput from + * @param name The name of the file to open the IndexInput from in the Directory * @throws IOException */ FieldInfos(Directory d, String name) throws IOException { - InputStream input = d.openFile(name); + IndexInput input = d.openInput(name); try { read(input); } finally { @@ -189,7 +189,7 @@ final class FieldInfos { } } - private void read(InputStream input) throws IOException { + private void read(IndexInput input) throws IOException { int size = input.readVInt();//read in the size for (int i = 0; i < size; i++) { String name = input.readString().intern(); diff --git a/src/java/org/apache/lucene/index/FieldsReader.java b/src/java/org/apache/lucene/index/FieldsReader.java index 08cfa712d84..c8f37873b84 100644 --- a/src/java/org/apache/lucene/index/FieldsReader.java +++ b/src/java/org/apache/lucene/index/FieldsReader.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; /** * Class responsible for access to stored document fields. @@ -32,15 +32,15 @@ import org.apache.lucene.store.InputStream; */ final class FieldsReader { private FieldInfos fieldInfos; - private InputStream fieldsStream; - private InputStream indexStream; + private IndexInput fieldsStream; + private IndexInput indexStream; private int size; FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; - fieldsStream = d.openFile(segment + ".fdt"); - indexStream = d.openFile(segment + ".fdx"); + fieldsStream = d.openInput(segment + ".fdt"); + indexStream = d.openInput(segment + ".fdx"); size = (int)(indexStream.length() / 8); } diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java index 6b0dcd741de..ff1d8dc8ae7 100644 --- a/src/java/org/apache/lucene/index/IndexWriter.java +++ b/src/java/org/apache/lucene/index/IndexWriter.java @@ -25,7 +25,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Lock; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.OutputStream; import org.apache.lucene.search.Similarity; import org.apache.lucene.document.Document; @@ -606,7 +606,7 @@ public class IndexWriter { if (!directory.fileExists("deletable")) return result; - InputStream input = directory.openFile("deletable"); + IndexInput input = directory.openInput("deletable"); try { for (int i = input.readInt(); i > 0; i--) // read file names result.addElement(input.readString()); diff --git a/src/java/org/apache/lucene/index/SegmentInfos.java b/src/java/org/apache/lucene/index/SegmentInfos.java index b16ae8f0cba..e36d0b71839 100644 --- a/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/src/java/org/apache/lucene/index/SegmentInfos.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import java.util.Vector; import java.io.IOException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.OutputStream; final class SegmentInfos extends Vector { @@ -37,7 +37,7 @@ final class SegmentInfos extends Vector { public final void read(Directory directory) throws IOException { - InputStream input = directory.openFile("segments"); + IndexInput input = directory.openInput("segments"); try { int format = input.readInt(); if(format < 0){ // file contains explicit format info @@ -103,7 +103,7 @@ final class SegmentInfos extends Vector { public static long readCurrentVersion(Directory directory) throws IOException { - InputStream input = directory.openFile("segments"); + IndexInput input = directory.openInput("segments"); int format = 0; long version = 0; try { diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java index 1c2345d2a24..90854ff1085 100644 --- a/src/java/org/apache/lucene/index/SegmentReader.java +++ b/src/java/org/apache/lucene/index/SegmentReader.java @@ -25,7 +25,7 @@ import java.util.Set; import java.util.Vector; import org.apache.lucene.document.Document; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.OutputStream; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; @@ -49,20 +49,20 @@ final class SegmentReader extends IndexReader { private boolean normsDirty = false; private boolean undeleteAll = false; - InputStream freqStream; - InputStream proxStream; + IndexInput freqStream; + IndexInput proxStream; // Compound File Reader when based on a compound file segment CompoundFileReader cfsReader = null; private class Norm { - public Norm(InputStream in, int number) + public Norm(IndexInput in, int number) { this.in = in; this.number = number; } - private InputStream in; + private IndexInput in; private byte[] bytes; private boolean dirty; private int number; @@ -123,8 +123,8 @@ final class SegmentReader extends IndexReader { // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them - freqStream = cfsDir.openFile(segment + ".frq"); - proxStream = cfsDir.openFile(segment + ".prx"); + freqStream = cfsDir.openInput(segment + ".frq"); + proxStream = cfsDir.openInput(segment + ".prx"); openNorms(cfsDir); if (fieldInfos.hasVectors()) { // open term vector files only as needed @@ -363,7 +363,7 @@ final class SegmentReader extends IndexReader { return; } - InputStream normStream = (InputStream) norm.in.clone(); + IndexInput normStream = (IndexInput) norm.in.clone(); try { // read from disk normStream.seek(0); normStream.readBytes(bytes, offset, maxDoc()); @@ -383,7 +383,7 @@ final class SegmentReader extends IndexReader { fileName = segment + ".f" + fi.number; d = cfsDir; } - norms.put(fi.name, new Norm(d.openFile(fileName), fi.number)); + norms.put(fi.name, new Norm(d.openInput(fileName), fi.number)); } } } diff --git a/src/java/org/apache/lucene/index/SegmentTermDocs.java b/src/java/org/apache/lucene/index/SegmentTermDocs.java index 6261b762fd9..f7017f8eb3f 100644 --- a/src/java/org/apache/lucene/index/SegmentTermDocs.java +++ b/src/java/org/apache/lucene/index/SegmentTermDocs.java @@ -18,11 +18,11 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.util.BitVector; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; class SegmentTermDocs implements TermDocs { protected SegmentReader parent; - private InputStream freqStream; + private IndexInput freqStream; private int count; private int df; private BitVector deletedDocs; @@ -32,7 +32,7 @@ class SegmentTermDocs implements TermDocs { private int skipInterval; private int numSkips; private int skipCount; - private InputStream skipStream; + private IndexInput skipStream; private int skipDoc; private long freqPointer; private long proxPointer; @@ -41,7 +41,7 @@ class SegmentTermDocs implements TermDocs { SegmentTermDocs(SegmentReader parent) { this.parent = parent; - this.freqStream = (InputStream) parent.freqStream.clone(); + this.freqStream = (IndexInput) parent.freqStream.clone(); this.deletedDocs = parent.deletedDocs; this.skipInterval = parent.tis.getSkipInterval(); } @@ -147,7 +147,7 @@ class SegmentTermDocs implements TermDocs { if (df >= skipInterval) { // optimized case if (skipStream == null) - skipStream = (InputStream) freqStream.clone(); // lazily clone + skipStream = (IndexInput) freqStream.clone(); // lazily clone if (!haveSkipped) { // lazily seek skip stream skipStream.seek(skipPointer); diff --git a/src/java/org/apache/lucene/index/SegmentTermEnum.java b/src/java/org/apache/lucene/index/SegmentTermEnum.java index 884bcfddfc7..22c26fee35b 100644 --- a/src/java/org/apache/lucene/index/SegmentTermEnum.java +++ b/src/java/org/apache/lucene/index/SegmentTermEnum.java @@ -17,10 +17,10 @@ package org.apache.lucene.index; */ import java.io.IOException; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; final class SegmentTermEnum extends TermEnum implements Cloneable { - private InputStream input; + private IndexInput input; FieldInfos fieldInfos; long size; long position = -1; @@ -38,7 +38,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable { private char[] buffer = {}; - SegmentTermEnum(InputStream i, FieldInfos fis, boolean isi) + SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi) throws IOException { input = i; fieldInfos = fis; @@ -87,7 +87,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable { clone = (SegmentTermEnum) super.clone(); } catch (CloneNotSupportedException e) {} - clone.input = (InputStream) input.clone(); + clone.input = (IndexInput) input.clone(); clone.termInfo = new TermInfo(termInfo); if (term != null) clone.growBuffer(term.text.length()); diff --git a/src/java/org/apache/lucene/index/SegmentTermPositions.java b/src/java/org/apache/lucene/index/SegmentTermPositions.java index 8a83af361c1..a66a22ab84f 100644 --- a/src/java/org/apache/lucene/index/SegmentTermPositions.java +++ b/src/java/org/apache/lucene/index/SegmentTermPositions.java @@ -18,17 +18,17 @@ package org.apache.lucene.index; import java.io.IOException; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; final class SegmentTermPositions extends SegmentTermDocs implements TermPositions { - private InputStream proxStream; + private IndexInput proxStream; private int proxCount; private int position; SegmentTermPositions(SegmentReader p) throws IOException { super(p); - this.proxStream = (InputStream)parent.proxStream.clone(); + this.proxStream = (IndexInput)parent.proxStream.clone(); } final void seek(TermInfo ti) throws IOException { diff --git a/src/java/org/apache/lucene/index/TermInfosReader.java b/src/java/org/apache/lucene/index/TermInfosReader.java index 4ad12a82754..17ac4667067 100644 --- a/src/java/org/apache/lucene/index/TermInfosReader.java +++ b/src/java/org/apache/lucene/index/TermInfosReader.java @@ -39,12 +39,17 @@ final class TermInfosReader { segment = seg; fieldInfos = fis; - origEnum = new SegmentTermEnum(directory.openFile(segment + ".tis"), + origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"), fieldInfos, false); size = origEnum.size; readIndex(); } + protected void finalize() { + // patch for pre-1.4.2 JVMs, whose ThreadLocals leak + enumerators.set(null); + } + public int getSkipInterval() { return origEnum.skipInterval; } @@ -74,7 +79,7 @@ final class TermInfosReader { private final void readIndex() throws IOException { SegmentTermEnum indexEnum = - new SegmentTermEnum(directory.openFile(segment + ".tii"), + new SegmentTermEnum(directory.openInput(segment + ".tii"), fieldInfos, true); try { int indexSize = (int)indexEnum.size; diff --git a/src/java/org/apache/lucene/index/TermVectorsReader.java b/src/java/org/apache/lucene/index/TermVectorsReader.java index e94b4f1f51e..56c288caff6 100644 --- a/src/java/org/apache/lucene/index/TermVectorsReader.java +++ b/src/java/org/apache/lucene/index/TermVectorsReader.java @@ -17,7 +17,7 @@ package org.apache.lucene.index; */ import org.apache.lucene.store.Directory; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import java.io.IOException; @@ -29,19 +29,19 @@ import java.io.IOException; class TermVectorsReader { private FieldInfos fieldInfos; - private InputStream tvx; - private InputStream tvd; - private InputStream tvf; + private IndexInput tvx; + private IndexInput tvd; + private IndexInput tvf; private int size; TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos) throws IOException { if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) { - tvx = d.openFile(segment + TermVectorsWriter.TVX_EXTENSION); + tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION); checkValidFormat(tvx); - tvd = d.openFile(segment + TermVectorsWriter.TVD_EXTENSION); + tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION); checkValidFormat(tvd); - tvf = d.openFile(segment + TermVectorsWriter.TVF_EXTENSION); + tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION); checkValidFormat(tvf); size = (int) tvx.length() / 8; } @@ -49,7 +49,7 @@ class TermVectorsReader { this.fieldInfos = fieldInfos; } - private void checkValidFormat(InputStream in) throws IOException + private void checkValidFormat(IndexInput in) throws IOException { int format = in.readInt(); if (format > TermVectorsWriter.FORMAT_VERSION) diff --git a/src/java/org/apache/lucene/store/BufferedIndexInput.java b/src/java/org/apache/lucene/store/BufferedIndexInput.java new file mode 100644 index 00000000000..91cefc171d0 --- /dev/null +++ b/src/java/org/apache/lucene/store/BufferedIndexInput.java @@ -0,0 +1,109 @@ +package org.apache.lucene.store; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** Base implementation class for buffered {@link IndexInput}. */ +public abstract class BufferedIndexInput extends IndexInput { + static final int BUFFER_SIZE = OutputStream.BUFFER_SIZE; + + private byte[] buffer; + + private long bufferStart = 0; // position in file of buffer + private int bufferLength = 0; // end of valid bytes + private int bufferPosition = 0; // next byte to read + + public byte readByte() throws IOException { + if (bufferPosition >= bufferLength) + refill(); + return buffer[bufferPosition++]; + } + + public void readBytes(byte[] b, int offset, int len) + throws IOException { + if (len < BUFFER_SIZE) { + for (int i = 0; i < len; i++) // read byte-by-byte + b[i + offset] = (byte)readByte(); + } else { // read all-at-once + long start = getFilePointer(); + seekInternal(start); + readInternal(b, offset, len); + + bufferStart = start + len; // adjust stream variables + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read + } + } + + private void refill() throws IOException { + long start = bufferStart + bufferPosition; + long end = start + BUFFER_SIZE; + if (end > length()) // don't read past EOF + end = length(); + bufferLength = (int)(end - start); + if (bufferLength == 0) + throw new IOException("read past EOF"); + + if (buffer == null) + buffer = new byte[BUFFER_SIZE]; // allocate buffer lazily + readInternal(buffer, 0, bufferLength); + + bufferStart = start; + bufferPosition = 0; + } + + /** Expert: implements buffer refill. Reads bytes from the current position + * in the input. + * @param b the array to read bytes into + * @param offset the offset in the array to start storing bytes + * @param length the number of bytes to read + */ + protected abstract void readInternal(byte[] b, int offset, int length) + throws IOException; + + public long getFilePointer() { return bufferStart + bufferPosition; } + + public void seek(long pos) throws IOException { + if (pos >= bufferStart && pos < (bufferStart + bufferLength)) + bufferPosition = (int)(pos - bufferStart); // seek within buffer + else { + bufferStart = pos; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read() + seekInternal(pos); + } + } + + /** Expert: implements seek. Sets current position in this file, where the + * next {@link #readInternal(byte[],int,int)} will occur. + * @see #readInternal(byte[],int,int) + */ + protected abstract void seekInternal(long pos) throws IOException; + + public Object clone() { + BufferedIndexInput clone = (BufferedIndexInput)super.clone(); + + if (buffer != null) { + clone.buffer = new byte[BUFFER_SIZE]; + System.arraycopy(buffer, 0, clone.buffer, 0, bufferLength); + } + + return clone; + } + +} diff --git a/src/java/org/apache/lucene/store/Directory.java b/src/java/org/apache/lucene/store/Directory.java index b51a9402671..ce8d0df0cf1 100644 --- a/src/java/org/apache/lucene/store/Directory.java +++ b/src/java/org/apache/lucene/store/Directory.java @@ -67,9 +67,18 @@ public abstract class Directory { public abstract OutputStream createFile(String name) throws IOException; + /** @deprecated use {@link openInput(String)}. */ + public InputStream openFile(String name) throws IOException { + return (InputStream)openInput(name); + } + /** Returns a stream reading an existing file. */ - public abstract InputStream openFile(String name) - throws IOException; + public IndexInput openInput(String name) + throws IOException { + // default implementation for back compatibility + // this method should be abstract + return (IndexInput)openFile(name); + } /** Construct a {@link Lock}. * @param name the name of the lock file diff --git a/src/java/org/apache/lucene/store/FSDirectory.java b/src/java/org/apache/lucene/store/FSDirectory.java index dfb82198a88..40f2a9fa3a9 100644 --- a/src/java/org/apache/lucene/store/FSDirectory.java +++ b/src/java/org/apache/lucene/store/FSDirectory.java @@ -56,6 +56,19 @@ public class FSDirectory extends Directory { System.getProperty("org.apache.lucene.lockdir", System.getProperty("java.io.tmpdir")); + /** The default class which implements filesystem-based directories. */ + private static final Class IMPL; + static { + try { + String name = + System.getProperty("org.apache.lucene.FSDirectory.class", + FSDirectory.class.getName()); + IMPL = Class.forName(name); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + private static MessageDigest DIGESTER; static { @@ -99,7 +112,12 @@ public class FSDirectory extends Directory { synchronized (DIRECTORIES) { dir = (FSDirectory)DIRECTORIES.get(file); if (dir == null) { - dir = new FSDirectory(file, create); + try { + dir = (FSDirectory)IMPL.newInstance(); + } catch (Exception e) { + throw new RuntimeException(e); + } + dir.init(file, create); DIRECTORIES.put(file, dir); } else if (create) { dir.create(); @@ -115,7 +133,9 @@ public class FSDirectory extends Directory { private int refCount; private File lockDir; - private FSDirectory(File path, boolean create) throws IOException { + protected FSDirectory() {}; // permit subclassing + + private void init(File path, boolean create) throws IOException { directory = path; if (LOCK_DIR == null) { @@ -360,7 +380,7 @@ public class FSDirectory extends Directory { /** For debug output. */ public String toString() { - return "FSDirectory@" + directory; + return this.getClass().getName() + "@" + directory; } } diff --git a/src/java/org/apache/lucene/store/IndexInput.java b/src/java/org/apache/lucene/store/IndexInput.java new file mode 100644 index 00000000000..3eac98a405b --- /dev/null +++ b/src/java/org/apache/lucene/store/IndexInput.java @@ -0,0 +1,156 @@ +package org.apache.lucene.store; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** Abstract base class for input from a file in a {@link Directory}. A + * random-access input stream. Used for all Lucene index input operations. + * @see Directory + */ +public abstract class IndexInput implements Cloneable { + private char[] chars; // used by readString() + + /** Reads and returns a single byte. + * @see OutputStream#writeByte(byte) + */ + public abstract byte readByte() throws IOException; + + /** Reads a specified number of bytes into an array at the specified offset. + * @param b the array to read bytes into + * @param offset the offset in the array to start storing bytes + * @param len the number of bytes to read + * @see OutputStream#writeBytes(byte[],int) + */ + public abstract void readBytes(byte[] b, int offset, int len) + throws IOException; + + /** Reads four bytes and returns an int. + * @see OutputStream#writeInt(int) + */ + public int readInt() throws IOException { + return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) + | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); + } + + /** Reads an int stored in variable-length format. Reads between one and + * five bytes. Smaller values take fewer bytes. Negative numbers are not + * supported. + * @see OutputStream#writeVInt(int) + */ + public int readVInt() throws IOException { + byte b = readByte(); + int i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) { + b = readByte(); + i |= (b & 0x7F) << shift; + } + return i; + } + + /** Reads eight bytes and returns a long. + * @see OutputStream#writeLong(long) + */ + public long readLong() throws IOException { + return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL); + } + + /** Reads a long stored in variable-length format. Reads between one and + * nine bytes. Smaller values take fewer bytes. Negative numbers are not + * supported. */ + public long readVLong() throws IOException { + byte b = readByte(); + long i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) { + b = readByte(); + i |= (b & 0x7FL) << shift; + } + return i; + } + + /** Reads a string. + * @see OutputStream#writeString(String) + */ + public String readString() throws IOException { + int length = readVInt(); + if (chars == null || length > chars.length) + chars = new char[length]; + readChars(chars, 0, length); + return new String(chars, 0, length); + } + + /** Reads UTF-8 encoded characters into an array. + * @param buffer the array to read characters into + * @param start the offset in the array to start storing characters + * @param length the number of characters to read + * @see OutputStream#writeChars(String,int,int) + */ + public void readChars(char[] buffer, int start, int length) + throws IOException { + final int end = start + length; + for (int i = start; i < end; i++) { + byte b = readByte(); + if ((b & 0x80) == 0) + buffer[i] = (char)(b & 0x7F); + else if ((b & 0xE0) != 0xE0) { + buffer[i] = (char)(((b & 0x1F) << 6) + | (readByte() & 0x3F)); + } else + buffer[i] = (char)(((b & 0x0F) << 12) + | ((readByte() & 0x3F) << 6) + | (readByte() & 0x3F)); + } + } + + /** Closes the stream to futher operations. */ + public abstract void close() throws IOException; + + /** Returns the current position in this file, where the next read will + * occur. + * @see #seek(long) + */ + public abstract long getFilePointer(); + + /** Sets current position in this file, where the next read will occur. + * @see #getFilePointer() + */ + public abstract void seek(long pos) throws IOException; + + /** The number of bytes in the file. */ + public abstract long length(); + + /** Returns a clone of this stream. + * + *

Clones of a stream access the same data, and are positioned at the same + * point as the stream they were cloned from. + * + *

Expert: Subclasses must ensure that clones may be positioned at + * different points in the input from each other and from the stream they + * were cloned from. + */ + public Object clone() { + IndexInput clone = null; + try { + clone = (IndexInput)super.clone(); + } catch (CloneNotSupportedException e) {} + + clone.chars = null; + + return clone; + } + +} diff --git a/src/java/org/apache/lucene/store/InputStream.java b/src/java/org/apache/lucene/store/InputStream.java index 0bdab03e084..e3d773fe28c 100644 --- a/src/java/org/apache/lucene/store/InputStream.java +++ b/src/java/org/apache/lucene/store/InputStream.java @@ -18,217 +18,13 @@ package org.apache.lucene.store; import java.io.IOException; -/** Abstract base class for input from a file in a {@link Directory}. A - * random-access input stream. Used for all Lucene index input operations. - * @see Directory - * @see OutputStream - */ -public abstract class InputStream implements Cloneable { - static final int BUFFER_SIZE = OutputStream.BUFFER_SIZE; - - private byte[] buffer; - private char[] chars; - - private long bufferStart = 0; // position in file of buffer - private int bufferLength = 0; // end of valid bytes - private int bufferPosition = 0; // next byte to read +/** @deprecated Use {@link IndexInput} or {@link BufferedIndexInput} instead.*/ +public abstract class InputStream extends BufferedIndexInput { protected long length; // set by subclasses - /** Reads and returns a single byte. - * @see OutputStream#writeByte(byte) - */ - public final byte readByte() throws IOException { - if (bufferPosition >= bufferLength) - refill(); - return buffer[bufferPosition++]; - } - - /** Reads a specified number of bytes into an array at the specified offset. - * @param b the array to read bytes into - * @param offset the offset in the array to start storing bytes - * @param len the number of bytes to read - * @see OutputStream#writeBytes(byte[],int) - */ - public final void readBytes(byte[] b, int offset, int len) - throws IOException { - if (len < BUFFER_SIZE) { - for (int i = 0; i < len; i++) // read byte-by-byte - b[i + offset] = (byte)readByte(); - } else { // read all-at-once - long start = getFilePointer(); - seekInternal(start); - readInternal(b, offset, len); - - bufferStart = start + len; // adjust stream variables - bufferPosition = 0; - bufferLength = 0; // trigger refill() on read - } - } - - /** Reads four bytes and returns an int. - * @see OutputStream#writeInt(int) - */ - public final int readInt() throws IOException { - return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) - | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); - } - - /** Reads an int stored in variable-length format. Reads between one and - * five bytes. Smaller values take fewer bytes. Negative numbers are not - * supported. - * @see OutputStream#writeVInt(int) - */ - public final int readVInt() throws IOException { - byte b = readByte(); - int i = b & 0x7F; - for (int shift = 7; (b & 0x80) != 0; shift += 7) { - b = readByte(); - i |= (b & 0x7F) << shift; - } - return i; - } - - /** Reads eight bytes and returns a long. - * @see OutputStream#writeLong(long) - */ - public final long readLong() throws IOException { - return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL); - } - - /** Reads a long stored in variable-length format. Reads between one and - * nine bytes. Smaller values take fewer bytes. Negative numbers are not - * supported. */ - public final long readVLong() throws IOException { - byte b = readByte(); - long i = b & 0x7F; - for (int shift = 7; (b & 0x80) != 0; shift += 7) { - b = readByte(); - i |= (b & 0x7FL) << shift; - } - return i; - } - - /** Reads a string. - * @see OutputStream#writeString(String) - */ - public final String readString() throws IOException { - int length = readVInt(); - if (chars == null || length > chars.length) - chars = new char[length]; - readChars(chars, 0, length); - return new String(chars, 0, length); - } - - /** Reads UTF-8 encoded characters into an array. - * @param buffer the array to read characters into - * @param start the offset in the array to start storing characters - * @param length the number of characters to read - * @see OutputStream#writeChars(String,int,int) - */ - public final void readChars(char[] buffer, int start, int length) - throws IOException { - final int end = start + length; - for (int i = start; i < end; i++) { - byte b = readByte(); - if ((b & 0x80) == 0) - buffer[i] = (char)(b & 0x7F); - else if ((b & 0xE0) != 0xE0) { - buffer[i] = (char)(((b & 0x1F) << 6) - | (readByte() & 0x3F)); - } else - buffer[i] = (char)(((b & 0x0F) << 12) - | ((readByte() & 0x3F) << 6) - | (readByte() & 0x3F)); - } - } - - - private void refill() throws IOException { - long start = bufferStart + bufferPosition; - long end = start + BUFFER_SIZE; - if (end > length) // don't read past EOF - end = length; - bufferLength = (int)(end - start); - if (bufferLength == 0) - throw new IOException("read past EOF"); - - if (buffer == null) - buffer = new byte[BUFFER_SIZE]; // allocate buffer lazily - readInternal(buffer, 0, bufferLength); - - bufferStart = start; - bufferPosition = 0; - } - - /** Expert: implements buffer refill. Reads bytes from the current position - * in the input. - * @param b the array to read bytes into - * @param offset the offset in the array to start storing bytes - * @param length the number of bytes to read - */ - protected abstract void readInternal(byte[] b, int offset, int length) - throws IOException; - - /** Closes the stream to futher operations. */ - public abstract void close() throws IOException; - - /** Returns the current position in this file, where the next read will - * occur. - * @see #seek(long) - */ - public final long getFilePointer() { - return bufferStart + bufferPosition; - } - - /** Sets current position in this file, where the next read will occur. - * @see #getFilePointer() - */ - public final void seek(long pos) throws IOException { - if (pos >= bufferStart && pos < (bufferStart + bufferLength)) - bufferPosition = (int)(pos - bufferStart); // seek within buffer - else { - bufferStart = pos; - bufferPosition = 0; - bufferLength = 0; // trigger refill() on read() - seekInternal(pos); - } - } - - /** Expert: implements seek. Sets current position in this file, where the - * next {@link #readInternal(byte[],int,int)} will occur. - * @see #readInternal(byte[],int,int) - */ - protected abstract void seekInternal(long pos) throws IOException; - - /** The number of bytes in the file. */ - public final long length() { + public long length() { return length; } - /** Returns a clone of this stream. - * - *

Clones of a stream access the same data, and are positioned at the same - * point as the stream they were cloned from. - * - *

Expert: Subclasses must ensure that clones may be positioned at - * different points in the input from each other and from the stream they - * were cloned from. - */ - public Object clone() { - InputStream clone = null; - try { - clone = (InputStream)super.clone(); - } catch (CloneNotSupportedException e) {} - - if (buffer != null) { - clone.buffer = new byte[BUFFER_SIZE]; - System.arraycopy(buffer, 0, clone.buffer, 0, bufferLength); - } - - clone.chars = null; - - return clone; - } - } diff --git a/src/java/org/apache/lucene/util/BitVector.java b/src/java/org/apache/lucene/util/BitVector.java index 2b9773f2fc3..c65dd68ff0b 100644 --- a/src/java/org/apache/lucene/util/BitVector.java +++ b/src/java/org/apache/lucene/util/BitVector.java @@ -19,7 +19,7 @@ package org.apache.lucene.util; import java.io.IOException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.OutputStream; /** Optimized implementation of a vector of bits. This is more-or-less like @@ -122,7 +122,7 @@ public final class BitVector { d, as written by the {@link #write} method. */ public BitVector(Directory d, String name) throws IOException { - InputStream input = d.openFile(name); + IndexInput input = d.openInput(name); try { size = input.readInt(); // read size count = input.readInt(); // read count diff --git a/src/java/overview.html b/src/java/overview.html index 3c071c263d5..d4672a0ee34 100644 --- a/src/java/overview.html +++ b/src/java/overview.html @@ -17,7 +17,7 @@ and PriorityQueue. org.apache.lucene.store defines an abstract class for storing persistent data, the Directory, a collection of named files written by an OutputStream -and read by an InputStream.  +and read by an IndexInput.  Two implementations are provided, FSDirectory, which uses a file system directory to store files, and RAMDirectory which implements files as memory-resident data structures. diff --git a/src/test/org/apache/lucene/StoreTest.java b/src/test/org/apache/lucene/StoreTest.java index 9b7c8341c25..1bffaf5c87b 100644 --- a/src/test/org/apache/lucene/StoreTest.java +++ b/src/test/org/apache/lucene/StoreTest.java @@ -17,7 +17,7 @@ package org.apache.lucene; */ import org.apache.lucene.store.Directory; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.OutputStream; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; @@ -84,7 +84,7 @@ class StoreTest { byte b = (byte)(gen.nextInt() & 0x7F); //System.out.println("reading " + name + " with " + length + " of " + b); - InputStream file = store.openFile(name); + IndexInput file = store.openInput(name); if (file.length() != length) throw new Exception("length incorrect"); diff --git a/src/test/org/apache/lucene/index/MockInputStream.java b/src/test/org/apache/lucene/index/MockIndexInput.java similarity index 87% rename from src/test/org/apache/lucene/index/MockInputStream.java rename to src/test/org/apache/lucene/index/MockIndexInput.java index a5069670fb7..a2475e15759 100644 --- a/src/test/org/apache/lucene/index/MockInputStream.java +++ b/src/test/org/apache/lucene/index/MockIndexInput.java @@ -16,15 +16,16 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.BufferedIndexInput; import java.io.IOException; -public class MockInputStream extends InputStream { +public class MockIndexInput extends BufferedIndexInput { private byte[] buffer; private int pointer = 0; + private long length; - public MockInputStream(byte[] bytes) { + public MockIndexInput(byte[] bytes) { buffer = bytes; length = bytes.length; } @@ -53,4 +54,9 @@ public class MockInputStream extends InputStream { protected void seekInternal(long pos) throws IOException { pointer = (int) pos; } + + public long length() { + return length; + } + } diff --git a/src/test/org/apache/lucene/index/TestCompoundFile.java b/src/test/org/apache/lucene/index/TestCompoundFile.java index a6f93cd57a7..338c3175a87 100644 --- a/src/test/org/apache/lucene/index/TestCompoundFile.java +++ b/src/test/org/apache/lucene/index/TestCompoundFile.java @@ -24,7 +24,7 @@ import junit.framework.TestSuite; import junit.textui.TestRunner; import org.apache.lucene.store.OutputStream; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.InputStream; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store._TestHelper; @@ -94,8 +94,8 @@ public class TestCompoundFile extends TestCase private void assertSameStreams(String msg, - InputStream expected, - InputStream test) + IndexInput expected, + IndexInput test) throws IOException { assertNotNull(msg + " null expected", expected); @@ -120,8 +120,8 @@ public class TestCompoundFile extends TestCase private void assertSameStreams(String msg, - InputStream expected, - InputStream actual, + IndexInput expected, + IndexInput actual, long seekTo) throws IOException { @@ -136,8 +136,8 @@ public class TestCompoundFile extends TestCase private void assertSameSeekBehavior(String msg, - InputStream expected, - InputStream actual) + IndexInput expected, + IndexInput actual) throws IOException { // seek to 0 @@ -199,8 +199,8 @@ public class TestCompoundFile extends TestCase csw.close(); CompoundFileReader csr = new CompoundFileReader(dir, name + ".cfs"); - InputStream expected = dir.openFile(name); - InputStream actual = csr.openFile(name); + IndexInput expected = dir.openInput(name); + IndexInput actual = csr.openInput(name); assertSameStreams(name, expected, actual); assertSameSeekBehavior(name, expected, actual); expected.close(); @@ -223,15 +223,15 @@ public class TestCompoundFile extends TestCase csw.close(); CompoundFileReader csr = new CompoundFileReader(dir, "d.csf"); - InputStream expected = dir.openFile("d1"); - InputStream actual = csr.openFile("d1"); + IndexInput expected = dir.openInput("d1"); + IndexInput actual = csr.openInput("d1"); assertSameStreams("d1", expected, actual); assertSameSeekBehavior("d1", expected, actual); expected.close(); actual.close(); - expected = dir.openFile("d2"); - actual = csr.openFile("d2"); + expected = dir.openInput("d2"); + actual = csr.openInput("d2"); assertSameStreams("d2", expected, actual); assertSameSeekBehavior("d2", expected, actual); expected.close(); @@ -279,8 +279,8 @@ public class TestCompoundFile extends TestCase CompoundFileReader csr = new CompoundFileReader(dir, "test.cfs"); for (int i=0; i