mirror of
https://github.com/apache/lucene.git
synced 2025-02-23 18:55:50 +00:00
LUCENE-888: increase internal buffer sizes used during indexing to improve overall indexing speed
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@542561 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4ef95040cb
commit
3e09257c09
11
CHANGES.txt
11
CHANGES.txt
@ -65,6 +65,11 @@ API Changes
|
|||||||
This patch also enables the tests in QueryUtils again that check for docid
|
This patch also enables the tests in QueryUtils again that check for docid
|
||||||
order. (Paul Elschot, Doron Cohen, Michael Busch)
|
order. (Paul Elschot, Doron Cohen, Michael Busch)
|
||||||
|
|
||||||
|
12. LUCENE-888: Added Directory.openInput(File path, int bufferSize)
|
||||||
|
to optionally specify the size of the read buffer. Also added
|
||||||
|
BufferedIndexInput.setBufferSize(int) to change the buffer size.
|
||||||
|
(Mike McCandless)
|
||||||
|
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
@ -196,6 +201,12 @@ Optimizations
|
|||||||
Together with LUCENE-888 this will allow to adjust the buffer size
|
Together with LUCENE-888 this will allow to adjust the buffer size
|
||||||
dynamically. (Paul Elschot, Michael Busch)
|
dynamically. (Paul Elschot, Michael Busch)
|
||||||
|
|
||||||
|
6. LUCENE-888: Increase buffer sizes inside CompoundFileWriter and
|
||||||
|
BufferedIndexOutput. Also increase buffer size in
|
||||||
|
BufferedIndexInput, but only when used during merging. Together,
|
||||||
|
these increases yield 10-18% overall performance gain vs the
|
||||||
|
previous 1K defaults. (Mike McCandless)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
1. LUCENE 791 && INFRA-1173: Infrastructure moved the Wiki to
|
1. LUCENE 791 && INFRA-1173: Infrastructure moved the Wiki to
|
||||||
|
@ -37,6 +37,8 @@ import java.io.IOException;
|
|||||||
*/
|
*/
|
||||||
class CompoundFileReader extends Directory {
|
class CompoundFileReader extends Directory {
|
||||||
|
|
||||||
|
private int readBufferSize;
|
||||||
|
|
||||||
private static final class FileEntry {
|
private static final class FileEntry {
|
||||||
long offset;
|
long offset;
|
||||||
long length;
|
long length;
|
||||||
@ -51,16 +53,21 @@ class CompoundFileReader extends Directory {
|
|||||||
private HashMap entries = new HashMap();
|
private HashMap entries = new HashMap();
|
||||||
|
|
||||||
|
|
||||||
public CompoundFileReader(Directory dir, String name)
|
public CompoundFileReader(Directory dir, String name) throws IOException {
|
||||||
|
this(dir, name, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public CompoundFileReader(Directory dir, String name, int readBufferSize)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
directory = dir;
|
directory = dir;
|
||||||
fileName = name;
|
fileName = name;
|
||||||
|
this.readBufferSize = readBufferSize;
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
stream = dir.openInput(name);
|
stream = dir.openInput(name, readBufferSize);
|
||||||
|
|
||||||
// read the directory and init files
|
// read the directory and init files
|
||||||
int count = stream.readVInt();
|
int count = stream.readVInt();
|
||||||
@ -114,6 +121,13 @@ class CompoundFileReader extends Directory {
|
|||||||
|
|
||||||
public synchronized IndexInput openInput(String id)
|
public synchronized IndexInput openInput(String id)
|
||||||
throws IOException
|
throws IOException
|
||||||
|
{
|
||||||
|
// Default to readBufferSize passed in when we were opened
|
||||||
|
return openInput(id, readBufferSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized IndexInput openInput(String id, int readBufferSize)
|
||||||
|
throws IOException
|
||||||
{
|
{
|
||||||
if (stream == null)
|
if (stream == null)
|
||||||
throw new IOException("Stream closed");
|
throw new IOException("Stream closed");
|
||||||
@ -122,7 +136,7 @@ class CompoundFileReader extends Directory {
|
|||||||
if (entry == null)
|
if (entry == null)
|
||||||
throw new IOException("No sub-file with id " + id + " found");
|
throw new IOException("No sub-file with id " + id + " found");
|
||||||
|
|
||||||
return new CSIndexInput(stream, entry.offset, entry.length);
|
return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns an array of strings, one for each file in the directory. */
|
/** Returns an array of strings, one for each file in the directory. */
|
||||||
@ -198,6 +212,12 @@ class CompoundFileReader extends Directory {
|
|||||||
|
|
||||||
CSIndexInput(final IndexInput base, final long fileOffset, final long length)
|
CSIndexInput(final IndexInput base, final long fileOffset, final long length)
|
||||||
{
|
{
|
||||||
|
this(base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
CSIndexInput(final IndexInput base, final long fileOffset, final long length, int readBufferSize)
|
||||||
|
{
|
||||||
|
super(readBufferSize);
|
||||||
this.base = base;
|
this.base = base;
|
||||||
this.fileOffset = fileOffset;
|
this.fileOffset = fileOffset;
|
||||||
this.length = length;
|
this.length = length;
|
||||||
|
@ -161,7 +161,7 @@ final class CompoundFileWriter {
|
|||||||
|
|
||||||
// Open the files and copy their data into the stream.
|
// Open the files and copy their data into the stream.
|
||||||
// Remember the locations of each file's data section.
|
// Remember the locations of each file's data section.
|
||||||
byte buffer[] = new byte[1024];
|
byte buffer[] = new byte[16384];
|
||||||
it = entries.iterator();
|
it = entries.iterator();
|
||||||
while(it.hasNext()) {
|
while(it.hasNext()) {
|
||||||
FileEntry fe = (FileEntry) it.next();
|
FileEntry fe = (FileEntry) it.next();
|
||||||
|
@ -22,6 +22,7 @@ import org.apache.lucene.document.*;
|
|||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -53,11 +54,15 @@ final class FieldsReader {
|
|||||||
private ThreadLocal fieldsStreamTL = new ThreadLocal();
|
private ThreadLocal fieldsStreamTL = new ThreadLocal();
|
||||||
|
|
||||||
FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
|
FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
|
||||||
|
this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize) throws IOException {
|
||||||
fieldInfos = fn;
|
fieldInfos = fn;
|
||||||
|
|
||||||
cloneableFieldsStream = d.openInput(segment + ".fdt");
|
cloneableFieldsStream = d.openInput(segment + ".fdt", readBufferSize);
|
||||||
fieldsStream = (IndexInput)cloneableFieldsStream.clone();
|
fieldsStream = (IndexInput)cloneableFieldsStream.clone();
|
||||||
indexStream = d.openInput(segment + ".fdx");
|
indexStream = d.openInput(segment + ".fdx", readBufferSize);
|
||||||
size = (int) (indexStream.length() / 8);
|
size = (int) (indexStream.length() / 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -203,6 +203,14 @@ public class IndexWriter {
|
|||||||
*/
|
*/
|
||||||
public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
|
public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
|
||||||
|
|
||||||
|
// The normal read buffer size defaults to 1024, but
|
||||||
|
// increasing this during merging seems to yield
|
||||||
|
// performance gains. However we don't want to increase
|
||||||
|
// it too much because there are quite a few
|
||||||
|
// BufferedIndexInputs created during merging. See
|
||||||
|
// LUCENE-888 for details.
|
||||||
|
private final static int MERGE_READ_BUFFER_SIZE = 4096;
|
||||||
|
|
||||||
private Directory directory; // where this index resides
|
private Directory directory; // where this index resides
|
||||||
private Analyzer analyzer; // how to analyze text
|
private Analyzer analyzer; // how to analyze text
|
||||||
|
|
||||||
@ -1824,7 +1832,7 @@ public class IndexWriter {
|
|||||||
SegmentInfo si = sourceSegments.info(i);
|
SegmentInfo si = sourceSegments.info(i);
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
||||||
IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet)
|
IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE); // no need to set deleter (yet)
|
||||||
merger.add(reader);
|
merger.add(reader);
|
||||||
if (reader.directory() == this.ramDirectory) {
|
if (reader.directory() == this.ramDirectory) {
|
||||||
ramSegmentsToDelete.add(si);
|
ramSegmentsToDelete.add(si);
|
||||||
|
@ -23,6 +23,7 @@ import org.apache.lucene.search.DefaultSimilarity;
|
|||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -127,7 +128,15 @@ class SegmentReader extends IndexReader {
|
|||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
|
public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
|
||||||
return get(si.dir, si, null, false, false);
|
return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
|
* @throws IOException if there is a low-level IO error
|
||||||
|
*/
|
||||||
|
public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
|
||||||
|
return get(si.dir, si, null, false, false, readBufferSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -136,7 +145,7 @@ class SegmentReader extends IndexReader {
|
|||||||
*/
|
*/
|
||||||
public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
|
public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
|
||||||
boolean closeDir) throws CorruptIndexException, IOException {
|
boolean closeDir) throws CorruptIndexException, IOException {
|
||||||
return get(si.dir, si, sis, closeDir, true);
|
return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -145,7 +154,8 @@ class SegmentReader extends IndexReader {
|
|||||||
*/
|
*/
|
||||||
public static SegmentReader get(Directory dir, SegmentInfo si,
|
public static SegmentReader get(Directory dir, SegmentInfo si,
|
||||||
SegmentInfos sis,
|
SegmentInfos sis,
|
||||||
boolean closeDir, boolean ownDir)
|
boolean closeDir, boolean ownDir,
|
||||||
|
int readBufferSize)
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
SegmentReader instance;
|
SegmentReader instance;
|
||||||
try {
|
try {
|
||||||
@ -154,11 +164,11 @@ class SegmentReader extends IndexReader {
|
|||||||
throw new RuntimeException("cannot load SegmentReader class: " + e, e);
|
throw new RuntimeException("cannot load SegmentReader class: " + e, e);
|
||||||
}
|
}
|
||||||
instance.init(dir, sis, closeDir, ownDir);
|
instance.init(dir, sis, closeDir, ownDir);
|
||||||
instance.initialize(si);
|
instance.initialize(si, readBufferSize);
|
||||||
return instance;
|
return instance;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initialize(SegmentInfo si) throws CorruptIndexException, IOException {
|
private void initialize(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
|
||||||
segment = si.name;
|
segment = si.name;
|
||||||
this.si = si;
|
this.si = si;
|
||||||
|
|
||||||
@ -168,20 +178,20 @@ class SegmentReader extends IndexReader {
|
|||||||
// Use compound file directory for some files, if it exists
|
// Use compound file directory for some files, if it exists
|
||||||
Directory cfsDir = directory();
|
Directory cfsDir = directory();
|
||||||
if (si.getUseCompoundFile()) {
|
if (si.getUseCompoundFile()) {
|
||||||
cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
|
cfsReader = new CompoundFileReader(directory(), segment + ".cfs", readBufferSize);
|
||||||
cfsDir = cfsReader;
|
cfsDir = cfsReader;
|
||||||
}
|
}
|
||||||
|
|
||||||
// No compound file exists - use the multi-file format
|
// No compound file exists - use the multi-file format
|
||||||
fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
|
fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
|
||||||
fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
|
fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos, readBufferSize);
|
||||||
|
|
||||||
// Verify two sources of "maxDoc" agree:
|
// Verify two sources of "maxDoc" agree:
|
||||||
if (fieldsReader.size() != si.docCount) {
|
if (fieldsReader.size() != si.docCount) {
|
||||||
throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
|
throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
tis = new TermInfosReader(cfsDir, segment, fieldInfos);
|
tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
|
||||||
|
|
||||||
// NOTE: the bitvector is stored using the regular directory, not cfs
|
// NOTE: the bitvector is stored using the regular directory, not cfs
|
||||||
if (hasDeletions(si)) {
|
if (hasDeletions(si)) {
|
||||||
@ -195,12 +205,12 @@ class SegmentReader extends IndexReader {
|
|||||||
|
|
||||||
// make sure that all index files have been read or are kept open
|
// make sure that all index files have been read or are kept open
|
||||||
// so that if an index update removes them we'll still have them
|
// so that if an index update removes them we'll still have them
|
||||||
freqStream = cfsDir.openInput(segment + ".frq");
|
freqStream = cfsDir.openInput(segment + ".frq", readBufferSize);
|
||||||
proxStream = cfsDir.openInput(segment + ".prx");
|
proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
|
||||||
openNorms(cfsDir);
|
openNorms(cfsDir, readBufferSize);
|
||||||
|
|
||||||
if (fieldInfos.hasVectors()) { // open term vector files only as needed
|
if (fieldInfos.hasVectors()) { // open term vector files only as needed
|
||||||
termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
|
termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos, readBufferSize);
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
@ -482,7 +492,7 @@ class SegmentReader extends IndexReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void openNorms(Directory cfsDir) throws IOException {
|
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
|
||||||
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
|
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
|
||||||
int maxDoc = maxDoc();
|
int maxDoc = maxDoc();
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
@ -502,7 +512,7 @@ class SegmentReader extends IndexReader {
|
|||||||
if (singleNormFile) {
|
if (singleNormFile) {
|
||||||
normSeek = nextNormSeek;
|
normSeek = nextNormSeek;
|
||||||
if (singleNormStream==null) {
|
if (singleNormStream==null) {
|
||||||
singleNormStream = d.openInput(fileName);
|
singleNormStream = d.openInput(fileName, readBufferSize);
|
||||||
}
|
}
|
||||||
// All norms in the .nrm file can share a single IndexInput since
|
// All norms in the .nrm file can share a single IndexInput since
|
||||||
// they are only used in a synchronized context.
|
// they are only used in a synchronized context.
|
||||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
|
|
||||||
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
|
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
|
||||||
* Directory. Pairs are accessed either by Term or by ordinal position the
|
* Directory. Pairs are accessed either by Term or by ordinal position the
|
||||||
@ -42,16 +43,21 @@ final class TermInfosReader {
|
|||||||
|
|
||||||
TermInfosReader(Directory dir, String seg, FieldInfos fis)
|
TermInfosReader(Directory dir, String seg, FieldInfos fis)
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
|
this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize)
|
||||||
|
throws CorruptIndexException, IOException {
|
||||||
directory = dir;
|
directory = dir;
|
||||||
segment = seg;
|
segment = seg;
|
||||||
fieldInfos = fis;
|
fieldInfos = fis;
|
||||||
|
|
||||||
origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"),
|
origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis", readBufferSize),
|
||||||
fieldInfos, false);
|
fieldInfos, false);
|
||||||
size = origEnum.size;
|
size = origEnum.size;
|
||||||
|
|
||||||
indexEnum =
|
indexEnum =
|
||||||
new SegmentTermEnum(directory.openInput(segment + ".tii"),
|
new SegmentTermEnum(directory.openInput(segment + ".tii", readBufferSize),
|
||||||
fieldInfos, true);
|
fieldInfos, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
@ -37,13 +38,18 @@ class TermVectorsReader implements Cloneable {
|
|||||||
private int tvfFormat;
|
private int tvfFormat;
|
||||||
|
|
||||||
TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos)
|
TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos)
|
||||||
|
throws CorruptIndexException, IOException {
|
||||||
|
this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos, int readBufferSize)
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
|
if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
|
||||||
tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION);
|
tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION, readBufferSize);
|
||||||
checkValidFormat(tvx);
|
checkValidFormat(tvx);
|
||||||
tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION);
|
tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION, readBufferSize);
|
||||||
tvdFormat = checkValidFormat(tvd);
|
tvdFormat = checkValidFormat(tvd);
|
||||||
tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION);
|
tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION, readBufferSize);
|
||||||
tvfFormat = checkValidFormat(tvf);
|
tvfFormat = checkValidFormat(tvf);
|
||||||
size = (int) tvx.length() / 8;
|
size = (int) tvx.length() / 8;
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,11 @@ import java.io.IOException;
|
|||||||
|
|
||||||
/** Base implementation class for buffered {@link IndexInput}. */
|
/** Base implementation class for buffered {@link IndexInput}. */
|
||||||
public abstract class BufferedIndexInput extends IndexInput {
|
public abstract class BufferedIndexInput extends IndexInput {
|
||||||
static final int BUFFER_SIZE = BufferedIndexOutput.BUFFER_SIZE;
|
|
||||||
|
/** Default buffer size */
|
||||||
|
public static final int BUFFER_SIZE = 1024;
|
||||||
|
|
||||||
|
private int bufferSize = BUFFER_SIZE;
|
||||||
|
|
||||||
private byte[] buffer;
|
private byte[] buffer;
|
||||||
|
|
||||||
@ -35,6 +39,50 @@ public abstract class BufferedIndexInput extends IndexInput {
|
|||||||
return buffer[bufferPosition++];
|
return buffer[bufferPosition++];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public BufferedIndexInput() {}
|
||||||
|
|
||||||
|
/** Inits BufferedIndexInput with a specific bufferSize */
|
||||||
|
public BufferedIndexInput(int bufferSize) {
|
||||||
|
checkBufferSize(bufferSize);
|
||||||
|
this.bufferSize = bufferSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Change the buffer size used by this IndexInput */
|
||||||
|
public void setBufferSize(int newSize) {
|
||||||
|
assert bufferSize == buffer.length;
|
||||||
|
if (newSize != bufferSize) {
|
||||||
|
checkBufferSize(newSize);
|
||||||
|
bufferSize = newSize;
|
||||||
|
if (buffer != null) {
|
||||||
|
// Resize the existing buffer and carefully save as
|
||||||
|
// many bytes as possible starting from the current
|
||||||
|
// bufferPosition
|
||||||
|
byte[] newBuffer = new byte[newSize];
|
||||||
|
final int leftInBuffer = bufferLength-bufferPosition;
|
||||||
|
final int numToCopy;
|
||||||
|
if (leftInBuffer > newSize)
|
||||||
|
numToCopy = newSize;
|
||||||
|
else
|
||||||
|
numToCopy = leftInBuffer;
|
||||||
|
System.arraycopy(buffer, bufferPosition, newBuffer, 0, numToCopy);
|
||||||
|
bufferStart += bufferPosition;
|
||||||
|
bufferPosition = 0;
|
||||||
|
bufferLength = numToCopy;
|
||||||
|
buffer = newBuffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns buffer size. @see #setBufferSize */
|
||||||
|
public int getBufferSize() {
|
||||||
|
return bufferSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkBufferSize(int bufferSize) {
|
||||||
|
if (bufferSize <= 0)
|
||||||
|
throw new IllegalArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")");
|
||||||
|
}
|
||||||
|
|
||||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||||
if(len <= (bufferLength-bufferPosition)){
|
if(len <= (bufferLength-bufferPosition)){
|
||||||
// the buffer contains enough data to satistfy this request
|
// the buffer contains enough data to satistfy this request
|
||||||
@ -51,7 +99,7 @@ public abstract class BufferedIndexInput extends IndexInput {
|
|||||||
bufferPosition += available;
|
bufferPosition += available;
|
||||||
}
|
}
|
||||||
// and now, read the remaining 'len' bytes:
|
// and now, read the remaining 'len' bytes:
|
||||||
if(len<BUFFER_SIZE){
|
if(len<bufferSize){
|
||||||
// If the amount left to read is small enough, do it in the usual
|
// If the amount left to read is small enough, do it in the usual
|
||||||
// buffered way: fill the buffer and copy from it:
|
// buffered way: fill the buffer and copy from it:
|
||||||
refill();
|
refill();
|
||||||
@ -81,7 +129,7 @@ public abstract class BufferedIndexInput extends IndexInput {
|
|||||||
|
|
||||||
private void refill() throws IOException {
|
private void refill() throws IOException {
|
||||||
long start = bufferStart + bufferPosition;
|
long start = bufferStart + bufferPosition;
|
||||||
long end = start + BUFFER_SIZE;
|
long end = start + bufferSize;
|
||||||
if (end > length()) // don't read past EOF
|
if (end > length()) // don't read past EOF
|
||||||
end = length();
|
end = length();
|
||||||
bufferLength = (int)(end - start);
|
bufferLength = (int)(end - start);
|
||||||
@ -89,7 +137,7 @@ public abstract class BufferedIndexInput extends IndexInput {
|
|||||||
throw new IOException("read past EOF");
|
throw new IOException("read past EOF");
|
||||||
|
|
||||||
if (buffer == null) {
|
if (buffer == null) {
|
||||||
buffer = new byte[BUFFER_SIZE]; // allocate buffer lazily
|
buffer = new byte[bufferSize]; // allocate buffer lazily
|
||||||
seekInternal(bufferStart);
|
seekInternal(bufferStart);
|
||||||
}
|
}
|
||||||
readInternal(buffer, 0, bufferLength);
|
readInternal(buffer, 0, bufferLength);
|
||||||
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||||||
|
|
||||||
/** Base implementation class for buffered {@link IndexOutput}. */
|
/** Base implementation class for buffered {@link IndexOutput}. */
|
||||||
public abstract class BufferedIndexOutput extends IndexOutput {
|
public abstract class BufferedIndexOutput extends IndexOutput {
|
||||||
static final int BUFFER_SIZE = 1024;
|
static final int BUFFER_SIZE = 16384;
|
||||||
|
|
||||||
private final byte[] buffer = new byte[BUFFER_SIZE];
|
private final byte[] buffer = new byte[BUFFER_SIZE];
|
||||||
private long bufferStart = 0; // position in file of buffer
|
private long bufferStart = 0; // position in file of buffer
|
||||||
|
@ -88,6 +88,17 @@ public abstract class Directory {
|
|||||||
public abstract IndexInput openInput(String name)
|
public abstract IndexInput openInput(String name)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/** Returns a stream reading an existing file, with the
|
||||||
|
* specified read buffer size. The particular Directory
|
||||||
|
* implementation may ignore the buffer size. Currently
|
||||||
|
* the only Directory implementations that respect this
|
||||||
|
* parameter are {@link FSDirectory} and {@link
|
||||||
|
* org.apache.lucene.index.CompoundFileReader}.
|
||||||
|
*/
|
||||||
|
public IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||||
|
return openInput(name);
|
||||||
|
}
|
||||||
|
|
||||||
/** Construct a {@link Lock}.
|
/** Construct a {@link Lock}.
|
||||||
* @param name the name of the lock file
|
* @param name the name of the lock file
|
||||||
*/
|
*/
|
||||||
|
@ -435,11 +435,16 @@ public class FSDirectory extends Directory {
|
|||||||
return new FSIndexOutput(file);
|
return new FSIndexOutput(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a stream reading an existing file. */
|
// Inherit javadoc
|
||||||
public IndexInput openInput(String name) throws IOException {
|
public IndexInput openInput(String name) throws IOException {
|
||||||
return new FSIndexInput(new File(directory, name));
|
return new FSIndexInput(new File(directory, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Inherit javadoc
|
||||||
|
public IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||||
|
return new FSIndexInput(new File(directory, name), bufferSize);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* So we can do some byte-to-hexchar conversion below
|
* So we can do some byte-to-hexchar conversion below
|
||||||
*/
|
*/
|
||||||
@ -523,6 +528,11 @@ public class FSDirectory extends Directory {
|
|||||||
boolean isClone;
|
boolean isClone;
|
||||||
|
|
||||||
public FSIndexInput(File path) throws IOException {
|
public FSIndexInput(File path) throws IOException {
|
||||||
|
this(path, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public FSIndexInput(File path, int bufferSize) throws IOException {
|
||||||
|
super(bufferSize);
|
||||||
file = new Descriptor(path, "r");
|
file = new Descriptor(path, "r");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ public class RAMDirectory extends Directory implements Serializable {
|
|||||||
|
|
||||||
/** Return total size in bytes of all files in this
|
/** Return total size in bytes of all files in this
|
||||||
* directory. This is currently quantized to
|
* directory. This is currently quantized to
|
||||||
* BufferedIndexOutput.BUFFER_SIZE. */
|
* RAMOutputStream.BUFFER_SIZE. */
|
||||||
public synchronized final long sizeInBytes() {
|
public synchronized final long sizeInBytes() {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return sizeInBytes;
|
return sizeInBytes;
|
||||||
|
@ -26,7 +26,7 @@ import java.io.IOException;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
class RAMInputStream extends IndexInput implements Cloneable {
|
class RAMInputStream extends IndexInput implements Cloneable {
|
||||||
static final int BUFFER_SIZE = BufferedIndexOutput.BUFFER_SIZE;
|
static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
|
||||||
|
|
||||||
private RAMFile file;
|
private RAMFile file;
|
||||||
private long length;
|
private long length;
|
||||||
|
@ -26,7 +26,7 @@ import java.io.IOException;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
public class RAMOutputStream extends IndexOutput {
|
public class RAMOutputStream extends IndexOutput {
|
||||||
static final int BUFFER_SIZE = BufferedIndexOutput.BUFFER_SIZE;
|
static final int BUFFER_SIZE = 1024;
|
||||||
|
|
||||||
private RAMFile file;
|
private RAMFile file;
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ public class MockRAMDirectory extends RAMDirectory {
|
|||||||
/** Like getRecomputedSizeInBytes(), but, uses actual file
|
/** Like getRecomputedSizeInBytes(), but, uses actual file
|
||||||
* lengths rather than buffer allocations (which are
|
* lengths rather than buffer allocations (which are
|
||||||
* quantized up to nearest
|
* quantized up to nearest
|
||||||
* BufferedIndexOutput.BUFFER_SIZE (now 1024) bytes.
|
* RAMOutputStream.BUFFER_SIZE (now 1024) bytes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
final long getRecomputedActualSizeInBytes() {
|
final long getRecomputedActualSizeInBytes() {
|
||||||
|
@ -1,6 +1,21 @@
|
|||||||
package org.apache.lucene.store;
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.search.Hits;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
@ -122,4 +137,133 @@ public class TestBufferedIndexInput extends TestCase {
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSetBufferSize() throws IOException {
|
||||||
|
File indexDir = new File(System.getProperty("tempDir"), "testSetBufferSize");
|
||||||
|
MockFSDirectory dir = new MockFSDirectory(indexDir);
|
||||||
|
try {
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
|
||||||
|
writer.setUseCompoundFile(false);
|
||||||
|
for(int i=0;i<37;i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.TOKENIZED));
|
||||||
|
doc.add(new Field("id", "" + i, Field.Store.YES, Field.Index.TOKENIZED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
dir.allIndexInputs.clear();
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(dir);
|
||||||
|
Term aaa = new Term("content", "aaa");
|
||||||
|
Term bbb = new Term("content", "bbb");
|
||||||
|
Term ccc = new Term("content", "ccc");
|
||||||
|
assertEquals(reader.docFreq(ccc), 37);
|
||||||
|
reader.deleteDocument(0);
|
||||||
|
assertEquals(reader.docFreq(aaa), 37);
|
||||||
|
dir.tweakBufferSizes();
|
||||||
|
reader.deleteDocument(4);
|
||||||
|
assertEquals(reader.docFreq(bbb), 37);
|
||||||
|
dir.tweakBufferSizes();
|
||||||
|
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
Hits hits = searcher.search(new TermQuery(bbb));
|
||||||
|
dir.tweakBufferSizes();
|
||||||
|
assertEquals(35, hits.length());
|
||||||
|
dir.tweakBufferSizes();
|
||||||
|
hits = searcher.search(new TermQuery(new Term("id", "33")));
|
||||||
|
dir.tweakBufferSizes();
|
||||||
|
assertEquals(1, hits.length());
|
||||||
|
hits = searcher.search(new TermQuery(aaa));
|
||||||
|
dir.tweakBufferSizes();
|
||||||
|
assertEquals(35, hits.length());
|
||||||
|
searcher.close();
|
||||||
|
reader.close();
|
||||||
|
} finally {
|
||||||
|
_TestUtil.rmDir(indexDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MockFSDirectory extends Directory {
|
||||||
|
|
||||||
|
List allIndexInputs = new ArrayList();
|
||||||
|
|
||||||
|
Random rand = new Random();
|
||||||
|
|
||||||
|
private Directory dir;
|
||||||
|
|
||||||
|
public MockFSDirectory(File path) throws IOException {
|
||||||
|
lockFactory = new NoLockFactory();
|
||||||
|
dir = FSDirectory.getDirectory(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexInput openInput(String name) throws IOException {
|
||||||
|
return openInput(name, BufferedIndexInput.BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void tweakBufferSizes() {
|
||||||
|
Iterator it = allIndexInputs.iterator();
|
||||||
|
int count = 0;
|
||||||
|
while(it.hasNext()) {
|
||||||
|
BufferedIndexInput bii = (BufferedIndexInput) it.next();
|
||||||
|
int bufferSize = 1024+(int) Math.abs(rand.nextInt() % 32768);
|
||||||
|
bii.setBufferSize(bufferSize);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
//System.out.println("tweak'd " + count + " buffer sizes");
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||||
|
// Make random changes to buffer size
|
||||||
|
bufferSize = 1+(int) Math.abs(rand.nextInt() % 10);
|
||||||
|
IndexInput f = dir.openInput(name, bufferSize);
|
||||||
|
allIndexInputs.add(f);
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexOutput createOutput(String name) throws IOException {
|
||||||
|
return dir.createOutput(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteFile(String name)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
dir.deleteFile(name);
|
||||||
|
}
|
||||||
|
public void touchFile(String name)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
dir.touchFile(name);
|
||||||
|
}
|
||||||
|
public long fileModified(String name)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
return dir.fileModified(name);
|
||||||
|
}
|
||||||
|
public boolean fileExists(String name)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
return dir.fileExists(name);
|
||||||
|
}
|
||||||
|
public String[] list()
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
return dir.list();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long fileLength(String name) throws IOException {
|
||||||
|
return dir.fileLength(name);
|
||||||
|
}
|
||||||
|
public void renameFile(String from, String to)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
dir.renameFile(from, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user