RAMDirectory.sizeInBytes, public flushRamSegments: LUCENE-709

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@478014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-11-22 02:47:49 +00:00
parent 5cf957527f
commit 9ba7a8e612
7 changed files with 253 additions and 68 deletions

View File

@ -67,6 +67,12 @@ New features
characters in terms via a unicode escape of the form \uXXXX characters in terms via a unicode escape of the form \uXXXX
(Michael Busch via Yonik Seeley) (Michael Busch via Yonik Seeley)
9. LUCENE-709: Added RAMDirectory.sizeInBytes(), IndexWriter.ramSizeInBytes()
and IndexWriter.flushRamSegments(), allowing applications to
control the amount of memory used to buffer documents.
(Chuck Williams via Yonik Seeley)
API Changes API Changes
1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow 1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow

View File

@ -99,9 +99,9 @@ public class IndexWriter {
private Similarity similarity = Similarity.getDefault(); // how to normalize private Similarity similarity = Similarity.getDefault(); // how to normalize
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
private final Directory ramDirectory = new RAMDirectory(); // for temp segs private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
private IndexFileDeleter deleter; private IndexFileDeleter deleter;
private Lock writeLock; private Lock writeLock;
@ -827,14 +827,28 @@ public class IndexWriter {
} }
} }
/** Merges all RAM-resident segments, then may merge segments. */ /** Expert: Flushes all RAM-resident segments (buffered documents), then may merge segments. */
private final void flushRamSegments() throws IOException { public final synchronized void flushRamSegments() throws IOException {
if (ramSegmentInfos.size() > 0) { if (ramSegmentInfos.size() > 0) {
mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size()); mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
maybeMergeSegments(minMergeDocs); maybeMergeSegments(minMergeDocs);
} }
} }
/** Expert: Return the total size of all index files currently cached in memory.
* Useful for size management with flushRamDocs()
*/
public final long ramSizeInBytes() {
return ramDirectory.sizeInBytes();
}
/** Expert: Return the number of documents whose segments are currently cached in memory.
* Useful when calling flushRamSegments()
*/
public final synchronized int numRamDocs() {
return ramSegmentInfos.size();
}
/** Incremental segment merger. */ /** Incremental segment merger. */
private final void maybeMergeSegments(int startUpperBound) throws IOException { private final void maybeMergeSegments(int startUpperBound) throws IOException {
long lowerBound = -1; long lowerBound = -1;

View File

@ -21,12 +21,11 @@ import java.io.IOException;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.File; import java.io.File;
import java.io.Serializable; import java.io.Serializable;
import java.util.Hashtable; import java.util.Collection;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.HashMap;
import org.apache.lucene.store.Directory; import java.util.Iterator;
import org.apache.lucene.store.IndexInput; import java.util.Set;
import org.apache.lucene.store.IndexOutput;
/** /**
* A memory-resident {@link Directory} implementation. Locking * A memory-resident {@link Directory} implementation. Locking
@ -39,7 +38,14 @@ public final class RAMDirectory extends Directory implements Serializable {
private static final long serialVersionUID = 1l; private static final long serialVersionUID = 1l;
Hashtable files = new Hashtable(); private HashMap fileMap = new HashMap();
private Set fileNames = fileMap.keySet();
private Collection files = fileMap.values();
long sizeInBytes = 0;
// *****
// Lock acquisition sequence: RAMDirectory, then RAMFile
// *****
/** Constructs an empty {@link Directory}. */ /** Constructs an empty {@link Directory}. */
public RAMDirectory() { public RAMDirectory() {
@ -107,85 +113,144 @@ public final class RAMDirectory extends Directory implements Serializable {
/** Returns an array of strings, one for each file in the directory. */ /** Returns an array of strings, one for each file in the directory. */
public synchronized final String[] list() { public synchronized final String[] list() {
String[] result = new String[files.size()]; String[] result = new String[fileNames.size()];
int i = 0; int i = 0;
Enumeration names = files.keys(); Iterator it = fileNames.iterator();
while (names.hasMoreElements()) while (it.hasNext())
result[i++] = (String)names.nextElement(); result[i++] = (String)it.next();
return result; return result;
} }
/** Returns true iff the named file exists in this directory. */ /** Returns true iff the named file exists in this directory. */
public final boolean fileExists(String name) { public final boolean fileExists(String name) {
RAMFile file = (RAMFile)files.get(name); RAMFile file;
synchronized (this) {
file = (RAMFile)fileMap.get(name);
}
return file != null; return file != null;
} }
/** Returns the time the named file was last modified. */ /** Returns the time the named file was last modified.
public final long fileModified(String name) { * @throws IOException if the file does not exist
RAMFile file = (RAMFile)files.get(name); */
return file.lastModified; public final long fileModified(String name) throws IOException {
RAMFile file;
synchronized (this) {
file = (RAMFile)fileMap.get(name);
}
if (file==null)
throw new FileNotFoundException(name);
return file.getLastModified();
} }
/** Set the modified time of an existing file to now. */ /** Set the modified time of an existing file to now.
public void touchFile(String name) { * @throws IOException if the file does not exist
// final boolean MONITOR = false; */
public void touchFile(String name) throws IOException {
RAMFile file = (RAMFile)files.get(name); RAMFile file;
synchronized (this) {
file = (RAMFile)fileMap.get(name);
}
if (file==null)
throw new FileNotFoundException(name);
long ts2, ts1 = System.currentTimeMillis(); long ts2, ts1 = System.currentTimeMillis();
do { do {
try { try {
Thread.sleep(0, 1); Thread.sleep(0, 1);
} catch (InterruptedException e) {} } catch (InterruptedException e) {}
ts2 = System.currentTimeMillis(); ts2 = System.currentTimeMillis();
// if (MONITOR) {
// count++;
// }
} while(ts1 == ts2); } while(ts1 == ts2);
file.lastModified = ts2; file.setLastModified(ts2);
// if (MONITOR)
// System.out.println("SLEEP COUNT: " + count);
} }
/** Returns the length in bytes of a file in the directory. */ /** Returns the length in bytes of a file in the directory.
public final long fileLength(String name) { * @throws IOException if the file does not exist
RAMFile file = (RAMFile)files.get(name); */
return file.length; public final long fileLength(String name) throws IOException {
RAMFile file;
synchronized (this) {
file = (RAMFile)fileMap.get(name);
}
if (file==null)
throw new FileNotFoundException(name);
return file.getLength();
}
/** Return total size in bytes of all files in this directory */
public synchronized final long sizeInBytes() {
return sizeInBytes;
}
/** Provided for testing purposes. Use sizeInBytes() instead. */
public synchronized final long getRecomputedSizeInBytes() {
long size = 0;
Iterator it = files.iterator();
while (it.hasNext())
size += ((RAMFile) it.next()).getSizeInBytes();
return size;
} }
/** Removes an existing file in the directory. */ /** Removes an existing file in the directory.
public final void deleteFile(String name) { * @throws IOException if the file does not exist
files.remove(name); */
public synchronized final void deleteFile(String name) throws IOException {
RAMFile file = (RAMFile)fileMap.get(name);
if (file!=null) {
fileMap.remove(name);
file.directory = null;
sizeInBytes -= file.sizeInBytes; // updates to RAMFile.sizeInBytes synchronized on directory
} else
throw new FileNotFoundException(name);
} }
/** Removes an existing file in the directory. */ /** Removes an existing file in the directory.
public final void renameFile(String from, String to) { * @throws IOException if from does not exist
RAMFile file = (RAMFile)files.get(from); */
files.remove(from); public synchronized final void renameFile(String from, String to) throws IOException {
files.put(to, file); RAMFile fromFile = (RAMFile)fileMap.get(from);
if (fromFile==null)
throw new FileNotFoundException(from);
RAMFile toFile = (RAMFile)fileMap.get(to);
if (toFile!=null) {
sizeInBytes -= toFile.sizeInBytes; // updates to RAMFile.sizeInBytes synchronized on directory
toFile.directory = null;
}
fileMap.remove(from);
fileMap.put(to, fromFile);
} }
/** Creates a new, empty file in the directory with the given name. /** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */
Returns a stream writing this file. */
public final IndexOutput createOutput(String name) { public final IndexOutput createOutput(String name) {
RAMFile file = new RAMFile(); RAMFile file = new RAMFile(this);
files.put(name, file); synchronized (this) {
RAMFile existing = (RAMFile)fileMap.get(name);
if (existing!=null) {
sizeInBytes -= existing.sizeInBytes;
existing.directory = null;
}
fileMap.put(name, file);
}
return new RAMOutputStream(file); return new RAMOutputStream(file);
} }
/** Returns a stream reading an existing file. */ /** Returns a stream reading an existing file. */
public final IndexInput openInput(String name) throws IOException { public final IndexInput openInput(String name) throws IOException {
RAMFile file = (RAMFile)files.get(name); RAMFile file;
if (file == null) { synchronized (this) {
throw new FileNotFoundException(name); file = (RAMFile)fileMap.get(name);
} }
if (file == null)
throw new FileNotFoundException(name);
return new RAMInputStream(file); return new RAMInputStream(file);
} }
/** Closes the store to future operations, releasing associated memory. */ /** Closes the store to future operations, releasing associated memory. */
public final void close() { public final void close() {
fileMap = null;
fileNames = null;
files = null; files = null;
} }
} }

View File

@ -17,14 +17,66 @@ package org.apache.lucene.store;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Vector; import java.util.ArrayList;
import java.io.Serializable; import java.io.Serializable;
class RAMFile implements Serializable { class RAMFile implements Serializable {
private static final long serialVersionUID = 1l; private static final long serialVersionUID = 1l;
Vector buffers = new Vector(); // Direct read-only access to state supported for streams since a writing stream implies no other concurrent streams
ArrayList buffers = new ArrayList();
long length; long length;
long lastModified = System.currentTimeMillis(); RAMDirectory directory;
long sizeInBytes; // Only maintained if in a directory; updates synchronized on directory
// This is publicly modifiable via Directory.touchFile(), so direct access not supported
private long lastModified = System.currentTimeMillis();
// File used as buffer, in no RAMDirectory
RAMFile() {}
RAMFile(RAMDirectory directory) {
this.directory = directory;
}
// For non-stream access from thread that might be concurrent with writing
synchronized long getLength() {
return length;
}
synchronized void setLength(long length) {
this.length = length;
}
// For non-stream access from thread that might be concurrent with writing
synchronized long getLastModified() {
return lastModified;
}
synchronized void setLastModified(long lastModified) {
this.lastModified = lastModified;
}
// Only one writing stream with no concurrent reading streams, so no file synchronization required
final byte[] addBuffer(int size) {
byte[] buffer = new byte[size];
if (directory!=null)
synchronized (directory) { // Ensure addition of buffer and adjustment to directory size are atomic wrt directory
buffers.add(buffer);
directory.sizeInBytes += size;
sizeInBytes += size;
}
else
buffers.add(buffer);
return buffer;
}
// Only valid if in a directory
long getSizeInBytes() {
synchronized (directory) {
return sizeInBytes;
}
}
} }

View File

@ -41,7 +41,7 @@ class RAMInputStream extends BufferedIndexInput implements Cloneable {
int bufferOffset = (int)(start%BUFFER_SIZE); int bufferOffset = (int)(start%BUFFER_SIZE);
int bytesInBuffer = BUFFER_SIZE - bufferOffset; int bytesInBuffer = BUFFER_SIZE - bufferOffset;
int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer; int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber); byte[] buffer = (byte[])file.buffers.get(bufferNumber);
System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy); System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
destOffset += bytesToCopy; destOffset += bytesToCopy;
start += bytesToCopy; start += bytesToCopy;

View File

@ -50,7 +50,7 @@ public class RAMOutputStream extends BufferedIndexOutput {
if (nextPos > end) { // at the last buffer if (nextPos > end) { // at the last buffer
length = (int)(end - pos); length = (int)(end - pos);
} }
out.writeBytes((byte[])file.buffers.elementAt(buffer++), length); out.writeBytes((byte[])file.buffers.get(buffer++), length);
pos = nextPos; pos = nextPos;
} }
} }
@ -63,7 +63,7 @@ public class RAMOutputStream extends BufferedIndexOutput {
throw new RuntimeException(e.toString()); throw new RuntimeException(e.toString());
} }
file.length = 0; file.setLength(0);
} }
public void flushBuffer(byte[] src, int len) { public void flushBuffer(byte[] src, int len) {
@ -76,12 +76,10 @@ public class RAMOutputStream extends BufferedIndexOutput {
int remainInSrcBuffer = len - bufferPos; int remainInSrcBuffer = len - bufferPos;
int bytesToCopy = bytesInBuffer >= remainInSrcBuffer ? remainInSrcBuffer : bytesInBuffer; int bytesToCopy = bytesInBuffer >= remainInSrcBuffer ? remainInSrcBuffer : bytesInBuffer;
if (bufferNumber == file.buffers.size()) { if (bufferNumber == file.buffers.size())
buffer = new byte[BUFFER_SIZE]; buffer = file.addBuffer(BUFFER_SIZE);
file.buffers.addElement(buffer); else
} else { buffer = (byte[]) file.buffers.get(bufferNumber);
buffer = (byte[]) file.buffers.elementAt(bufferNumber);
}
System.arraycopy(src, bufferPos, buffer, bufferOffset, bytesToCopy); System.arraycopy(src, bufferPos, buffer, bufferOffset, bytesToCopy);
bufferPos += bytesToCopy; bufferPos += bytesToCopy;
@ -89,9 +87,9 @@ public class RAMOutputStream extends BufferedIndexOutput {
} }
if (pointer > file.length) if (pointer > file.length)
file.length = pointer; file.setLength(pointer);
file.lastModified = System.currentTimeMillis(); file.setLastModified(System.currentTimeMillis());
} }
public void close() throws IOException { public void close() throws IOException {

View File

@ -64,7 +64,6 @@ public class TestRAMDirectory extends TestCase {
writer.addDocument(doc); writer.addDocument(doc);
} }
assertEquals(docsToAdd, writer.docCount()); assertEquals(docsToAdd, writer.docCount());
writer.optimize();
writer.close(); writer.close();
} }
@ -73,9 +72,12 @@ public class TestRAMDirectory extends TestCase {
Directory dir = FSDirectory.getDirectory(indexDir, false); Directory dir = FSDirectory.getDirectory(indexDir, false);
RAMDirectory ramDir = new RAMDirectory(dir); RAMDirectory ramDir = new RAMDirectory(dir);
// close the underlaying directory and delete the index // close the underlaying directory
dir.close(); dir.close();
// Check size
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
// open reader to test document count // open reader to test document count
IndexReader reader = IndexReader.open(ramDir); IndexReader reader = IndexReader.open(ramDir);
assertEquals(docsToAdd, reader.numDocs()); assertEquals(docsToAdd, reader.numDocs());
@ -98,6 +100,9 @@ public class TestRAMDirectory extends TestCase {
RAMDirectory ramDir = new RAMDirectory(indexDir); RAMDirectory ramDir = new RAMDirectory(indexDir);
// Check size
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
// open reader to test document count // open reader to test document count
IndexReader reader = IndexReader.open(ramDir); IndexReader reader = IndexReader.open(ramDir);
assertEquals(docsToAdd, reader.numDocs()); assertEquals(docsToAdd, reader.numDocs());
@ -120,6 +125,9 @@ public class TestRAMDirectory extends TestCase {
RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath()); RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath());
// Check size
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
// open reader to test document count // open reader to test document count
IndexReader reader = IndexReader.open(ramDir); IndexReader reader = IndexReader.open(ramDir);
assertEquals(docsToAdd, reader.numDocs()); assertEquals(docsToAdd, reader.numDocs());
@ -137,6 +145,48 @@ public class TestRAMDirectory extends TestCase {
reader.close(); reader.close();
searcher.close(); searcher.close();
} }
private final int numThreads = 50;
private final int docsPerThread = 40;
public void testRAMDirectorySize() throws IOException, InterruptedException {
final RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath());
final IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false);
writer.optimize();
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
Thread[] threads = new Thread[numThreads];
for (int i=0; i<numThreads; i++) {
final int num = i;
threads[i] = new Thread(){
public void run() {
for (int j=1; j<docsPerThread; j++) {
Document doc = new Document();
doc.add(new Field("sizeContent", English.intToEnglish(num*docsPerThread+j).trim(), Field.Store.YES, Field.Index.UN_TOKENIZED));
try {
writer.addDocument(doc);
} catch (IOException e) {
throw new RuntimeException(e);
}
synchronized (ramDir) {
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
}
}
}
};
}
for (int i=0; i<numThreads; i++)
threads[i].start();
for (int i=0; i<numThreads; i++)
threads[i].join();
writer.optimize();
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
writer.close();
}
public void tearDown() { public void tearDown() {
// cleanup // cleanup