LUCENE-880: Fixed DocumentWriter to close the TokenStreams after it has written the postings.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@538892 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2007-05-17 12:38:43 +00:00
parent b5e09521b5
commit 56547a1f36
3 changed files with 210 additions and 71 deletions

View File

@ -120,6 +120,10 @@ Bug fixes
was set has no effect - it is masked by the similarity of the MultiSearcher. This is as
designed, because MultiSearcher operates on Searchables (not Searchers). (Doron Cohen)
15. LUCENE-880: Fixed DocumentWriter to close the TokenStreams after it
has written the postings. Then the resources associated with the
TokenStreams can safely be released. (Michael Busch)
New features
1. LUCENE-759: Added two n-gram-producing TokenFilters.

View File

@ -35,6 +35,8 @@ import java.util.BitSet;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
final class DocumentWriter {
private Analyzer analyzer;
@ -84,6 +86,8 @@ final class DocumentWriter {
fieldBoosts = new float[fieldInfos.size()]; // init fieldBoosts
Arrays.fill(fieldBoosts, doc.getBoost());
try {
// Before we write the FieldInfos we invert the Document. The reason is that
// during invertion the TokenStreams of tokenized fields are being processed
// and we might encounter tokens that have payloads associated with them. In
@ -123,7 +127,26 @@ final class DocumentWriter {
// write norms of indexed fields
writeNorms(segment);
} finally {
// close TokenStreams
IOException ex = null;
Iterator it = openTokenStreams.iterator();
while (it.hasNext()) {
try {
((TokenStream) it.next()).close();
} catch (IOException e) {
if (ex != null) {
ex = e;
}
}
}
openTokenStreams.clear();
if (ex != null) {
throw ex;
}
}
}
// Keys are Terms, values are Postings.
@ -138,6 +161,10 @@ final class DocumentWriter {
// then we enable payloads for that field.
private BitSet fieldStoresPayloads;
// Keep references of the token streams. We must close them after
// the postings are written to the segment.
private List openTokenStreams = new LinkedList();
// Tokenizes the fields of a document into Postings.
private final void invertDocument(Document doc)
throws IOException {
@ -181,10 +208,13 @@ final class DocumentWriter {
stream = analyzer.tokenStream(fieldName, reader);
}
// remember this TokenStream, we must close it later
openTokenStreams.add(stream);
// reset the TokenStream to the first token
stream.reset();
try {
Token lastToken = null;
for (Token t = stream.next(); t != null; t = stream.next()) {
position += (t.getPositionIncrement() - 1);
@ -213,10 +243,6 @@ final class DocumentWriter {
if(lastToken != null)
offset += lastToken.endOffset() + 1;
} finally {
stream.close();
}
}
fieldLengths[fieldNumber] = length; // save field length

View File

@ -20,7 +20,9 @@ package org.apache.lucene.index;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
@ -319,10 +321,15 @@ public class TestPayloads extends TestCase {
}
private byte[] generateRandomData(int n) {
Random rnd = new Random();
byte[] data = new byte[n];
private static Random rnd = new Random();
private static void generateRandomData(byte[] data) {
rnd.nextBytes(data);
}
private static byte[] generateRandomData(int n) {
byte[] data = new byte[n];
generateRandomData(data);
return data;
}
@ -440,4 +447,106 @@ public class TestPayloads extends TestCase {
return nextToken;
}
}
public void testThreadSafety() throws IOException {
final int numThreads = 5;
final int numDocs = 50;
final ByteArrayPool pool = new ByteArrayPool(numThreads, 5);
Directory dir = new RAMDirectory();
final IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer());
final String field = "test";
Thread[] ingesters = new Thread[numThreads];
for (int i = 0; i < numThreads; i++) {
ingesters[i] = new Thread() {
public void run() {
try {
for (int j = 0; j < numDocs; j++) {
Document d = new Document();
d.add(new Field(field, new PoolingPayloadTokenStream(pool)));
writer.addDocument(d);
}
} catch (IOException e) {
fail(e.toString());
}
}
};
ingesters[i].start();
}
for (int i = 0; i < numThreads; i++) {
try {
ingesters[i].join();
} catch (InterruptedException e) {}
}
writer.close();
IndexReader reader = IndexReader.open(dir);
TermEnum terms = reader.terms();
while (terms.next()) {
TermPositions tp = reader.termPositions(terms.term());
while(tp.next()) {
int freq = tp.freq();
for (int i = 0; i < freq; i++) {
tp.nextPosition();
String s = new String(tp.getPayload(new byte[5], 0));
assertEquals(s, terms.term().text);
}
}
tp.close();
}
terms.close();
reader.close();
assertEquals(pool.size(), numThreads);
}
private static class PoolingPayloadTokenStream extends TokenStream {
private byte[] payload;
private boolean first;
private ByteArrayPool pool;
PoolingPayloadTokenStream(ByteArrayPool pool) {
this.pool = pool;
payload = pool.get();
generateRandomData(payload);
first = true;
}
public Token next() throws IOException {
if (!first) return null;
Token t = new Token(new String(payload), 0, 0);
t.setPayload(new Payload(payload));
return t;
}
public void close() throws IOException {
pool.release(payload);
}
}
private static class ByteArrayPool {
private List pool;
ByteArrayPool(int capacity, int size) {
pool = new ArrayList();
for (int i = 0; i < capacity; i++) {
pool.add(new byte[size]);
}
}
synchronized byte[] get() {
return (byte[]) pool.remove(0);
}
synchronized void release(byte[] b) {
pool.add(b);
}
synchronized int size() {
return pool.size();
}
}
}