Remove synchronization from TermVectors

(Patch #30736)


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150569 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christoph Goller 2004-10-06 10:40:23 +00:00
parent 3e955f8c77
commit 4953fa5c71
4 changed files with 248 additions and 19 deletions

View File

@ -532,7 +532,7 @@ public abstract class IndexReader {
protected abstract void doClose() throws IOException;
/** Release the write lock, if needed. */
protected final void finalize() {
protected void finalize() {
if (writeLock != null) {
writeLock.release(); // release write lock
writeLock = null;

View File

@ -43,7 +43,8 @@ class SegmentReader extends IndexReader {
private FieldsReader fieldsReader;
TermInfosReader tis;
TermVectorsReader termVectorsReader;
TermVectorsReader termVectorsReaderOrig = null;
ThreadLocal termVectorsLocal = new ThreadLocal();
BitVector deletedDocs = null;
private boolean deletedDocsDirty = false;
@ -156,9 +157,15 @@ class SegmentReader extends IndexReader {
openNorms(cfsDir);
if (fieldInfos.hasVectors()) { // open term vector files only as needed
termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
}
}
protected void finalize() {
// patch for pre-1.4.2 JVMs, whose ThreadLocals leak
termVectorsLocal.set(null);
super.finalize();
}
protected void doCommit() throws IOException {
if (deletedDocsDirty) { // re-write deleted
@ -193,8 +200,8 @@ class SegmentReader extends IndexReader {
closeNorms();
if (termVectorsReader != null)
termVectorsReader.close();
if (termVectorsReaderOrig != null)
termVectorsReaderOrig.close();
if (cfsReader != null)
cfsReader.close();
@ -456,6 +463,19 @@ class SegmentReader extends IndexReader {
}
}
/**
* Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
* @return TermVectorsReader
*/
private TermVectorsReader getTermVectorsReader() {
TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
if (tvReader == null) {
tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
termVectorsLocal.set(tvReader);
}
return tvReader;
}
/** Return a term frequency vector for the specified document and field. The
* vector returned contains term numbers and frequencies for all terms in
* the specified field of this document, if the field had storeTermVector
@ -465,9 +485,10 @@ class SegmentReader extends IndexReader {
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
// Check if this field is invalid or has no stored term vector
FieldInfo fi = fieldInfos.fieldInfo(field);
if (fi == null || !fi.storeTermVector || termVectorsReader == null)
if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
return null;
TermVectorsReader termVectorsReader = getTermVectorsReader();
return termVectorsReader.get(docNumber, field);
}
@ -480,9 +501,10 @@ class SegmentReader extends IndexReader {
* @throws IOException
*/
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
if (termVectorsReader == null)
if (termVectorsReaderOrig == null)
return null;
TermVectorsReader termVectorsReader = getTermVectorsReader();
return termVectorsReader.get(docNumber);
}
}

View File

@ -22,11 +22,9 @@ import org.apache.lucene.store.IndexInput;
import java.io.IOException;
/**
* FIXME: relax synchro!
*
* @version $Id$
*/
class TermVectorsReader {
class TermVectorsReader implements Cloneable {
private FieldInfos fieldInfos;
private IndexInput tvx;
@ -86,9 +84,9 @@ class TermVectorsReader {
* @param docNum The document number to retrieve the vector for
* @param field The field within the document to retrieve
* @return The TermFreqVector for the document and field or null if there is no termVector for this field.
* @throws IOException
* @throws IOException if there is an error reading the term vector files
*/
synchronized TermFreqVector get(int docNum, String field) throws IOException {
TermFreqVector get(int docNum, String field) throws IOException {
// Check if no term vectors are available for this segment at all
int fieldNumber = fieldInfos.fieldNumber(field);
TermFreqVector result = null;
@ -137,13 +135,14 @@ class TermVectorsReader {
return result;
}
/**
* Return all term vectors stored for this document or null if there are no term vectors
* for the document.
* @throws IOException
* Return all term vectors stored for this document or null if the could not be read in.
*
* @param docNum The document number to retrieve the vector for
* @return All term frequency vectors
* @throws IOException if there is an error reading the term vector files
*/
synchronized TermFreqVector[] get(int docNum) throws IOException {
TermFreqVector[] get(int docNum) throws IOException {
TermFreqVector[] result = null;
// Check if no term vectors are available for this segment at all
if (tvx != null) {
@ -295,4 +294,16 @@ class TermVectorsReader {
return tv;
}
protected Object clone() {
TermVectorsReader clone = null;
try {
clone = (TermVectorsReader) super.clone();
} catch (CloneNotSupportedException e) {}
clone.tvx = (IndexInput) tvx.clone();
clone.tvd = (IndexInput) tvd.clone();
clone.tvf = (IndexInput) tvf.clone();
return clone;
}
}

View File

@ -0,0 +1,196 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.English;
/**
* @author Bernhard Messer
* @version $rcs = ' $Id$ ' ;
*/
public class TestMultiThreadTermVectors extends TestCase {
private RAMDirectory directory = new RAMDirectory();
public int numDocs = 100;
public int numThreads = 3;
public TestMultiThreadTermVectors(String s) {
super(s);
}
public void setUp() throws Exception {
IndexWriter writer
= new IndexWriter(directory, new SimpleAnalyzer(), true);
//writer.setUseCompoundFile(false);
//writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
doc.add(fld);
writer.addDocument(doc);
}
writer.close();
}
public void test() {
IndexReader reader = null;
try {
reader = IndexReader.open(directory);
for(int i = 1; i <= numThreads; i++)
testTermPositionVectors(reader, i);
}
catch (IOException ioe) {
fail(ioe.getMessage());
}
finally {
if (reader != null) {
try {
/** close the opened reader */
reader.close();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}
}
public void testTermPositionVectors(final IndexReader reader, int threadCount) {
MultiThreadTermVectorsReader[] mtr = new MultiThreadTermVectorsReader[threadCount];
for (int i = 0; i < threadCount; i++) {
mtr[i] = new MultiThreadTermVectorsReader();
mtr[i].init(reader);
}
/** run until all threads finished */
int threadsAlive = mtr.length;
while (threadsAlive > 0) {
try {
//System.out.println("Threads alive");
Thread.sleep(10);
threadsAlive = mtr.length;
for (int i = 0; i < mtr.length; i++) {
if (mtr[i].isAlive() == true) {
break;
}
threadsAlive--;
}
} catch (InterruptedException ie) {}
}
long totalTime = 0L;
for (int i = 0; i < mtr.length; i++) {
totalTime += mtr[i].timeElapsed;
mtr[i] = null;
}
//System.out.println("threadcount: " + mtr.length + " average term vector time: " + totalTime/mtr.length);
}
}
class MultiThreadTermVectorsReader implements Runnable {
private IndexReader reader = null;
private Thread t = null;
private final int runsToDo = 100;
long timeElapsed = 0;
public void init(IndexReader reader) {
this.reader = reader;
timeElapsed = 0;
t=new Thread(this);
t.start();
}
public boolean isAlive() {
if (t == null) return false;
return t.isAlive();
}
public void run() {
try {
// run the test 100 times
for (int i = 0; i < runsToDo; i++)
testTermVectors();
}
catch (Exception e) {
e.printStackTrace();
}
return;
}
private void testTermVectors() throws Exception {
// check:
int numDocs = reader.numDocs();
long start = 0L;
for (int docId = 0; docId < numDocs; docId++) {
start = System.currentTimeMillis();
TermFreqVector [] vectors = reader.getTermFreqVectors(docId);
timeElapsed += System.currentTimeMillis()-start;
// verify vectors result
verifyVectors(vectors, docId);
start = System.currentTimeMillis();
TermFreqVector vector = reader.getTermFreqVector(docId, "field");
timeElapsed += System.currentTimeMillis()-start;
vectors = new TermFreqVector[1];
vectors[0] = vector;
verifyVectors(vectors, docId);
}
}
private void verifyVectors(TermFreqVector[] vectors, int num) {
StringBuffer temp = new StringBuffer();
String[] terms = null;
for (int i = 0; i < vectors.length; i++) {
terms = vectors[i].getTerms();
for (int z = 0; z < terms.length; z++) {
temp.append(terms[z]);
}
}
if (!English.intToEnglish(num).trim().equals(temp.toString().trim()))
System.out.println("worng term result");
}
}