mirror of https://github.com/apache/lucene.git
Remove synchronization from TermVectors
(Patch #30736) git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150569 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3e955f8c77
commit
4953fa5c71
|
@ -532,7 +532,7 @@ public abstract class IndexReader {
|
|||
protected abstract void doClose() throws IOException;
|
||||
|
||||
/** Release the write lock, if needed. */
|
||||
protected final void finalize() {
|
||||
protected void finalize() {
|
||||
if (writeLock != null) {
|
||||
writeLock.release(); // release write lock
|
||||
writeLock = null;
|
||||
|
|
|
@ -43,7 +43,8 @@ class SegmentReader extends IndexReader {
|
|||
private FieldsReader fieldsReader;
|
||||
|
||||
TermInfosReader tis;
|
||||
TermVectorsReader termVectorsReader;
|
||||
TermVectorsReader termVectorsReaderOrig = null;
|
||||
ThreadLocal termVectorsLocal = new ThreadLocal();
|
||||
|
||||
BitVector deletedDocs = null;
|
||||
private boolean deletedDocsDirty = false;
|
||||
|
@ -156,9 +157,15 @@ class SegmentReader extends IndexReader {
|
|||
openNorms(cfsDir);
|
||||
|
||||
if (fieldInfos.hasVectors()) { // open term vector files only as needed
|
||||
termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
|
||||
termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
|
||||
}
|
||||
}
|
||||
|
||||
protected void finalize() {
|
||||
// patch for pre-1.4.2 JVMs, whose ThreadLocals leak
|
||||
termVectorsLocal.set(null);
|
||||
super.finalize();
|
||||
}
|
||||
|
||||
protected void doCommit() throws IOException {
|
||||
if (deletedDocsDirty) { // re-write deleted
|
||||
|
@ -193,8 +200,8 @@ class SegmentReader extends IndexReader {
|
|||
|
||||
closeNorms();
|
||||
|
||||
if (termVectorsReader != null)
|
||||
termVectorsReader.close();
|
||||
if (termVectorsReaderOrig != null)
|
||||
termVectorsReaderOrig.close();
|
||||
|
||||
if (cfsReader != null)
|
||||
cfsReader.close();
|
||||
|
@ -456,6 +463,19 @@ class SegmentReader extends IndexReader {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
|
||||
* @return TermVectorsReader
|
||||
*/
|
||||
private TermVectorsReader getTermVectorsReader() {
|
||||
TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
|
||||
if (tvReader == null) {
|
||||
tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
|
||||
termVectorsLocal.set(tvReader);
|
||||
}
|
||||
return tvReader;
|
||||
}
|
||||
|
||||
/** Return a term frequency vector for the specified document and field. The
|
||||
* vector returned contains term numbers and frequencies for all terms in
|
||||
* the specified field of this document, if the field had storeTermVector
|
||||
|
@ -465,9 +485,10 @@ class SegmentReader extends IndexReader {
|
|||
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
|
||||
// Check if this field is invalid or has no stored term vector
|
||||
FieldInfo fi = fieldInfos.fieldInfo(field);
|
||||
if (fi == null || !fi.storeTermVector || termVectorsReader == null)
|
||||
if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
|
||||
return null;
|
||||
|
||||
|
||||
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
||||
return termVectorsReader.get(docNumber, field);
|
||||
}
|
||||
|
||||
|
@ -480,9 +501,10 @@ class SegmentReader extends IndexReader {
|
|||
* @throws IOException
|
||||
*/
|
||||
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
|
||||
if (termVectorsReader == null)
|
||||
if (termVectorsReaderOrig == null)
|
||||
return null;
|
||||
|
||||
|
||||
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
||||
return termVectorsReader.get(docNumber);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,11 +22,9 @@ import org.apache.lucene.store.IndexInput;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* FIXME: relax synchro!
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
class TermVectorsReader {
|
||||
class TermVectorsReader implements Cloneable {
|
||||
private FieldInfos fieldInfos;
|
||||
|
||||
private IndexInput tvx;
|
||||
|
@ -86,9 +84,9 @@ class TermVectorsReader {
|
|||
* @param docNum The document number to retrieve the vector for
|
||||
* @param field The field within the document to retrieve
|
||||
* @return The TermFreqVector for the document and field or null if there is no termVector for this field.
|
||||
* @throws IOException
|
||||
* @throws IOException if there is an error reading the term vector files
|
||||
*/
|
||||
synchronized TermFreqVector get(int docNum, String field) throws IOException {
|
||||
TermFreqVector get(int docNum, String field) throws IOException {
|
||||
// Check if no term vectors are available for this segment at all
|
||||
int fieldNumber = fieldInfos.fieldNumber(field);
|
||||
TermFreqVector result = null;
|
||||
|
@ -137,13 +135,14 @@ class TermVectorsReader {
|
|||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return all term vectors stored for this document or null if there are no term vectors
|
||||
* for the document.
|
||||
* @throws IOException
|
||||
* Return all term vectors stored for this document or null if the could not be read in.
|
||||
*
|
||||
* @param docNum The document number to retrieve the vector for
|
||||
* @return All term frequency vectors
|
||||
* @throws IOException if there is an error reading the term vector files
|
||||
*/
|
||||
synchronized TermFreqVector[] get(int docNum) throws IOException {
|
||||
TermFreqVector[] get(int docNum) throws IOException {
|
||||
TermFreqVector[] result = null;
|
||||
// Check if no term vectors are available for this segment at all
|
||||
if (tvx != null) {
|
||||
|
@ -295,4 +294,16 @@ class TermVectorsReader {
|
|||
return tv;
|
||||
}
|
||||
|
||||
protected Object clone() {
|
||||
TermVectorsReader clone = null;
|
||||
try {
|
||||
clone = (TermVectorsReader) super.clone();
|
||||
} catch (CloneNotSupportedException e) {}
|
||||
|
||||
clone.tvx = (IndexInput) tvx.clone();
|
||||
clone.tvd = (IndexInput) tvd.clone();
|
||||
clone.tvf = (IndexInput) tvf.clone();
|
||||
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,196 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.TermFreqVector;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.English;
|
||||
|
||||
/**
|
||||
* @author Bernhard Messer
|
||||
* @version $rcs = ' $Id$ ' ;
|
||||
*/
|
||||
public class TestMultiThreadTermVectors extends TestCase {
|
||||
private RAMDirectory directory = new RAMDirectory();
|
||||
public int numDocs = 100;
|
||||
public int numThreads = 3;
|
||||
|
||||
public TestMultiThreadTermVectors(String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
public void setUp() throws Exception {
|
||||
IndexWriter writer
|
||||
= new IndexWriter(directory, new SimpleAnalyzer(), true);
|
||||
//writer.setUseCompoundFile(false);
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
|
||||
doc.add(fld);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.close();
|
||||
|
||||
}
|
||||
|
||||
public void test() {
|
||||
|
||||
IndexReader reader = null;
|
||||
|
||||
try {
|
||||
reader = IndexReader.open(directory);
|
||||
for(int i = 1; i <= numThreads; i++)
|
||||
testTermPositionVectors(reader, i);
|
||||
|
||||
|
||||
}
|
||||
catch (IOException ioe) {
|
||||
fail(ioe.getMessage());
|
||||
}
|
||||
finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
/** close the opened reader */
|
||||
reader.close();
|
||||
} catch (IOException ioe) {
|
||||
ioe.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testTermPositionVectors(final IndexReader reader, int threadCount) {
|
||||
MultiThreadTermVectorsReader[] mtr = new MultiThreadTermVectorsReader[threadCount];
|
||||
for (int i = 0; i < threadCount; i++) {
|
||||
mtr[i] = new MultiThreadTermVectorsReader();
|
||||
mtr[i].init(reader);
|
||||
}
|
||||
|
||||
|
||||
/** run until all threads finished */
|
||||
int threadsAlive = mtr.length;
|
||||
while (threadsAlive > 0) {
|
||||
try {
|
||||
//System.out.println("Threads alive");
|
||||
Thread.sleep(10);
|
||||
threadsAlive = mtr.length;
|
||||
for (int i = 0; i < mtr.length; i++) {
|
||||
if (mtr[i].isAlive() == true) {
|
||||
break;
|
||||
}
|
||||
|
||||
threadsAlive--;
|
||||
|
||||
}
|
||||
|
||||
} catch (InterruptedException ie) {}
|
||||
}
|
||||
|
||||
long totalTime = 0L;
|
||||
for (int i = 0; i < mtr.length; i++) {
|
||||
totalTime += mtr[i].timeElapsed;
|
||||
mtr[i] = null;
|
||||
}
|
||||
|
||||
//System.out.println("threadcount: " + mtr.length + " average term vector time: " + totalTime/mtr.length);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class MultiThreadTermVectorsReader implements Runnable {
|
||||
|
||||
private IndexReader reader = null;
|
||||
private Thread t = null;
|
||||
|
||||
private final int runsToDo = 100;
|
||||
long timeElapsed = 0;
|
||||
|
||||
|
||||
public void init(IndexReader reader) {
|
||||
this.reader = reader;
|
||||
timeElapsed = 0;
|
||||
t=new Thread(this);
|
||||
t.start();
|
||||
}
|
||||
|
||||
public boolean isAlive() {
|
||||
if (t == null) return false;
|
||||
|
||||
return t.isAlive();
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
// run the test 100 times
|
||||
for (int i = 0; i < runsToDo; i++)
|
||||
testTermVectors();
|
||||
}
|
||||
catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private void testTermVectors() throws Exception {
|
||||
// check:
|
||||
int numDocs = reader.numDocs();
|
||||
long start = 0L;
|
||||
for (int docId = 0; docId < numDocs; docId++) {
|
||||
start = System.currentTimeMillis();
|
||||
TermFreqVector [] vectors = reader.getTermFreqVectors(docId);
|
||||
timeElapsed += System.currentTimeMillis()-start;
|
||||
|
||||
// verify vectors result
|
||||
verifyVectors(vectors, docId);
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
TermFreqVector vector = reader.getTermFreqVector(docId, "field");
|
||||
timeElapsed += System.currentTimeMillis()-start;
|
||||
|
||||
vectors = new TermFreqVector[1];
|
||||
vectors[0] = vector;
|
||||
|
||||
verifyVectors(vectors, docId);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyVectors(TermFreqVector[] vectors, int num) {
|
||||
StringBuffer temp = new StringBuffer();
|
||||
String[] terms = null;
|
||||
for (int i = 0; i < vectors.length; i++) {
|
||||
terms = vectors[i].getTerms();
|
||||
for (int z = 0; z < terms.length; z++) {
|
||||
temp.append(terms[z]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!English.intToEnglish(num).trim().equals(temp.toString().trim()))
|
||||
System.out.println("worng term result");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue