mirror of https://github.com/apache/lucene.git
Remove synchronization from TermVectors
(Patch #30736) git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150569 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3e955f8c77
commit
4953fa5c71
|
@ -532,7 +532,7 @@ public abstract class IndexReader {
|
||||||
protected abstract void doClose() throws IOException;
|
protected abstract void doClose() throws IOException;
|
||||||
|
|
||||||
/** Release the write lock, if needed. */
|
/** Release the write lock, if needed. */
|
||||||
protected final void finalize() {
|
protected void finalize() {
|
||||||
if (writeLock != null) {
|
if (writeLock != null) {
|
||||||
writeLock.release(); // release write lock
|
writeLock.release(); // release write lock
|
||||||
writeLock = null;
|
writeLock = null;
|
||||||
|
|
|
@ -43,7 +43,8 @@ class SegmentReader extends IndexReader {
|
||||||
private FieldsReader fieldsReader;
|
private FieldsReader fieldsReader;
|
||||||
|
|
||||||
TermInfosReader tis;
|
TermInfosReader tis;
|
||||||
TermVectorsReader termVectorsReader;
|
TermVectorsReader termVectorsReaderOrig = null;
|
||||||
|
ThreadLocal termVectorsLocal = new ThreadLocal();
|
||||||
|
|
||||||
BitVector deletedDocs = null;
|
BitVector deletedDocs = null;
|
||||||
private boolean deletedDocsDirty = false;
|
private boolean deletedDocsDirty = false;
|
||||||
|
@ -156,10 +157,16 @@ class SegmentReader extends IndexReader {
|
||||||
openNorms(cfsDir);
|
openNorms(cfsDir);
|
||||||
|
|
||||||
if (fieldInfos.hasVectors()) { // open term vector files only as needed
|
if (fieldInfos.hasVectors()) { // open term vector files only as needed
|
||||||
termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
|
termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void finalize() {
|
||||||
|
// patch for pre-1.4.2 JVMs, whose ThreadLocals leak
|
||||||
|
termVectorsLocal.set(null);
|
||||||
|
super.finalize();
|
||||||
|
}
|
||||||
|
|
||||||
protected void doCommit() throws IOException {
|
protected void doCommit() throws IOException {
|
||||||
if (deletedDocsDirty) { // re-write deleted
|
if (deletedDocsDirty) { // re-write deleted
|
||||||
deletedDocs.write(directory(), segment + ".tmp");
|
deletedDocs.write(directory(), segment + ".tmp");
|
||||||
|
@ -193,8 +200,8 @@ class SegmentReader extends IndexReader {
|
||||||
|
|
||||||
closeNorms();
|
closeNorms();
|
||||||
|
|
||||||
if (termVectorsReader != null)
|
if (termVectorsReaderOrig != null)
|
||||||
termVectorsReader.close();
|
termVectorsReaderOrig.close();
|
||||||
|
|
||||||
if (cfsReader != null)
|
if (cfsReader != null)
|
||||||
cfsReader.close();
|
cfsReader.close();
|
||||||
|
@ -456,6 +463,19 @@ class SegmentReader extends IndexReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
|
||||||
|
* @return TermVectorsReader
|
||||||
|
*/
|
||||||
|
private TermVectorsReader getTermVectorsReader() {
|
||||||
|
TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
|
||||||
|
if (tvReader == null) {
|
||||||
|
tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
|
||||||
|
termVectorsLocal.set(tvReader);
|
||||||
|
}
|
||||||
|
return tvReader;
|
||||||
|
}
|
||||||
|
|
||||||
/** Return a term frequency vector for the specified document and field. The
|
/** Return a term frequency vector for the specified document and field. The
|
||||||
* vector returned contains term numbers and frequencies for all terms in
|
* vector returned contains term numbers and frequencies for all terms in
|
||||||
* the specified field of this document, if the field had storeTermVector
|
* the specified field of this document, if the field had storeTermVector
|
||||||
|
@ -465,9 +485,10 @@ class SegmentReader extends IndexReader {
|
||||||
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
|
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
|
||||||
// Check if this field is invalid or has no stored term vector
|
// Check if this field is invalid or has no stored term vector
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(field);
|
FieldInfo fi = fieldInfos.fieldInfo(field);
|
||||||
if (fi == null || !fi.storeTermVector || termVectorsReader == null)
|
if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
||||||
return termVectorsReader.get(docNumber, field);
|
return termVectorsReader.get(docNumber, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -480,9 +501,10 @@ class SegmentReader extends IndexReader {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
|
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
|
||||||
if (termVectorsReader == null)
|
if (termVectorsReaderOrig == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
||||||
return termVectorsReader.get(docNumber);
|
return termVectorsReader.get(docNumber);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,11 +22,9 @@ import org.apache.lucene.store.IndexInput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* FIXME: relax synchro!
|
|
||||||
*
|
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
class TermVectorsReader {
|
class TermVectorsReader implements Cloneable {
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
||||||
private IndexInput tvx;
|
private IndexInput tvx;
|
||||||
|
@ -86,9 +84,9 @@ class TermVectorsReader {
|
||||||
* @param docNum The document number to retrieve the vector for
|
* @param docNum The document number to retrieve the vector for
|
||||||
* @param field The field within the document to retrieve
|
* @param field The field within the document to retrieve
|
||||||
* @return The TermFreqVector for the document and field or null if there is no termVector for this field.
|
* @return The TermFreqVector for the document and field or null if there is no termVector for this field.
|
||||||
* @throws IOException
|
* @throws IOException if there is an error reading the term vector files
|
||||||
*/
|
*/
|
||||||
synchronized TermFreqVector get(int docNum, String field) throws IOException {
|
TermFreqVector get(int docNum, String field) throws IOException {
|
||||||
// Check if no term vectors are available for this segment at all
|
// Check if no term vectors are available for this segment at all
|
||||||
int fieldNumber = fieldInfos.fieldNumber(field);
|
int fieldNumber = fieldInfos.fieldNumber(field);
|
||||||
TermFreqVector result = null;
|
TermFreqVector result = null;
|
||||||
|
@ -137,13 +135,14 @@ class TermVectorsReader {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return all term vectors stored for this document or null if there are no term vectors
|
* Return all term vectors stored for this document or null if the could not be read in.
|
||||||
* for the document.
|
*
|
||||||
* @throws IOException
|
* @param docNum The document number to retrieve the vector for
|
||||||
|
* @return All term frequency vectors
|
||||||
|
* @throws IOException if there is an error reading the term vector files
|
||||||
*/
|
*/
|
||||||
synchronized TermFreqVector[] get(int docNum) throws IOException {
|
TermFreqVector[] get(int docNum) throws IOException {
|
||||||
TermFreqVector[] result = null;
|
TermFreqVector[] result = null;
|
||||||
// Check if no term vectors are available for this segment at all
|
// Check if no term vectors are available for this segment at all
|
||||||
if (tvx != null) {
|
if (tvx != null) {
|
||||||
|
@ -295,4 +294,16 @@ class TermVectorsReader {
|
||||||
return tv;
|
return tv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Object clone() {
|
||||||
|
TermVectorsReader clone = null;
|
||||||
|
try {
|
||||||
|
clone = (TermVectorsReader) super.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {}
|
||||||
|
|
||||||
|
clone.tvx = (IndexInput) tvx.clone();
|
||||||
|
clone.tvd = (IndexInput) tvd.clone();
|
||||||
|
clone.tvf = (IndexInput) tvf.clone();
|
||||||
|
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,196 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.TermFreqVector;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.English;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Bernhard Messer
|
||||||
|
* @version $rcs = ' $Id$ ' ;
|
||||||
|
*/
|
||||||
|
public class TestMultiThreadTermVectors extends TestCase {
|
||||||
|
private RAMDirectory directory = new RAMDirectory();
|
||||||
|
public int numDocs = 100;
|
||||||
|
public int numThreads = 3;
|
||||||
|
|
||||||
|
public TestMultiThreadTermVectors(String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
IndexWriter writer
|
||||||
|
= new IndexWriter(directory, new SimpleAnalyzer(), true);
|
||||||
|
//writer.setUseCompoundFile(false);
|
||||||
|
//writer.infoStream = System.out;
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
|
||||||
|
doc.add(fld);
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() {
|
||||||
|
|
||||||
|
IndexReader reader = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
reader = IndexReader.open(directory);
|
||||||
|
for(int i = 1; i <= numThreads; i++)
|
||||||
|
testTermPositionVectors(reader, i);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (IOException ioe) {
|
||||||
|
fail(ioe.getMessage());
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
if (reader != null) {
|
||||||
|
try {
|
||||||
|
/** close the opened reader */
|
||||||
|
reader.close();
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
ioe.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermPositionVectors(final IndexReader reader, int threadCount) {
|
||||||
|
MultiThreadTermVectorsReader[] mtr = new MultiThreadTermVectorsReader[threadCount];
|
||||||
|
for (int i = 0; i < threadCount; i++) {
|
||||||
|
mtr[i] = new MultiThreadTermVectorsReader();
|
||||||
|
mtr[i].init(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** run until all threads finished */
|
||||||
|
int threadsAlive = mtr.length;
|
||||||
|
while (threadsAlive > 0) {
|
||||||
|
try {
|
||||||
|
//System.out.println("Threads alive");
|
||||||
|
Thread.sleep(10);
|
||||||
|
threadsAlive = mtr.length;
|
||||||
|
for (int i = 0; i < mtr.length; i++) {
|
||||||
|
if (mtr[i].isAlive() == true) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
threadsAlive--;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (InterruptedException ie) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
long totalTime = 0L;
|
||||||
|
for (int i = 0; i < mtr.length; i++) {
|
||||||
|
totalTime += mtr[i].timeElapsed;
|
||||||
|
mtr[i] = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.println("threadcount: " + mtr.length + " average term vector time: " + totalTime/mtr.length);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class MultiThreadTermVectorsReader implements Runnable {
|
||||||
|
|
||||||
|
private IndexReader reader = null;
|
||||||
|
private Thread t = null;
|
||||||
|
|
||||||
|
private final int runsToDo = 100;
|
||||||
|
long timeElapsed = 0;
|
||||||
|
|
||||||
|
|
||||||
|
public void init(IndexReader reader) {
|
||||||
|
this.reader = reader;
|
||||||
|
timeElapsed = 0;
|
||||||
|
t=new Thread(this);
|
||||||
|
t.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isAlive() {
|
||||||
|
if (t == null) return false;
|
||||||
|
|
||||||
|
return t.isAlive();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
// run the test 100 times
|
||||||
|
for (int i = 0; i < runsToDo; i++)
|
||||||
|
testTermVectors();
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testTermVectors() throws Exception {
|
||||||
|
// check:
|
||||||
|
int numDocs = reader.numDocs();
|
||||||
|
long start = 0L;
|
||||||
|
for (int docId = 0; docId < numDocs; docId++) {
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
TermFreqVector [] vectors = reader.getTermFreqVectors(docId);
|
||||||
|
timeElapsed += System.currentTimeMillis()-start;
|
||||||
|
|
||||||
|
// verify vectors result
|
||||||
|
verifyVectors(vectors, docId);
|
||||||
|
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
TermFreqVector vector = reader.getTermFreqVector(docId, "field");
|
||||||
|
timeElapsed += System.currentTimeMillis()-start;
|
||||||
|
|
||||||
|
vectors = new TermFreqVector[1];
|
||||||
|
vectors[0] = vector;
|
||||||
|
|
||||||
|
verifyVectors(vectors, docId);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyVectors(TermFreqVector[] vectors, int num) {
|
||||||
|
StringBuffer temp = new StringBuffer();
|
||||||
|
String[] terms = null;
|
||||||
|
for (int i = 0; i < vectors.length; i++) {
|
||||||
|
terms = vectors[i].getTerms();
|
||||||
|
for (int z = 0; z < terms.length; z++) {
|
||||||
|
temp.append(terms[z]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!English.intToEnglish(num).trim().equals(temp.toString().trim()))
|
||||||
|
System.out.println("worng term result");
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue