mirror of https://github.com/apache/lucene.git
LUCENE-3234: add phraseLimit parameter for FVH speed up
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1139995 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
88689125fa
commit
9d6f451871
|
@ -55,7 +55,16 @@ Bug Fixes
|
|||
|
||||
======================= Lucene 3.x (not yet released) ================
|
||||
|
||||
(No changes)
|
||||
New Features
|
||||
|
||||
* LUCENE-3234: provide a limit on phrase analysis in FastVectorHighlighter for
|
||||
highlighting speed up. Use FastVectorHighlighter.setPhraseLimit() to set limit
|
||||
(e.g. 5000). (Mike Sokolov via Koji Sekiguchi)
|
||||
|
||||
API Changes
|
||||
|
||||
Bug Fixes
|
||||
|
||||
|
||||
======================= Lucene 3.3.0 =======================
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ public class FastVectorHighlighter {
|
|||
private final boolean fieldMatch;
|
||||
private final FragListBuilder fragListBuilder;
|
||||
private final FragmentsBuilder fragmentsBuilder;
|
||||
private int phraseLimit = Integer.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* the default constructor.
|
||||
|
@ -173,7 +174,7 @@ public class FastVectorHighlighter {
|
|||
final FieldQuery fieldQuery, IndexReader reader, int docId,
|
||||
String fieldName, int fragCharSize ) throws IOException {
|
||||
FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, fieldName, fieldQuery );
|
||||
FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery );
|
||||
FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery, phraseLimit );
|
||||
return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize );
|
||||
}
|
||||
|
||||
|
@ -190,4 +191,15 @@ public class FastVectorHighlighter {
|
|||
* @return whether fieldMatch or not
|
||||
*/
|
||||
public boolean isFieldMatch(){ return fieldMatch; }
|
||||
|
||||
/**
|
||||
* @return the maximum number of phrases to analyze when searching for the highest-scoring phrase.
|
||||
*/
|
||||
public int getPhraseLimit () { return phraseLimit; }
|
||||
|
||||
/**
|
||||
* set the maximum number of phrases to analyze when searching for the highest-scoring phrase.
|
||||
* The default is 5000. To ensure that all phrases are analyzed, use a negative number or Integer.MAX_VALUE.
|
||||
*/
|
||||
public void setPhraseLimit (int phraseLimit) { this.phraseLimit = phraseLimit; }
|
||||
}
|
||||
|
|
|
@ -32,19 +32,30 @@ public class FieldPhraseList {
|
|||
LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
* create a FieldPhraseList that has no limit on the number of phrases to analyze
|
||||
*
|
||||
* @param fieldTermStack FieldTermStack object
|
||||
* @param fieldQuery FieldQuery object
|
||||
*/
|
||||
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery){
|
||||
this (fieldTermStack, fieldQuery, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param fieldTermStack FieldTermStack object
|
||||
* @param fieldQuery FieldQuery object
|
||||
* @param phraseLimit maximum size of phraseList
|
||||
*/
|
||||
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit){
|
||||
final String field = fieldTermStack.getFieldName();
|
||||
|
||||
LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
|
||||
QueryPhraseMap currMap = null;
|
||||
QueryPhraseMap nextMap = null;
|
||||
while( !fieldTermStack.isEmpty() ){
|
||||
|
||||
while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
|
||||
{
|
||||
phraseCandidate.clear();
|
||||
|
||||
TermInfo ti = fieldTermStack.pop();
|
||||
|
|
|
@ -188,4 +188,34 @@ public class FieldPhraseListTest extends AbstractTestCase {
|
|||
assertEquals( 1, fpl.phraseList.size() );
|
||||
assertEquals( "sppeeeed(1.0)((88,93))", fpl.phraseList.get( 0 ).toString() );
|
||||
}
|
||||
|
||||
/* This test shows a big speedup from limiting the number of analyzed phrases in
|
||||
* this bad case for FieldPhraseList */
|
||||
/* But it is not reliable as a unit test since it is timing-dependent
|
||||
public void testManyRepeatedTerms() throws Exception {
|
||||
long t = System.currentTimeMillis();
|
||||
testManyTermsWithLimit (-1);
|
||||
long t1 = System.currentTimeMillis();
|
||||
testManyTermsWithLimit (1);
|
||||
long t2 = System.currentTimeMillis();
|
||||
assertTrue (t2-t1 * 1000 < t1-t);
|
||||
}
|
||||
private void testManyTermsWithLimit (int limit) throws Exception {
|
||||
StringBuilder buf = new StringBuilder ();
|
||||
for (int i = 0; i < 16000; i++) {
|
||||
buf.append("a b c ");
|
||||
}
|
||||
make1d1fIndex( buf.toString());
|
||||
|
||||
Query query = tq("a");
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq, limit);
|
||||
if (limit < 0 || limit > 16000)
|
||||
assertEquals( 16000, fpl.phraseList.size() );
|
||||
else
|
||||
assertEquals( limit, fpl.phraseList.size() );
|
||||
assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() );
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -254,6 +254,9 @@ New Features
|
|||
|
||||
* SOLR-2458: post.jar enhanced to handle JSON, CSV and <optimize> (janhoy)
|
||||
|
||||
* LUCENE-3234: add a new parameter hl.phraseLimit for FastVectorHighlighter speed up.
|
||||
(Mike Sokolov via koji)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ public interface HighlightParams {
|
|||
public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter";
|
||||
public static final String TAG_PRE = HIGHLIGHT + ".tag.pre";
|
||||
public static final String TAG_POST = HIGHLIGHT + ".tag.post";
|
||||
public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit";
|
||||
public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar";
|
||||
|
||||
// Formatter
|
||||
|
|
|
@ -362,6 +362,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
params.getBool( HighlightParams.USE_PHRASE_HIGHLIGHTER, true ),
|
||||
// FVH cannot process hl.requireFieldMatch parameter per-field basis
|
||||
params.getBool( HighlightParams.FIELD_MATCH, false ) );
|
||||
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
|
||||
FieldQuery fieldQuery = fvh.getFieldQuery( query );
|
||||
|
||||
// Highlight each document
|
||||
|
|
Loading…
Reference in New Issue