mirror of https://github.com/apache/lucene.git
Fix for Lucene-1500 - new exception added to Highlighter API to handle TokenStreams with Tokens that exceed given text length
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@758460 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
126f4b18d3
commit
73a02ec6fe
|
@ -65,6 +65,10 @@ API Changes
|
|||
|
||||
11. LUCENE-1561: Renamed Field.omitTf to Field.omitTermFreqAndPositions
|
||||
(Otis Gospodnetic via Mike McCandless)
|
||||
|
||||
12. LUCENE-1500: Added new InvalidTokenOffsetsException to Highlighter methods
|
||||
to denote issues when offsets in TokenStream tokens exceed the length of the
|
||||
provided text. (Mark Harwood)
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
|
|
@ -74,9 +74,10 @@ public class Highlighter
|
|||
* @param fieldName Name of field used to influence analyzer's tokenization policy
|
||||
*
|
||||
* @return highlighted text fragment or null if no terms found
|
||||
* @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
|
||||
*/
|
||||
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
|
||||
throws IOException
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
return getBestFragment(tokenStream, text);
|
||||
|
@ -96,9 +97,10 @@ public class Highlighter
|
|||
* @param text text to highlight terms in
|
||||
*
|
||||
* @return highlighted text fragment or null if no terms found
|
||||
* @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
|
||||
*/
|
||||
public final String getBestFragment(TokenStream tokenStream, String text)
|
||||
throws IOException
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
String[] results = getBestFragments(tokenStream,text, 1);
|
||||
if (results.length > 0)
|
||||
|
@ -120,12 +122,13 @@ public class Highlighter
|
|||
* @deprecated This method incorrectly hardcodes the choice of fieldname. Use the
|
||||
* method of the same name that takes a fieldname.
|
||||
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
|
||||
* @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
|
||||
*/
|
||||
public final String[] getBestFragments(
|
||||
Analyzer analyzer,
|
||||
String text,
|
||||
int maxNumFragments)
|
||||
throws IOException
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
|
||||
return getBestFragments(tokenStream, text, maxNumFragments);
|
||||
|
@ -142,13 +145,14 @@ public class Highlighter
|
|||
* @param maxNumFragments the maximum number of fragments.
|
||||
*
|
||||
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
|
||||
* @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
|
||||
*/
|
||||
public final String[] getBestFragments(
|
||||
Analyzer analyzer,
|
||||
String fieldName,
|
||||
String text,
|
||||
int maxNumFragments)
|
||||
throws IOException
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
return getBestFragments(tokenStream, text, maxNumFragments);
|
||||
|
@ -165,12 +169,13 @@ public class Highlighter
|
|||
* @param maxNumFragments the maximum number of fragments.
|
||||
*
|
||||
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
|
||||
* @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
|
||||
*/
|
||||
public final String[] getBestFragments(
|
||||
TokenStream tokenStream,
|
||||
String text,
|
||||
int maxNumFragments)
|
||||
throws IOException
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
maxNumFragments = Math.max(1, maxNumFragments); //sanity check
|
||||
|
||||
|
@ -198,13 +203,14 @@ public class Highlighter
|
|||
* @param maxNumFragments
|
||||
* @param mergeContiguousFragments
|
||||
* @throws IOException
|
||||
* @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
|
||||
*/
|
||||
public final TextFragment[] getBestTextFragments(
|
||||
TokenStream tokenStream,
|
||||
String text,
|
||||
boolean mergeContiguousFragments,
|
||||
int maxNumFragments)
|
||||
throws IOException
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
ArrayList docFrags = new ArrayList();
|
||||
StringBuffer newText=new StringBuffer();
|
||||
|
@ -230,6 +236,14 @@ public class Highlighter
|
|||
(nextToken!= null)&&(nextToken.startOffset()< maxDocCharsToAnalyze);
|
||||
nextToken = tokenStream.next(reusableToken))
|
||||
{
|
||||
if( (nextToken.endOffset()>text.length())
|
||||
||
|
||||
(nextToken.startOffset()>text.length())
|
||||
)
|
||||
{
|
||||
throw new InvalidTokenOffsetsException("Token "+nextToken.toString()
|
||||
+" exceeds length of provided text sized "+text.length());
|
||||
}
|
||||
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(nextToken)))
|
||||
{
|
||||
//the current token is distinct from previous tokens -
|
||||
|
@ -452,13 +466,14 @@ public class Highlighter
|
|||
* @param separator the separator used to intersperse the document fragments (typically "...")
|
||||
*
|
||||
* @return highlighted text
|
||||
* @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
|
||||
*/
|
||||
public final String getBestFragments(
|
||||
TokenStream tokenStream,
|
||||
String text,
|
||||
int maxNumFragments,
|
||||
String separator)
|
||||
throws IOException
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
String sections[] = getBestFragments(tokenStream,text, maxNumFragments);
|
||||
StringBuffer result = new StringBuffer();
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package org.apache.lucene.search.highlight;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Exception thrown if TokenStream Tokens are incompatible with provided text
|
||||
*
|
||||
*/
|
||||
public class InvalidTokenOffsetsException extends Exception
|
||||
{
|
||||
|
||||
public InvalidTokenOffsetsException(String message)
|
||||
{
|
||||
super(message);
|
||||
}
|
||||
|
||||
}
|
|
@ -136,9 +136,10 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
/**
|
||||
* This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
|
||||
* @throws InvalidTokenOffsetsException
|
||||
*/
|
||||
private static String highlightField(Query query, String fieldName, String text)
|
||||
throws IOException {
|
||||
throws IOException, InvalidTokenOffsetsException {
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(new StandardAnalyzer().tokenStream(
|
||||
fieldName, new StringReader(text)));
|
||||
// Assuming "<B>", "</B>" used to highlight
|
||||
|
@ -1291,7 +1292,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
private Directory dir = new RAMDirectory();
|
||||
private Analyzer a = new WhitespaceAnalyzer();
|
||||
|
||||
public void testWeightedTermsWithDeletes() throws IOException, ParseException {
|
||||
public void testWeightedTermsWithDeletes() throws IOException, ParseException, InvalidTokenOffsetsException {
|
||||
makeIndex();
|
||||
deleteDocument();
|
||||
searchIndex();
|
||||
|
@ -1321,7 +1322,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
writer.close();
|
||||
}
|
||||
|
||||
private void searchIndex() throws IOException, ParseException {
|
||||
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
|
||||
String q = "t_text1:random";
|
||||
QueryParser parser = new QueryParser( "t_text1", a );
|
||||
Query query = parser.parse( q );
|
||||
|
|
Loading…
Reference in New Issue