From 9a930b88061f8f6662a28afbbfdc8ac9b712ced4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 6 Sep 2013 10:52:47 +0000 Subject: [PATCH] Revert LUCENE-4734. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520536 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 - .../vectorhighlight/FieldPhraseList.java | 82 ++++---- .../search/vectorhighlight/FieldQuery.java | 130 ++----------- .../vectorhighlight/FieldTermStack.java | 7 - .../FastVectorHighlighterTest.java | 181 +----------------- .../vectorhighlight/FieldPhraseListTest.java | 26 +-- .../vectorhighlight/FieldQueryTest.java | 4 +- 7 files changed, 65 insertions(+), 368 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index cb0c1b98b70..c61cc7f7c5e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -112,9 +112,6 @@ Bug Fixes * LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all deleted) segments. (Robert Muir, Shai Erera) -* LUCENE-4734: Add FastVectorHighlighter support for proximity queries and - phrase queries with gaps or overlapping terms. (Ryan Lauck, Adrien Grand) - * LUCENE-5132: Spatial RecursivePrefixTree Contains predicate will throw an NPE when there's no indexed data and maybe in other circumstances too. (David Smiley) diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java index ae24de9a14f..74cceb471a3 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java @@ -60,50 +60,50 @@ public class FieldPhraseList { public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){ final String field = fieldTermStack.getFieldName(); - QueryPhraseMap qpm = fieldQuery.getRootMap(field); - if (qpm != null) { - LinkedList phraseCandidate = new LinkedList(); - extractPhrases(fieldTermStack.termList, qpm, phraseCandidate, 0); - assert phraseCandidate.size() == 0; - } - } + LinkedList phraseCandidate = new LinkedList(); + QueryPhraseMap currMap = null; + QueryPhraseMap nextMap = null; + while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) ) + { + phraseCandidate.clear(); - void extractPhrases(LinkedList terms, QueryPhraseMap currMap, LinkedList phraseCandidate, int longest) { - if (phraseCandidate.size() > 1 && phraseCandidate.getLast().getPosition() - phraseCandidate.getFirst().getPosition() > currMap.getMaxPhraseWindow()) { - return; - } - if (terms.isEmpty()) { - if (longest > 0) { - addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); - } - return; - } - ArrayList samePositionTerms = new ArrayList(); - do { - samePositionTerms.add(terms.pop()); - } while (!terms.isEmpty() && terms.get(0).getPosition() == samePositionTerms.get(0).getPosition()); + TermInfo ti = fieldTermStack.pop(); + currMap = fieldQuery.getFieldTermMap( field, ti.getText() ); - // try all next terms at the same position - for (TermInfo nextTerm : samePositionTerms) { - QueryPhraseMap nextMap = currMap.getTermMap(nextTerm.getText()); - if (nextMap != null) { - phraseCandidate.add(nextTerm); - int l = longest; - if(nextMap.isValidTermOrPhrase( phraseCandidate ) ){ - l = phraseCandidate.size(); + // if not found, discard top TermInfo from stack, then try next element + if( currMap == null ) continue; + + // if found, search the longest phrase + phraseCandidate.add( ti ); + while( true ){ + ti = fieldTermStack.pop(); + nextMap = null; + if( ti != null ) + nextMap = currMap.getTermMap( ti.getText() ); + if( ti == null || nextMap == null ){ + if( ti != null ) + fieldTermStack.push( ti ); + if( currMap.isValidTermOrPhrase( phraseCandidate ) ){ + addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); + } + else{ + while( phraseCandidate.size() > 1 ){ + fieldTermStack.push( phraseCandidate.removeLast() ); + currMap = fieldQuery.searchPhrase( field, phraseCandidate ); + if( currMap != null ){ + addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); + break; + } + } + } + break; + } + else{ + phraseCandidate.add( ti ); + currMap = nextMap; } - extractPhrases(terms, nextMap, phraseCandidate, l); - phraseCandidate.removeLast(); } } - - // ignore the next term - extractPhrases(terms, currMap, phraseCandidate, longest); - - // add terms back - for (TermInfo nextTerm : samePositionTerms) { - terms.push(nextTerm); - } } public void addIfNoOverlap( WeightedPhraseInfo wpi ){ @@ -159,11 +159,11 @@ public class FieldPhraseList { return termsInfos; } - public WeightedPhraseInfo( List terms, float boost ){ + public WeightedPhraseInfo( LinkedList terms, float boost ){ this( terms, boost, 0 ); } - public WeightedPhraseInfo( List terms, float boost, int seqnum ){ + public WeightedPhraseInfo( LinkedList terms, float boost, int seqnum ){ this.boost = boost; this.seqnum = seqnum; diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java index ae3b5d13894..c0284794446 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java @@ -17,8 +17,6 @@ package org.apache.lucene.search.vectorhighlight; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -30,6 +28,7 @@ import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; @@ -40,7 +39,6 @@ import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; -import org.apache.lucene.util.InPlaceMergeSorter; /** * FieldQuery breaks down query object into terms/phrases and keeps @@ -62,8 +60,6 @@ public class FieldQuery { // The maximum number of different matching terms accumulated from any one MultiTermQuery private static final int MAX_MTQ_TERMS = 1024; - - private int maxPhraseWindow = 1; FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException { this.fieldMatch = fieldMatch; @@ -334,8 +330,7 @@ public class FieldQuery { return root.searchPhrase( phraseCandidate ); } - /** Get the root map for the given field name. */ - public QueryPhraseMap getRootMap( String fieldName ){ + private QueryPhraseMap getRootMap( String fieldName ){ return rootMaps.get( fieldMatch ? fieldName : null ); } @@ -352,7 +347,6 @@ public class FieldQuery { boolean terminal; int slop; // valid if terminal == true and phraseHighlight == true float boost; // valid if terminal == true - int[] positions; // valid if terminal == true int termOrPhraseNumber; // valid if terminal == true FieldQuery fieldQuery; Map subMap = new HashMap(); @@ -375,117 +369,38 @@ public class FieldQuery { return map; } - void add( Query query, IndexReader reader ) { + void add( Query query, IndexReader reader ) { if( query instanceof TermQuery ){ addTerm( ((TermQuery)query).getTerm(), query.getBoost() ); } else if( query instanceof PhraseQuery ){ PhraseQuery pq = (PhraseQuery)query; - final Term[] terms = pq.getTerms(); - final int[] positions = pq.getPositions(); - new InPlaceMergeSorter() { - - @Override - protected void swap(int i, int j) { - Term tmpTerm = terms[i]; - terms[i] = terms[j]; - terms[j] = tmpTerm; - - int tmpPos = positions[i]; - positions[i] = positions[j]; - positions[j] = tmpPos; - } - - @Override - protected int compare(int i, int j) { - return positions[i] - positions[j]; - } - }.sort(0, terms.length); - - addToMap(pq, terms, positions, 0, subMap, pq.getSlop()); + Term[] terms = pq.getTerms(); + Map map = subMap; + QueryPhraseMap qpm = null; + for( Term term : terms ){ + qpm = getOrNewMap( map, term.text() ); + map = qpm.subMap; + } + qpm.markTerminal( pq.getSlop(), pq.getBoost() ); } else throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); } - - private int numTermsAtSamePosition(int[] positions, int i) { - int numTermsAtSamePosition = 1; - for (int j = i + 1; j < positions.length; ++j) { - if (positions[j] == positions[i]) { - ++numTermsAtSamePosition; - } - } - return numTermsAtSamePosition; - } - - private void addToMap(PhraseQuery pq, Term[] terms, int[] positions, int i, Map map, int slop) { - int numTermsAtSamePosition = numTermsAtSamePosition(positions, i); - for (int j = 0; j < numTermsAtSamePosition; ++j) { - QueryPhraseMap qpm = getOrNewMap(map, terms[i + j].text()); - if (i + numTermsAtSamePosition == terms.length) { - qpm.markTerminal(pq.getSlop(), pq.getBoost(), uniquePositions(positions)); - } else { - addToMap(pq, terms, positions, i + numTermsAtSamePosition, qpm.subMap, slop); - } - } - if (slop > 2 && i + numTermsAtSamePosition < terms.length) { - Term[] otherTerms = Arrays.copyOf(terms, terms.length); - int[] otherPositions = Arrays.copyOf(positions, positions.length); - final int nextTermAtSamePosition = numTermsAtSamePosition(positions, i + numTermsAtSamePosition); - System.arraycopy(terms, i + numTermsAtSamePosition, otherTerms, i, nextTermAtSamePosition); - System.arraycopy(positions, i + numTermsAtSamePosition, otherPositions, i, nextTermAtSamePosition); - System.arraycopy(terms, i, otherTerms, i + nextTermAtSamePosition, numTermsAtSamePosition); - System.arraycopy(positions, i, otherPositions, i + nextTermAtSamePosition, numTermsAtSamePosition); - addToMap(pq, otherTerms, otherPositions, i, map, slop - 2); - } - } - - private int[] uniquePositions(int[] positions) { - int uniqueCount = 1; - for (int i = 1; i < positions.length; ++i) { - if (positions[i] != positions[i - 1]) { - ++uniqueCount; - } - } - if (uniqueCount == positions.length) { - return positions; - } - int[] result = new int[uniqueCount]; - result[0] = positions[0]; - for (int i = 1, j = 1; i < positions.length; ++i) { - if (positions[i] != positions[i - 1]) { - result[j++] = positions[i]; - } - } - return result; - } - + public QueryPhraseMap getTermMap( String term ){ return subMap.get( term ); } private void markTerminal( float boost ){ - markTerminal( 0, boost, null ); + markTerminal( 0, boost ); } - private void markTerminal( int slop, float boost, int[] positions ){ - if (slop > this.slop || (slop == this.slop && boost > this.boost)) { - this.terminal = true; - this.slop = slop; - this.boost = boost; - this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); - this.positions = positions; - if (positions != null) { - fieldQuery.maxPhraseWindow = Math.max(fieldQuery.maxPhraseWindow, slop + positions[positions.length-1] - positions[0]); - } - } - } - - /** - * The max phrase window based on the actual phrase positions and slop. - */ - int getMaxPhraseWindow() { - return fieldQuery.maxPhraseWindow; + private void markTerminal( int slop, float boost ){ + this.terminal = true; + this.slop = slop; + this.boost = boost; + this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); } public boolean isTerminal(){ @@ -520,20 +435,15 @@ public class FieldQuery { // if the candidate is a term, it is valid if( phraseCandidate.size() == 1 ) return true; - - assert phraseCandidate.size() == positions.length; // else check whether the candidate is valid phrase // compare position-gaps between terms to slop int pos = phraseCandidate.get( 0 ).getPosition(); - int totalDistance = 0; for( int i = 1; i < phraseCandidate.size(); i++ ){ int nextPos = phraseCandidate.get( i ).getPosition(); - final int expectedDelta = positions[i] - positions[i - 1]; - final int actualDelta = nextPos - pos; - totalDistance += Math.abs(expectedDelta - actualDelta); + if( Math.abs( nextPos - pos - 1 ) > slop ) return false; pos = nextPos; } - return totalDistance <= slop; + return true; } } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java index 51998aa3360..1375cfdac47 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java @@ -144,13 +144,6 @@ public class FieldTermStack { return termList.poll(); } - /** - * Return the top TermInfo object of the stack without removing it. - */ - public TermInfo peek() { - return termList.peek(); - } - /** * @param termInfo the TermInfo object to be put on the top of the stack */ diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java index b1c917192dc..718c7eda46c 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java @@ -16,18 +16,10 @@ package org.apache.lucene.search.vectorhighlight; * limitations under the License. */ import java.io.IOException; -import java.io.Reader; -import java.util.Arrays; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -35,24 +27,20 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; public class FastVectorHighlighterTest extends LuceneTestCase { - - private static final String FIELD = "text"; + public void testSimpleHighlightTest() throws IOException { Directory dir = newDirectory(); @@ -299,171 +287,4 @@ public class FastVectorHighlighterTest extends LuceneTestCase { writer.close(); dir.close(); } - - public void testLotsOfPhrases() throws IOException { - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))); - FieldType type = new FieldType(TextField.TYPE_STORED); - type.setStoreTermVectorOffsets(true); - type.setStoreTermVectorPositions(true); - type.setStoreTermVectors(true); - type.freeze(); - String[] terms = { "org", "apache", "lucene"}; - int iters = 1000; // don't let it go too big, or jenkins will stack overflow: atLeast(1000); - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < iters; i++) { - builder.append(terms[random().nextInt(terms.length)]).append(" "); - if (random().nextInt(6) == 3) { - builder.append("solr").append(" "); - } - } - Document doc = new Document(); - Field field = new Field("field", builder.toString(), type); - doc.add(field); - writer.addDocument(doc); - PhraseQuery query = new PhraseQuery(); - query.add(new Term("field", "org")); - query.add(new Term("field", "apache")); - query.add(new Term("field", "lucene")); - - - FastVectorHighlighter highlighter = new FastVectorHighlighter(); - IndexReader reader = DirectoryReader.open(writer, true); - IndexSearcher searcher = newSearcher(reader); - TopDocs hits = searcher.search(query, 10); - assertEquals(1, hits.totalHits); - FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); - String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1); - for (int i = 0; i < bestFragments.length; i++) { - String result = bestFragments[i].replaceAll("org apache lucene", "FOOBAR"); - assertFalse(result.contains("org apache lucene")); - } - reader.close(); - writer.close(); - dir.close(); - } - - public void testOverlappingPhrases() throws IOException { - final Analyzer analyzer = new Analyzer() { - - @Override - protected TokenStreamComponents createComponents(String fieldName, Reader reader) { - final Tokenizer source = new MockTokenizer(reader); - TokenStream sink = source; - sink = new SynonymFilter(sink); - return new TokenStreamComponents(source, sink); - } - - }; - final Directory directory = newDirectory(); - RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer); - Document doc = new Document(); - FieldType withVectors = new FieldType(TextField.TYPE_STORED); - withVectors.setStoreTermVectors(true); - withVectors.setStoreTermVectorPositions(true); - withVectors.setStoreTermVectorOffsets(true); - doc.add(new Field(FIELD, "a b c", withVectors)); - iw.addDocument(doc); - DirectoryReader ir = iw.getReader(); - - // Disjunction of two overlapping phrase queries - final PhraseQuery pq1 = new PhraseQuery(); - pq1.add(new Term(FIELD, "a"), 0); - pq1.add(new Term(FIELD, "b"), 1); - pq1.add(new Term(FIELD, "c"), 2); - - final PhraseQuery pq2 = new PhraseQuery(); - pq2.add(new Term(FIELD, "a"), 0); - pq2.add(new Term(FIELD, "B"), 1); - pq2.add(new Term(FIELD, "c"), 2); - - final BooleanQuery bq = new BooleanQuery(); - bq.add(pq1, Occur.SHOULD); - bq.add(pq2, Occur.SHOULD); - - // Single phrase query with two terms at the same position - final PhraseQuery pq = new PhraseQuery(); - pq.add(new Term(FIELD, "a"), 0); - pq.add(new Term(FIELD, "b"), 1); - pq.add(new Term(FIELD, "B"), 1); - pq.add(new Term(FIELD, "c"), 2); - - for (Query query : Arrays.asList(pq1, pq2, bq, pq)) { - assertEquals(1, new IndexSearcher(ir).search(bq, 1).totalHits); - - FastVectorHighlighter highlighter = new FastVectorHighlighter(); - FieldQuery fieldQuery = highlighter.getFieldQuery(query, ir); - String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1); - assertEquals("a b c", bestFragments[0]); - } - - ir.close(); - iw.close(); - directory.close(); - } - - public void testPhraseWithGap() throws IOException { - final Directory directory = newDirectory(); - RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); - Document doc = new Document(); - FieldType withVectors = new FieldType(TextField.TYPE_STORED); - withVectors.setStoreTermVectors(true); - withVectors.setStoreTermVectorPositions(true); - withVectors.setStoreTermVectorOffsets(true); - doc.add(new Field(FIELD, "a b c", withVectors)); - iw.addDocument(doc); - DirectoryReader ir = iw.getReader(); - - final PhraseQuery pq = new PhraseQuery(); - pq.add(new Term(FIELD, "c"), 2); - pq.add(new Term(FIELD, "a"), 0); - - assertEquals(1, new IndexSearcher(ir).search(pq, 1).totalHits); - - FastVectorHighlighter highlighter = new FastVectorHighlighter(); - FieldQuery fieldQuery = highlighter.getFieldQuery(pq, ir); - String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1); - assertEquals("a b c", bestFragments[0]); - - ir.close(); - iw.close(); - directory.close(); - } - - // Simple token filter that adds 'B' as a synonym of 'b' - private static class SynonymFilter extends TokenFilter { - - final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - - State pending; - - protected SynonymFilter(TokenStream input) { - super(input); - } - - @Override - public boolean incrementToken() throws IOException { - if (pending != null) { - restoreState(pending); - termAtt.setEmpty().append('B'); - posIncAtt.setPositionIncrement(0); - pending = null; - return true; - } - if (!input.incrementToken()) { - return false; - } - if (termAtt.toString().equals("b")) { - pending = captureState(); - } - return true; - } - - @Override - public void reset() throws IOException { - super.reset(); - pending = null; - } - } } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java index 879b6b595a0..45054e6d4e1 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java @@ -120,31 +120,7 @@ public class FieldPhraseListTest extends AbstractTestCase { assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() ); assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() ); } - - public void testProximityPhraseReverse() throws Exception { - make1d1fIndex( "z a a b c" ); - - FieldQuery fq = new FieldQuery( pqF( 2F, 3, "c", "a" ), true, true ); - FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); - FieldPhraseList fpl = new FieldPhraseList( stack, fq ); - assertEquals( 1, fpl.phraseList.size() ); - assertEquals( "ac(2.0)((4,5)(8,9))", fpl.phraseList.get( 0 ).toString() ); - assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() ); - assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() ); - } - - public void testProximityPhraseWithRepeatedTerms() throws Exception { - make1d1fIndex( "z a a b b z d" ); - - FieldQuery fq = new FieldQuery( pqF( 2F, 2, "a", "b", "d" ), true, true ); - FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); - FieldPhraseList fpl = new FieldPhraseList( stack, fq ); - assertEquals( 1, fpl.phraseList.size() ); - assertEquals( "abd(2.0)((4,7)(12,13))", fpl.phraseList.get( 0 ).toString() ); - assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() ); - assertEquals( 13, fpl.phraseList.get( 0 ).getEndOffset() ); - } - + public void test2PhrasesOverlap() throws Exception { make1d1fIndex( "d a b c d" ); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java index e82cb362940..f381fa7e482 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java @@ -863,8 +863,8 @@ public class FieldQueryTest extends AbstractTestCase { phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) ); assertNull( fq.searchPhrase( F, phraseCandidate ) ); - // "a b c"~2 - query = pqF( 1F, 2, "a", "b", "c" ); + // "a b c"~1 + query = pqF( 1F, 1, "a", "b", "c" ); // phraseHighlight = true, fieldMatch = true fq = new FieldQuery( query, true, true );