mirror of https://github.com/apache/lucene.git
Revert LUCENE-4734.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520536 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
da16af7519
commit
9a930b8806
|
@ -112,9 +112,6 @@ Bug Fixes
|
||||||
* LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all
|
* LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all
|
||||||
deleted) segments. (Robert Muir, Shai Erera)
|
deleted) segments. (Robert Muir, Shai Erera)
|
||||||
|
|
||||||
* LUCENE-4734: Add FastVectorHighlighter support for proximity queries and
|
|
||||||
phrase queries with gaps or overlapping terms. (Ryan Lauck, Adrien Grand)
|
|
||||||
|
|
||||||
* LUCENE-5132: Spatial RecursivePrefixTree Contains predicate will throw an NPE
|
* LUCENE-5132: Spatial RecursivePrefixTree Contains predicate will throw an NPE
|
||||||
when there's no indexed data and maybe in other circumstances too. (David Smiley)
|
when there's no indexed data and maybe in other circumstances too. (David Smiley)
|
||||||
|
|
||||||
|
|
|
@ -60,50 +60,50 @@ public class FieldPhraseList {
|
||||||
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){
|
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){
|
||||||
final String field = fieldTermStack.getFieldName();
|
final String field = fieldTermStack.getFieldName();
|
||||||
|
|
||||||
QueryPhraseMap qpm = fieldQuery.getRootMap(field);
|
LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
|
||||||
if (qpm != null) {
|
QueryPhraseMap currMap = null;
|
||||||
LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
|
QueryPhraseMap nextMap = null;
|
||||||
extractPhrases(fieldTermStack.termList, qpm, phraseCandidate, 0);
|
while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
|
||||||
assert phraseCandidate.size() == 0;
|
{
|
||||||
}
|
phraseCandidate.clear();
|
||||||
}
|
|
||||||
|
|
||||||
void extractPhrases(LinkedList<TermInfo> terms, QueryPhraseMap currMap, LinkedList<TermInfo> phraseCandidate, int longest) {
|
TermInfo ti = fieldTermStack.pop();
|
||||||
if (phraseCandidate.size() > 1 && phraseCandidate.getLast().getPosition() - phraseCandidate.getFirst().getPosition() > currMap.getMaxPhraseWindow()) {
|
currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (terms.isEmpty()) {
|
|
||||||
if (longest > 0) {
|
|
||||||
addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
ArrayList<TermInfo> samePositionTerms = new ArrayList<TermInfo>();
|
|
||||||
do {
|
|
||||||
samePositionTerms.add(terms.pop());
|
|
||||||
} while (!terms.isEmpty() && terms.get(0).getPosition() == samePositionTerms.get(0).getPosition());
|
|
||||||
|
|
||||||
// try all next terms at the same position
|
// if not found, discard top TermInfo from stack, then try next element
|
||||||
for (TermInfo nextTerm : samePositionTerms) {
|
if( currMap == null ) continue;
|
||||||
QueryPhraseMap nextMap = currMap.getTermMap(nextTerm.getText());
|
|
||||||
if (nextMap != null) {
|
// if found, search the longest phrase
|
||||||
phraseCandidate.add(nextTerm);
|
phraseCandidate.add( ti );
|
||||||
int l = longest;
|
while( true ){
|
||||||
if(nextMap.isValidTermOrPhrase( phraseCandidate ) ){
|
ti = fieldTermStack.pop();
|
||||||
l = phraseCandidate.size();
|
nextMap = null;
|
||||||
|
if( ti != null )
|
||||||
|
nextMap = currMap.getTermMap( ti.getText() );
|
||||||
|
if( ti == null || nextMap == null ){
|
||||||
|
if( ti != null )
|
||||||
|
fieldTermStack.push( ti );
|
||||||
|
if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
|
||||||
|
addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
while( phraseCandidate.size() > 1 ){
|
||||||
|
fieldTermStack.push( phraseCandidate.removeLast() );
|
||||||
|
currMap = fieldQuery.searchPhrase( field, phraseCandidate );
|
||||||
|
if( currMap != null ){
|
||||||
|
addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
phraseCandidate.add( ti );
|
||||||
|
currMap = nextMap;
|
||||||
}
|
}
|
||||||
extractPhrases(terms, nextMap, phraseCandidate, l);
|
|
||||||
phraseCandidate.removeLast();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ignore the next term
|
|
||||||
extractPhrases(terms, currMap, phraseCandidate, longest);
|
|
||||||
|
|
||||||
// add terms back
|
|
||||||
for (TermInfo nextTerm : samePositionTerms) {
|
|
||||||
terms.push(nextTerm);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addIfNoOverlap( WeightedPhraseInfo wpi ){
|
public void addIfNoOverlap( WeightedPhraseInfo wpi ){
|
||||||
|
@ -159,11 +159,11 @@ public class FieldPhraseList {
|
||||||
return termsInfos;
|
return termsInfos;
|
||||||
}
|
}
|
||||||
|
|
||||||
public WeightedPhraseInfo( List<TermInfo> terms, float boost ){
|
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
|
||||||
this( terms, boost, 0 );
|
this( terms, boost, 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
public WeightedPhraseInfo( List<TermInfo> terms, float boost, int seqnum ){
|
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
|
||||||
this.boost = boost;
|
this.boost = boost;
|
||||||
this.seqnum = seqnum;
|
this.seqnum = seqnum;
|
||||||
|
|
||||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.lucene.search.vectorhighlight;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -30,6 +28,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.CommonTermsQuery;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
@ -40,7 +39,6 @@ import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
|
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
|
||||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* FieldQuery breaks down query object into terms/phrases and keeps
|
* FieldQuery breaks down query object into terms/phrases and keeps
|
||||||
|
@ -62,8 +60,6 @@ public class FieldQuery {
|
||||||
|
|
||||||
// The maximum number of different matching terms accumulated from any one MultiTermQuery
|
// The maximum number of different matching terms accumulated from any one MultiTermQuery
|
||||||
private static final int MAX_MTQ_TERMS = 1024;
|
private static final int MAX_MTQ_TERMS = 1024;
|
||||||
|
|
||||||
private int maxPhraseWindow = 1;
|
|
||||||
|
|
||||||
FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
|
FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
|
||||||
this.fieldMatch = fieldMatch;
|
this.fieldMatch = fieldMatch;
|
||||||
|
@ -334,8 +330,7 @@ public class FieldQuery {
|
||||||
return root.searchPhrase( phraseCandidate );
|
return root.searchPhrase( phraseCandidate );
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get the root map for the given field name. */
|
private QueryPhraseMap getRootMap( String fieldName ){
|
||||||
public QueryPhraseMap getRootMap( String fieldName ){
|
|
||||||
return rootMaps.get( fieldMatch ? fieldName : null );
|
return rootMaps.get( fieldMatch ? fieldName : null );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -352,7 +347,6 @@ public class FieldQuery {
|
||||||
boolean terminal;
|
boolean terminal;
|
||||||
int slop; // valid if terminal == true and phraseHighlight == true
|
int slop; // valid if terminal == true and phraseHighlight == true
|
||||||
float boost; // valid if terminal == true
|
float boost; // valid if terminal == true
|
||||||
int[] positions; // valid if terminal == true
|
|
||||||
int termOrPhraseNumber; // valid if terminal == true
|
int termOrPhraseNumber; // valid if terminal == true
|
||||||
FieldQuery fieldQuery;
|
FieldQuery fieldQuery;
|
||||||
Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
|
Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
|
||||||
|
@ -375,117 +369,38 @@ public class FieldQuery {
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
void add( Query query, IndexReader reader ) {
|
void add( Query query, IndexReader reader ) {
|
||||||
if( query instanceof TermQuery ){
|
if( query instanceof TermQuery ){
|
||||||
addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
|
addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
|
||||||
}
|
}
|
||||||
else if( query instanceof PhraseQuery ){
|
else if( query instanceof PhraseQuery ){
|
||||||
PhraseQuery pq = (PhraseQuery)query;
|
PhraseQuery pq = (PhraseQuery)query;
|
||||||
final Term[] terms = pq.getTerms();
|
Term[] terms = pq.getTerms();
|
||||||
final int[] positions = pq.getPositions();
|
Map<String, QueryPhraseMap> map = subMap;
|
||||||
new InPlaceMergeSorter() {
|
QueryPhraseMap qpm = null;
|
||||||
|
for( Term term : terms ){
|
||||||
@Override
|
qpm = getOrNewMap( map, term.text() );
|
||||||
protected void swap(int i, int j) {
|
map = qpm.subMap;
|
||||||
Term tmpTerm = terms[i];
|
}
|
||||||
terms[i] = terms[j];
|
qpm.markTerminal( pq.getSlop(), pq.getBoost() );
|
||||||
terms[j] = tmpTerm;
|
|
||||||
|
|
||||||
int tmpPos = positions[i];
|
|
||||||
positions[i] = positions[j];
|
|
||||||
positions[j] = tmpPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected int compare(int i, int j) {
|
|
||||||
return positions[i] - positions[j];
|
|
||||||
}
|
|
||||||
}.sort(0, terms.length);
|
|
||||||
|
|
||||||
addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
|
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
|
||||||
}
|
}
|
||||||
|
|
||||||
private int numTermsAtSamePosition(int[] positions, int i) {
|
|
||||||
int numTermsAtSamePosition = 1;
|
|
||||||
for (int j = i + 1; j < positions.length; ++j) {
|
|
||||||
if (positions[j] == positions[i]) {
|
|
||||||
++numTermsAtSamePosition;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return numTermsAtSamePosition;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addToMap(PhraseQuery pq, Term[] terms, int[] positions, int i, Map<String, QueryPhraseMap> map, int slop) {
|
|
||||||
int numTermsAtSamePosition = numTermsAtSamePosition(positions, i);
|
|
||||||
for (int j = 0; j < numTermsAtSamePosition; ++j) {
|
|
||||||
QueryPhraseMap qpm = getOrNewMap(map, terms[i + j].text());
|
|
||||||
if (i + numTermsAtSamePosition == terms.length) {
|
|
||||||
qpm.markTerminal(pq.getSlop(), pq.getBoost(), uniquePositions(positions));
|
|
||||||
} else {
|
|
||||||
addToMap(pq, terms, positions, i + numTermsAtSamePosition, qpm.subMap, slop);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (slop > 2 && i + numTermsAtSamePosition < terms.length) {
|
|
||||||
Term[] otherTerms = Arrays.copyOf(terms, terms.length);
|
|
||||||
int[] otherPositions = Arrays.copyOf(positions, positions.length);
|
|
||||||
final int nextTermAtSamePosition = numTermsAtSamePosition(positions, i + numTermsAtSamePosition);
|
|
||||||
System.arraycopy(terms, i + numTermsAtSamePosition, otherTerms, i, nextTermAtSamePosition);
|
|
||||||
System.arraycopy(positions, i + numTermsAtSamePosition, otherPositions, i, nextTermAtSamePosition);
|
|
||||||
System.arraycopy(terms, i, otherTerms, i + nextTermAtSamePosition, numTermsAtSamePosition);
|
|
||||||
System.arraycopy(positions, i, otherPositions, i + nextTermAtSamePosition, numTermsAtSamePosition);
|
|
||||||
addToMap(pq, otherTerms, otherPositions, i, map, slop - 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private int[] uniquePositions(int[] positions) {
|
|
||||||
int uniqueCount = 1;
|
|
||||||
for (int i = 1; i < positions.length; ++i) {
|
|
||||||
if (positions[i] != positions[i - 1]) {
|
|
||||||
++uniqueCount;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (uniqueCount == positions.length) {
|
|
||||||
return positions;
|
|
||||||
}
|
|
||||||
int[] result = new int[uniqueCount];
|
|
||||||
result[0] = positions[0];
|
|
||||||
for (int i = 1, j = 1; i < positions.length; ++i) {
|
|
||||||
if (positions[i] != positions[i - 1]) {
|
|
||||||
result[j++] = positions[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public QueryPhraseMap getTermMap( String term ){
|
public QueryPhraseMap getTermMap( String term ){
|
||||||
return subMap.get( term );
|
return subMap.get( term );
|
||||||
}
|
}
|
||||||
|
|
||||||
private void markTerminal( float boost ){
|
private void markTerminal( float boost ){
|
||||||
markTerminal( 0, boost, null );
|
markTerminal( 0, boost );
|
||||||
}
|
}
|
||||||
|
|
||||||
private void markTerminal( int slop, float boost, int[] positions ){
|
private void markTerminal( int slop, float boost ){
|
||||||
if (slop > this.slop || (slop == this.slop && boost > this.boost)) {
|
this.terminal = true;
|
||||||
this.terminal = true;
|
this.slop = slop;
|
||||||
this.slop = slop;
|
this.boost = boost;
|
||||||
this.boost = boost;
|
this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
|
||||||
this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
|
|
||||||
this.positions = positions;
|
|
||||||
if (positions != null) {
|
|
||||||
fieldQuery.maxPhraseWindow = Math.max(fieldQuery.maxPhraseWindow, slop + positions[positions.length-1] - positions[0]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The max phrase window based on the actual phrase positions and slop.
|
|
||||||
*/
|
|
||||||
int getMaxPhraseWindow() {
|
|
||||||
return fieldQuery.maxPhraseWindow;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isTerminal(){
|
public boolean isTerminal(){
|
||||||
|
@ -520,20 +435,15 @@ public class FieldQuery {
|
||||||
// if the candidate is a term, it is valid
|
// if the candidate is a term, it is valid
|
||||||
if( phraseCandidate.size() == 1 ) return true;
|
if( phraseCandidate.size() == 1 ) return true;
|
||||||
|
|
||||||
|
|
||||||
assert phraseCandidate.size() == positions.length;
|
|
||||||
// else check whether the candidate is valid phrase
|
// else check whether the candidate is valid phrase
|
||||||
// compare position-gaps between terms to slop
|
// compare position-gaps between terms to slop
|
||||||
int pos = phraseCandidate.get( 0 ).getPosition();
|
int pos = phraseCandidate.get( 0 ).getPosition();
|
||||||
int totalDistance = 0;
|
|
||||||
for( int i = 1; i < phraseCandidate.size(); i++ ){
|
for( int i = 1; i < phraseCandidate.size(); i++ ){
|
||||||
int nextPos = phraseCandidate.get( i ).getPosition();
|
int nextPos = phraseCandidate.get( i ).getPosition();
|
||||||
final int expectedDelta = positions[i] - positions[i - 1];
|
if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
|
||||||
final int actualDelta = nextPos - pos;
|
|
||||||
totalDistance += Math.abs(expectedDelta - actualDelta);
|
|
||||||
pos = nextPos;
|
pos = nextPos;
|
||||||
}
|
}
|
||||||
return totalDistance <= slop;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -144,13 +144,6 @@ public class FieldTermStack {
|
||||||
return termList.poll();
|
return termList.poll();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the top TermInfo object of the stack without removing it.
|
|
||||||
*/
|
|
||||||
public TermInfo peek() {
|
|
||||||
return termList.peek();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param termInfo the TermInfo object to be put on the top of the stack
|
* @param termInfo the TermInfo object to be put on the top of the stack
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -16,18 +16,10 @@ package org.apache.lucene.search.vectorhighlight;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenFilter;
|
import org.apache.lucene.analysis.MockTokenFilter;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
@ -35,24 +27,20 @@ import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.CommonTermsQuery;
|
import org.apache.lucene.queries.CommonTermsQuery;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
|
||||||
|
|
||||||
|
|
||||||
public class FastVectorHighlighterTest extends LuceneTestCase {
|
public class FastVectorHighlighterTest extends LuceneTestCase {
|
||||||
|
|
||||||
private static final String FIELD = "text";
|
|
||||||
|
|
||||||
public void testSimpleHighlightTest() throws IOException {
|
public void testSimpleHighlightTest() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
|
@ -299,171 +287,4 @@ public class FastVectorHighlighterTest extends LuceneTestCase {
|
||||||
writer.close();
|
writer.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testLotsOfPhrases() throws IOException {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
|
|
||||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
|
||||||
type.setStoreTermVectorOffsets(true);
|
|
||||||
type.setStoreTermVectorPositions(true);
|
|
||||||
type.setStoreTermVectors(true);
|
|
||||||
type.freeze();
|
|
||||||
String[] terms = { "org", "apache", "lucene"};
|
|
||||||
int iters = 1000; // don't let it go too big, or jenkins will stack overflow: atLeast(1000);
|
|
||||||
StringBuilder builder = new StringBuilder();
|
|
||||||
for (int i = 0; i < iters; i++) {
|
|
||||||
builder.append(terms[random().nextInt(terms.length)]).append(" ");
|
|
||||||
if (random().nextInt(6) == 3) {
|
|
||||||
builder.append("solr").append(" ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Document doc = new Document();
|
|
||||||
Field field = new Field("field", builder.toString(), type);
|
|
||||||
doc.add(field);
|
|
||||||
writer.addDocument(doc);
|
|
||||||
PhraseQuery query = new PhraseQuery();
|
|
||||||
query.add(new Term("field", "org"));
|
|
||||||
query.add(new Term("field", "apache"));
|
|
||||||
query.add(new Term("field", "lucene"));
|
|
||||||
|
|
||||||
|
|
||||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
|
||||||
IndexReader reader = DirectoryReader.open(writer, true);
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
TopDocs hits = searcher.search(query, 10);
|
|
||||||
assertEquals(1, hits.totalHits);
|
|
||||||
FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
|
|
||||||
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
|
|
||||||
for (int i = 0; i < bestFragments.length; i++) {
|
|
||||||
String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR");
|
|
||||||
assertFalse(result.contains("org apache lucene"));
|
|
||||||
}
|
|
||||||
reader.close();
|
|
||||||
writer.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testOverlappingPhrases() throws IOException {
|
|
||||||
final Analyzer analyzer = new Analyzer() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
|
||||||
final Tokenizer source = new MockTokenizer(reader);
|
|
||||||
TokenStream sink = source;
|
|
||||||
sink = new SynonymFilter(sink);
|
|
||||||
return new TokenStreamComponents(source, sink);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
final Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer);
|
|
||||||
Document doc = new Document();
|
|
||||||
FieldType withVectors = new FieldType(TextField.TYPE_STORED);
|
|
||||||
withVectors.setStoreTermVectors(true);
|
|
||||||
withVectors.setStoreTermVectorPositions(true);
|
|
||||||
withVectors.setStoreTermVectorOffsets(true);
|
|
||||||
doc.add(new Field(FIELD, "a b c", withVectors));
|
|
||||||
iw.addDocument(doc);
|
|
||||||
DirectoryReader ir = iw.getReader();
|
|
||||||
|
|
||||||
// Disjunction of two overlapping phrase queries
|
|
||||||
final PhraseQuery pq1 = new PhraseQuery();
|
|
||||||
pq1.add(new Term(FIELD, "a"), 0);
|
|
||||||
pq1.add(new Term(FIELD, "b"), 1);
|
|
||||||
pq1.add(new Term(FIELD, "c"), 2);
|
|
||||||
|
|
||||||
final PhraseQuery pq2 = new PhraseQuery();
|
|
||||||
pq2.add(new Term(FIELD, "a"), 0);
|
|
||||||
pq2.add(new Term(FIELD, "B"), 1);
|
|
||||||
pq2.add(new Term(FIELD, "c"), 2);
|
|
||||||
|
|
||||||
final BooleanQuery bq = new BooleanQuery();
|
|
||||||
bq.add(pq1, Occur.SHOULD);
|
|
||||||
bq.add(pq2, Occur.SHOULD);
|
|
||||||
|
|
||||||
// Single phrase query with two terms at the same position
|
|
||||||
final PhraseQuery pq = new PhraseQuery();
|
|
||||||
pq.add(new Term(FIELD, "a"), 0);
|
|
||||||
pq.add(new Term(FIELD, "b"), 1);
|
|
||||||
pq.add(new Term(FIELD, "B"), 1);
|
|
||||||
pq.add(new Term(FIELD, "c"), 2);
|
|
||||||
|
|
||||||
for (Query query : Arrays.asList(pq1, pq2, bq, pq)) {
|
|
||||||
assertEquals(1, new IndexSearcher(ir).search(bq, 1).totalHits);
|
|
||||||
|
|
||||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
|
||||||
FieldQuery fieldQuery = highlighter.getFieldQuery(query, ir);
|
|
||||||
String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1);
|
|
||||||
assertEquals("<b>a b c</b>", bestFragments[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
ir.close();
|
|
||||||
iw.close();
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testPhraseWithGap() throws IOException {
|
|
||||||
final Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
|
|
||||||
Document doc = new Document();
|
|
||||||
FieldType withVectors = new FieldType(TextField.TYPE_STORED);
|
|
||||||
withVectors.setStoreTermVectors(true);
|
|
||||||
withVectors.setStoreTermVectorPositions(true);
|
|
||||||
withVectors.setStoreTermVectorOffsets(true);
|
|
||||||
doc.add(new Field(FIELD, "a b c", withVectors));
|
|
||||||
iw.addDocument(doc);
|
|
||||||
DirectoryReader ir = iw.getReader();
|
|
||||||
|
|
||||||
final PhraseQuery pq = new PhraseQuery();
|
|
||||||
pq.add(new Term(FIELD, "c"), 2);
|
|
||||||
pq.add(new Term(FIELD, "a"), 0);
|
|
||||||
|
|
||||||
assertEquals(1, new IndexSearcher(ir).search(pq, 1).totalHits);
|
|
||||||
|
|
||||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
|
||||||
FieldQuery fieldQuery = highlighter.getFieldQuery(pq, ir);
|
|
||||||
String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1);
|
|
||||||
assertEquals("<b>a</b> b <b>c</b>", bestFragments[0]);
|
|
||||||
|
|
||||||
ir.close();
|
|
||||||
iw.close();
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simple token filter that adds 'B' as a synonym of 'b'
|
|
||||||
private static class SynonymFilter extends TokenFilter {
|
|
||||||
|
|
||||||
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
|
|
||||||
State pending;
|
|
||||||
|
|
||||||
protected SynonymFilter(TokenStream input) {
|
|
||||||
super(input);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean incrementToken() throws IOException {
|
|
||||||
if (pending != null) {
|
|
||||||
restoreState(pending);
|
|
||||||
termAtt.setEmpty().append('B');
|
|
||||||
posIncAtt.setPositionIncrement(0);
|
|
||||||
pending = null;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (!input.incrementToken()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (termAtt.toString().equals("b")) {
|
|
||||||
pending = captureState();
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void reset() throws IOException {
|
|
||||||
super.reset();
|
|
||||||
pending = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,31 +120,7 @@ public class FieldPhraseListTest extends AbstractTestCase {
|
||||||
assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
|
assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
|
||||||
assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
|
assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testProximityPhraseReverse() throws Exception {
|
|
||||||
make1d1fIndex( "z a a b c" );
|
|
||||||
|
|
||||||
FieldQuery fq = new FieldQuery( pqF( 2F, 3, "c", "a" ), true, true );
|
|
||||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
|
||||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
|
||||||
assertEquals( 1, fpl.phraseList.size() );
|
|
||||||
assertEquals( "ac(2.0)((4,5)(8,9))", fpl.phraseList.get( 0 ).toString() );
|
|
||||||
assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
|
|
||||||
assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testProximityPhraseWithRepeatedTerms() throws Exception {
|
|
||||||
make1d1fIndex( "z a a b b z d" );
|
|
||||||
|
|
||||||
FieldQuery fq = new FieldQuery( pqF( 2F, 2, "a", "b", "d" ), true, true );
|
|
||||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
|
||||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
|
||||||
assertEquals( 1, fpl.phraseList.size() );
|
|
||||||
assertEquals( "abd(2.0)((4,7)(12,13))", fpl.phraseList.get( 0 ).toString() );
|
|
||||||
assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
|
|
||||||
assertEquals( 13, fpl.phraseList.get( 0 ).getEndOffset() );
|
|
||||||
}
|
|
||||||
|
|
||||||
public void test2PhrasesOverlap() throws Exception {
|
public void test2PhrasesOverlap() throws Exception {
|
||||||
make1d1fIndex( "d a b c d" );
|
make1d1fIndex( "d a b c d" );
|
||||||
|
|
||||||
|
|
|
@ -863,8 +863,8 @@ public class FieldQueryTest extends AbstractTestCase {
|
||||||
phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) );
|
phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) );
|
||||||
assertNull( fq.searchPhrase( F, phraseCandidate ) );
|
assertNull( fq.searchPhrase( F, phraseCandidate ) );
|
||||||
|
|
||||||
// "a b c"~2
|
// "a b c"~1
|
||||||
query = pqF( 1F, 2, "a", "b", "c" );
|
query = pqF( 1F, 1, "a", "b", "c" );
|
||||||
|
|
||||||
// phraseHighlight = true, fieldMatch = true
|
// phraseHighlight = true, fieldMatch = true
|
||||||
fq = new FieldQuery( query, true, true );
|
fq = new FieldQuery( query, true, true );
|
||||||
|
|
Loading…
Reference in New Issue