mirror of https://github.com/apache/lucene.git
LUCENE-1095: option added to StopFilter and QueryParser to consider position increments.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@607591 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b367e863e6
commit
f4639c0ab0
|
@ -266,6 +266,14 @@ New features
|
|||
|
||||
11. LUCENE-1019: CustomScoreQuery enhanced to support multiple
|
||||
ValueSource queries. (Kyle Maxwell via Doron Cohen)
|
||||
|
||||
12. LUCENE-1095: Added an option to StopFilter to increase
|
||||
positionIncrement of the token succeeding a stopped token.
|
||||
Disabled by default. Similar option added to QueryParser
|
||||
to consider token positions when creating PhraseQuery
|
||||
and MultiPhraseQuery. Disabled by default (so by default
|
||||
the query parser ignores position increments).
|
||||
(Doron Cohen)
|
||||
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -27,7 +27,10 @@ import java.util.Set;
|
|||
|
||||
public final class StopFilter extends TokenFilter {
|
||||
|
||||
private static boolean ENABLE_POSITION_INCREMENTS_DEFAULT = false;
|
||||
|
||||
private final CharArraySet stopWords;
|
||||
private boolean enablePositionIncrements = ENABLE_POSITION_INCREMENTS_DEFAULT;
|
||||
|
||||
/**
|
||||
* Construct a token stream filtering the given input.
|
||||
|
@ -111,11 +114,58 @@ public final class StopFilter extends TokenFilter {
|
|||
*/
|
||||
public final Token next(Token result) throws IOException {
|
||||
// return the first non-stop word found
|
||||
int skippedPositions = 0;
|
||||
while((result = input.next(result)) != null) {
|
||||
if (!stopWords.contains(result.termBuffer(), 0, result.termLength))
|
||||
if (!stopWords.contains(result.termBuffer(), 0, result.termLength)) {
|
||||
if (enablePositionIncrements) {
|
||||
result.setPositionIncrement(result.getPositionIncrement() + skippedPositions);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
skippedPositions += result.getPositionIncrement();
|
||||
}
|
||||
// reached EOS -- return null
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrementsDefault(boolean).
|
||||
*/
|
||||
public static boolean getEnablePositionIncrementsDefault() {
|
||||
return ENABLE_POSITION_INCREMENTS_DEFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the default position increments behavior of every StopFilter created from now on.
|
||||
* <p>
|
||||
* Note: behavior of a single StopFilter instance can be modified
|
||||
* with {@link #setEnablePositionIncrements(boolean)}.
|
||||
* This static method allows control over behavior of classes using StopFilters internally,
|
||||
* for example {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer}.
|
||||
* <p>
|
||||
* Default : false.
|
||||
* @see #setEnablePositionIncrements(boolean).
|
||||
*/
|
||||
public static void setEnablePositionIncrementsDefault(boolean defaultValue) {
|
||||
ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrements(boolean).
|
||||
*/
|
||||
public boolean getEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to <code>true</code> to make <b>this</b> StopFilter enable position increments to result tokens.
|
||||
* <p>
|
||||
* When set, when a token is stopped (omitted), the position increment of
|
||||
* the following token is incremented.
|
||||
* <p>
|
||||
* Default: see {@link #setEnablePositionIncrementsDefault(boolean)}.
|
||||
*/
|
||||
public void setEnablePositionIncrements(boolean enable) {
|
||||
this.enablePositionIncrements = enable;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,6 +100,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
boolean lowercaseExpandedTerms = true;
|
||||
boolean useOldRangeQuery= false;
|
||||
boolean allowLeadingWildcard = false;
|
||||
boolean enablePositionIncrements = false;
|
||||
|
||||
Analyzer analyzer;
|
||||
String field;
|
||||
|
@ -234,12 +235,33 @@ public class QueryParser implements QueryParserConstants {
|
|||
}
|
||||
|
||||
/**
|
||||
* @see #setAllowLeadingWildcard
|
||||
* @see #setAllowLeadingWildcard(boolean)
|
||||
*/
|
||||
public boolean getAllowLeadingWildcard() {
|
||||
return allowLeadingWildcard;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to <code>true</code> to enable position increments in result query.
|
||||
* <p>
|
||||
* When set, result phrase and multi-phrase queries will
|
||||
* be aware of position increments.
|
||||
* Useful when e.g. a StopFilter increases the position increment of
|
||||
* the token that follows an omitted token.
|
||||
* <p>
|
||||
* Default: false.
|
||||
*/
|
||||
public void setEnablePositionIncrements(boolean enable) {
|
||||
this.enablePositionIncrements = enable;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrements(boolean)
|
||||
*/
|
||||
public boolean getEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boolean operator of the QueryParser.
|
||||
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
|
||||
|
@ -478,27 +500,42 @@ public class QueryParser implements QueryParserConstants {
|
|||
MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
mpq.setSlop(phraseSlop);
|
||||
List multiTerms = new ArrayList();
|
||||
int position = -1;
|
||||
for (int i = 0; i < v.size(); i++) {
|
||||
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
|
||||
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
|
||||
if (enablePositionIncrements) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
|
||||
} else {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
}
|
||||
multiTerms.clear();
|
||||
}
|
||||
position += t.getPositionIncrement();
|
||||
multiTerms.add(new Term(field, t.termText()));
|
||||
}
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
if (enablePositionIncrements) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
|
||||
} else {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
}
|
||||
return mpq;
|
||||
}
|
||||
}
|
||||
else {
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.setSlop(phraseSlop);
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.setSlop(phraseSlop);
|
||||
int position = -1;
|
||||
for (int i = 0; i < v.size(); i++) {
|
||||
q.add(new Term(field, ((org.apache.lucene.analysis.Token)
|
||||
v.elementAt(i)).termText()));
|
||||
|
||||
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
|
||||
if (enablePositionIncrements) {
|
||||
position += t.getPositionIncrement();
|
||||
pq.add(new Term(field, t.termText()),position);
|
||||
} else {
|
||||
pq.add(new Term(field, t.termText()));
|
||||
}
|
||||
}
|
||||
return q;
|
||||
return pq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1262,12 +1299,6 @@ public class QueryParser implements QueryParserConstants {
|
|||
finally { jj_save(0, xla); }
|
||||
}
|
||||
|
||||
final private boolean jj_3R_3() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
final private boolean jj_3R_2() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
|
@ -1284,6 +1315,12 @@ public class QueryParser implements QueryParserConstants {
|
|||
return false;
|
||||
}
|
||||
|
||||
final private boolean jj_3R_3() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public QueryParserTokenManager token_source;
|
||||
public Token token, jj_nt;
|
||||
private int jj_ntk;
|
||||
|
|
|
@ -124,6 +124,7 @@ public class QueryParser {
|
|||
boolean lowercaseExpandedTerms = true;
|
||||
boolean useOldRangeQuery= false;
|
||||
boolean allowLeadingWildcard = false;
|
||||
boolean enablePositionIncrements = false;
|
||||
|
||||
Analyzer analyzer;
|
||||
String field;
|
||||
|
@ -258,12 +259,33 @@ public class QueryParser {
|
|||
}
|
||||
|
||||
/**
|
||||
* @see #setAllowLeadingWildcard
|
||||
* @see #setAllowLeadingWildcard(boolean)
|
||||
*/
|
||||
public boolean getAllowLeadingWildcard() {
|
||||
return allowLeadingWildcard;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to <code>true</code> to enable position increments in result query.
|
||||
* <p>
|
||||
* When set, result phrase and multi-phrase queries will
|
||||
* be aware of position increments.
|
||||
* Useful when e.g. a StopFilter increases the position increment of
|
||||
* the token that follows an omitted token.
|
||||
* <p>
|
||||
* Default: false.
|
||||
*/
|
||||
public void setEnablePositionIncrements(boolean enable) {
|
||||
this.enablePositionIncrements = enable;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrements(boolean)
|
||||
*/
|
||||
public boolean getEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boolean operator of the QueryParser.
|
||||
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
|
||||
|
@ -502,27 +524,42 @@ public class QueryParser {
|
|||
MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
mpq.setSlop(phraseSlop);
|
||||
List multiTerms = new ArrayList();
|
||||
int position = -1;
|
||||
for (int i = 0; i < v.size(); i++) {
|
||||
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
|
||||
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
|
||||
if (enablePositionIncrements) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
|
||||
} else {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
}
|
||||
multiTerms.clear();
|
||||
}
|
||||
position += t.getPositionIncrement();
|
||||
multiTerms.add(new Term(field, t.termText()));
|
||||
}
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
if (enablePositionIncrements) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
|
||||
} else {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
}
|
||||
return mpq;
|
||||
}
|
||||
}
|
||||
else {
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.setSlop(phraseSlop);
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.setSlop(phraseSlop);
|
||||
int position = -1;
|
||||
for (int i = 0; i < v.size(); i++) {
|
||||
q.add(new Term(field, ((org.apache.lucene.analysis.Token)
|
||||
v.elementAt(i)).termText()));
|
||||
|
||||
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
|
||||
if (enablePositionIncrements) {
|
||||
position += t.getPositionIncrement();
|
||||
pq.add(new Term(field, t.termText()),position);
|
||||
} else {
|
||||
pq.add(new Term(field, t.termText()));
|
||||
}
|
||||
}
|
||||
return q;
|
||||
return pq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,7 +64,33 @@ public class TestStopAnalyzer extends LuceneTestCase {
|
|||
while ((token = stream.next()) != null) {
|
||||
String text = token.termText();
|
||||
assertFalse(stopWordsSet.contains(text));
|
||||
assertEquals(1,token.getPositionIncrement()); // by default stop tokenizer does not apply increments.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testStopListPositions() throws IOException {
|
||||
boolean defaultEnable = StopFilter.getEnablePositionIncrementsDefault();
|
||||
StopFilter.setEnablePositionIncrementsDefault(true);
|
||||
try {
|
||||
Set stopWordsSet = new HashSet();
|
||||
stopWordsSet.add("good");
|
||||
stopWordsSet.add("test");
|
||||
stopWordsSet.add("analyzer");
|
||||
StopAnalyzer newStop = new StopAnalyzer((String[])stopWordsSet.toArray(new String[3]));
|
||||
StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
|
||||
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
|
||||
TokenStream stream = newStop.tokenStream("test", reader);
|
||||
assertNotNull(stream);
|
||||
Token token = null;
|
||||
int i = 0;
|
||||
while ((token = stream.next()) != null) {
|
||||
String text = token.termText();
|
||||
assertFalse(stopWordsSet.contains(text));
|
||||
assertEquals(expectedIncr[i++],token.getPositionIncrement());
|
||||
}
|
||||
} finally {
|
||||
StopFilter.setEnablePositionIncrementsDefault(defaultEnable);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,10 +16,12 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
|
@ -27,6 +29,8 @@ import java.util.Set;
|
|||
*/
|
||||
public class TestStopFilter extends LuceneTestCase {
|
||||
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
// other StopFilter functionality is already tested by TestStopAnalyzer
|
||||
|
||||
public void testExactCase() throws IOException {
|
||||
|
@ -56,4 +60,69 @@ public class TestStopFilter extends LuceneTestCase {
|
|||
assertEquals(null, stream.next());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Position increments applied by StopFilter with and without enabling this option.
|
||||
*/
|
||||
public void testStopPositons() throws IOException {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
ArrayList a = new ArrayList();
|
||||
for (int i=0; i<20; i++) {
|
||||
String w = English.intToEnglish(i).trim();
|
||||
sb.append(w).append(" ");
|
||||
if (i%3 != 0) a.add(w);
|
||||
}
|
||||
log(sb.toString());
|
||||
String stopWords[] = (String[]) a.toArray(new String[0]);
|
||||
for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
|
||||
Set stopSet = StopFilter.makeStopSet(stopWords);
|
||||
// with increments
|
||||
StringReader reader = new StringReader(sb.toString());
|
||||
StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,true);
|
||||
// without increments
|
||||
reader = new StringReader(sb.toString());
|
||||
stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,false);
|
||||
// with increments, concatenating two stop filters
|
||||
ArrayList a0 = new ArrayList();
|
||||
ArrayList a1 = new ArrayList();
|
||||
for (int i=0; i<a.size(); i++) {
|
||||
if (i%2==0) {
|
||||
a0.add(a.get(i));
|
||||
} else {
|
||||
a1.add(a.get(i));
|
||||
}
|
||||
}
|
||||
String stopWords0[] = (String[]) a0.toArray(new String[0]);
|
||||
for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
|
||||
String stopWords1[] = (String[]) a1.toArray(new String[0]);
|
||||
for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
|
||||
Set stopSet0 = StopFilter.makeStopSet(stopWords0);
|
||||
Set stopSet1 = StopFilter.makeStopSet(stopWords1);
|
||||
reader = new StringReader(sb.toString());
|
||||
StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
|
||||
stpf0.setEnablePositionIncrements(true);
|
||||
StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
|
||||
doTestStopPositons(stpf01,true);
|
||||
}
|
||||
|
||||
private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
|
||||
log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
|
||||
stpf.setEnablePositionIncrements(enableIcrements);
|
||||
for (int i=0; i<20; i+=3) {
|
||||
Token t = stpf.next();
|
||||
log("Token "+i+": "+t);
|
||||
String w = English.intToEnglish(i).trim();
|
||||
assertEquals("expecting token "+i+" to be "+w,w,t.termText());
|
||||
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,t.getPositionIncrement());
|
||||
}
|
||||
assertNull(stpf.next());
|
||||
}
|
||||
|
||||
// print debug info depending on VERBOSE
|
||||
private static void log(String s) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -838,19 +838,42 @@ public class TestQueryParser extends LuceneTestCase {
|
|||
public void testStopwords() throws Exception {
|
||||
QueryParser qp = new QueryParser("a", new StopAnalyzer(new String[]{"the", "foo"}));
|
||||
Query result = qp.parse("a:the OR a:foo");
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
assertNotNull("result is null and it shouldn't be", result);
|
||||
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
|
||||
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0);
|
||||
result = qp.parse("a:woo OR a:the");
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
assertNotNull("result is null and it shouldn't be", result);
|
||||
assertTrue("result is not a TermQuery", result instanceof TermQuery);
|
||||
result = qp.parse("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)");
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
assertNotNull("result is null and it shouldn't be", result);
|
||||
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
|
||||
System.out.println("Result: " + result);
|
||||
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
|
||||
}
|
||||
|
||||
public void testPositionIncrement() throws Exception {
|
||||
boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
|
||||
StopFilter.setEnablePositionIncrementsDefault(true);
|
||||
try {
|
||||
QueryParser qp = new QueryParser("a", new StopAnalyzer(new String[]{"the", "in", "are", "this"}));
|
||||
qp.setEnablePositionIncrements(true);
|
||||
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
|
||||
// 0 2 5 7 8
|
||||
int expectedPositions[] = {1,3,4,6,9};
|
||||
PhraseQuery pq = (PhraseQuery) qp.parse(qtxt);
|
||||
//System.out.println("Query text: "+qtxt);
|
||||
//System.out.println("Result: "+pq);
|
||||
Term t[] = pq.getTerms();
|
||||
int pos[] = pq.getPositions();
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
//System.out.println(i+". "+t[i]+" pos: "+pos[i]);
|
||||
assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]);
|
||||
}
|
||||
} finally {
|
||||
StopFilter.setEnablePositionIncrementsDefault(dflt);
|
||||
}
|
||||
}
|
||||
|
||||
public void testMatchAllDocs() throws Exception {
|
||||
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer());
|
||||
assertEquals(new MatchAllDocsQuery(), qp.parse("*:*"));
|
||||
|
|
|
@ -19,11 +19,14 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
|
@ -80,6 +83,20 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// same as previous, just specify positions explicitely.
|
||||
q = new PhraseQuery();
|
||||
q.add(new Term("field", "1"),0);
|
||||
q.add(new Term("field", "2"),1);
|
||||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// specifying correct positions should find the phrase.
|
||||
q = new PhraseQuery();
|
||||
q.add(new Term("field", "1"),0);
|
||||
q.add(new Term("field", "2"),2);
|
||||
hits = searcher.search(q);
|
||||
assertEquals(1, hits.length());
|
||||
|
||||
q = new PhraseQuery();
|
||||
q.add(new Term("field", "2"));
|
||||
q.add(new Term("field", "3"));
|
||||
|
@ -92,6 +109,28 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// phrase query would find it when correct positions are specified.
|
||||
q = new PhraseQuery();
|
||||
q.add(new Term("field", "3"),0);
|
||||
q.add(new Term("field", "4"),0);
|
||||
hits = searcher.search(q);
|
||||
assertEquals(1, hits.length());
|
||||
|
||||
// phrase query should fail for non existing searched term
|
||||
// even if there exist another searched terms in the same searched position.
|
||||
q = new PhraseQuery();
|
||||
q.add(new Term("field", "3"),0);
|
||||
q.add(new Term("field", "9"),0);
|
||||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// multi-phrase query should succed for non existing searched term
|
||||
// because there exist another searched terms in the same searched position.
|
||||
MultiPhraseQuery mq = new MultiPhraseQuery();
|
||||
mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
|
||||
hits = searcher.search(mq);
|
||||
assertEquals(1, hits.length());
|
||||
|
||||
q = new PhraseQuery();
|
||||
q.add(new Term("field", "2"));
|
||||
q.add(new Term("field", "4"));
|
||||
|
@ -115,6 +154,50 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
q.add(new Term("field", "5"));
|
||||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// analyzer to introduce stopwords and increment gaps
|
||||
Analyzer stpa = new Analyzer() {
|
||||
final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream ts = a.tokenStream(fieldName,reader);
|
||||
return new StopFilter(ts,new String[]{"stop"});
|
||||
}
|
||||
};
|
||||
|
||||
// should not find "1 2" because there is a gap of 1 in the index
|
||||
QueryParser qp = new QueryParser("field",stpa);
|
||||
q = (PhraseQuery) qp.parse("\"1 2\"");
|
||||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// omitted stop word cannot help because stop filter swallows the increments.
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// query parser alone won't help, because stop filter swallows the increments.
|
||||
qp.setEnablePositionIncrements(true);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
|
||||
try {
|
||||
// stop filter alone won't help, because query parser swallows the increments.
|
||||
qp.setEnablePositionIncrements(false);
|
||||
StopFilter.setEnablePositionIncrementsDefault(true);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// when both qp qnd stopFilter propagate increments, we should find the doc.
|
||||
qp.setEnablePositionIncrements(true);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q);
|
||||
assertEquals(1, hits.length());
|
||||
} finally {
|
||||
StopFilter.setEnablePositionIncrementsDefault(dflt);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue