mirror of https://github.com/apache/lucene.git
LUCENE-3284: Decoupled remaining module/contrib tests from QueryParser
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1144566 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5c2923a1de
commit
f16f395a30
lucene/contrib/highlighter/src/test/org/apache/lucene/search
highlight
vectorhighlight
modules
analysis/common/src/test/org/apache/lucene/analysis
queries/src/test/org/apache/lucene/queries
|
@ -53,24 +53,13 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FilteredQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeFilter;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
|
||||
import org.apache.lucene.search.regex.RegexQuery;
|
||||
import org.apache.lucene.search.spans.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
@ -102,8 +91,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
public void testQueryScorerHits() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
||||
query = qp.parse("\"very long\"");
|
||||
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "very"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "long"));
|
||||
|
||||
query = phraseQuery;
|
||||
searcher = new IndexSearcher(ramDir, true);
|
||||
TopDocs hits = searcher.search(query, 10);
|
||||
|
||||
|
@ -133,12 +126,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
String s1 = "I call our world Flatland, not because we call it so,";
|
||||
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));
|
||||
|
||||
// Verify that a query against the default field results in text being
|
||||
// highlighted
|
||||
// regardless of the field name.
|
||||
Query q = parser.parse("\"world Flatland\"~3");
|
||||
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.setSlop(3);
|
||||
q.add(new Term(FIELD_NAME, "world"));
|
||||
q.add(new Term(FIELD_NAME, "flatland"));
|
||||
|
||||
String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
|
||||
String observed = highlightField(q, "SOME_FIELD_NAME", s1);
|
||||
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
|
||||
|
@ -150,7 +146,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
// when the query field name differs from the name of the field being
|
||||
// highlighted,
|
||||
// which in this example happens to be the default field name.
|
||||
q = parser.parse("text:\"world Flatland\"~3");
|
||||
q = new PhraseQuery();
|
||||
q.setSlop(3);
|
||||
q.add(new Term("text", "world"));
|
||||
q.add(new Term("text", "flatland"));
|
||||
|
||||
expected = s1;
|
||||
observed = highlightField(q, FIELD_NAME, s1);
|
||||
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
|
||||
|
@ -177,7 +177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
public void testSimpleSpanHighlighter() throws Exception {
|
||||
doSearching("Kennedy");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -202,23 +202,49 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
// LUCENE-1752
|
||||
public void testRepeatingTermsInMultBooleans() throws Exception {
|
||||
String content = "x y z a b c d e f g b c g";
|
||||
String ph1 = "\"a b c d\"";
|
||||
String ph2 = "\"b c g\"";
|
||||
String f1 = "f1";
|
||||
String f2 = "f2";
|
||||
String f1c = f1 + ":";
|
||||
String f2c = f2 + ":";
|
||||
String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
|
||||
+ " OR " + f2c + ph2 + ")";
|
||||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer);
|
||||
Query query = qp.parse(q);
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, f1);
|
||||
PhraseQuery f1ph1 = new PhraseQuery();
|
||||
f1ph1.add(new Term(f1, "a"));
|
||||
f1ph1.add(new Term(f1, "b"));
|
||||
f1ph1.add(new Term(f1, "c"));
|
||||
f1ph1.add(new Term(f1, "d"));
|
||||
|
||||
PhraseQuery f2ph1 = new PhraseQuery();
|
||||
f2ph1.add(new Term(f2, "a"));
|
||||
f2ph1.add(new Term(f2, "b"));
|
||||
f2ph1.add(new Term(f2, "c"));
|
||||
f2ph1.add(new Term(f2, "d"));
|
||||
|
||||
PhraseQuery f1ph2 = new PhraseQuery();
|
||||
f1ph2.add(new Term(f1, "b"));
|
||||
f1ph2.add(new Term(f1, "c"));
|
||||
f1ph2.add(new Term(f1, "g"));
|
||||
|
||||
PhraseQuery f2ph2 = new PhraseQuery();
|
||||
f2ph2.add(new Term(f2, "b"));
|
||||
f2ph2.add(new Term(f2, "c"));
|
||||
f2ph2.add(new Term(f2, "g"));
|
||||
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
BooleanQuery leftChild = new BooleanQuery();
|
||||
leftChild.add(f1ph1, Occur.SHOULD);
|
||||
leftChild.add(f2ph1, Occur.SHOULD);
|
||||
booleanQuery.add(leftChild, Occur.MUST);
|
||||
|
||||
BooleanQuery rightChild = new BooleanQuery();
|
||||
rightChild.add(f1ph2, Occur.SHOULD);
|
||||
rightChild.add(f2ph2, Occur.SHOULD);
|
||||
booleanQuery.add(rightChild, Occur.MUST);
|
||||
|
||||
QueryScorer scorer = new QueryScorer(booleanQuery, f1);
|
||||
scorer.setExpandMultiTermQuery(false);
|
||||
|
||||
Highlighter h = new Highlighter(this, scorer);
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
||||
|
||||
h.getBestFragment(analyzer, f1, content);
|
||||
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
|
@ -226,7 +252,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
|
||||
doSearching("\"very long and contains\"");
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "very"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "long"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "contains"), 3);
|
||||
doSearching(phraseQuery);
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -248,7 +278,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
numHighlights == 3);
|
||||
|
||||
numHighlights = 0;
|
||||
doSearching("\"This piece of text refers to Kennedy\"");
|
||||
|
||||
phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "piece"), 1);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "text"), 3);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "refers"), 4);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "kennedy"), 6);
|
||||
|
||||
doSearching(phraseQuery);
|
||||
|
||||
maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -270,7 +307,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
numHighlights == 4);
|
||||
|
||||
numHighlights = 0;
|
||||
doSearching("\"lets is a the lets is a the lets is a the lets\"");
|
||||
|
||||
phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "lets"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "lets"), 4);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "lets"), 8);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "lets"), 12);
|
||||
|
||||
doSearching(phraseQuery);
|
||||
|
||||
maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -366,7 +410,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
|
||||
doSearching("\"text piece long\"~5");
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.setSlop(5);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "text"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "piece"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "long"));
|
||||
doSearching(phraseQuery);
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -388,7 +437,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
public void testSimpleQueryScorerPhraseHighlighting3() throws Exception {
|
||||
doSearching("\"x y z\"");
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "x"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "y"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "z"));
|
||||
doSearching(phraseQuery);
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -410,7 +463,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
public void testSimpleSpanFragmenter() throws Exception {
|
||||
doSearching("\"piece of text that is very long\"");
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "piece"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "text"), 2);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "very"), 5);
|
||||
phraseQuery.add(new Term(FIELD_NAME, "long"), 6);
|
||||
doSearching(phraseQuery);
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -428,8 +486,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
if (VERBOSE) System.out.println("\t" + result);
|
||||
|
||||
}
|
||||
|
||||
doSearching("\"been shot\"");
|
||||
|
||||
phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "been"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "shot"));
|
||||
|
||||
doSearching(query);
|
||||
|
||||
maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -451,7 +513,16 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
// position sensitive query added after position insensitive query
|
||||
public void testPosTermStdTerm() throws Exception {
|
||||
doSearching("y \"x y z\"");
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "y")), Occur.SHOULD);
|
||||
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "x"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "y"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "z"));
|
||||
booleanQuery.add(phraseQuery, Occur.SHOULD);
|
||||
|
||||
doSearching(booleanQuery);
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
|
@ -525,7 +596,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
public void testSimpleQueryTermScorerHighlighter() throws Exception {
|
||||
doSearching("Kennedy");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||
Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
@ -591,7 +662,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("Kennedy");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 4);
|
||||
|
@ -607,7 +678,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("Kinnedy~0.5");
|
||||
FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(FIELD_NAME, "kinnedy"), 0.5f);
|
||||
fuzzyQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
doSearching(fuzzyQuery);
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 5);
|
||||
|
@ -623,7 +696,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("K?nnedy");
|
||||
WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k?nnedy"));
|
||||
wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
doSearching(wildcardQuery);
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 4);
|
||||
|
@ -639,7 +714,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("K*dy");
|
||||
WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k*dy"));
|
||||
wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
doSearching(wildcardQuery);
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 5);
|
||||
|
@ -660,9 +737,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
// Need to explicitly set the QueryParser property to use TermRangeQuery
|
||||
// rather
|
||||
// than RangeFilters
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
query = parser.parse(queryString);
|
||||
|
||||
TermRangeQuery rangeQuery = new TermRangeQuery(
|
||||
FIELD_NAME,
|
||||
new BytesRef("kannedy"),
|
||||
new BytesRef("kznnedy"),
|
||||
true, true);
|
||||
rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
|
||||
query = rangeQuery;
|
||||
doSearching(query);
|
||||
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||
|
@ -772,7 +855,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("\"John Kennedy\"");
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(FIELD_NAME, "john"));
|
||||
phraseQuery.add(new Term(FIELD_NAME, "kennedy"));
|
||||
doSearching(phraseQuery);
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||
// Currently highlights "John" and "Kennedy" separately
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
|
@ -874,7 +960,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("John Kenn*");
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "john")), Occur.SHOULD);
|
||||
PrefixQuery prefixQuery = new PrefixQuery(new Term(FIELD_NAME, "kenn"));
|
||||
prefixQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
booleanQuery.add(prefixQuery, Occur.SHOULD);
|
||||
|
||||
doSearching(booleanQuery);
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 5);
|
||||
|
@ -890,7 +982,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("JFK OR Kennedy");
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(new TermQuery(new Term(FIELD_NAME, "jfk")), Occur.SHOULD);
|
||||
query.add(new TermQuery(new Term(FIELD_NAME, "kennedy")), Occur.SHOULD);
|
||||
|
||||
doSearching(query);
|
||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 5);
|
||||
|
@ -905,7 +1002,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
doSearching("Kennedy");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||
numHighlights = 0;
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
|
@ -1006,11 +1103,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
HashMap<String,String> synonyms = new HashMap<String,String>();
|
||||
synonyms.put("football", "soccer,footie");
|
||||
Analyzer analyzer = new SynonymAnalyzer(synonyms);
|
||||
String srchkey = "football";
|
||||
|
||||
String s = "football-soccer in the euro 2004 footie competition";
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "bookid", analyzer);
|
||||
Query query = parser.parse(srchkey);
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(new TermQuery(new Term("bookid", "football")), Occur.SHOULD);
|
||||
query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
|
||||
query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);
|
||||
|
||||
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
|
||||
|
||||
|
@ -1037,7 +1136,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("Kennedy");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||
// new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
|
||||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
|
@ -1061,7 +1160,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
|
||||
doSearching("Kennedy");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
|
@ -1102,7 +1201,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("meat");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
|
||||
HighlighterTest.this);// new Highlighter(this, new
|
||||
|
@ -1199,9 +1298,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
if (searcher != null) searcher.close();
|
||||
searcher = new IndexSearcher(ramDir, true);
|
||||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
|
||||
query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
|
||||
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
||||
Query query = parser.parse("JF? or Kenned*");
|
||||
if (VERBOSE) System.out.println("Searching with primitive query");
|
||||
// forget to set this and...
|
||||
// query=query.rewrite(reader);
|
||||
|
@ -1243,7 +1344,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
doSearching("AnInvalidQueryWhichShouldYieldNoResults");
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
|
||||
|
||||
for (String text : texts) {
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
@ -1313,8 +1414,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
String docMainText = "fred is one of the people";
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
||||
Query query = parser.parse("fred category:people");
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(new TermQuery(new Term(FIELD_NAME, "fred")), Occur.SHOULD);
|
||||
query.add(new TermQuery(new Term("category", "people")), Occur.SHOULD);
|
||||
|
||||
// highlighting respects fieldnames used in query
|
||||
|
||||
|
@ -1453,64 +1556,68 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
Highlighter highlighter;
|
||||
String result;
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
|
||||
query = new TermQuery(new Term("text", "foo"));
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
|
||||
query = new TermQuery(new Term("text", "10"));
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("Hi-Speed<B>10</B> foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
|
||||
query = new TermQuery(new Term("text", "hi"));
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
|
||||
query = new TermQuery(new Term("text", "speed"));
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
|
||||
query = new TermQuery(new Term("text", "hispeed"));
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
booleanQuery.add(new TermQuery(new Term("text", "hi")), Occur.SHOULD);
|
||||
booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
|
||||
|
||||
query = booleanQuery;
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
||||
// ///////////////// same tests, just put the bigger overlapping token
|
||||
// first
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
|
||||
query = new TermQuery(new Term("text", "foo"));
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
|
||||
query = new TermQuery(new Term("text", "10"));
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("Hi-Speed<B>10</B> foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
|
||||
query = new TermQuery(new Term("text", "hi"));
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
|
||||
query = new TermQuery(new Term("text", "speed"));
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
|
||||
query = new TermQuery(new Term("text", "hispeed"));
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
|
||||
query = booleanQuery;
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
@ -1554,9 +1661,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
|
||||
String q = "t_text1:random";
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "t_text1", a );
|
||||
Query query = parser.parse( q );
|
||||
Query query = new TermQuery(new Term("t_text1", "random"));
|
||||
IndexSearcher searcher = new IndexSearcher( dir, true );
|
||||
// This scorer can return negative idf -> null fragment
|
||||
Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
|
||||
|
@ -1608,14 +1713,6 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
return "<B>" + originalText + "</B>";
|
||||
}
|
||||
|
||||
public void doSearching(String queryString) throws Exception {
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
||||
parser.setEnablePositionIncrements(true);
|
||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
query = parser.parse(queryString);
|
||||
doSearching(query);
|
||||
}
|
||||
|
||||
public void doSearching(Query unReWrittenQuery) throws Exception {
|
||||
if (searcher != null) searcher.close();
|
||||
searcher = new IndexSearcher(ramDir, true);
|
||||
|
|
|
@ -19,7 +19,10 @@ package org.apache.lucene.search.vectorhighlight;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -28,6 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
|
@ -44,6 +48,7 @@ import org.apache.lucene.search.PhraseQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public abstract class AbstractTestCase extends LuceneTestCase {
|
||||
|
@ -56,9 +61,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
protected Analyzer analyzerB;
|
||||
protected Analyzer analyzerK;
|
||||
protected IndexReader reader;
|
||||
protected QueryParser paW;
|
||||
protected QueryParser paB;
|
||||
|
||||
|
||||
protected static final String[] shortMVValues = {
|
||||
"",
|
||||
"",
|
||||
|
@ -90,8 +93,6 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
||||
analyzerB = new BigramAnalyzer();
|
||||
analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
|
||||
paW = new QueryParser(TEST_VERSION_CURRENT, F, analyzerW );
|
||||
paB = new QueryParser(TEST_VERSION_CURRENT, F, analyzerB );
|
||||
dir = newDirectory();
|
||||
}
|
||||
|
||||
|
@ -172,6 +173,33 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
|
||||
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
|
||||
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(field, new StringReader(text));
|
||||
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
|
||||
BytesRef bytesRef = termAttribute.getBytesRef();
|
||||
|
||||
while (tokenStream.incrementToken()) {
|
||||
termAttribute.fillBytesRef();
|
||||
bytesRefs.add(new BytesRef(bytesRef));
|
||||
}
|
||||
|
||||
tokenStream.end();
|
||||
tokenStream.close();
|
||||
|
||||
return bytesRefs;
|
||||
}
|
||||
|
||||
protected PhraseQuery toPhraseQuery(List<BytesRef> bytesRefs, String field) {
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
for (BytesRef bytesRef : bytesRefs) {
|
||||
phraseQuery.add(new Term(field, bytesRef));
|
||||
}
|
||||
return phraseQuery;
|
||||
}
|
||||
|
||||
static final class BigramAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
|
|
|
@ -22,19 +22,33 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public class FieldQueryTest extends AbstractTestCase {
|
||||
|
||||
public void testFlattenBoolean() throws Exception {
|
||||
Query query = paW.parse( "A AND B OR C NOT (D AND E)" );
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
|
||||
booleanQuery.add(new TermQuery(new Term(F, "B")), Occur.MUST);
|
||||
booleanQuery.add(new TermQuery(new Term(F, "C")), Occur.SHOULD);
|
||||
|
||||
BooleanQuery innerQuery = new BooleanQuery();
|
||||
innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
|
||||
innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
|
||||
booleanQuery.add(innerQuery, Occur.MUST_NOT);
|
||||
|
||||
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
|
||||
Set<Query> flatQueries = new HashSet<Query>();
|
||||
fq.flatten( query, flatQueries );
|
||||
fq.flatten(booleanQuery, flatQueries);
|
||||
assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) );
|
||||
}
|
||||
|
||||
|
@ -47,15 +61,25 @@ public class FieldQueryTest extends AbstractTestCase {
|
|||
}
|
||||
|
||||
public void testFlattenTermAndPhrase() throws Exception {
|
||||
Query query = paW.parse( "A AND \"B C\"" );
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(F, "B"));
|
||||
phraseQuery.add(new Term(F, "C"));
|
||||
booleanQuery.add(phraseQuery, Occur.MUST);
|
||||
|
||||
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
|
||||
Set<Query> flatQueries = new HashSet<Query>();
|
||||
fq.flatten( query, flatQueries );
|
||||
fq.flatten(booleanQuery, flatQueries);
|
||||
assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) );
|
||||
}
|
||||
|
||||
public void testFlattenTermAndPhrase2gram() throws Exception {
|
||||
Query query = paB.parse( "AA AND \"BCD\" OR \"EFGH\"" );
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(new TermQuery(new Term(F, "AA")), Occur.MUST);
|
||||
query.add(toPhraseQuery(analyze("BCD", F, analyzerB), F), Occur.MUST);
|
||||
query.add(toPhraseQuery(analyze("EFGH", F, analyzerB), F), Occur.SHOULD);
|
||||
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
Set<Query> flatQueries = new HashSet<Query>();
|
||||
fq.flatten( query, flatQueries );
|
||||
|
@ -232,7 +256,16 @@ public class FieldQueryTest extends AbstractTestCase {
|
|||
}
|
||||
|
||||
public void testGetTermSet() throws Exception {
|
||||
Query query = paW.parse( "A AND B OR x:C NOT (D AND E)" );
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(new TermQuery(new Term(F, "A")), Occur.MUST);
|
||||
query.add(new TermQuery(new Term(F, "B")), Occur.MUST);
|
||||
query.add(new TermQuery(new Term("x", "C")), Occur.SHOULD);
|
||||
|
||||
BooleanQuery innerQuery = new BooleanQuery();
|
||||
innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
|
||||
innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
|
||||
query.add(innerQuery, Occur.MUST_NOT);
|
||||
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
assertEquals( 2, fq.termSetMap.size() );
|
||||
Set<String> termSet = fq.getTermSet( F );
|
||||
|
@ -679,8 +712,10 @@ public class FieldQueryTest extends AbstractTestCase {
|
|||
}
|
||||
|
||||
public void testQueryPhraseMapOverlap2gram() throws Exception {
|
||||
Query query = paB.parse( "\"abc\" AND \"bcd\"" );
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(toPhraseQuery(analyze("abc", F, analyzerB), F), Occur.MUST);
|
||||
query.add(toPhraseQuery(analyze("bcd", F, analyzerB), F), Occur.MUST);
|
||||
|
||||
// phraseHighlight = true, fieldMatch = true
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
Map<String, QueryPhraseMap> map = fq.rootMaps;
|
||||
|
|
|
@ -17,12 +17,20 @@ package org.apache.lucene.search.vectorhighlight;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
|
||||
|
||||
public void test3Frags() throws Exception {
|
||||
FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
|
||||
query.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
|
||||
|
||||
FieldFragList ffl = ffl(query, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
||||
ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder();
|
||||
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
|
||||
assertEquals( 3, f.length );
|
||||
|
@ -32,9 +40,8 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
|
|||
assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
|
||||
}
|
||||
|
||||
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
|
||||
private FieldFragList ffl(Query query, String indexValue ) throws Exception {
|
||||
make1d1fIndex( indexValue );
|
||||
Query query = paW.parse( queryValue );
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
|
|
|
@ -17,20 +17,21 @@ package org.apache.lucene.search.vectorhighlight;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
|
||||
public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||
|
||||
public void testNullFieldFragList() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
}
|
||||
|
||||
public void testTooSmallFragSize() throws Exception {
|
||||
try{
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
sflb.createFieldFragList( fpl( "a", "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 );
|
||||
sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 );
|
||||
fail( "IllegalArgumentException must be thrown" );
|
||||
}
|
||||
catch ( IllegalArgumentException expected ) {
|
||||
|
@ -39,14 +40,19 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
|
||||
public void testSmallerFragSizeThanTermQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(F, "abcdefgh"));
|
||||
phraseQuery.add(new Term(F, "jklmnopqrs"));
|
||||
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
@ -54,39 +60,39 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
|
||||
public void test1TermIndex() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a" ), 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsIndex1Frag() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a a" ), 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b a" ), 20 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a" ), 20 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsIndex2Frags() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
@ -94,41 +100,56 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
|
||||
public void test2TermsQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
|
||||
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
|
||||
booleanQuery.add(new TermQuery(new Term(F, "b")), BooleanClause.Occur.SHOULD);
|
||||
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(booleanQuery, "c d e" ), 20 );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(booleanQuery, "d b c" ), 20 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(booleanQuery, "a b c" ), 20 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testPhraseQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
|
||||
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.add(new Term(F, "a"));
|
||||
phraseQuery.add(new Term(F, "b"));
|
||||
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "c d e" ), 20 );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
|
||||
ffl = sflb.createFieldFragList( fpl(phraseQuery, "a b c" ), 20 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testPhraseQuerySlop() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
|
||||
|
||||
PhraseQuery phraseQuery = new PhraseQuery();
|
||||
phraseQuery.setSlop(1);
|
||||
phraseQuery.add(new Term(F, "a"));
|
||||
phraseQuery.add(new Term(F, "b"));
|
||||
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
|
||||
private FieldPhraseList fpl(Query query, String indexValue ) throws Exception {
|
||||
make1d1fIndex( indexValue );
|
||||
Query query = paW.parse( queryValue );
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
return new FieldPhraseList( stack, fq );
|
||||
|
|
|
@ -26,13 +26,17 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
||||
|
||||
public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
||||
|
||||
public void test1TermIndex() throws Exception {
|
||||
FieldFragList ffl = ffl( "a", "a" );
|
||||
FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a" );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
|
||||
|
@ -42,7 +46,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
}
|
||||
|
||||
public void test2Frags() throws Exception {
|
||||
FieldFragList ffl = ffl( "a", "a b b b b b b b b b b b a b a b" );
|
||||
FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b a b a b" );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
|
||||
// 3 snippets requested, but should be 2
|
||||
|
@ -52,7 +56,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
}
|
||||
|
||||
public void test3Frags() throws Exception {
|
||||
FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
||||
BooleanQuery booleanQuery = new BooleanQuery();
|
||||
booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
|
||||
booleanQuery.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
|
||||
|
||||
FieldFragList ffl = ffl(booleanQuery, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
|
||||
assertEquals( 3, f.length );
|
||||
|
@ -62,7 +70,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
}
|
||||
|
||||
public void testTagsAndEncoder() throws Exception {
|
||||
FieldFragList ffl = ffl( "a", "<h1> a </h1>" );
|
||||
FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "<h1> a </h1>" );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
String[] preTags = { "[" };
|
||||
String[] postTags = { "]" };
|
||||
|
@ -70,9 +78,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
|
||||
}
|
||||
|
||||
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
|
||||
private FieldFragList ffl(Query query, String indexValue ) throws Exception {
|
||||
make1d1fIndex( indexValue );
|
||||
Query query = paW.parse( queryValue );
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -23,27 +25,26 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
|
|||
|
||||
public void testNullFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
}
|
||||
|
||||
public void testShortFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d" ), 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testLongFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {
|
||||
private FieldPhraseList fpl(Query query, String... indexValues ) throws Exception {
|
||||
make1dmfIndex( indexValues );
|
||||
Query query = paW.parse( queryValue );
|
||||
FieldQuery fq = new FieldQuery( query, true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
return new FieldPhraseList( stack, fq );
|
||||
|
|
|
@ -16,7 +16,6 @@ package org.apache.lucene.analysis.query;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
|
@ -25,19 +24,12 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||
|
@ -74,22 +66,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
//Helper method to query
|
||||
private int search(Analyzer a, String queryString) throws IOException, ParseException {
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
|
||||
Query q = qp.parse(queryString);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
int hits = searcher.search(q, null, 1000).totalHits;
|
||||
searcher.close();
|
||||
return hits;
|
||||
}
|
||||
|
||||
public void testUninitializedAnalyzer() throws Exception {
|
||||
//Note: no calls to "addStopWord"
|
||||
String query = "variedField:quick repetitiveField:boring";
|
||||
int numHits1 = search(protectedAnalyzer, query);
|
||||
int numHits2 = search(appAnalyzer, query);
|
||||
assertEquals("No filtering test", numHits1, numHits2);
|
||||
// Note: no calls to "addStopWord"
|
||||
// query = "variedField:quick repetitiveField:boring";
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("quick"));
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"quick"});
|
||||
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -97,36 +81,41 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testDefaultAddStopWordsIndexReader() throws Exception {
|
||||
protectedAnalyzer.addStopWords(reader);
|
||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
||||
assertEquals("Default filter should remove all docs", 0, numHits);
|
||||
}
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring
|
||||
}
|
||||
|
||||
/*
|
||||
* Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader, int)'
|
||||
*/
|
||||
public void testAddStopWordsIndexReaderInt() throws Exception {
|
||||
protectedAnalyzer.addStopWords(reader, 1f / 2f);
|
||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
||||
assertEquals("A filter on terms in > one half of docs remove boring docs", 0, numHits);
|
||||
|
||||
numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring");
|
||||
assertTrue("A filter on terms in > half of docs should not remove vaguelyBoring docs", numHits > 1);
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
// A filter on terms in > one half of docs remove boring
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
// A filter on terms in > half of docs should not remove vaguelyBoring
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"});
|
||||
|
||||
protectedAnalyzer.addStopWords(reader, 1f / 4f);
|
||||
numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring");
|
||||
assertEquals("A filter on terms in > quarter of docs should remove vaguelyBoring docs", 0, numHits);
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
// A filter on terms in > quarter of docs should remove vaguelyBoring
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
}
|
||||
|
||||
|
||||
public void testAddStopWordsIndexReaderStringFloat() throws Exception {
|
||||
protectedAnalyzer.addStopWords(reader, "variedField", 1f / 2f);
|
||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
||||
assertTrue("A filter on one Field should not affect queris on another", numHits > 0);
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
// A filter on one Field should not affect queries on another
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||
|
||||
protectedAnalyzer.addStopWords(reader, "repetitiveField", 1f / 2f);
|
||||
numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
||||
assertEquals("A filter on the right Field should affect queries on it", numHits, 0);
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
// A filter on the right Field should affect queries on it
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
}
|
||||
|
||||
public void testAddStopWordsIndexReaderStringInt() throws Exception {
|
||||
|
@ -144,12 +133,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testNoFieldNamePollution() throws Exception {
|
||||
protectedAnalyzer.addStopWords(reader, "repetitiveField", 10);
|
||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
||||
assertEquals("Check filter set up OK", 0, numHits);
|
||||
|
||||
numHits = search(protectedAnalyzer, "variedField:boring");
|
||||
assertTrue("Filter should not prevent stopwords in one field being used in another ", numHits > 0);
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
// Check filter set up OK
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("boring"));
|
||||
// Filter should not prevent stopwords in one field being used in another
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -170,10 +161,12 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
public void testWrappingNonReusableAnalyzer() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
|
||||
a.addStopWords(reader, 10);
|
||||
int numHits = search(a, "repetitiveField:boring");
|
||||
assertTrue(numHits == 0);
|
||||
numHits = search(a, "repetitiveField:vaguelyboring");
|
||||
assertTrue(numHits == 0);
|
||||
|
||||
TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
assertTokenStreamContents(tokenStream, new String[0]);
|
||||
|
||||
tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
assertTokenStreamContents(tokenStream, new String[0]);
|
||||
}
|
||||
|
||||
public void testTokenStream() throws Exception {
|
||||
|
|
|
@ -32,14 +32,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
|
@ -82,16 +75,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
return new IndexSearcher(dir, true);
|
||||
}
|
||||
|
||||
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
|
||||
searcher = setUpSearcher(analyzer);
|
||||
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", analyzer);
|
||||
|
||||
Query q = qp.parse(qs);
|
||||
|
||||
return searcher.search(q, null, 1000).scoreDocs;
|
||||
}
|
||||
|
||||
protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
|
||||
assertEquals(ranks.length, hits.length);
|
||||
for (int i = 0; i < ranks.length; i++) {
|
||||
|
@ -99,51 +82,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Will not work on an index without unigrams, since QueryParser automatically
|
||||
* tokenizes on whitespace.
|
||||
*/
|
||||
public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
||||
"test sentence");
|
||||
int[] ranks = new int[] { 1, 2, 0 };
|
||||
compareRanks(hits, ranks);
|
||||
}
|
||||
|
||||
/*
|
||||
* This one fails with an exception.
|
||||
*/
|
||||
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
||||
"\"this sentence\"");
|
||||
int[] ranks = new int[] { 0 };
|
||||
compareRanks(hits, ranks);
|
||||
}
|
||||
|
||||
/*
|
||||
* This one works, actually.
|
||||
*/
|
||||
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
||||
"\"test sentence\"");
|
||||
int[] ranks = new int[] { 1 };
|
||||
compareRanks(hits, ranks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as above, is tokenized without using the analyzer.
|
||||
*/
|
||||
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
|
||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
||||
"+test +sentence");
|
||||
int[] ranks = new int[] { 1, 2 };
|
||||
compareRanks(hits, ranks);
|
||||
}
|
||||
|
||||
/*
|
||||
* This shows how to construct a phrase query containing shingles.
|
||||
*/
|
||||
|
@ -153,8 +91,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
|
||||
TokenStream ts = analyzer.tokenStream("content",
|
||||
new StringReader("this sentence"));
|
||||
TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence"));
|
||||
int j = -1;
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -183,8 +120,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
|
||||
TokenStream ts = analyzer.tokenStream("content",
|
||||
new StringReader("test sentence"));
|
||||
TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence"));
|
||||
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
|
|
|
@ -20,11 +20,7 @@ package org.apache.lucene.queries;
|
|||
import org.apache.lucene.queries.function.FunctionQuery;
|
||||
import org.apache.lucene.queries.function.FunctionTestSetup;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ByteFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ShortFieldSource;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.cache.*;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -198,9 +194,10 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
|
||||
@Test
|
||||
public void testCustomExternalQuery() throws Exception {
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD,anlzr);
|
||||
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
|
||||
Query q1 = qp.parse(qtxt);
|
||||
BooleanQuery q1 = new BooleanQuery();
|
||||
q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
|
||||
q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
|
||||
q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
|
||||
|
||||
final Query q = new CustomExternalQuery(q1);
|
||||
log(q);
|
||||
|
@ -243,11 +240,12 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
FunctionQuery functionQuery = new FunctionQuery(valueSource);
|
||||
float boost = (float) dboost;
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, anlzr);
|
||||
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
|
||||
|
||||
// regular (boolean) query.
|
||||
Query q1 = qp.parse(qtxt);
|
||||
BooleanQuery q1 = new BooleanQuery();
|
||||
q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
|
||||
q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
|
||||
q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
|
||||
log(q1);
|
||||
|
||||
// custom query, that should score the same as q1.
|
||||
|
|
Loading…
Reference in New Issue