Merged with trunk up to r1144714

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1144715 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-07-09 18:50:54 +00:00
commit 9e020991ef
38 changed files with 786 additions and 285 deletions

View File

@ -421,6 +421,8 @@ New features
* LUCENE-2862: Added TermsEnum.totalTermFreq() and * LUCENE-2862: Added TermsEnum.totalTermFreq() and
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir) Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
* LUCENE-3290: Added Terms.getSumDocFreq() (Mike McCandless, Robert Muir)
* LUCENE-3003: Added new expert class oal.index.DocTermsOrd, * LUCENE-3003: Added new expert class oal.index.DocTermsOrd,
refactored from Solr's UnInvertedField, for accessing term ords for refactored from Solr's UnInvertedField, for accessing term ords for
multi-valued fields, per document. This is similar to FieldCache in multi-valued fields, per document. This is similar to FieldCache in
@ -512,6 +514,11 @@ Bug fixes
causing the file to sometimes be larger than it needed to be. (Mike causing the file to sometimes be larger than it needed to be. (Mike
McCandless) McCandless)
New Features
* LUCENE-3290: Added FieldInvertState.numUniqueTerms
(Mike McCandless, Robert Muir)
Optimizations Optimizations
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated * LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated

View File

@ -53,24 +53,13 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner; import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
import org.apache.lucene.search.regex.RegexQuery; import org.apache.lucene.search.regex.RegexQuery;
import org.apache.lucene.search.spans.*; import org.apache.lucene.search.spans.*;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.util.automaton.RegExp;
@ -102,8 +91,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
public void testQueryScorerHits() throws Exception { public void testQueryScorerHits() throws Exception {
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
query = qp.parse("\"very long\""); PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "very"));
phraseQuery.add(new Term(FIELD_NAME, "long"));
query = phraseQuery;
searcher = new IndexSearcher(ramDir, true); searcher = new IndexSearcher(ramDir, true);
TopDocs hits = searcher.search(query, 10); TopDocs hits = searcher.search(query, 10);
@ -133,12 +126,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String s1 = "I call our world Flatland, not because we call it so,"; String s1 = "I call our world Flatland, not because we call it so,";
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));
// Verify that a query against the default field results in text being // Verify that a query against the default field results in text being
// highlighted // highlighted
// regardless of the field name. // regardless of the field name.
Query q = parser.parse("\"world Flatland\"~3");
PhraseQuery q = new PhraseQuery();
q.setSlop(3);
q.add(new Term(FIELD_NAME, "world"));
q.add(new Term(FIELD_NAME, "flatland"));
String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,"; String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
String observed = highlightField(q, "SOME_FIELD_NAME", s1); String observed = highlightField(q, "SOME_FIELD_NAME", s1);
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
@ -150,7 +146,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// when the query field name differs from the name of the field being // when the query field name differs from the name of the field being
// highlighted, // highlighted,
// which in this example happens to be the default field name. // which in this example happens to be the default field name.
q = parser.parse("text:\"world Flatland\"~3"); q = new PhraseQuery();
q.setSlop(3);
q.add(new Term("text", "world"));
q.add(new Term("text", "flatland"));
expected = s1; expected = s1;
observed = highlightField(q, FIELD_NAME, s1); observed = highlightField(q, FIELD_NAME, s1);
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
@ -177,7 +177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
public void testSimpleSpanHighlighter() throws Exception { public void testSimpleSpanHighlighter() throws Exception {
doSearching("Kennedy"); doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
@ -202,23 +202,49 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// LUCENE-1752 // LUCENE-1752
public void testRepeatingTermsInMultBooleans() throws Exception { public void testRepeatingTermsInMultBooleans() throws Exception {
String content = "x y z a b c d e f g b c g"; String content = "x y z a b c d e f g b c g";
String ph1 = "\"a b c d\"";
String ph2 = "\"b c g\"";
String f1 = "f1"; String f1 = "f1";
String f2 = "f2"; String f2 = "f2";
String f1c = f1 + ":";
String f2c = f2 + ":";
String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
+ " OR " + f2c + ph2 + ")";
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer);
Query query = qp.parse(q);
QueryScorer scorer = new QueryScorer(query, f1); PhraseQuery f1ph1 = new PhraseQuery();
f1ph1.add(new Term(f1, "a"));
f1ph1.add(new Term(f1, "b"));
f1ph1.add(new Term(f1, "c"));
f1ph1.add(new Term(f1, "d"));
PhraseQuery f2ph1 = new PhraseQuery();
f2ph1.add(new Term(f2, "a"));
f2ph1.add(new Term(f2, "b"));
f2ph1.add(new Term(f2, "c"));
f2ph1.add(new Term(f2, "d"));
PhraseQuery f1ph2 = new PhraseQuery();
f1ph2.add(new Term(f1, "b"));
f1ph2.add(new Term(f1, "c"));
f1ph2.add(new Term(f1, "g"));
PhraseQuery f2ph2 = new PhraseQuery();
f2ph2.add(new Term(f2, "b"));
f2ph2.add(new Term(f2, "c"));
f2ph2.add(new Term(f2, "g"));
BooleanQuery booleanQuery = new BooleanQuery();
BooleanQuery leftChild = new BooleanQuery();
leftChild.add(f1ph1, Occur.SHOULD);
leftChild.add(f2ph1, Occur.SHOULD);
booleanQuery.add(leftChild, Occur.MUST);
BooleanQuery rightChild = new BooleanQuery();
rightChild.add(f1ph2, Occur.SHOULD);
rightChild.add(f2ph2, Occur.SHOULD);
booleanQuery.add(rightChild, Occur.MUST);
QueryScorer scorer = new QueryScorer(booleanQuery, f1);
scorer.setExpandMultiTermQuery(false); scorer.setExpandMultiTermQuery(false);
Highlighter h = new Highlighter(this, scorer); Highlighter h = new Highlighter(this, scorer);
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
h.getBestFragment(analyzer, f1, content); h.getBestFragment(analyzer, f1, content);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
@ -226,7 +252,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
public void testSimpleQueryScorerPhraseHighlighting() throws Exception { public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
doSearching("\"very long and contains\""); PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "very"));
phraseQuery.add(new Term(FIELD_NAME, "long"));
phraseQuery.add(new Term(FIELD_NAME, "contains"), 3);
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
@ -248,7 +278,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights == 3); numHighlights == 3);
numHighlights = 0; numHighlights = 0;
doSearching("\"This piece of text refers to Kennedy\"");
phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "piece"), 1);
phraseQuery.add(new Term(FIELD_NAME, "text"), 3);
phraseQuery.add(new Term(FIELD_NAME, "refers"), 4);
phraseQuery.add(new Term(FIELD_NAME, "kennedy"), 6);
doSearching(phraseQuery);
maxNumFragmentsRequired = 2; maxNumFragmentsRequired = 2;
@ -270,7 +307,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights == 4); numHighlights == 4);
numHighlights = 0; numHighlights = 0;
doSearching("\"lets is a the lets is a the lets is a the lets\"");
phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "lets"));
phraseQuery.add(new Term(FIELD_NAME, "lets"), 4);
phraseQuery.add(new Term(FIELD_NAME, "lets"), 8);
phraseQuery.add(new Term(FIELD_NAME, "lets"), 12);
doSearching(phraseQuery);
maxNumFragmentsRequired = 2; maxNumFragmentsRequired = 2;
@ -366,7 +410,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception { public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
doSearching("\"text piece long\"~5"); PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.setSlop(5);
phraseQuery.add(new Term(FIELD_NAME, "text"));
phraseQuery.add(new Term(FIELD_NAME, "piece"));
phraseQuery.add(new Term(FIELD_NAME, "long"));
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
@ -388,7 +437,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
public void testSimpleQueryScorerPhraseHighlighting3() throws Exception { public void testSimpleQueryScorerPhraseHighlighting3() throws Exception {
doSearching("\"x y z\""); PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "x"));
phraseQuery.add(new Term(FIELD_NAME, "y"));
phraseQuery.add(new Term(FIELD_NAME, "z"));
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
@ -410,7 +463,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
public void testSimpleSpanFragmenter() throws Exception { public void testSimpleSpanFragmenter() throws Exception {
doSearching("\"piece of text that is very long\""); PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "piece"));
phraseQuery.add(new Term(FIELD_NAME, "text"), 2);
phraseQuery.add(new Term(FIELD_NAME, "very"), 5);
phraseQuery.add(new Term(FIELD_NAME, "long"), 6);
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
@ -428,8 +486,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
if (VERBOSE) System.out.println("\t" + result); if (VERBOSE) System.out.println("\t" + result);
} }
doSearching("\"been shot\""); phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "been"));
phraseQuery.add(new Term(FIELD_NAME, "shot"));
doSearching(query);
maxNumFragmentsRequired = 2; maxNumFragmentsRequired = 2;
@ -451,7 +513,16 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// position sensitive query added after position insensitive query // position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception { public void testPosTermStdTerm() throws Exception {
doSearching("y \"x y z\""); BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "y")), Occur.SHOULD);
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "x"));
phraseQuery.add(new Term(FIELD_NAME, "y"));
phraseQuery.add(new Term(FIELD_NAME, "z"));
booleanQuery.add(phraseQuery, Occur.SHOULD);
doSearching(booleanQuery);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
@ -525,7 +596,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
public void testSimpleQueryTermScorerHighlighter() throws Exception { public void testSimpleQueryTermScorerHighlighter() throws Exception {
doSearching("Kennedy"); doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
Highlighter highlighter = new Highlighter(new QueryTermScorer(query)); Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
@ -591,7 +662,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("Kennedy"); doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4); numHighlights == 4);
@ -607,7 +678,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("Kinnedy~0.5"); FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(FIELD_NAME, "kinnedy"), 0.5f);
fuzzyQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
doSearching(fuzzyQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5); numHighlights == 5);
@ -623,7 +696,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("K?nnedy"); WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k?nnedy"));
wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
doSearching(wildcardQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4); numHighlights == 4);
@ -639,7 +714,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("K*dy"); WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k*dy"));
wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
doSearching(wildcardQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5); numHighlights == 5);
@ -660,9 +737,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// Need to explicitly set the QueryParser property to use TermRangeQuery // Need to explicitly set the QueryParser property to use TermRangeQuery
// rather // rather
// than RangeFilters // than RangeFilters
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); TermRangeQuery rangeQuery = new TermRangeQuery(
query = parser.parse(queryString); FIELD_NAME,
new BytesRef("kannedy"),
new BytesRef("kznnedy"),
true, true);
rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = rangeQuery;
doSearching(query); doSearching(query);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
@ -772,7 +855,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("\"John Kennedy\""); PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(FIELD_NAME, "john"));
phraseQuery.add(new Term(FIELD_NAME, "kennedy"));
doSearching(phraseQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
// Currently highlights "John" and "Kennedy" separately // Currently highlights "John" and "Kennedy" separately
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
@ -874,7 +960,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("John Kenn*"); BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "john")), Occur.SHOULD);
PrefixQuery prefixQuery = new PrefixQuery(new Term(FIELD_NAME, "kenn"));
prefixQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
booleanQuery.add(prefixQuery, Occur.SHOULD);
doSearching(booleanQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5); numHighlights == 5);
@ -890,7 +982,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("JFK OR Kennedy");
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(FIELD_NAME, "jfk")), Occur.SHOULD);
query.add(new TermQuery(new Term(FIELD_NAME, "kennedy")), Occur.SHOULD);
doSearching(query);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5); numHighlights == 5);
@ -905,7 +1002,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
doSearching("Kennedy"); doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
numHighlights = 0; numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
@ -1006,11 +1103,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
HashMap<String,String> synonyms = new HashMap<String,String>(); HashMap<String,String> synonyms = new HashMap<String,String>();
synonyms.put("football", "soccer,footie"); synonyms.put("football", "soccer,footie");
Analyzer analyzer = new SynonymAnalyzer(synonyms); Analyzer analyzer = new SynonymAnalyzer(synonyms);
String srchkey = "football";
String s = "football-soccer in the euro 2004 footie competition"; String s = "football-soccer in the euro 2004 footie competition";
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "bookid", analyzer);
Query query = parser.parse(srchkey); BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("bookid", "football")), Occur.SHOULD);
query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s)); TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
@ -1037,7 +1136,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("Kennedy"); doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
// new Highlighter(HighlighterTest.this, new QueryTermScorer(query)); // new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
@ -1061,7 +1160,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
doSearching("Kennedy"); doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
for (int i = 0; i < hits.totalHits; i++) { for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
@ -1102,7 +1201,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("meat"); doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
HighlighterTest.this);// new Highlighter(this, new HighlighterTest.this);// new Highlighter(this, new
@ -1199,9 +1298,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
if (searcher != null) searcher.close(); if (searcher != null) searcher.close();
searcher = new IndexSearcher(ramDir, true); searcher = new IndexSearcher(ramDir, true);
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
BooleanQuery query = new BooleanQuery();
query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
Query query = parser.parse("JF? or Kenned*");
if (VERBOSE) System.out.println("Searching with primitive query"); if (VERBOSE) System.out.println("Searching with primitive query");
// forget to set this and... // forget to set this and...
// query=query.rewrite(reader); // query=query.rewrite(reader);
@ -1243,7 +1344,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
doSearching("AnInvalidQueryWhichShouldYieldNoResults"); doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
for (String text : texts) { for (String text : texts) {
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
@ -1313,8 +1414,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override @Override
public void run() throws Exception { public void run() throws Exception {
String docMainText = "fred is one of the people"; String docMainText = "fred is one of the people";
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
Query query = parser.parse("fred category:people"); BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(FIELD_NAME, "fred")), Occur.SHOULD);
query.add(new TermQuery(new Term("category", "people")), Occur.SHOULD);
// highlighting respects fieldnames used in query // highlighting respects fieldnames used in query
@ -1453,64 +1556,68 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
Highlighter highlighter; Highlighter highlighter;
String result; String result;
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo"); query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result); assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10"); query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result); assertEquals("Hi-Speed<B>10</B> foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi"); query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result); assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed"); query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result); assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed"); query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed"); BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(new TermQuery(new Term("text", "hi")), Occur.SHOULD);
booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "..."); result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
// ///////////////// same tests, just put the bigger overlapping token // ///////////////// same tests, just put the bigger overlapping token
// first // first
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo"); query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result); assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10"); query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result); assertEquals("Hi-Speed<B>10</B> foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi"); query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result); assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed"); query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result); assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed"); query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed"); query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result); assertEquals("<B>Hi-Speed</B>10 foo", result);
@ -1554,9 +1661,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
} }
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException { private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
String q = "t_text1:random"; Query query = new TermQuery(new Term("t_text1", "random"));
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "t_text1", a );
Query query = parser.parse( q );
IndexSearcher searcher = new IndexSearcher( dir, true ); IndexSearcher searcher = new IndexSearcher( dir, true );
// This scorer can return negative idf -> null fragment // This scorer can return negative idf -> null fragment
Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" ); Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
@ -1608,14 +1713,6 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
return "<B>" + originalText + "</B>"; return "<B>" + originalText + "</B>";
} }
public void doSearching(String queryString) throws Exception {
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
parser.setEnablePositionIncrements(true);
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = parser.parse(queryString);
doSearching(query);
}
public void doSearching(Query unReWrittenQuery) throws Exception { public void doSearching(Query unReWrittenQuery) throws Exception {
if (searcher != null) searcher.close(); if (searcher != null) searcher.close();
searcher = new IndexSearcher(ramDir, true); searcher = new IndexSearcher(ramDir, true);

View File

@ -19,7 +19,10 @@ package org.apache.lucene.search.vectorhighlight;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
@ -28,6 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Index;
@ -44,6 +48,7 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
public abstract class AbstractTestCase extends LuceneTestCase { public abstract class AbstractTestCase extends LuceneTestCase {
@ -56,9 +61,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
protected Analyzer analyzerB; protected Analyzer analyzerB;
protected Analyzer analyzerK; protected Analyzer analyzerK;
protected IndexReader reader; protected IndexReader reader;
protected QueryParser paW;
protected QueryParser paB;
protected static final String[] shortMVValues = { protected static final String[] shortMVValues = {
"", "",
"", "",
@ -90,8 +93,6 @@ public abstract class AbstractTestCase extends LuceneTestCase {
analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
analyzerB = new BigramAnalyzer(); analyzerB = new BigramAnalyzer();
analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false); analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
paW = new QueryParser(TEST_VERSION_CURRENT, F, analyzerW );
paB = new QueryParser(TEST_VERSION_CURRENT, F, analyzerB );
dir = newDirectory(); dir = newDirectory();
} }
@ -172,6 +173,33 @@ public abstract class AbstractTestCase extends LuceneTestCase {
} }
} }
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
TokenStream tokenStream = analyzer.reusableTokenStream(field, new StringReader(text));
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytesRef = termAttribute.getBytesRef();
while (tokenStream.incrementToken()) {
termAttribute.fillBytesRef();
bytesRefs.add(new BytesRef(bytesRef));
}
tokenStream.end();
tokenStream.close();
return bytesRefs;
}
protected PhraseQuery toPhraseQuery(List<BytesRef> bytesRefs, String field) {
PhraseQuery phraseQuery = new PhraseQuery();
for (BytesRef bytesRef : bytesRefs) {
phraseQuery.add(new Term(field, bytesRef));
}
return phraseQuery;
}
static final class BigramAnalyzer extends Analyzer { static final class BigramAnalyzer extends Analyzer {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStream tokenStream(String fieldName, Reader reader) {

View File

@ -22,19 +22,33 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap; import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
import org.apache.lucene.util.BytesRef;
public class FieldQueryTest extends AbstractTestCase { public class FieldQueryTest extends AbstractTestCase {
public void testFlattenBoolean() throws Exception { public void testFlattenBoolean() throws Exception {
Query query = paW.parse( "A AND B OR C NOT (D AND E)" ); BooleanQuery booleanQuery = new BooleanQuery();
FieldQuery fq = new FieldQuery( query, true, true ); booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
booleanQuery.add(new TermQuery(new Term(F, "B")), Occur.MUST);
booleanQuery.add(new TermQuery(new Term(F, "C")), Occur.SHOULD);
BooleanQuery innerQuery = new BooleanQuery();
innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
booleanQuery.add(innerQuery, Occur.MUST_NOT);
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
Set<Query> flatQueries = new HashSet<Query>(); Set<Query> flatQueries = new HashSet<Query>();
fq.flatten( query, flatQueries ); fq.flatten(booleanQuery, flatQueries);
assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) ); assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) );
} }
@ -47,15 +61,25 @@ public class FieldQueryTest extends AbstractTestCase {
} }
public void testFlattenTermAndPhrase() throws Exception { public void testFlattenTermAndPhrase() throws Exception {
Query query = paW.parse( "A AND \"B C\"" ); BooleanQuery booleanQuery = new BooleanQuery();
FieldQuery fq = new FieldQuery( query, true, true ); booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(F, "B"));
phraseQuery.add(new Term(F, "C"));
booleanQuery.add(phraseQuery, Occur.MUST);
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
Set<Query> flatQueries = new HashSet<Query>(); Set<Query> flatQueries = new HashSet<Query>();
fq.flatten( query, flatQueries ); fq.flatten(booleanQuery, flatQueries);
assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) ); assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) );
} }
public void testFlattenTermAndPhrase2gram() throws Exception { public void testFlattenTermAndPhrase2gram() throws Exception {
Query query = paB.parse( "AA AND \"BCD\" OR \"EFGH\"" ); BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(F, "AA")), Occur.MUST);
query.add(toPhraseQuery(analyze("BCD", F, analyzerB), F), Occur.MUST);
query.add(toPhraseQuery(analyze("EFGH", F, analyzerB), F), Occur.SHOULD);
FieldQuery fq = new FieldQuery( query, true, true ); FieldQuery fq = new FieldQuery( query, true, true );
Set<Query> flatQueries = new HashSet<Query>(); Set<Query> flatQueries = new HashSet<Query>();
fq.flatten( query, flatQueries ); fq.flatten( query, flatQueries );
@ -232,7 +256,16 @@ public class FieldQueryTest extends AbstractTestCase {
} }
public void testGetTermSet() throws Exception { public void testGetTermSet() throws Exception {
Query query = paW.parse( "A AND B OR x:C NOT (D AND E)" ); BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(F, "A")), Occur.MUST);
query.add(new TermQuery(new Term(F, "B")), Occur.MUST);
query.add(new TermQuery(new Term("x", "C")), Occur.SHOULD);
BooleanQuery innerQuery = new BooleanQuery();
innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
query.add(innerQuery, Occur.MUST_NOT);
FieldQuery fq = new FieldQuery( query, true, true ); FieldQuery fq = new FieldQuery( query, true, true );
assertEquals( 2, fq.termSetMap.size() ); assertEquals( 2, fq.termSetMap.size() );
Set<String> termSet = fq.getTermSet( F ); Set<String> termSet = fq.getTermSet( F );
@ -679,8 +712,10 @@ public class FieldQueryTest extends AbstractTestCase {
} }
public void testQueryPhraseMapOverlap2gram() throws Exception { public void testQueryPhraseMapOverlap2gram() throws Exception {
Query query = paB.parse( "\"abc\" AND \"bcd\"" ); BooleanQuery query = new BooleanQuery();
query.add(toPhraseQuery(analyze("abc", F, analyzerB), F), Occur.MUST);
query.add(toPhraseQuery(analyze("bcd", F, analyzerB), F), Occur.MUST);
// phraseHighlight = true, fieldMatch = true // phraseHighlight = true, fieldMatch = true
FieldQuery fq = new FieldQuery( query, true, true ); FieldQuery fq = new FieldQuery( query, true, true );
Map<String, QueryPhraseMap> map = fq.rootMaps; Map<String, QueryPhraseMap> map = fq.rootMaps;

View File

@ -17,12 +17,20 @@ package org.apache.lucene.search.vectorhighlight;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase { public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
public void test3Frags() throws Exception { public void test3Frags() throws Exception {
FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" ); BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
FieldFragList ffl = ffl(query, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder(); ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder();
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 ); String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
assertEquals( 3, f.length ); assertEquals( 3, f.length );
@ -32,9 +40,8 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
assertEquals( "<b>a</b> b b b b b b b b b ", f[2] ); assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
} }
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception { private FieldFragList ffl(Query query, String indexValue ) throws Exception {
make1d1fIndex( indexValue ); make1d1fIndex( indexValue );
Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true ); FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
FieldPhraseList fpl = new FieldPhraseList( stack, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq );

View File

@ -17,20 +17,21 @@ package org.apache.lucene.search.vectorhighlight;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.search.Query; import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
public class SimpleFragListBuilderTest extends AbstractTestCase { public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception { public void testNullFieldFragList() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
assertEquals( 0, ffl.getFragInfos().size() ); assertEquals( 0, ffl.getFragInfos().size() );
} }
public void testTooSmallFragSize() throws Exception { public void testTooSmallFragSize() throws Exception {
try{ try{
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
sflb.createFieldFragList( fpl( "a", "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 ); sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 );
fail( "IllegalArgumentException must be thrown" ); fail( "IllegalArgumentException must be thrown" );
} }
catch ( IllegalArgumentException expected ) { catch ( IllegalArgumentException expected ) {
@ -39,14 +40,19 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testSmallerFragSizeThanTermQuery() throws Exception { public void testSmallerFragSizeThanTermQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testSmallerFragSizeThanPhraseQuery() throws Exception { public void testSmallerFragSizeThanPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(F, "abcdefgh"));
phraseQuery.add(new Term(F, "jklmnopqrs"));
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() ); if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
@ -54,39 +60,39 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void test1TermIndex() throws Exception { public void test1TermIndex() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void test2TermsIndex1Frag() throws Exception { public void test2TermsIndex1Frag() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a a" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b a" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void test2TermsIndex2Frags() throws Exception { public void test2TermsIndex2Frags() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() ); assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() ); assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() ); assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() ); assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() ); assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() ); assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
@ -94,41 +100,56 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void test2TermsQuery() throws Exception { public void test2TermsQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
booleanQuery.add(new TermQuery(new Term(F, "b")), BooleanClause.Occur.SHOULD);
FieldFragList ffl = sflb.createFieldFragList( fpl(booleanQuery, "c d e" ), 20 );
assertEquals( 0, ffl.getFragInfos().size() ); assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 ); ffl = sflb.createFieldFragList( fpl(booleanQuery, "d b c" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 ); ffl = sflb.createFieldFragList( fpl(booleanQuery, "a b c" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testPhraseQuery() throws Exception { public void testPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.add(new Term(F, "a"));
phraseQuery.add(new Term(F, "b"));
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "c d e" ), 20 );
assertEquals( 0, ffl.getFragInfos().size() ); assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 ); ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
assertEquals( 0, ffl.getFragInfos().size() ); assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 ); ffl = sflb.createFieldFragList( fpl(phraseQuery, "a b c" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testPhraseQuerySlop() throws Exception { public void testPhraseQuerySlop() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.setSlop(1);
phraseQuery.add(new Term(F, "a"));
phraseQuery.add(new Term(F, "b"));
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
} }
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception { private FieldPhraseList fpl(Query query, String indexValue ) throws Exception {
make1d1fIndex( indexValue ); make1d1fIndex( indexValue );
Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true ); FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
return new FieldPhraseList( stack, fq ); return new FieldPhraseList( stack, fq );

View File

@ -26,13 +26,17 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder; import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
public class SimpleFragmentsBuilderTest extends AbstractTestCase { public class SimpleFragmentsBuilderTest extends AbstractTestCase {
public void test1TermIndex() throws Exception { public void test1TermIndex() throws Exception {
FieldFragList ffl = ffl( "a", "a" ); FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) ); assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
@ -42,7 +46,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
} }
public void test2Frags() throws Exception { public void test2Frags() throws Exception {
FieldFragList ffl = ffl( "a", "a b b b b b b b b b b b a b a b" ); FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b a b a b" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 ); String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
// 3 snippets requested, but should be 2 // 3 snippets requested, but should be 2
@ -52,7 +56,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
} }
public void test3Frags() throws Exception { public void test3Frags() throws Exception {
FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" ); BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
booleanQuery.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
FieldFragList ffl = ffl(booleanQuery, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 ); String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
assertEquals( 3, f.length ); assertEquals( 3, f.length );
@ -62,7 +70,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
} }
public void testTagsAndEncoder() throws Exception { public void testTagsAndEncoder() throws Exception {
FieldFragList ffl = ffl( "a", "<h1> a </h1>" ); FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "<h1> a </h1>" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] preTags = { "[" }; String[] preTags = { "[" };
String[] postTags = { "]" }; String[] postTags = { "]" };
@ -70,9 +78,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) ); sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
} }
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception { private FieldFragList ffl(Query query, String indexValue ) throws Exception {
make1d1fIndex( indexValue ); make1d1fIndex( indexValue );
Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true ); FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
FieldPhraseList fpl = new FieldPhraseList( stack, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq );

View File

@ -1,6 +1,8 @@
package org.apache.lucene.search.vectorhighlight; package org.apache.lucene.search.vectorhighlight;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -23,27 +25,26 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception { public void testNullFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder(); SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
assertEquals( 0, ffl.getFragInfos().size() ); assertEquals( 0, ffl.getFragInfos().size() );
} }
public void testShortFieldFragList() throws Exception { public void testShortFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder(); SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testLongFieldFragList() throws Exception { public void testLongFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder(); SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
} }
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception { private FieldPhraseList fpl(Query query, String... indexValues ) throws Exception {
make1dmfIndex( indexValues ); make1dmfIndex( indexValues );
Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true ); FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
return new FieldPhraseList( stack, fq ); return new FieldPhraseList( stack, fq );

View File

@ -426,6 +426,12 @@ public class InstantiatedIndexReader extends IndexReader {
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return sumTotalTermFreq; return sumTotalTermFreq;
} }
// TODO: support this?
@Override
public long getSumDocFreq() {
return -1;
}
@Override @Override
public Comparator<BytesRef> getComparator() { public Comparator<BytesRef> getComparator() {

View File

@ -842,6 +842,12 @@ public class MemoryIndex {
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return info.getSumTotalTermFreq(); return info.getSumTotalTermFreq();
} }
@Override
public long getSumDocFreq() throws IOException {
// each term has df=1
return info.sortedTerms.length;
}
}; };
} }
} }

View File

@ -128,6 +128,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return -1; return -1;
} }
@Override
public long getSumDocFreq() throws IOException {
return -1;
}
}); });
assert termsEnum != null; assert termsEnum != null;

View File

@ -691,7 +691,7 @@ public class CheckIndex {
Comparator<BytesRef> termComp = terms.getComparator(); Comparator<BytesRef> termComp = terms.getComparator();
long sumTotalTermFreq = 0; long sumTotalTermFreq = 0;
long sumDocFreq = 0;
while(true) { while(true) {
final BytesRef term = terms.next(); final BytesRef term = terms.next();
@ -712,6 +712,7 @@ public class CheckIndex {
final int docFreq = terms.docFreq(); final int docFreq = terms.docFreq();
status.totFreq += docFreq; status.totFreq += docFreq;
sumDocFreq += docFreq;
docs = terms.docs(liveDocs, docs); docs = terms.docs(liveDocs, docs);
postings = terms.docsAndPositions(liveDocs, postings); postings = terms.docsAndPositions(liveDocs, postings);
@ -879,6 +880,13 @@ public class CheckIndex {
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq); throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
} }
} }
if (sumDocFreq != 0) {
final long v = fields.terms(field).getSumDocFreq();
if (v != -1 && sumDocFreq != v) {
throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
}
}
// Test seek to last term: // Test seek to last term:
if (lastTerm != null) { if (lastTerm != null) {

View File

@ -31,6 +31,7 @@ public final class FieldInvertState {
int numOverlap; int numOverlap;
int offset; int offset;
int maxTermFrequency; int maxTermFrequency;
int uniqueTermCount;
float boost; float boost;
AttributeSource attributeSource; AttributeSource attributeSource;
@ -55,6 +56,7 @@ public final class FieldInvertState {
numOverlap = 0; numOverlap = 0;
offset = 0; offset = 0;
maxTermFrequency = 0; maxTermFrequency = 0;
uniqueTermCount = 0;
boost = docBoost; boost = docBoost;
attributeSource = null; attributeSource = null;
} }
@ -122,6 +124,13 @@ public final class FieldInvertState {
return maxTermFrequency; return maxTermFrequency;
} }
/**
* Return the number of unique terms encountered in this field.
*/
public int getUniqueTermCount() {
return uniqueTermCount;
}
public AttributeSource getAttributeSource() { public AttributeSource getAttributeSource() {
return attributeSource; return attributeSource;
} }

View File

@ -105,6 +105,11 @@ public class FilterIndexReader extends IndexReader {
public long getSumTotalTermFreq() throws IOException { public long getSumTotalTermFreq() throws IOException {
return in.getSumTotalTermFreq(); return in.getSumTotalTermFreq();
} }
@Override
public long getSumDocFreq() throws IOException {
return in.getSumDocFreq();
}
} }
/** Base class for filtering {@link TermsEnum} implementations. */ /** Base class for filtering {@link TermsEnum} implementations. */

View File

@ -134,6 +134,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
writeProx(termID, fieldState.position); writeProx(termID, fieldState.position);
} }
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency); fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
fieldState.uniqueTermCount++;
} }
@Override @Override
@ -151,6 +152,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]); termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID]; postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
postings.lastDocIDs[termID] = docState.docID; postings.lastDocIDs[termID] = docState.docID;
fieldState.uniqueTermCount++;
} }
} else { } else {
if (docState.docID != postings.lastDocIDs[termID]) { if (docState.docID != postings.lastDocIDs[termID]) {
@ -171,6 +173,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1; postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID; postings.lastDocIDs[termID] = docState.docID;
writeProx(termID, fieldState.position); writeProx(termID, fieldState.position);
fieldState.uniqueTermCount++;
} else { } else {
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]); fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
writeProx(termID, fieldState.position-postings.lastPositions[termID]); writeProx(termID, fieldState.position-postings.lastPositions[termID]);
@ -251,6 +254,8 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final ByteSliceReader prox = new ByteSliceReader(); final ByteSliceReader prox = new ByteSliceReader();
long sumTotalTermFreq = 0; long sumTotalTermFreq = 0;
long sumDocFreq = 0;
for (int i = 0; i < numTerms; i++) { for (int i = 0; i < numTerms; i++) {
final int termID = termIDs[i]; final int termID = termIDs[i];
// Get BytesRef // Get BytesRef
@ -389,9 +394,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
} }
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF)); termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
sumTotalTermFreq += totTF; sumTotalTermFreq += totTF;
sumDocFreq += numDocs;
} }
termsConsumer.finish(sumTotalTermFreq); termsConsumer.finish(sumTotalTermFreq, sumDocFreq);
} }
} }

View File

@ -88,6 +88,19 @@ public final class MultiTerms extends Terms {
} }
return sum; return sum;
} }
@Override
public long getSumDocFreq() throws IOException {
long sum = 0;
for(Terms terms : subs) {
final long v = terms.getSumDocFreq();
if (v == -1) {
return -1;
}
sum += v;
}
return sum;
}
@Override @Override
public Comparator<BytesRef> getComparator() { public Comparator<BytesRef> getComparator() {

View File

@ -132,6 +132,13 @@ public abstract class Terms {
* into account. */ * into account. */
public abstract long getSumTotalTermFreq() throws IOException; public abstract long getSumTotalTermFreq() throws IOException;
/** Returns the sum of {@link #docFreq(BytesRef)} for
* all terms in this field, or -1 if this measure isn't
* stored by the codec. Note that, just like other term
* measures, this measure does not take deleted documents
* into account. */
public abstract long getSumDocFreq() throws IOException;
/** /**
* Returns a thread-private {@link TermsEnum} instance. Obtaining * Returns a thread-private {@link TermsEnum} instance. Obtaining
* {@link TermsEnum} from this method might be more efficient than using * {@link TermsEnum} from this method might be more efficient than using

View File

@ -137,8 +137,9 @@ public class BlockTermsReader extends FieldsProducer {
final long termsStartPointer = in.readVLong(); final long termsStartPointer = in.readVLong();
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong(); final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
assert !fields.containsKey(fieldInfo.name); assert !fields.containsKey(fieldInfo.name);
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq)); fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
} }
success = true; success = true;
} finally { } finally {
@ -245,13 +246,15 @@ public class BlockTermsReader extends FieldsProducer {
final FieldInfo fieldInfo; final FieldInfo fieldInfo;
final long termsStartPointer; final long termsStartPointer;
final long sumTotalTermFreq; final long sumTotalTermFreq;
final long sumDocFreq;
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq) { FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq) {
assert numTerms > 0; assert numTerms > 0;
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
this.numTerms = numTerms; this.numTerms = numTerms;
this.termsStartPointer = termsStartPointer; this.termsStartPointer = termsStartPointer;
this.sumTotalTermFreq = sumTotalTermFreq; this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
} }
@Override @Override
@ -279,6 +282,11 @@ public class BlockTermsReader extends FieldsProducer {
return sumTotalTermFreq; return sumTotalTermFreq;
} }
@Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
// Iterates through terms in this field // Iterates through terms in this field
private final class SegmentTermsEnum extends TermsEnum { private final class SegmentTermsEnum extends TermsEnum {
private final IndexInput in; private final IndexInput in;

View File

@ -132,6 +132,7 @@ public class BlockTermsWriter extends FieldsConsumer {
if (!field.fieldInfo.omitTermFreqAndPositions) { if (!field.fieldInfo.omitTermFreqAndPositions) {
out.writeVLong(field.sumTotalTermFreq); out.writeVLong(field.sumTotalTermFreq);
} }
out.writeVLong(field.sumDocFreq);
} }
} }
writeTrailer(dirStart); writeTrailer(dirStart);
@ -157,6 +158,7 @@ public class BlockTermsWriter extends FieldsConsumer {
private long numTerms; private long numTerms;
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter; private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
long sumTotalTermFreq; long sumTotalTermFreq;
long sumDocFreq;
private TermEntry[] pendingTerms; private TermEntry[] pendingTerms;
@ -231,7 +233,7 @@ public class BlockTermsWriter extends FieldsConsumer {
// Finishes all terms in this field // Finishes all terms in this field
@Override @Override
public void finish(long sumTotalTermFreq) throws IOException { public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
if (pendingCount > 0) { if (pendingCount > 0) {
flushBlock(); flushBlock();
} }
@ -239,6 +241,7 @@ public class BlockTermsWriter extends FieldsConsumer {
out.writeVInt(0); out.writeVInt(0);
this.sumTotalTermFreq = sumTotalTermFreq; this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
fieldIndexWriter.finish(out.getFilePointer()); fieldIndexWriter.finish(out.getFilePointer());
} }

View File

@ -41,7 +41,7 @@ public abstract class TermsConsumer {
public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException; public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
/** Called when we are done adding terms to this field */ /** Called when we are done adding terms to this field */
public abstract void finish(long sumTotalTermFreq) throws IOException; public abstract void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException;
/** Return the BytesRef Comparator used to sort terms /** Return the BytesRef Comparator used to sort terms
* before feeding to this API. */ * before feeding to this API. */
@ -56,7 +56,8 @@ public abstract class TermsConsumer {
BytesRef term; BytesRef term;
assert termsEnum != null; assert termsEnum != null;
long sumTotalTermFreq = 0; long sumTotalTermFreq = 0;
long sumDF = 0; long sumDocFreq = 0;
long sumDFsinceLastAbortCheck = 0;
if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (mergeState.fieldInfo.omitTermFreqAndPositions) {
if (docsEnum == null) { if (docsEnum == null) {
@ -74,10 +75,11 @@ public abstract class TermsConsumer {
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum); final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
if (stats.docFreq > 0) { if (stats.docFreq > 0) {
finishTerm(term, stats); finishTerm(term, stats);
sumDF += stats.docFreq; sumDFsinceLastAbortCheck += stats.docFreq;
if (sumDF > 60000) { sumDocFreq += stats.docFreq;
mergeState.checkAbort.work(sumDF/5.0); if (sumDFsinceLastAbortCheck > 60000) {
sumDF = 0; mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
} }
} }
} }
@ -105,16 +107,17 @@ public abstract class TermsConsumer {
if (stats.docFreq > 0) { if (stats.docFreq > 0) {
finishTerm(term, stats); finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq; sumTotalTermFreq += stats.totalTermFreq;
sumDF += stats.docFreq; sumDFsinceLastAbortCheck += stats.docFreq;
if (sumDF > 60000) { sumDocFreq += stats.docFreq;
mergeState.checkAbort.work(sumDF/5.0); if (sumDFsinceLastAbortCheck > 60000) {
sumDF = 0; mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
} }
} }
} }
} }
} }
finish(sumTotalTermFreq); finish(sumTotalTermFreq, sumDocFreq);
} }
} }

View File

@ -219,13 +219,14 @@ public class MemoryCodec extends Codec {
} }
@Override @Override
public void finish(long sumTotalTermFreq) throws IOException { public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
if (termCount > 0) { if (termCount > 0) {
out.writeVInt(termCount); out.writeVInt(termCount);
out.writeVInt(field.number); out.writeVInt(field.number);
if (!field.omitTermFreqAndPositions) { if (!field.omitTermFreqAndPositions) {
out.writeVLong(sumTotalTermFreq); out.writeVLong(sumTotalTermFreq);
} }
out.writeVLong(sumDocFreq);
builder.finish().save(out); builder.finish().save(out);
if (VERBOSE) System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer()); if (VERBOSE) System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
} }
@ -683,6 +684,7 @@ public class MemoryCodec extends Codec {
private final static class TermsReader extends Terms { private final static class TermsReader extends Terms {
private final long sumTotalTermFreq; private final long sumTotalTermFreq;
private final long sumDocFreq;
private FST<BytesRef> fst; private FST<BytesRef> fst;
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
private final FieldInfo field; private final FieldInfo field;
@ -695,6 +697,7 @@ public class MemoryCodec extends Codec {
} else { } else {
sumTotalTermFreq = 0; sumTotalTermFreq = 0;
} }
sumDocFreq = in.readVLong();
fst = new FST<BytesRef>(in, outputs); fst = new FST<BytesRef>(in, outputs);
} }
@ -704,6 +707,11 @@ public class MemoryCodec extends Codec {
return sumTotalTermFreq; return sumTotalTermFreq;
} }
@Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
@Override @Override
public TermsEnum iterator() { public TermsEnum iterator() {
return new FSTTermsEnum(field, fst); return new FSTTermsEnum(field, fst);

View File

@ -266,6 +266,11 @@ public class PreFlexFields extends FieldsProducer {
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return -1; return -1;
} }
@Override
public long getSumDocFreq() throws IOException {
return -1;
}
} }
private class PreTermsEnum extends TermsEnum { private class PreTermsEnum extends TermsEnum {

View File

@ -463,6 +463,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private final long termsStart; private final long termsStart;
private final boolean omitTF; private final boolean omitTF;
private long sumTotalTermFreq; private long sumTotalTermFreq;
private long sumDocFreq;
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst; private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
private int termCount; private int termCount;
private final BytesRef scratch = new BytesRef(10); private final BytesRef scratch = new BytesRef(10);
@ -500,6 +501,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
break; break;
} else if (scratch.startsWith(DOC)) { } else if (scratch.startsWith(DOC)) {
docFreq++; docFreq++;
sumDocFreq++;
} else if (scratch.startsWith(POS)) { } else if (scratch.startsWith(POS)) {
totalTermFreq++; totalTermFreq++;
} else if (scratch.startsWith(TERM)) { } else if (scratch.startsWith(TERM)) {
@ -554,6 +556,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return sumTotalTermFreq; return sumTotalTermFreq;
} }
@Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
} }
@Override @Override

View File

@ -89,7 +89,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
} }
@Override @Override
public void finish(long sumTotalTermFreq) throws IOException { public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
} }
@Override @Override

View File

@ -195,7 +195,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
} }
@Override @Override
public void finish(long sumTotalTermCount) throws IOException { public void finish(long sumTotalTermCount, long sumDocFreq) throws IOException {
} }
@Override @Override

View File

@ -102,6 +102,7 @@ public class TestExternalCodecs extends LuceneTestCase {
final String field; final String field;
final SortedMap<String,RAMTerm> termToDocs = new TreeMap<String,RAMTerm>(); final SortedMap<String,RAMTerm> termToDocs = new TreeMap<String,RAMTerm>();
long sumTotalTermFreq; long sumTotalTermFreq;
long sumDocFreq;
RAMField(String field) { RAMField(String field) {
this.field = field; this.field = field;
@ -116,6 +117,11 @@ public class TestExternalCodecs extends LuceneTestCase {
public long getSumTotalTermFreq() { public long getSumTotalTermFreq() {
return sumTotalTermFreq; return sumTotalTermFreq;
} }
@Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
@Override @Override
public TermsEnum iterator() { public TermsEnum iterator() {
@ -204,8 +210,9 @@ public class TestExternalCodecs extends LuceneTestCase {
} }
@Override @Override
public void finish(long sumTotalTermFreq) { public void finish(long sumTotalTermFreq, long sumDocFreq) {
field.sumTotalTermFreq = sumTotalTermFreq; field.sumTotalTermFreq = sumTotalTermFreq;
field.sumDocFreq = sumDocFreq;
} }
} }

View File

@ -455,9 +455,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName); File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
_TestUtil.rmDir(indexDir); _TestUtil.rmDir(indexDir);
Directory dir = newFSDirectory(indexDir); Directory dir = newFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10); mp.setUseCompoundFile(doCFS);
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS); mp.setNoCFSRatio(1.0);
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setMaxBufferedDocs(10).setMergePolicy(mp);
IndexWriter writer = new IndexWriter(dir, conf); IndexWriter writer = new IndexWriter(dir, conf);
for(int i=0;i<35;i++) { for(int i=0;i<35;i++) {
@ -471,8 +474,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
if (!optimized) { if (!optimized) {
// open fresh writer so we get no prx file in the added segment // open fresh writer so we get no prx file in the added segment
conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10); mp = new LogByteSizeMergePolicy();
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS); mp.setUseCompoundFile(doCFS);
mp.setNoCFSRatio(1.0);
// TODO: remove randomness
conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setMaxBufferedDocs(10).setMergePolicy(mp);
writer = new IndexWriter(dir, conf); writer = new IndexWriter(dir, conf);
addNoProxDoc(writer); addNoProxDoc(writer);
writer.close(); writer.close();

View File

@ -101,10 +101,12 @@ public class TestCodecs extends LuceneTestCase {
Arrays.sort(terms); Arrays.sort(terms);
final TermsConsumer termsConsumer = consumer.addField(fieldInfo); final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
long sumTotalTermCount = 0; long sumTotalTermCount = 0;
long sumDF = 0;
for (final TermData term : terms) { for (final TermData term : terms) {
sumDF += term.docs.length;
sumTotalTermCount += term.write(termsConsumer); sumTotalTermCount += term.write(termsConsumer);
} }
termsConsumer.finish(sumTotalTermCount); termsConsumer.finish(sumTotalTermCount, sumDF);
} }
} }

View File

@ -0,0 +1,101 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Tests {@link Terms#getSumDocFreq()}
* @lucene.experimental
*/
public class TestSumDocFreq extends LuceneTestCase {
public void testSumDocFreq() throws Exception {
final int numDocs = atLeast(500);
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
Document doc = new Document();
Field field1 = newField("foo", "", Field.Index.ANALYZED);
Field field2 = newField("bar", "", Field.Index.ANALYZED);
doc.add(field1);
doc.add(field2);
for (int i = 0; i < numDocs; i++) {
char ch1 = (char) _TestUtil.nextInt(random, 'a', 'z');
char ch2 = (char) _TestUtil.nextInt(random, 'a', 'z');
field1.setValue("" + ch1 + " " + ch2);
ch1 = (char) _TestUtil.nextInt(random, 'a', 'z');
ch2 = (char) _TestUtil.nextInt(random, 'a', 'z');
field2.setValue("" + ch1 + " " + ch2);
writer.addDocument(doc);
}
IndexReader ir = writer.getReader();
writer.close();
assertSumDocFreq(ir);
ir.close();
ir = IndexReader.open(dir, false);
int numDeletions = atLeast(20);
for (int i = 0; i < numDeletions; i++) {
ir.deleteDocument(random.nextInt(ir.maxDoc()));
}
ir.close();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
w.optimize();
w.close();
ir = IndexReader.open(dir, true);
assertSumDocFreq(ir);
ir.close();
dir.close();
}
private void assertSumDocFreq(IndexReader ir) throws Exception {
// compute sumDocFreq across all fields
Fields fields = MultiFields.getFields(ir);
FieldsEnum fieldEnum = fields.iterator();
String f = null;
while ((f = fieldEnum.next()) != null) {
Terms terms = fields.terms(f);
long sumDocFreq = terms.getSumDocFreq();
if (sumDocFreq == -1) {
if (VERBOSE) {
System.out.println("skipping field: " + f + ", codec does not support sumDocFreq");
}
continue;
}
long computedSumDocFreq = 0;
TermsEnum termsEnum = terms.iterator();
while (termsEnum.next() != null) {
computedSumDocFreq += termsEnum.docFreq();
}
assertEquals(computedSumDocFreq, sumDocFreq);
}
}
}

View File

@ -0,0 +1,108 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.HashSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DefaultSimilarityProvider;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Tests the uniqueTermCount statistic in FieldInvertState
*/
public class TestUniqueTermCount extends LuceneTestCase {
Directory dir;
IndexReader reader;
/* expected uniqueTermCount values for our documents */
ArrayList<Integer> expected = new ArrayList<Integer>();
@Override
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy());
config.setSimilarityProvider(new DefaultSimilarityProvider() {
@Override
public Similarity get(String field) {
return new TestSimilarity();
}
});
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
doc.add(foo);
for (int i = 0; i < 100; i++) {
foo.setValue(addValue());
writer.addDocument(doc);
}
reader = writer.getReader();
writer.close();
}
@Override
public void tearDown() throws Exception {
reader.close();
dir.close();
super.tearDown();
}
public void test() throws Exception {
byte fooNorms[] = MultiNorms.norms(reader, "foo");
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
}
/**
* Makes a bunch of single-char tokens (the max # unique terms will at most be 26).
* puts the # unique terms into expected, to be checked against the norm.
*/
private String addValue() {
StringBuilder sb = new StringBuilder();
HashSet<String> terms = new HashSet<String>();
int num = _TestUtil.nextInt(random, 0, 255);
for (int i = 0; i < num; i++) {
sb.append(' ');
char term = (char) _TestUtil.nextInt(random, 'a', 'z');
sb.append(term);
terms.add("" + term);
}
expected.add(terms.size());
return sb.toString();
}
/**
* Simple similarity that encodes maxTermFrequency directly as a byte
*/
class TestSimilarity extends DefaultSimilarity {
@Override
public byte computeNorm(FieldInvertState state) {
return (byte) state.getUniqueTermCount();
}
}
}

View File

@ -16,7 +16,6 @@ package org.apache.lucene.analysis.query;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
@ -25,19 +24,12 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
@ -74,22 +66,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
super.tearDown(); super.tearDown();
} }
//Helper method to query
private int search(Analyzer a, String queryString) throws IOException, ParseException {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
Query q = qp.parse(queryString);
IndexSearcher searcher = newSearcher(reader);
int hits = searcher.search(q, null, 1000).totalHits;
searcher.close();
return hits;
}
public void testUninitializedAnalyzer() throws Exception { public void testUninitializedAnalyzer() throws Exception {
//Note: no calls to "addStopWord" // Note: no calls to "addStopWord"
String query = "variedField:quick repetitiveField:boring"; // query = "variedField:quick repetitiveField:boring";
int numHits1 = search(protectedAnalyzer, query); TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("quick"));
int numHits2 = search(appAnalyzer, query); assertTokenStreamContents(protectedTokenStream, new String[]{"quick"});
assertEquals("No filtering test", numHits1, numHits2);
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
} }
/* /*
@ -97,36 +81,41 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
*/ */
public void testDefaultAddStopWordsIndexReader() throws Exception { public void testDefaultAddStopWordsIndexReader() throws Exception {
protectedAnalyzer.addStopWords(reader); protectedAnalyzer.addStopWords(reader);
int numHits = search(protectedAnalyzer, "repetitiveField:boring"); TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
assertEquals("Default filter should remove all docs", 0, numHits);
}
assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring
}
/* /*
* Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader, int)' * Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader, int)'
*/ */
public void testAddStopWordsIndexReaderInt() throws Exception { public void testAddStopWordsIndexReaderInt() throws Exception {
protectedAnalyzer.addStopWords(reader, 1f / 2f); protectedAnalyzer.addStopWords(reader, 1f / 2f);
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
assertEquals("A filter on terms in > one half of docs remove boring docs", 0, numHits);
numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring"); TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
assertTrue("A filter on terms in > half of docs should not remove vaguelyBoring docs", numHits > 1); // A filter on terms in > one half of docs remove boring
assertTokenStreamContents(protectedTokenStream, new String[0]);
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
// A filter on terms in > half of docs should not remove vaguelyBoring
assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"});
protectedAnalyzer.addStopWords(reader, 1f / 4f); protectedAnalyzer.addStopWords(reader, 1f / 4f);
numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring"); protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
assertEquals("A filter on terms in > quarter of docs should remove vaguelyBoring docs", 0, numHits); // A filter on terms in > quarter of docs should remove vaguelyBoring
assertTokenStreamContents(protectedTokenStream, new String[0]);
} }
public void testAddStopWordsIndexReaderStringFloat() throws Exception { public void testAddStopWordsIndexReaderStringFloat() throws Exception {
protectedAnalyzer.addStopWords(reader, "variedField", 1f / 2f); protectedAnalyzer.addStopWords(reader, "variedField", 1f / 2f);
int numHits = search(protectedAnalyzer, "repetitiveField:boring"); TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
assertTrue("A filter on one Field should not affect queris on another", numHits > 0); // A filter on one Field should not affect queries on another
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
protectedAnalyzer.addStopWords(reader, "repetitiveField", 1f / 2f); protectedAnalyzer.addStopWords(reader, "repetitiveField", 1f / 2f);
numHits = search(protectedAnalyzer, "repetitiveField:boring"); protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
assertEquals("A filter on the right Field should affect queries on it", numHits, 0); // A filter on the right Field should affect queries on it
assertTokenStreamContents(protectedTokenStream, new String[0]);
} }
public void testAddStopWordsIndexReaderStringInt() throws Exception { public void testAddStopWordsIndexReaderStringInt() throws Exception {
@ -144,12 +133,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
public void testNoFieldNamePollution() throws Exception { public void testNoFieldNamePollution() throws Exception {
protectedAnalyzer.addStopWords(reader, "repetitiveField", 10); protectedAnalyzer.addStopWords(reader, "repetitiveField", 10);
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
assertEquals("Check filter set up OK", 0, numHits);
numHits = search(protectedAnalyzer, "variedField:boring"); TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
assertTrue("Filter should not prevent stopwords in one field being used in another ", numHits > 0); // Check filter set up OK
assertTokenStreamContents(protectedTokenStream, new String[0]);
protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("boring"));
// Filter should not prevent stopwords in one field being used in another
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
} }
/* /*
@ -170,10 +161,12 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
public void testWrappingNonReusableAnalyzer() throws Exception { public void testWrappingNonReusableAnalyzer() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer()); QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
a.addStopWords(reader, 10); a.addStopWords(reader, 10);
int numHits = search(a, "repetitiveField:boring");
assertTrue(numHits == 0); TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
numHits = search(a, "repetitiveField:vaguelyboring"); assertTokenStreamContents(tokenStream, new String[0]);
assertTrue(numHits == 0);
tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
assertTokenStreamContents(tokenStream, new String[0]);
} }
public void testTokenStream() throws Exception { public void testTokenStream() throws Exception {

View File

@ -32,14 +32,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
@ -82,16 +75,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
return new IndexSearcher(dir, true); return new IndexSearcher(dir, true);
} }
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
searcher = setUpSearcher(analyzer);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", analyzer);
Query q = qp.parse(qs);
return searcher.search(q, null, 1000).scoreDocs;
}
protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception { protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
assertEquals(ranks.length, hits.length); assertEquals(ranks.length, hits.length);
for (int i = 0; i < ranks.length; i++) { for (int i = 0; i < ranks.length; i++) {
@ -99,51 +82,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
} }
} }
/*
* Will not work on an index without unigrams, since QueryParser automatically
* tokenizes on whitespace.
*/
public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"test sentence");
int[] ranks = new int[] { 1, 2, 0 };
compareRanks(hits, ranks);
}
/*
* This one fails with an exception.
*/
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"\"this sentence\"");
int[] ranks = new int[] { 0 };
compareRanks(hits, ranks);
}
/*
* This one works, actually.
*/
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"\"test sentence\"");
int[] ranks = new int[] { 1 };
compareRanks(hits, ranks);
}
/*
* Same as above, is tokenized without using the analyzer.
*/
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"+test +sentence");
int[] ranks = new int[] { 1, 2 };
compareRanks(hits, ranks);
}
/* /*
* This shows how to construct a phrase query containing shingles. * This shows how to construct a phrase query containing shingles.
*/ */
@ -153,8 +91,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
PhraseQuery q = new PhraseQuery(); PhraseQuery q = new PhraseQuery();
TokenStream ts = analyzer.tokenStream("content", TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence"));
new StringReader("this sentence"));
int j = -1; int j = -1;
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
@ -183,8 +120,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
BooleanQuery q = new BooleanQuery(); BooleanQuery q = new BooleanQuery();
TokenStream ts = analyzer.tokenStream("content", TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence"));
new StringReader("test sentence"));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

View File

@ -20,11 +20,7 @@ package org.apache.lucene.queries;
import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.FunctionTestSetup; import org.apache.lucene.queries.function.FunctionTestSetup;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.ByteFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource; import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.queries.function.valuesource.ShortFieldSource;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.search.cache.*; import org.apache.lucene.search.cache.*;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -198,9 +194,10 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
@Test @Test
public void testCustomExternalQuery() throws Exception { public void testCustomExternalQuery() throws Exception {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD,anlzr); BooleanQuery q1 = new BooleanQuery();
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
Query q1 = qp.parse(qtxt); q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
final Query q = new CustomExternalQuery(q1); final Query q = new CustomExternalQuery(q1);
log(q); log(q);
@ -243,11 +240,12 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
FunctionQuery functionQuery = new FunctionQuery(valueSource); FunctionQuery functionQuery = new FunctionQuery(valueSource);
float boost = (float) dboost; float boost = (float) dboost;
IndexSearcher s = new IndexSearcher(dir, true); IndexSearcher s = new IndexSearcher(dir, true);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, anlzr);
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
// regular (boolean) query. // regular (boolean) query.
Query q1 = qp.parse(qtxt); BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
log(q1); log(q1);
// custom query, that should score the same as q1. // custom query, that should score the same as q1.

View File

@ -256,7 +256,13 @@ Other Changes
* LUCENE-2883: FunctionQuery, DocValues (and its impls), ValueSource (and its * LUCENE-2883: FunctionQuery, DocValues (and its impls), ValueSource (and its
impls) and BoostedQuery have been consolidated into the queries module. They impls) and BoostedQuery have been consolidated into the queries module. They
can now be found at o.a.l.queries.function. can now be found at o.a.l.queries.function.
* SOLR-2027: FacetField.getValues() now returns an empty list if there are no
values, instead of null (Chris Male)
* SOLR-1825: SolrQuery.addFacetQuery now enables facets automatically, like
addFacetField (Chris Male)
Documentation Documentation
---------------------- ----------------------

View File

@ -292,6 +292,7 @@ public class SolrQuery extends ModifiableSolrParams
*/ */
public SolrQuery addFacetQuery(String f) { public SolrQuery addFacetQuery(String f) {
this.add(FacetParams.FACET_QUERY, f); this.add(FacetParams.FACET_QUERY, f);
this.set(FacetParams.FACET, true);
return this; return this;
} }

View File

@ -19,6 +19,7 @@ package org.apache.solr.client.solrj.response;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
@ -145,7 +146,7 @@ import org.apache.solr.client.solrj.util.ClientUtils;
} }
public List<Count> getValues() { public List<Count> getValues() {
return _values; return _values == null ? Collections.<Count>emptyList() : _values;
} }
public int getValueCount() public int getValueCount()

View File

@ -195,4 +195,10 @@ public class SolrQueryTest extends LuceneTestCase {
q.setTermsRegexFlag("multiline"); q.setTermsRegexFlag("multiline");
assertEquals(2, q.getTermsRegexFlags().length); assertEquals(2, q.getTermsRegexFlags().length);
} }
public void testAddFacetQuery() {
SolrQuery solrQuery = new SolrQuery();
solrQuery.addFacetQuery("field:value");
assertTrue("Adding a Facet Query should enable facets", solrQuery.getBool(FacetParams.FACET));
}
} }

View File

@ -0,0 +1,33 @@
package org.apache.solr.client.solrj.response;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase;
public class FacetFieldTest extends LuceneTestCase {
public void testGetValues() {
FacetField facetField = new FacetField("field");
assertNotNull(facetField.getValues());
assertEquals(0, facetField.getValues().size());
facetField.add("value", 1);
assertEquals(1, facetField.getValues().size());
}
}