diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7d76af0b813..a4c3d2556f4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -421,6 +421,8 @@ New features
* LUCENE-2862: Added TermsEnum.totalTermFreq() and
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
+* LUCENE-3290: Added Terms.getSumDocFreq() (Mike McCandless, Robert Muir)
+
* LUCENE-3003: Added new expert class oal.index.DocTermsOrd,
refactored from Solr's UnInvertedField, for accessing term ords for
multi-valued fields, per document. This is similar to FieldCache in
@@ -512,6 +514,11 @@ Bug fixes
causing the file to sometimes be larger than it needed to be. (Mike
McCandless)
+New Features
+
+* LUCENE-3290: Added FieldInvertState.numUniqueTerms
+ (Mike McCandless, Robert Muir)
+
Optimizations
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
index b66a7b1ed70..396b29ed58c 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@@ -53,24 +53,13 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.FilteredQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MultiPhraseQuery;
-import org.apache.lucene.search.MultiTermQuery;
-import org.apache.lucene.search.NumericRangeQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TermRangeFilter;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
import org.apache.lucene.search.regex.RegexQuery;
import org.apache.lucene.search.spans.*;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
@@ -102,8 +91,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
public void testQueryScorerHits() throws Exception {
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
- query = qp.parse("\"very long\"");
+
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "very"));
+ phraseQuery.add(new Term(FIELD_NAME, "long"));
+
+ query = phraseQuery;
searcher = new IndexSearcher(ramDir, true);
TopDocs hits = searcher.search(query, 10);
@@ -133,12 +126,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String s1 = "I call our world Flatland, not because we call it so,";
- QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));
-
// Verify that a query against the default field results in text being
// highlighted
// regardless of the field name.
- Query q = parser.parse("\"world Flatland\"~3");
+
+ PhraseQuery q = new PhraseQuery();
+ q.setSlop(3);
+ q.add(new Term(FIELD_NAME, "world"));
+ q.add(new Term(FIELD_NAME, "flatland"));
+
String expected = "I call our world Flatland, not because we call it so,";
String observed = highlightField(q, "SOME_FIELD_NAME", s1);
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
@@ -150,7 +146,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// when the query field name differs from the name of the field being
// highlighted,
// which in this example happens to be the default field name.
- q = parser.parse("text:\"world Flatland\"~3");
+ q = new PhraseQuery();
+ q.setSlop(3);
+ q.add(new Term("text", "world"));
+ q.add(new Term("text", "flatland"));
+
expected = s1;
observed = highlightField(q, FIELD_NAME, s1);
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
@@ -177,7 +177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testSimpleSpanHighlighter() throws Exception {
- doSearching("Kennedy");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
int maxNumFragmentsRequired = 2;
@@ -202,23 +202,49 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// LUCENE-1752
public void testRepeatingTermsInMultBooleans() throws Exception {
String content = "x y z a b c d e f g b c g";
- String ph1 = "\"a b c d\"";
- String ph2 = "\"b c g\"";
String f1 = "f1";
String f2 = "f2";
- String f1c = f1 + ":";
- String f2c = f2 + ":";
- String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
- + " OR " + f2c + ph2 + ")";
- Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer);
- Query query = qp.parse(q);
- QueryScorer scorer = new QueryScorer(query, f1);
+ PhraseQuery f1ph1 = new PhraseQuery();
+ f1ph1.add(new Term(f1, "a"));
+ f1ph1.add(new Term(f1, "b"));
+ f1ph1.add(new Term(f1, "c"));
+ f1ph1.add(new Term(f1, "d"));
+
+ PhraseQuery f2ph1 = new PhraseQuery();
+ f2ph1.add(new Term(f2, "a"));
+ f2ph1.add(new Term(f2, "b"));
+ f2ph1.add(new Term(f2, "c"));
+ f2ph1.add(new Term(f2, "d"));
+
+ PhraseQuery f1ph2 = new PhraseQuery();
+ f1ph2.add(new Term(f1, "b"));
+ f1ph2.add(new Term(f1, "c"));
+ f1ph2.add(new Term(f1, "g"));
+
+ PhraseQuery f2ph2 = new PhraseQuery();
+ f2ph2.add(new Term(f2, "b"));
+ f2ph2.add(new Term(f2, "c"));
+ f2ph2.add(new Term(f2, "g"));
+
+ BooleanQuery booleanQuery = new BooleanQuery();
+ BooleanQuery leftChild = new BooleanQuery();
+ leftChild.add(f1ph1, Occur.SHOULD);
+ leftChild.add(f2ph1, Occur.SHOULD);
+ booleanQuery.add(leftChild, Occur.MUST);
+
+ BooleanQuery rightChild = new BooleanQuery();
+ rightChild.add(f1ph2, Occur.SHOULD);
+ rightChild.add(f2ph2, Occur.SHOULD);
+ booleanQuery.add(rightChild, Occur.MUST);
+
+ QueryScorer scorer = new QueryScorer(booleanQuery, f1);
scorer.setExpandMultiTermQuery(false);
Highlighter h = new Highlighter(this, scorer);
+ Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
+
h.getBestFragment(analyzer, f1, content);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
@@ -226,7 +252,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
- doSearching("\"very long and contains\"");
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "very"));
+ phraseQuery.add(new Term(FIELD_NAME, "long"));
+ phraseQuery.add(new Term(FIELD_NAME, "contains"), 3);
+ doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
@@ -248,7 +278,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights == 3);
numHighlights = 0;
- doSearching("\"This piece of text refers to Kennedy\"");
+
+ phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "piece"), 1);
+ phraseQuery.add(new Term(FIELD_NAME, "text"), 3);
+ phraseQuery.add(new Term(FIELD_NAME, "refers"), 4);
+ phraseQuery.add(new Term(FIELD_NAME, "kennedy"), 6);
+
+ doSearching(phraseQuery);
maxNumFragmentsRequired = 2;
@@ -270,7 +307,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
numHighlights == 4);
numHighlights = 0;
- doSearching("\"lets is a the lets is a the lets is a the lets\"");
+
+ phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "lets"));
+ phraseQuery.add(new Term(FIELD_NAME, "lets"), 4);
+ phraseQuery.add(new Term(FIELD_NAME, "lets"), 8);
+ phraseQuery.add(new Term(FIELD_NAME, "lets"), 12);
+
+ doSearching(phraseQuery);
maxNumFragmentsRequired = 2;
@@ -366,7 +410,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
- doSearching("\"text piece long\"~5");
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.setSlop(5);
+ phraseQuery.add(new Term(FIELD_NAME, "text"));
+ phraseQuery.add(new Term(FIELD_NAME, "piece"));
+ phraseQuery.add(new Term(FIELD_NAME, "long"));
+ doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
@@ -388,7 +437,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testSimpleQueryScorerPhraseHighlighting3() throws Exception {
- doSearching("\"x y z\"");
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "x"));
+ phraseQuery.add(new Term(FIELD_NAME, "y"));
+ phraseQuery.add(new Term(FIELD_NAME, "z"));
+ doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
@@ -410,7 +463,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testSimpleSpanFragmenter() throws Exception {
- doSearching("\"piece of text that is very long\"");
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "piece"));
+ phraseQuery.add(new Term(FIELD_NAME, "text"), 2);
+ phraseQuery.add(new Term(FIELD_NAME, "very"), 5);
+ phraseQuery.add(new Term(FIELD_NAME, "long"), 6);
+ doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
@@ -428,8 +486,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
if (VERBOSE) System.out.println("\t" + result);
}
-
- doSearching("\"been shot\"");
+
+ phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "been"));
+ phraseQuery.add(new Term(FIELD_NAME, "shot"));
+
+ doSearching(query);
maxNumFragmentsRequired = 2;
@@ -451,7 +513,16 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception {
- doSearching("y \"x y z\"");
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "y")), Occur.SHOULD);
+
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "x"));
+ phraseQuery.add(new Term(FIELD_NAME, "y"));
+ phraseQuery.add(new Term(FIELD_NAME, "z"));
+ booleanQuery.add(phraseQuery, Occur.SHOULD);
+
+ doSearching(booleanQuery);
int maxNumFragmentsRequired = 2;
@@ -525,7 +596,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testSimpleQueryTermScorerHighlighter() throws Exception {
- doSearching("Kennedy");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
int maxNumFragmentsRequired = 2;
@@ -591,7 +662,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("Kennedy");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4);
@@ -607,7 +678,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("Kinnedy~0.5");
+ FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(FIELD_NAME, "kinnedy"), 0.5f);
+ fuzzyQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ doSearching(fuzzyQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5);
@@ -623,7 +696,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("K?nnedy");
+ WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k?nnedy"));
+ wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ doSearching(wildcardQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4);
@@ -639,7 +714,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("K*dy");
+ WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k*dy"));
+ wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ doSearching(wildcardQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5);
@@ -660,9 +737,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// Need to explicitly set the QueryParser property to use TermRangeQuery
// rather
// than RangeFilters
- QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
- parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- query = parser.parse(queryString);
+
+ TermRangeQuery rangeQuery = new TermRangeQuery(
+ FIELD_NAME,
+ new BytesRef("kannedy"),
+ new BytesRef("kznnedy"),
+ true, true);
+ rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+
+ query = rangeQuery;
doSearching(query);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
@@ -772,7 +855,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("\"John Kennedy\"");
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(FIELD_NAME, "john"));
+ phraseQuery.add(new Term(FIELD_NAME, "kennedy"));
+ doSearching(phraseQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
// Currently highlights "John" and "Kennedy" separately
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
@@ -874,7 +960,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("John Kenn*");
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "john")), Occur.SHOULD);
+ PrefixQuery prefixQuery = new PrefixQuery(new Term(FIELD_NAME, "kenn"));
+ prefixQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ booleanQuery.add(prefixQuery, Occur.SHOULD);
+
+ doSearching(booleanQuery);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5);
@@ -890,7 +982,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("JFK OR Kennedy");
+
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term(FIELD_NAME, "jfk")), Occur.SHOULD);
+ query.add(new TermQuery(new Term(FIELD_NAME, "kennedy")), Occur.SHOULD);
+
+ doSearching(query);
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5);
@@ -905,7 +1002,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
- doSearching("Kennedy");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
@@ -1006,11 +1103,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
HashMap synonyms = new HashMap();
synonyms.put("football", "soccer,footie");
Analyzer analyzer = new SynonymAnalyzer(synonyms);
- String srchkey = "football";
String s = "football-soccer in the euro 2004 footie competition";
- QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "bookid", analyzer);
- Query query = parser.parse(srchkey);
+
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term("bookid", "football")), Occur.SHOULD);
+ query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
+ query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
@@ -1037,7 +1136,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("Kennedy");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
// new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
for (int i = 0; i < hits.totalHits; i++) {
@@ -1061,7 +1160,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
- doSearching("Kennedy");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
@@ -1102,7 +1201,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
numHighlights = 0;
- doSearching("meat");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
HighlighterTest.this);// new Highlighter(this, new
@@ -1199,9 +1298,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
if (searcher != null) searcher.close();
searcher = new IndexSearcher(ramDir, true);
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
+
+ BooleanQuery query = new BooleanQuery();
+ query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
+ query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
- QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
- Query query = parser.parse("JF? or Kenned*");
if (VERBOSE) System.out.println("Searching with primitive query");
// forget to set this and...
// query=query.rewrite(reader);
@@ -1243,7 +1344,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
- doSearching("AnInvalidQueryWhichShouldYieldNoResults");
+ doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
for (String text : texts) {
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
@@ -1313,8 +1414,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
@Override
public void run() throws Exception {
String docMainText = "fred is one of the people";
- QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
- Query query = parser.parse("fred category:people");
+
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term(FIELD_NAME, "fred")), Occur.SHOULD);
+ query.add(new TermQuery(new Term("category", "people")), Occur.SHOULD);
// highlighting respects fieldnames used in query
@@ -1453,64 +1556,68 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
Highlighter highlighter;
String result;
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
+ query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
+ query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
+ query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
+ query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
+ query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new TermQuery(new Term("text", "hi")), Occur.SHOULD);
+ booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
+
+ query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
// ///////////////// same tests, just put the bigger overlapping token
// first
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
+ query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
+ query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
+ query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
+ query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
+ query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
- query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
+ query = booleanQuery;
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 foo", result);
@@ -1554,9 +1661,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
- String q = "t_text1:random";
- QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "t_text1", a );
- Query query = parser.parse( q );
+ Query query = new TermQuery(new Term("t_text1", "random"));
IndexSearcher searcher = new IndexSearcher( dir, true );
// This scorer can return negative idf -> null fragment
Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
@@ -1608,14 +1713,6 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
return "" + originalText + "";
}
- public void doSearching(String queryString) throws Exception {
- QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
- parser.setEnablePositionIncrements(true);
- parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- query = parser.parse(queryString);
- doSearching(query);
- }
-
public void doSearching(Query unReWrittenQuery) throws Exception {
if (searcher != null) searcher.close();
searcher = new IndexSearcher(ramDir, true);
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
index 0f19ebfd459..d7fbe0b0909 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
@@ -19,7 +19,10 @@ package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -28,6 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
@@ -44,6 +48,7 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
public abstract class AbstractTestCase extends LuceneTestCase {
@@ -56,9 +61,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
protected Analyzer analyzerB;
protected Analyzer analyzerK;
protected IndexReader reader;
- protected QueryParser paW;
- protected QueryParser paB;
-
+
protected static final String[] shortMVValues = {
"",
"",
@@ -90,8 +93,6 @@ public abstract class AbstractTestCase extends LuceneTestCase {
analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
analyzerB = new BigramAnalyzer();
analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
- paW = new QueryParser(TEST_VERSION_CURRENT, F, analyzerW );
- paB = new QueryParser(TEST_VERSION_CURRENT, F, analyzerB );
dir = newDirectory();
}
@@ -172,6 +173,33 @@ public abstract class AbstractTestCase extends LuceneTestCase {
}
}
+ protected List analyze(String text, String field, Analyzer analyzer) throws IOException {
+ List bytesRefs = new ArrayList();
+
+ TokenStream tokenStream = analyzer.reusableTokenStream(field, new StringReader(text));
+ TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
+
+ BytesRef bytesRef = termAttribute.getBytesRef();
+
+ while (tokenStream.incrementToken()) {
+ termAttribute.fillBytesRef();
+ bytesRefs.add(new BytesRef(bytesRef));
+ }
+
+ tokenStream.end();
+ tokenStream.close();
+
+ return bytesRefs;
+ }
+
+ protected PhraseQuery toPhraseQuery(List bytesRefs, String field) {
+ PhraseQuery phraseQuery = new PhraseQuery();
+ for (BytesRef bytesRef : bytesRefs) {
+ phraseQuery.add(new Term(field, bytesRef));
+ }
+ return phraseQuery;
+ }
+
static final class BigramAnalyzer extends Analyzer {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
index 42924fdc8c9..ac0e69ddd81 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
@@ -22,19 +22,33 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util.BytesRef;
public class FieldQueryTest extends AbstractTestCase {
public void testFlattenBoolean() throws Exception {
- Query query = paW.parse( "A AND B OR C NOT (D AND E)" );
- FieldQuery fq = new FieldQuery( query, true, true );
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
+ booleanQuery.add(new TermQuery(new Term(F, "B")), Occur.MUST);
+ booleanQuery.add(new TermQuery(new Term(F, "C")), Occur.SHOULD);
+
+ BooleanQuery innerQuery = new BooleanQuery();
+ innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
+ innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
+ booleanQuery.add(innerQuery, Occur.MUST_NOT);
+
+ FieldQuery fq = new FieldQuery(booleanQuery, true, true );
Set flatQueries = new HashSet();
- fq.flatten( query, flatQueries );
+ fq.flatten(booleanQuery, flatQueries);
assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) );
}
@@ -47,15 +61,25 @@ public class FieldQueryTest extends AbstractTestCase {
}
public void testFlattenTermAndPhrase() throws Exception {
- Query query = paW.parse( "A AND \"B C\"" );
- FieldQuery fq = new FieldQuery( query, true, true );
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(F, "B"));
+ phraseQuery.add(new Term(F, "C"));
+ booleanQuery.add(phraseQuery, Occur.MUST);
+
+ FieldQuery fq = new FieldQuery(booleanQuery, true, true );
Set flatQueries = new HashSet();
- fq.flatten( query, flatQueries );
+ fq.flatten(booleanQuery, flatQueries);
assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) );
}
public void testFlattenTermAndPhrase2gram() throws Exception {
- Query query = paB.parse( "AA AND \"BCD\" OR \"EFGH\"" );
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term(F, "AA")), Occur.MUST);
+ query.add(toPhraseQuery(analyze("BCD", F, analyzerB), F), Occur.MUST);
+ query.add(toPhraseQuery(analyze("EFGH", F, analyzerB), F), Occur.SHOULD);
+
FieldQuery fq = new FieldQuery( query, true, true );
Set flatQueries = new HashSet();
fq.flatten( query, flatQueries );
@@ -232,7 +256,16 @@ public class FieldQueryTest extends AbstractTestCase {
}
public void testGetTermSet() throws Exception {
- Query query = paW.parse( "A AND B OR x:C NOT (D AND E)" );
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term(F, "A")), Occur.MUST);
+ query.add(new TermQuery(new Term(F, "B")), Occur.MUST);
+ query.add(new TermQuery(new Term("x", "C")), Occur.SHOULD);
+
+ BooleanQuery innerQuery = new BooleanQuery();
+ innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
+ innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
+ query.add(innerQuery, Occur.MUST_NOT);
+
FieldQuery fq = new FieldQuery( query, true, true );
assertEquals( 2, fq.termSetMap.size() );
Set termSet = fq.getTermSet( F );
@@ -679,8 +712,10 @@ public class FieldQueryTest extends AbstractTestCase {
}
public void testQueryPhraseMapOverlap2gram() throws Exception {
- Query query = paB.parse( "\"abc\" AND \"bcd\"" );
-
+ BooleanQuery query = new BooleanQuery();
+ query.add(toPhraseQuery(analyze("abc", F, analyzerB), F), Occur.MUST);
+ query.add(toPhraseQuery(analyze("bcd", F, analyzerB), F), Occur.MUST);
+
// phraseHighlight = true, fieldMatch = true
FieldQuery fq = new FieldQuery( query, true, true );
Map map = fq.rootMaps;
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
index 31bc37d7e5a..961bd4cbe8d 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
@@ -17,12 +17,20 @@ package org.apache.lucene.search.vectorhighlight;
* limitations under the License.
*/
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
public void test3Frags() throws Exception {
- FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" );
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
+ query.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
+
+ FieldFragList ffl = ffl(query, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder();
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
assertEquals( 3, f.length );
@@ -32,9 +40,8 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
assertEquals( "a b b b b b b b b b ", f[2] );
}
- private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
+ private FieldFragList ffl(Query query, String indexValue ) throws Exception {
make1d1fIndex( indexValue );
- Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
index b8d2a2b1ce5..20d0949a0e9 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
@@ -17,20 +17,21 @@ package org.apache.lucene.search.vectorhighlight;
* limitations under the License.
*/
-import org.apache.lucene.search.Query;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
assertEquals( 0, ffl.getFragInfos().size() );
}
public void testTooSmallFragSize() throws Exception {
try{
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- sflb.createFieldFragList( fpl( "a", "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 );
+ sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 );
fail( "IllegalArgumentException must be thrown" );
}
catch ( IllegalArgumentException expected ) {
@@ -39,14 +40,19 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testSmallerFragSizeThanTermQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
+
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(F, "abcdefgh"));
+ phraseQuery.add(new Term(F, "jklmnopqrs"));
+
+ FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.getFragInfos().size() );
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
@@ -54,39 +60,39 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void test1TermIndex() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test2TermsIndex1Frag() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a a" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
- ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b a" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
- ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test2TermsIndex2Frags() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
- ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
- ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
@@ -94,41 +100,56 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void test2TermsQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
+
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
+ booleanQuery.add(new TermQuery(new Term(F, "b")), BooleanClause.Occur.SHOULD);
+
+ FieldFragList ffl = sflb.createFieldFragList( fpl(booleanQuery, "c d e" ), 20 );
assertEquals( 0, ffl.getFragInfos().size() );
- ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(booleanQuery, "d b c" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
- ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(booleanQuery, "a b c" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
+
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term(F, "a"));
+ phraseQuery.add(new Term(F, "b"));
+
+ FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "c d e" ), 20 );
assertEquals( 0, ffl.getFragInfos().size() );
- ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
assertEquals( 0, ffl.getFragInfos().size() );
- ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
+ ffl = sflb.createFieldFragList( fpl(phraseQuery, "a b c" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testPhraseQuerySlop() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
+
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.setSlop(1);
+ phraseQuery.add(new Term(F, "a"));
+ phraseQuery.add(new Term(F, "b"));
+
+ FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
}
- private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
+ private FieldPhraseList fpl(Query query, String indexValue ) throws Exception {
make1d1fIndex( indexValue );
- Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
return new FieldPhraseList( stack, fq );
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
index 72c17d8ac24..795a7884224 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
@@ -26,13 +26,17 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
public class SimpleFragmentsBuilderTest extends AbstractTestCase {
public void test1TermIndex() throws Exception {
- FieldFragList ffl = ffl( "a", "a" );
+ FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "a ", sfb.createFragment( reader, 0, F, ffl ) );
@@ -42,7 +46,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
}
public void test2Frags() throws Exception {
- FieldFragList ffl = ffl( "a", "a b b b b b b b b b b b a b a b" );
+ FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b a b a b" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
// 3 snippets requested, but should be 2
@@ -52,7 +56,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
}
public void test3Frags() throws Exception {
- FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" );
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
+ booleanQuery.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
+
+ FieldFragList ffl = ffl(booleanQuery, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
assertEquals( 3, f.length );
@@ -62,7 +70,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
}
public void testTagsAndEncoder() throws Exception {
- FieldFragList ffl = ffl( "a", " a
" );
+ FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), " a
" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] preTags = { "[" };
String[] postTags = { "]" };
@@ -70,9 +78,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
}
- private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
+ private FieldFragList ffl(Query query, String indexValue ) throws Exception {
make1d1fIndex( indexValue );
- Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SingleFragListBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SingleFragListBuilderTest.java
index 1857ef90e24..0f42bae8b96 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SingleFragListBuilderTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SingleFragListBuilderTest.java
@@ -1,6 +1,8 @@
package org.apache.lucene.search.vectorhighlight;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,27 +25,26 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
assertEquals( 0, ffl.getFragInfos().size() );
}
public void testShortFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testLongFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder();
- FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
+ FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
}
- private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {
+ private FieldPhraseList fpl(Query query, String... indexValues ) throws Exception {
make1dmfIndex( indexValues );
- Query query = paW.parse( queryValue );
FieldQuery fq = new FieldQuery( query, true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
return new FieldPhraseList( stack, fq );
diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
index 57aca7a109a..b8b10477a34 100644
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
@@ -426,6 +426,12 @@ public class InstantiatedIndexReader extends IndexReader {
public long getSumTotalTermFreq() {
return sumTotalTermFreq;
}
+
+ // TODO: support this?
+ @Override
+ public long getSumDocFreq() {
+ return -1;
+ }
@Override
public Comparator getComparator() {
diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 0c93f3feedb..d6e7e09e50d 100644
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -842,6 +842,12 @@ public class MemoryIndex {
public long getSumTotalTermFreq() {
return info.getSumTotalTermFreq();
}
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ // each term has df=1
+ return info.sortedTerms.length;
+ }
};
}
}
diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
index 8539f855f9f..8a999d628db 100644
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
@@ -128,6 +128,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
public long getSumTotalTermFreq() {
return -1;
}
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return -1;
+ }
});
assert termsEnum != null;
diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java
index a3fec4d2f58..4b0164f549d 100644
--- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java
@@ -691,7 +691,7 @@ public class CheckIndex {
Comparator termComp = terms.getComparator();
long sumTotalTermFreq = 0;
-
+ long sumDocFreq = 0;
while(true) {
final BytesRef term = terms.next();
@@ -712,6 +712,7 @@ public class CheckIndex {
final int docFreq = terms.docFreq();
status.totFreq += docFreq;
+ sumDocFreq += docFreq;
docs = terms.docs(liveDocs, docs);
postings = terms.docsAndPositions(liveDocs, postings);
@@ -879,6 +880,13 @@ public class CheckIndex {
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
}
}
+
+ if (sumDocFreq != 0) {
+ final long v = fields.terms(field).getSumDocFreq();
+ if (v != -1 && sumDocFreq != v) {
+ throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
+ }
+ }
// Test seek to last term:
if (lastTerm != null) {
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInvertState.java b/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
index 8c4e92ad4ea..0b54500c71d 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
@@ -31,6 +31,7 @@ public final class FieldInvertState {
int numOverlap;
int offset;
int maxTermFrequency;
+ int uniqueTermCount;
float boost;
AttributeSource attributeSource;
@@ -55,6 +56,7 @@ public final class FieldInvertState {
numOverlap = 0;
offset = 0;
maxTermFrequency = 0;
+ uniqueTermCount = 0;
boost = docBoost;
attributeSource = null;
}
@@ -122,6 +124,13 @@ public final class FieldInvertState {
return maxTermFrequency;
}
+ /**
+ * Return the number of unique terms encountered in this field.
+ */
+ public int getUniqueTermCount() {
+ return uniqueTermCount;
+ }
+
public AttributeSource getAttributeSource() {
return attributeSource;
}
diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
index e7d2190774a..d57591a6b15 100644
--- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
@@ -105,6 +105,11 @@ public class FilterIndexReader extends IndexReader {
public long getSumTotalTermFreq() throws IOException {
return in.getSumTotalTermFreq();
}
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return in.getSumDocFreq();
+ }
}
/** Base class for filtering {@link TermsEnum} implementations. */
diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
index a770c9011bf..ddc4e441e71 100644
--- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
@@ -134,6 +134,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
writeProx(termID, fieldState.position);
}
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
+ fieldState.uniqueTermCount++;
}
@Override
@@ -151,6 +152,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
postings.lastDocIDs[termID] = docState.docID;
+ fieldState.uniqueTermCount++;
}
} else {
if (docState.docID != postings.lastDocIDs[termID]) {
@@ -171,6 +173,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
writeProx(termID, fieldState.position);
+ fieldState.uniqueTermCount++;
} else {
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
@@ -251,6 +254,8 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final ByteSliceReader prox = new ByteSliceReader();
long sumTotalTermFreq = 0;
+ long sumDocFreq = 0;
+
for (int i = 0; i < numTerms; i++) {
final int termID = termIDs[i];
// Get BytesRef
@@ -389,9 +394,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
}
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
sumTotalTermFreq += totTF;
+ sumDocFreq += numDocs;
}
- termsConsumer.finish(sumTotalTermFreq);
+ termsConsumer.finish(sumTotalTermFreq, sumDocFreq);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/MultiTerms.java b/lucene/src/java/org/apache/lucene/index/MultiTerms.java
index 2da5db54df8..2785170dfb3 100644
--- a/lucene/src/java/org/apache/lucene/index/MultiTerms.java
+++ b/lucene/src/java/org/apache/lucene/index/MultiTerms.java
@@ -88,6 +88,19 @@ public final class MultiTerms extends Terms {
}
return sum;
}
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ long sum = 0;
+ for(Terms terms : subs) {
+ final long v = terms.getSumDocFreq();
+ if (v == -1) {
+ return -1;
+ }
+ sum += v;
+ }
+ return sum;
+ }
@Override
public Comparator getComparator() {
diff --git a/lucene/src/java/org/apache/lucene/index/Terms.java b/lucene/src/java/org/apache/lucene/index/Terms.java
index b7600c8a784..b7a27e0d0c9 100644
--- a/lucene/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/src/java/org/apache/lucene/index/Terms.java
@@ -132,6 +132,13 @@ public abstract class Terms {
* into account. */
public abstract long getSumTotalTermFreq() throws IOException;
+ /** Returns the sum of {@link #docFreq(BytesRef)} for
+ * all terms in this field, or -1 if this measure isn't
+ * stored by the codec. Note that, just like other term
+ * measures, this measure does not take deleted documents
+ * into account. */
+ public abstract long getSumDocFreq() throws IOException;
+
/**
* Returns a thread-private {@link TermsEnum} instance. Obtaining
* {@link TermsEnum} from this method might be more efficient than using
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
index 19c280b0c7a..a970d57966e 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
@@ -137,8 +137,9 @@ public class BlockTermsReader extends FieldsProducer {
final long termsStartPointer = in.readVLong();
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
+ final long sumDocFreq = in.readVLong();
assert !fields.containsKey(fieldInfo.name);
- fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq));
+ fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
}
success = true;
} finally {
@@ -245,13 +246,15 @@ public class BlockTermsReader extends FieldsProducer {
final FieldInfo fieldInfo;
final long termsStartPointer;
final long sumTotalTermFreq;
+ final long sumDocFreq;
- FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq) {
+ FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.termsStartPointer = termsStartPointer;
this.sumTotalTermFreq = sumTotalTermFreq;
+ this.sumDocFreq = sumDocFreq;
}
@Override
@@ -279,6 +282,11 @@ public class BlockTermsReader extends FieldsProducer {
return sumTotalTermFreq;
}
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return sumDocFreq;
+ }
+
// Iterates through terms in this field
private final class SegmentTermsEnum extends TermsEnum {
private final IndexInput in;
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
index 926a6af6301..89ab114919e 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
@@ -132,6 +132,7 @@ public class BlockTermsWriter extends FieldsConsumer {
if (!field.fieldInfo.omitTermFreqAndPositions) {
out.writeVLong(field.sumTotalTermFreq);
}
+ out.writeVLong(field.sumDocFreq);
}
}
writeTrailer(dirStart);
@@ -157,6 +158,7 @@ public class BlockTermsWriter extends FieldsConsumer {
private long numTerms;
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
long sumTotalTermFreq;
+ long sumDocFreq;
private TermEntry[] pendingTerms;
@@ -231,7 +233,7 @@ public class BlockTermsWriter extends FieldsConsumer {
// Finishes all terms in this field
@Override
- public void finish(long sumTotalTermFreq) throws IOException {
+ public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
if (pendingCount > 0) {
flushBlock();
}
@@ -239,6 +241,7 @@ public class BlockTermsWriter extends FieldsConsumer {
out.writeVInt(0);
this.sumTotalTermFreq = sumTotalTermFreq;
+ this.sumDocFreq = sumDocFreq;
fieldIndexWriter.finish(out.getFilePointer());
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
index 875da3bbb63..01280154cac 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
@@ -41,7 +41,7 @@ public abstract class TermsConsumer {
public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
/** Called when we are done adding terms to this field */
- public abstract void finish(long sumTotalTermFreq) throws IOException;
+ public abstract void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException;
/** Return the BytesRef Comparator used to sort terms
* before feeding to this API. */
@@ -56,7 +56,8 @@ public abstract class TermsConsumer {
BytesRef term;
assert termsEnum != null;
long sumTotalTermFreq = 0;
- long sumDF = 0;
+ long sumDocFreq = 0;
+ long sumDFsinceLastAbortCheck = 0;
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
if (docsEnum == null) {
@@ -74,10 +75,11 @@ public abstract class TermsConsumer {
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
if (stats.docFreq > 0) {
finishTerm(term, stats);
- sumDF += stats.docFreq;
- if (sumDF > 60000) {
- mergeState.checkAbort.work(sumDF/5.0);
- sumDF = 0;
+ sumDFsinceLastAbortCheck += stats.docFreq;
+ sumDocFreq += stats.docFreq;
+ if (sumDFsinceLastAbortCheck > 60000) {
+ mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
+ sumDFsinceLastAbortCheck = 0;
}
}
}
@@ -105,16 +107,17 @@ public abstract class TermsConsumer {
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
- sumDF += stats.docFreq;
- if (sumDF > 60000) {
- mergeState.checkAbort.work(sumDF/5.0);
- sumDF = 0;
+ sumDFsinceLastAbortCheck += stats.docFreq;
+ sumDocFreq += stats.docFreq;
+ if (sumDFsinceLastAbortCheck > 60000) {
+ mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
+ sumDFsinceLastAbortCheck = 0;
}
}
}
}
}
- finish(sumTotalTermFreq);
+ finish(sumTotalTermFreq, sumDocFreq);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java
index dbe6265900a..e36994fe990 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java
@@ -219,13 +219,14 @@ public class MemoryCodec extends Codec {
}
@Override
- public void finish(long sumTotalTermFreq) throws IOException {
+ public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
if (termCount > 0) {
out.writeVInt(termCount);
out.writeVInt(field.number);
if (!field.omitTermFreqAndPositions) {
out.writeVLong(sumTotalTermFreq);
}
+ out.writeVLong(sumDocFreq);
builder.finish().save(out);
if (VERBOSE) System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
}
@@ -683,6 +684,7 @@ public class MemoryCodec extends Codec {
private final static class TermsReader extends Terms {
private final long sumTotalTermFreq;
+ private final long sumDocFreq;
private FST fst;
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
private final FieldInfo field;
@@ -695,6 +697,7 @@ public class MemoryCodec extends Codec {
} else {
sumTotalTermFreq = 0;
}
+ sumDocFreq = in.readVLong();
fst = new FST(in, outputs);
}
@@ -704,6 +707,11 @@ public class MemoryCodec extends Codec {
return sumTotalTermFreq;
}
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return sumDocFreq;
+ }
+
@Override
public TermsEnum iterator() {
return new FSTTermsEnum(field, fst);
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
index ac3962d50e4..a994b468527 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
@@ -266,6 +266,11 @@ public class PreFlexFields extends FieldsProducer {
public long getSumTotalTermFreq() {
return -1;
}
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return -1;
+ }
}
private class PreTermsEnum extends TermsEnum {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
index 34451939966..e2a37f6b199 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
@@ -463,6 +463,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private final long termsStart;
private final boolean omitTF;
private long sumTotalTermFreq;
+ private long sumDocFreq;
private FST>> fst;
private int termCount;
private final BytesRef scratch = new BytesRef(10);
@@ -500,6 +501,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
break;
} else if (scratch.startsWith(DOC)) {
docFreq++;
+ sumDocFreq++;
} else if (scratch.startsWith(POS)) {
totalTermFreq++;
} else if (scratch.startsWith(TERM)) {
@@ -554,6 +556,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
public long getSumTotalTermFreq() {
return sumTotalTermFreq;
}
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return sumDocFreq;
+ }
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
index f822ec6a72c..49ed1f5c786 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
@@ -89,7 +89,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
}
@Override
- public void finish(long sumTotalTermFreq) throws IOException {
+ public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
}
@Override
diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
index 4950cf97ea9..e10323146dd 100644
--- a/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
@@ -195,7 +195,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
}
@Override
- public void finish(long sumTotalTermCount) throws IOException {
+ public void finish(long sumTotalTermCount, long sumDocFreq) throws IOException {
}
@Override
diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
index 50485853819..b3cebe05567 100644
--- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
+++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
@@ -102,6 +102,7 @@ public class TestExternalCodecs extends LuceneTestCase {
final String field;
final SortedMap termToDocs = new TreeMap();
long sumTotalTermFreq;
+ long sumDocFreq;
RAMField(String field) {
this.field = field;
@@ -116,6 +117,11 @@ public class TestExternalCodecs extends LuceneTestCase {
public long getSumTotalTermFreq() {
return sumTotalTermFreq;
}
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return sumDocFreq;
+ }
@Override
public TermsEnum iterator() {
@@ -204,8 +210,9 @@ public class TestExternalCodecs extends LuceneTestCase {
}
@Override
- public void finish(long sumTotalTermFreq) {
+ public void finish(long sumTotalTermFreq, long sumDocFreq) {
field.sumTotalTermFreq = sumTotalTermFreq;
+ field.sumDocFreq = sumDocFreq;
}
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
index 5a629dacd85..b3d91e2d80d 100644
--- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@@ -455,9 +455,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
_TestUtil.rmDir(indexDir);
Directory dir = newFSDirectory(indexDir);
-
- IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10);
- ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
+ LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
+ mp.setUseCompoundFile(doCFS);
+ mp.setNoCFSRatio(1.0);
+ // TODO: remove randomness
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ .setMaxBufferedDocs(10).setMergePolicy(mp);
IndexWriter writer = new IndexWriter(dir, conf);
for(int i=0;i<35;i++) {
@@ -471,8 +474,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
if (!optimized) {
// open fresh writer so we get no prx file in the added segment
- conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10);
- ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
+ mp = new LogByteSizeMergePolicy();
+ mp.setUseCompoundFile(doCFS);
+ mp.setNoCFSRatio(1.0);
+ // TODO: remove randomness
+ conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ .setMaxBufferedDocs(10).setMergePolicy(mp);
writer = new IndexWriter(dir, conf);
addNoProxDoc(writer);
writer.close();
diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
index fb702291507..c655d8a6b03 100644
--- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
@@ -101,10 +101,12 @@ public class TestCodecs extends LuceneTestCase {
Arrays.sort(terms);
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
long sumTotalTermCount = 0;
+ long sumDF = 0;
for (final TermData term : terms) {
+ sumDF += term.docs.length;
sumTotalTermCount += term.write(termsConsumer);
}
- termsConsumer.finish(sumTotalTermCount);
+ termsConsumer.finish(sumTotalTermCount, sumDF);
}
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestSumDocFreq.java b/lucene/src/test/org/apache/lucene/index/TestSumDocFreq.java
new file mode 100644
index 00000000000..92d579f70df
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/index/TestSumDocFreq.java
@@ -0,0 +1,101 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Tests {@link Terms#getSumDocFreq()}
+ * @lucene.experimental
+ */
+public class TestSumDocFreq extends LuceneTestCase {
+
+ public void testSumDocFreq() throws Exception {
+ final int numDocs = atLeast(500);
+
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir);
+
+ Document doc = new Document();
+ Field field1 = newField("foo", "", Field.Index.ANALYZED);
+ Field field2 = newField("bar", "", Field.Index.ANALYZED);
+ doc.add(field1);
+ doc.add(field2);
+ for (int i = 0; i < numDocs; i++) {
+ char ch1 = (char) _TestUtil.nextInt(random, 'a', 'z');
+ char ch2 = (char) _TestUtil.nextInt(random, 'a', 'z');
+ field1.setValue("" + ch1 + " " + ch2);
+ ch1 = (char) _TestUtil.nextInt(random, 'a', 'z');
+ ch2 = (char) _TestUtil.nextInt(random, 'a', 'z');
+ field2.setValue("" + ch1 + " " + ch2);
+ writer.addDocument(doc);
+ }
+
+ IndexReader ir = writer.getReader();
+ writer.close();
+
+ assertSumDocFreq(ir);
+ ir.close();
+
+ ir = IndexReader.open(dir, false);
+ int numDeletions = atLeast(20);
+ for (int i = 0; i < numDeletions; i++) {
+ ir.deleteDocument(random.nextInt(ir.maxDoc()));
+ }
+ ir.close();
+
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ w.optimize();
+ w.close();
+
+ ir = IndexReader.open(dir, true);
+ assertSumDocFreq(ir);
+ ir.close();
+
+ dir.close();
+ }
+
+ private void assertSumDocFreq(IndexReader ir) throws Exception {
+ // compute sumDocFreq across all fields
+ Fields fields = MultiFields.getFields(ir);
+ FieldsEnum fieldEnum = fields.iterator();
+ String f = null;
+ while ((f = fieldEnum.next()) != null) {
+ Terms terms = fields.terms(f);
+ long sumDocFreq = terms.getSumDocFreq();
+ if (sumDocFreq == -1) {
+ if (VERBOSE) {
+ System.out.println("skipping field: " + f + ", codec does not support sumDocFreq");
+ }
+ continue;
+ }
+
+ long computedSumDocFreq = 0;
+ TermsEnum termsEnum = terms.iterator();
+ while (termsEnum.next() != null) {
+ computedSumDocFreq += termsEnum.docFreq();
+ }
+ assertEquals(computedSumDocFreq, sumDocFreq);
+ }
+ }
+}
diff --git a/lucene/src/test/org/apache/lucene/index/TestUniqueTermCount.java b/lucene/src/test/org/apache/lucene/index/TestUniqueTermCount.java
new file mode 100644
index 00000000000..3fd64fa4926
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/index/TestUniqueTermCount.java
@@ -0,0 +1,108 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.HashSet;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.DefaultSimilarityProvider;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Tests the uniqueTermCount statistic in FieldInvertState
+ */
+public class TestUniqueTermCount extends LuceneTestCase {
+ Directory dir;
+ IndexReader reader;
+ /* expected uniqueTermCount values for our documents */
+ ArrayList expected = new ArrayList();
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy());
+ config.setSimilarityProvider(new DefaultSimilarityProvider() {
+ @Override
+ public Similarity get(String field) {
+ return new TestSimilarity();
+ }
+ });
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
+ Document doc = new Document();
+ Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
+ doc.add(foo);
+ for (int i = 0; i < 100; i++) {
+ foo.setValue(addValue());
+ writer.addDocument(doc);
+ }
+ reader = writer.getReader();
+ writer.close();
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ reader.close();
+ dir.close();
+ super.tearDown();
+ }
+
+ public void test() throws Exception {
+ byte fooNorms[] = MultiNorms.norms(reader, "foo");
+ for (int i = 0; i < reader.maxDoc(); i++)
+ assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
+ }
+
+ /**
+ * Makes a bunch of single-char tokens (the max # unique terms will at most be 26).
+ * puts the # unique terms into expected, to be checked against the norm.
+ */
+ private String addValue() {
+ StringBuilder sb = new StringBuilder();
+ HashSet terms = new HashSet();
+ int num = _TestUtil.nextInt(random, 0, 255);
+ for (int i = 0; i < num; i++) {
+ sb.append(' ');
+ char term = (char) _TestUtil.nextInt(random, 'a', 'z');
+ sb.append(term);
+ terms.add("" + term);
+ }
+ expected.add(terms.size());
+ return sb.toString();
+ }
+
+ /**
+ * Simple similarity that encodes maxTermFrequency directly as a byte
+ */
+ class TestSimilarity extends DefaultSimilarity {
+
+ @Override
+ public byte computeNorm(FieldInvertState state) {
+ return (byte) state.getUniqueTermCount();
+ }
+ }
+}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
index c97d4ab6bfa..422938b6bd5 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
@@ -16,7 +16,6 @@ package org.apache.lucene.analysis.query;
* limitations under the License.
*/
-import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -25,19 +24,12 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
import org.apache.lucene.store.RAMDirectory;
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
@@ -74,22 +66,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
super.tearDown();
}
- //Helper method to query
- private int search(Analyzer a, String queryString) throws IOException, ParseException {
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
- Query q = qp.parse(queryString);
- IndexSearcher searcher = newSearcher(reader);
- int hits = searcher.search(q, null, 1000).totalHits;
- searcher.close();
- return hits;
- }
-
public void testUninitializedAnalyzer() throws Exception {
- //Note: no calls to "addStopWord"
- String query = "variedField:quick repetitiveField:boring";
- int numHits1 = search(protectedAnalyzer, query);
- int numHits2 = search(appAnalyzer, query);
- assertEquals("No filtering test", numHits1, numHits2);
+ // Note: no calls to "addStopWord"
+ // query = "variedField:quick repetitiveField:boring";
+ TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("quick"));
+ assertTokenStreamContents(protectedTokenStream, new String[]{"quick"});
+
+ protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
+ assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
}
/*
@@ -97,36 +81,41 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
*/
public void testDefaultAddStopWordsIndexReader() throws Exception {
protectedAnalyzer.addStopWords(reader);
- int numHits = search(protectedAnalyzer, "repetitiveField:boring");
- assertEquals("Default filter should remove all docs", 0, numHits);
- }
+ TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
+ assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring
+ }
/*
* Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader, int)'
*/
public void testAddStopWordsIndexReaderInt() throws Exception {
protectedAnalyzer.addStopWords(reader, 1f / 2f);
- int numHits = search(protectedAnalyzer, "repetitiveField:boring");
- assertEquals("A filter on terms in > one half of docs remove boring docs", 0, numHits);
- numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring");
- assertTrue("A filter on terms in > half of docs should not remove vaguelyBoring docs", numHits > 1);
+ TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
+ // A filter on terms in > one half of docs remove boring
+ assertTokenStreamContents(protectedTokenStream, new String[0]);
+
+ protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
+ // A filter on terms in > half of docs should not remove vaguelyBoring
+ assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"});
protectedAnalyzer.addStopWords(reader, 1f / 4f);
- numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring");
- assertEquals("A filter on terms in > quarter of docs should remove vaguelyBoring docs", 0, numHits);
+ protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
+ // A filter on terms in > quarter of docs should remove vaguelyBoring
+ assertTokenStreamContents(protectedTokenStream, new String[0]);
}
-
public void testAddStopWordsIndexReaderStringFloat() throws Exception {
protectedAnalyzer.addStopWords(reader, "variedField", 1f / 2f);
- int numHits = search(protectedAnalyzer, "repetitiveField:boring");
- assertTrue("A filter on one Field should not affect queris on another", numHits > 0);
+ TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
+ // A filter on one Field should not affect queries on another
+ assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
protectedAnalyzer.addStopWords(reader, "repetitiveField", 1f / 2f);
- numHits = search(protectedAnalyzer, "repetitiveField:boring");
- assertEquals("A filter on the right Field should affect queries on it", numHits, 0);
+ protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
+ // A filter on the right Field should affect queries on it
+ assertTokenStreamContents(protectedTokenStream, new String[0]);
}
public void testAddStopWordsIndexReaderStringInt() throws Exception {
@@ -144,12 +133,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
public void testNoFieldNamePollution() throws Exception {
protectedAnalyzer.addStopWords(reader, "repetitiveField", 10);
- int numHits = search(protectedAnalyzer, "repetitiveField:boring");
- assertEquals("Check filter set up OK", 0, numHits);
- numHits = search(protectedAnalyzer, "variedField:boring");
- assertTrue("Filter should not prevent stopwords in one field being used in another ", numHits > 0);
+ TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
+ // Check filter set up OK
+ assertTokenStreamContents(protectedTokenStream, new String[0]);
+ protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("boring"));
+ // Filter should not prevent stopwords in one field being used in another
+ assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
}
/*
@@ -170,10 +161,12 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
public void testWrappingNonReusableAnalyzer() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
a.addStopWords(reader, 10);
- int numHits = search(a, "repetitiveField:boring");
- assertTrue(numHits == 0);
- numHits = search(a, "repetitiveField:vaguelyboring");
- assertTrue(numHits == 0);
+
+ TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
+ assertTokenStreamContents(tokenStream, new String[0]);
+
+ tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
+ assertTokenStreamContents(tokenStream, new String[0]);
}
public void testTokenStream() throws Exception {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
index 3cf4ac22aae..9d2d6870c82 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
@@ -32,14 +32,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
@@ -82,16 +75,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
return new IndexSearcher(dir, true);
}
- protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
- searcher = setUpSearcher(analyzer);
-
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", analyzer);
-
- Query q = qp.parse(qs);
-
- return searcher.search(q, null, 1000).scoreDocs;
- }
-
protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
assertEquals(ranks.length, hits.length);
for (int i = 0; i < ranks.length; i++) {
@@ -99,51 +82,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
}
}
- /*
- * Will not work on an index without unigrams, since QueryParser automatically
- * tokenizes on whitespace.
- */
- public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
- ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
- "test sentence");
- int[] ranks = new int[] { 1, 2, 0 };
- compareRanks(hits, ranks);
- }
-
- /*
- * This one fails with an exception.
- */
- public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
- ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
- "\"this sentence\"");
- int[] ranks = new int[] { 0 };
- compareRanks(hits, ranks);
- }
-
- /*
- * This one works, actually.
- */
- public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
- ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
- "\"test sentence\"");
- int[] ranks = new int[] { 1 };
- compareRanks(hits, ranks);
- }
-
- /*
- * Same as above, is tokenized without using the analyzer.
- */
- public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
- ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
- "+test +sentence");
- int[] ranks = new int[] { 1, 2 };
- compareRanks(hits, ranks);
- }
-
/*
* This shows how to construct a phrase query containing shingles.
*/
@@ -153,8 +91,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
PhraseQuery q = new PhraseQuery();
- TokenStream ts = analyzer.tokenStream("content",
- new StringReader("this sentence"));
+ TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence"));
int j = -1;
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
@@ -183,8 +120,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
BooleanQuery q = new BooleanQuery();
- TokenStream ts = analyzer.tokenStream("content",
- new StringReader("test sentence"));
+ TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence"));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
diff --git a/modules/queries/src/test/org/apache/lucene/queries/TestCustomScoreQuery.java b/modules/queries/src/test/org/apache/lucene/queries/TestCustomScoreQuery.java
index fc69bb812fe..779d296b532 100755
--- a/modules/queries/src/test/org/apache/lucene/queries/TestCustomScoreQuery.java
+++ b/modules/queries/src/test/org/apache/lucene/queries/TestCustomScoreQuery.java
@@ -20,11 +20,7 @@ package org.apache.lucene.queries;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.FunctionTestSetup;
import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.queries.function.valuesource.ByteFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
-import org.apache.lucene.queries.function.valuesource.IntFieldSource;
-import org.apache.lucene.queries.function.valuesource.ShortFieldSource;
-import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.cache.*;
import org.junit.BeforeClass;
@@ -198,9 +194,10 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
@Test
public void testCustomExternalQuery() throws Exception {
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD,anlzr);
- String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
- Query q1 = qp.parse(qtxt);
+ BooleanQuery q1 = new BooleanQuery();
+ q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
+ q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
+ q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
final Query q = new CustomExternalQuery(q1);
log(q);
@@ -243,11 +240,12 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
FunctionQuery functionQuery = new FunctionQuery(valueSource);
float boost = (float) dboost;
IndexSearcher s = new IndexSearcher(dir, true);
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, anlzr);
- String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
// regular (boolean) query.
- Query q1 = qp.parse(qtxt);
+ BooleanQuery q1 = new BooleanQuery();
+ q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
+ q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
+ q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
log(q1);
// custom query, that should score the same as q1.
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 6761975e542..3b6ce8aa5cf 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -256,7 +256,13 @@ Other Changes
* LUCENE-2883: FunctionQuery, DocValues (and its impls), ValueSource (and its
impls) and BoostedQuery have been consolidated into the queries module. They
- can now be found at o.a.l.queries.function.
+ can now be found at o.a.l.queries.function.
+
+* SOLR-2027: FacetField.getValues() now returns an empty list if there are no
+ values, instead of null (Chris Male)
+
+* SOLR-1825: SolrQuery.addFacetQuery now enables facets automatically, like
+ addFacetField (Chris Male)
Documentation
----------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java
index a7228f0da9d..5771028741f 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java
@@ -292,6 +292,7 @@ public class SolrQuery extends ModifiableSolrParams
*/
public SolrQuery addFacetQuery(String f) {
this.add(FacetParams.FACET_QUERY, f);
+ this.set(FacetParams.FACET, true);
return this;
}
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/FacetField.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/FacetField.java
index 6b653487344..f46fd97a8cf 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/FacetField.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/FacetField.java
@@ -19,6 +19,7 @@ package org.apache.solr.client.solrj.response;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Date;
import java.util.List;
@@ -145,7 +146,7 @@ import org.apache.solr.client.solrj.util.ClientUtils;
}
public List getValues() {
- return _values;
+ return _values == null ? Collections.emptyList() : _values;
}
public int getValueCount()
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
index 9c24c4d9c0c..4eedeb9a57c 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
@@ -195,4 +195,10 @@ public class SolrQueryTest extends LuceneTestCase {
q.setTermsRegexFlag("multiline");
assertEquals(2, q.getTermsRegexFlags().length);
}
+
+ public void testAddFacetQuery() {
+ SolrQuery solrQuery = new SolrQuery();
+ solrQuery.addFacetQuery("field:value");
+ assertTrue("Adding a Facet Query should enable facets", solrQuery.getBool(FacetParams.FACET));
+ }
}
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/FacetFieldTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/FacetFieldTest.java
new file mode 100644
index 00000000000..beaab3b2d3d
--- /dev/null
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/FacetFieldTest.java
@@ -0,0 +1,33 @@
+package org.apache.solr.client.solrj.response;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class FacetFieldTest extends LuceneTestCase {
+
+ public void testGetValues() {
+ FacetField facetField = new FacetField("field");
+
+ assertNotNull(facetField.getValues());
+ assertEquals(0, facetField.getValues().size());
+
+ facetField.add("value", 1);
+ assertEquals(1, facetField.getValues().size());
+ }
+}