mirror of https://github.com/apache/lucene.git
Merged with trunk up to r1144714
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1144715 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
9e020991ef
|
@ -421,6 +421,8 @@ New features
|
||||||
* LUCENE-2862: Added TermsEnum.totalTermFreq() and
|
* LUCENE-2862: Added TermsEnum.totalTermFreq() and
|
||||||
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
|
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3290: Added Terms.getSumDocFreq() (Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
* LUCENE-3003: Added new expert class oal.index.DocTermsOrd,
|
* LUCENE-3003: Added new expert class oal.index.DocTermsOrd,
|
||||||
refactored from Solr's UnInvertedField, for accessing term ords for
|
refactored from Solr's UnInvertedField, for accessing term ords for
|
||||||
multi-valued fields, per document. This is similar to FieldCache in
|
multi-valued fields, per document. This is similar to FieldCache in
|
||||||
|
@ -512,6 +514,11 @@ Bug fixes
|
||||||
causing the file to sometimes be larger than it needed to be. (Mike
|
causing the file to sometimes be larger than it needed to be. (Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
* LUCENE-3290: Added FieldInvertState.numUniqueTerms
|
||||||
|
(Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
|
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
|
||||||
|
|
|
@ -53,24 +53,13 @@ import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.queryParser.ParseException;
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
|
||||||
import org.apache.lucene.search.FilteredQuery;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.MultiPhraseQuery;
|
|
||||||
import org.apache.lucene.search.MultiTermQuery;
|
|
||||||
import org.apache.lucene.search.NumericRangeQuery;
|
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.TermQuery;
|
|
||||||
import org.apache.lucene.search.TermRangeFilter;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
|
||||||
import org.apache.lucene.search.WildcardQuery;
|
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
|
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
|
||||||
import org.apache.lucene.search.regex.RegexQuery;
|
import org.apache.lucene.search.regex.RegexQuery;
|
||||||
import org.apache.lucene.search.spans.*;
|
import org.apache.lucene.search.spans.*;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.RegExp;
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
|
@ -102,8 +91,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
|
|
||||||
public void testQueryScorerHits() throws Exception {
|
public void testQueryScorerHits() throws Exception {
|
||||||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
|
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
|
||||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
|
||||||
query = qp.parse("\"very long\"");
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "very"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "long"));
|
||||||
|
|
||||||
|
query = phraseQuery;
|
||||||
searcher = new IndexSearcher(ramDir, true);
|
searcher = new IndexSearcher(ramDir, true);
|
||||||
TopDocs hits = searcher.search(query, 10);
|
TopDocs hits = searcher.search(query, 10);
|
||||||
|
|
||||||
|
@ -133,12 +126,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
|
|
||||||
String s1 = "I call our world Flatland, not because we call it so,";
|
String s1 = "I call our world Flatland, not because we call it so,";
|
||||||
|
|
||||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true));
|
|
||||||
|
|
||||||
// Verify that a query against the default field results in text being
|
// Verify that a query against the default field results in text being
|
||||||
// highlighted
|
// highlighted
|
||||||
// regardless of the field name.
|
// regardless of the field name.
|
||||||
Query q = parser.parse("\"world Flatland\"~3");
|
|
||||||
|
PhraseQuery q = new PhraseQuery();
|
||||||
|
q.setSlop(3);
|
||||||
|
q.add(new Term(FIELD_NAME, "world"));
|
||||||
|
q.add(new Term(FIELD_NAME, "flatland"));
|
||||||
|
|
||||||
String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
|
String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
|
||||||
String observed = highlightField(q, "SOME_FIELD_NAME", s1);
|
String observed = highlightField(q, "SOME_FIELD_NAME", s1);
|
||||||
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
|
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
|
||||||
|
@ -150,7 +146,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
// when the query field name differs from the name of the field being
|
// when the query field name differs from the name of the field being
|
||||||
// highlighted,
|
// highlighted,
|
||||||
// which in this example happens to be the default field name.
|
// which in this example happens to be the default field name.
|
||||||
q = parser.parse("text:\"world Flatland\"~3");
|
q = new PhraseQuery();
|
||||||
|
q.setSlop(3);
|
||||||
|
q.add(new Term("text", "world"));
|
||||||
|
q.add(new Term("text", "flatland"));
|
||||||
|
|
||||||
expected = s1;
|
expected = s1;
|
||||||
observed = highlightField(q, FIELD_NAME, s1);
|
observed = highlightField(q, FIELD_NAME, s1);
|
||||||
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
|
if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
|
||||||
|
@ -177,7 +177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleSpanHighlighter() throws Exception {
|
public void testSimpleSpanHighlighter() throws Exception {
|
||||||
doSearching("Kennedy");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||||
|
|
||||||
int maxNumFragmentsRequired = 2;
|
int maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -202,23 +202,49 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
// LUCENE-1752
|
// LUCENE-1752
|
||||||
public void testRepeatingTermsInMultBooleans() throws Exception {
|
public void testRepeatingTermsInMultBooleans() throws Exception {
|
||||||
String content = "x y z a b c d e f g b c g";
|
String content = "x y z a b c d e f g b c g";
|
||||||
String ph1 = "\"a b c d\"";
|
|
||||||
String ph2 = "\"b c g\"";
|
|
||||||
String f1 = "f1";
|
String f1 = "f1";
|
||||||
String f2 = "f2";
|
String f2 = "f2";
|
||||||
String f1c = f1 + ":";
|
|
||||||
String f2c = f2 + ":";
|
|
||||||
String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
|
|
||||||
+ " OR " + f2c + ph2 + ")";
|
|
||||||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
|
||||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer);
|
|
||||||
Query query = qp.parse(q);
|
|
||||||
|
|
||||||
QueryScorer scorer = new QueryScorer(query, f1);
|
PhraseQuery f1ph1 = new PhraseQuery();
|
||||||
|
f1ph1.add(new Term(f1, "a"));
|
||||||
|
f1ph1.add(new Term(f1, "b"));
|
||||||
|
f1ph1.add(new Term(f1, "c"));
|
||||||
|
f1ph1.add(new Term(f1, "d"));
|
||||||
|
|
||||||
|
PhraseQuery f2ph1 = new PhraseQuery();
|
||||||
|
f2ph1.add(new Term(f2, "a"));
|
||||||
|
f2ph1.add(new Term(f2, "b"));
|
||||||
|
f2ph1.add(new Term(f2, "c"));
|
||||||
|
f2ph1.add(new Term(f2, "d"));
|
||||||
|
|
||||||
|
PhraseQuery f1ph2 = new PhraseQuery();
|
||||||
|
f1ph2.add(new Term(f1, "b"));
|
||||||
|
f1ph2.add(new Term(f1, "c"));
|
||||||
|
f1ph2.add(new Term(f1, "g"));
|
||||||
|
|
||||||
|
PhraseQuery f2ph2 = new PhraseQuery();
|
||||||
|
f2ph2.add(new Term(f2, "b"));
|
||||||
|
f2ph2.add(new Term(f2, "c"));
|
||||||
|
f2ph2.add(new Term(f2, "g"));
|
||||||
|
|
||||||
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
|
BooleanQuery leftChild = new BooleanQuery();
|
||||||
|
leftChild.add(f1ph1, Occur.SHOULD);
|
||||||
|
leftChild.add(f2ph1, Occur.SHOULD);
|
||||||
|
booleanQuery.add(leftChild, Occur.MUST);
|
||||||
|
|
||||||
|
BooleanQuery rightChild = new BooleanQuery();
|
||||||
|
rightChild.add(f1ph2, Occur.SHOULD);
|
||||||
|
rightChild.add(f2ph2, Occur.SHOULD);
|
||||||
|
booleanQuery.add(rightChild, Occur.MUST);
|
||||||
|
|
||||||
|
QueryScorer scorer = new QueryScorer(booleanQuery, f1);
|
||||||
scorer.setExpandMultiTermQuery(false);
|
scorer.setExpandMultiTermQuery(false);
|
||||||
|
|
||||||
Highlighter h = new Highlighter(this, scorer);
|
Highlighter h = new Highlighter(this, scorer);
|
||||||
|
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
h.getBestFragment(analyzer, f1, content);
|
h.getBestFragment(analyzer, f1, content);
|
||||||
|
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
|
@ -226,7 +252,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
|
public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
|
||||||
doSearching("\"very long and contains\"");
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "very"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "long"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "contains"), 3);
|
||||||
|
doSearching(phraseQuery);
|
||||||
|
|
||||||
int maxNumFragmentsRequired = 2;
|
int maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -248,7 +278,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
numHighlights == 3);
|
numHighlights == 3);
|
||||||
|
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("\"This piece of text refers to Kennedy\"");
|
|
||||||
|
phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "piece"), 1);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "text"), 3);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "refers"), 4);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "kennedy"), 6);
|
||||||
|
|
||||||
|
doSearching(phraseQuery);
|
||||||
|
|
||||||
maxNumFragmentsRequired = 2;
|
maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -270,7 +307,14 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
numHighlights == 4);
|
numHighlights == 4);
|
||||||
|
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("\"lets is a the lets is a the lets is a the lets\"");
|
|
||||||
|
phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "lets"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "lets"), 4);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "lets"), 8);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "lets"), 12);
|
||||||
|
|
||||||
|
doSearching(phraseQuery);
|
||||||
|
|
||||||
maxNumFragmentsRequired = 2;
|
maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -366,7 +410,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
|
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
|
||||||
doSearching("\"text piece long\"~5");
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.setSlop(5);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "text"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "piece"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "long"));
|
||||||
|
doSearching(phraseQuery);
|
||||||
|
|
||||||
int maxNumFragmentsRequired = 2;
|
int maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -388,7 +437,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleQueryScorerPhraseHighlighting3() throws Exception {
|
public void testSimpleQueryScorerPhraseHighlighting3() throws Exception {
|
||||||
doSearching("\"x y z\"");
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "x"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "y"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "z"));
|
||||||
|
doSearching(phraseQuery);
|
||||||
|
|
||||||
int maxNumFragmentsRequired = 2;
|
int maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -410,7 +463,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleSpanFragmenter() throws Exception {
|
public void testSimpleSpanFragmenter() throws Exception {
|
||||||
doSearching("\"piece of text that is very long\"");
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "piece"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "text"), 2);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "very"), 5);
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "long"), 6);
|
||||||
|
doSearching(phraseQuery);
|
||||||
|
|
||||||
int maxNumFragmentsRequired = 2;
|
int maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -429,7 +487,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
doSearching("\"been shot\"");
|
phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "been"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "shot"));
|
||||||
|
|
||||||
|
doSearching(query);
|
||||||
|
|
||||||
maxNumFragmentsRequired = 2;
|
maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -451,7 +513,16 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
|
|
||||||
// position sensitive query added after position insensitive query
|
// position sensitive query added after position insensitive query
|
||||||
public void testPosTermStdTerm() throws Exception {
|
public void testPosTermStdTerm() throws Exception {
|
||||||
doSearching("y \"x y z\"");
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
|
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "y")), Occur.SHOULD);
|
||||||
|
|
||||||
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "x"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "y"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "z"));
|
||||||
|
booleanQuery.add(phraseQuery, Occur.SHOULD);
|
||||||
|
|
||||||
|
doSearching(booleanQuery);
|
||||||
|
|
||||||
int maxNumFragmentsRequired = 2;
|
int maxNumFragmentsRequired = 2;
|
||||||
|
|
||||||
|
@ -525,7 +596,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSimpleQueryTermScorerHighlighter() throws Exception {
|
public void testSimpleQueryTermScorerHighlighter() throws Exception {
|
||||||
doSearching("Kennedy");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||||
Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
|
Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
|
||||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||||
int maxNumFragmentsRequired = 2;
|
int maxNumFragmentsRequired = 2;
|
||||||
|
@ -591,7 +662,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("Kennedy");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
numHighlights == 4);
|
numHighlights == 4);
|
||||||
|
@ -607,7 +678,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("Kinnedy~0.5");
|
FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term(FIELD_NAME, "kinnedy"), 0.5f);
|
||||||
|
fuzzyQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||||
|
doSearching(fuzzyQuery);
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
numHighlights == 5);
|
numHighlights == 5);
|
||||||
|
@ -623,7 +696,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("K?nnedy");
|
WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k?nnedy"));
|
||||||
|
wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||||
|
doSearching(wildcardQuery);
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
numHighlights == 4);
|
numHighlights == 4);
|
||||||
|
@ -639,7 +714,9 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("K*dy");
|
WildcardQuery wildcardQuery = new WildcardQuery(new Term(FIELD_NAME, "k*dy"));
|
||||||
|
wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||||
|
doSearching(wildcardQuery);
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
numHighlights == 5);
|
numHighlights == 5);
|
||||||
|
@ -660,9 +737,15 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
// Need to explicitly set the QueryParser property to use TermRangeQuery
|
// Need to explicitly set the QueryParser property to use TermRangeQuery
|
||||||
// rather
|
// rather
|
||||||
// than RangeFilters
|
// than RangeFilters
|
||||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
|
||||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
TermRangeQuery rangeQuery = new TermRangeQuery(
|
||||||
query = parser.parse(queryString);
|
FIELD_NAME,
|
||||||
|
new BytesRef("kannedy"),
|
||||||
|
new BytesRef("kznnedy"),
|
||||||
|
true, true);
|
||||||
|
rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||||
|
|
||||||
|
query = rangeQuery;
|
||||||
doSearching(query);
|
doSearching(query);
|
||||||
|
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||||
|
@ -772,7 +855,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("\"John Kennedy\"");
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "john"));
|
||||||
|
phraseQuery.add(new Term(FIELD_NAME, "kennedy"));
|
||||||
|
doSearching(phraseQuery);
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||||
// Currently highlights "John" and "Kennedy" separately
|
// Currently highlights "John" and "Kennedy" separately
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
|
@ -874,7 +960,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("John Kenn*");
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
|
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "john")), Occur.SHOULD);
|
||||||
|
PrefixQuery prefixQuery = new PrefixQuery(new Term(FIELD_NAME, "kenn"));
|
||||||
|
prefixQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||||
|
booleanQuery.add(prefixQuery, Occur.SHOULD);
|
||||||
|
|
||||||
|
doSearching(booleanQuery);
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
numHighlights == 5);
|
numHighlights == 5);
|
||||||
|
@ -890,7 +982,12 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("JFK OR Kennedy");
|
|
||||||
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term(FIELD_NAME, "jfk")), Occur.SHOULD);
|
||||||
|
query.add(new TermQuery(new Term(FIELD_NAME, "kennedy")), Occur.SHOULD);
|
||||||
|
|
||||||
|
doSearching(query);
|
||||||
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||||
numHighlights == 5);
|
numHighlights == 5);
|
||||||
|
@ -905,7 +1002,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
doSearching("Kennedy");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
for (int i = 0; i < hits.totalHits; i++) {
|
for (int i = 0; i < hits.totalHits; i++) {
|
||||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||||
|
@ -1006,11 +1103,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
HashMap<String,String> synonyms = new HashMap<String,String>();
|
HashMap<String,String> synonyms = new HashMap<String,String>();
|
||||||
synonyms.put("football", "soccer,footie");
|
synonyms.put("football", "soccer,footie");
|
||||||
Analyzer analyzer = new SynonymAnalyzer(synonyms);
|
Analyzer analyzer = new SynonymAnalyzer(synonyms);
|
||||||
String srchkey = "football";
|
|
||||||
|
|
||||||
String s = "football-soccer in the euro 2004 footie competition";
|
String s = "football-soccer in the euro 2004 footie competition";
|
||||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "bookid", analyzer);
|
|
||||||
Query query = parser.parse(srchkey);
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term("bookid", "football")), Occur.SHOULD);
|
||||||
|
query.add(new TermQuery(new Term("bookid", "soccer")), Occur.SHOULD);
|
||||||
|
query.add(new TermQuery(new Term("bookid", "footie")), Occur.SHOULD);
|
||||||
|
|
||||||
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
|
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
|
||||||
|
|
||||||
|
@ -1037,7 +1136,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("Kennedy");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||||
// new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
|
// new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
|
||||||
|
|
||||||
for (int i = 0; i < hits.totalHits; i++) {
|
for (int i = 0; i < hits.totalHits; i++) {
|
||||||
|
@ -1061,7 +1160,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
|
|
||||||
doSearching("Kennedy");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
|
||||||
|
|
||||||
for (int i = 0; i < hits.totalHits; i++) {
|
for (int i = 0; i < hits.totalHits; i++) {
|
||||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||||
|
@ -1102,7 +1201,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
numHighlights = 0;
|
numHighlights = 0;
|
||||||
doSearching("meat");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
|
||||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
|
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
|
||||||
HighlighterTest.this);// new Highlighter(this, new
|
HighlighterTest.this);// new Highlighter(this, new
|
||||||
|
@ -1200,8 +1299,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
searcher = new IndexSearcher(ramDir, true);
|
searcher = new IndexSearcher(ramDir, true);
|
||||||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
|
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
|
||||||
|
|
||||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
BooleanQuery query = new BooleanQuery();
|
||||||
Query query = parser.parse("JF? or Kenned*");
|
query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
|
||||||
|
query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
|
||||||
|
|
||||||
if (VERBOSE) System.out.println("Searching with primitive query");
|
if (VERBOSE) System.out.println("Searching with primitive query");
|
||||||
// forget to set this and...
|
// forget to set this and...
|
||||||
// query=query.rewrite(reader);
|
// query=query.rewrite(reader);
|
||||||
|
@ -1243,7 +1344,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
doSearching("AnInvalidQueryWhichShouldYieldNoResults");
|
doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
|
||||||
|
|
||||||
for (String text : texts) {
|
for (String text : texts) {
|
||||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||||
|
@ -1313,8 +1414,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
String docMainText = "fred is one of the people";
|
String docMainText = "fred is one of the people";
|
||||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
|
||||||
Query query = parser.parse("fred category:people");
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term(FIELD_NAME, "fred")), Occur.SHOULD);
|
||||||
|
query.add(new TermQuery(new Term("category", "people")), Occur.SHOULD);
|
||||||
|
|
||||||
// highlighting respects fieldnames used in query
|
// highlighting respects fieldnames used in query
|
||||||
|
|
||||||
|
@ -1453,64 +1556,68 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
Highlighter highlighter;
|
Highlighter highlighter;
|
||||||
String result;
|
String result;
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
|
query = new TermQuery(new Term("text", "foo"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||||
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
|
query = new TermQuery(new Term("text", "10"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||||
assertEquals("Hi-Speed<B>10</B> foo", result);
|
assertEquals("Hi-Speed<B>10</B> foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
|
query = new TermQuery(new Term("text", "hi"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||||
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
|
query = new TermQuery(new Term("text", "speed"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||||
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
|
query = new TermQuery(new Term("text", "hispeed"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
|
booleanQuery.add(new TermQuery(new Term("text", "hi")), Occur.SHOULD);
|
||||||
|
booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
|
||||||
|
|
||||||
|
query = booleanQuery;
|
||||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||||
|
|
||||||
// ///////////////// same tests, just put the bigger overlapping token
|
// ///////////////// same tests, just put the bigger overlapping token
|
||||||
// first
|
// first
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
|
query = new TermQuery(new Term("text", "foo"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||||
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
|
query = new TermQuery(new Term("text", "10"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||||
assertEquals("Hi-Speed<B>10</B> foo", result);
|
assertEquals("Hi-Speed<B>10</B> foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
|
query = new TermQuery(new Term("text", "hi"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||||
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
|
query = new TermQuery(new Term("text", "speed"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||||
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
|
query = new TermQuery(new Term("text", "hispeed"));
|
||||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||||
|
|
||||||
query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
|
query = booleanQuery;
|
||||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||||
|
@ -1554,9 +1661,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
|
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
|
||||||
String q = "t_text1:random";
|
Query query = new TermQuery(new Term("t_text1", "random"));
|
||||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "t_text1", a );
|
|
||||||
Query query = parser.parse( q );
|
|
||||||
IndexSearcher searcher = new IndexSearcher( dir, true );
|
IndexSearcher searcher = new IndexSearcher( dir, true );
|
||||||
// This scorer can return negative idf -> null fragment
|
// This scorer can return negative idf -> null fragment
|
||||||
Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
|
Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
|
||||||
|
@ -1608,14 +1713,6 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
return "<B>" + originalText + "</B>";
|
return "<B>" + originalText + "</B>";
|
||||||
}
|
}
|
||||||
|
|
||||||
public void doSearching(String queryString) throws Exception {
|
|
||||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
|
||||||
parser.setEnablePositionIncrements(true);
|
|
||||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
|
||||||
query = parser.parse(queryString);
|
|
||||||
doSearching(query);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void doSearching(Query unReWrittenQuery) throws Exception {
|
public void doSearching(Query unReWrittenQuery) throws Exception {
|
||||||
if (searcher != null) searcher.close();
|
if (searcher != null) searcher.close();
|
||||||
searcher = new IndexSearcher(ramDir, true);
|
searcher = new IndexSearcher(ramDir, true);
|
||||||
|
|
|
@ -19,7 +19,10 @@ package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
@ -28,6 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Field.Index;
|
import org.apache.lucene.document.Field.Index;
|
||||||
|
@ -44,6 +48,7 @@ import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
public abstract class AbstractTestCase extends LuceneTestCase {
|
public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
|
@ -56,8 +61,6 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
protected Analyzer analyzerB;
|
protected Analyzer analyzerB;
|
||||||
protected Analyzer analyzerK;
|
protected Analyzer analyzerK;
|
||||||
protected IndexReader reader;
|
protected IndexReader reader;
|
||||||
protected QueryParser paW;
|
|
||||||
protected QueryParser paB;
|
|
||||||
|
|
||||||
protected static final String[] shortMVValues = {
|
protected static final String[] shortMVValues = {
|
||||||
"",
|
"",
|
||||||
|
@ -90,8 +93,6 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
||||||
analyzerB = new BigramAnalyzer();
|
analyzerB = new BigramAnalyzer();
|
||||||
analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
|
analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
|
||||||
paW = new QueryParser(TEST_VERSION_CURRENT, F, analyzerW );
|
|
||||||
paB = new QueryParser(TEST_VERSION_CURRENT, F, analyzerB );
|
|
||||||
dir = newDirectory();
|
dir = newDirectory();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,6 +173,33 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
|
||||||
|
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
|
||||||
|
|
||||||
|
TokenStream tokenStream = analyzer.reusableTokenStream(field, new StringReader(text));
|
||||||
|
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
|
||||||
|
BytesRef bytesRef = termAttribute.getBytesRef();
|
||||||
|
|
||||||
|
while (tokenStream.incrementToken()) {
|
||||||
|
termAttribute.fillBytesRef();
|
||||||
|
bytesRefs.add(new BytesRef(bytesRef));
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenStream.end();
|
||||||
|
tokenStream.close();
|
||||||
|
|
||||||
|
return bytesRefs;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected PhraseQuery toPhraseQuery(List<BytesRef> bytesRefs, String field) {
|
||||||
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
for (BytesRef bytesRef : bytesRefs) {
|
||||||
|
phraseQuery.add(new Term(field, bytesRef));
|
||||||
|
}
|
||||||
|
return phraseQuery;
|
||||||
|
}
|
||||||
|
|
||||||
static final class BigramAnalyzer extends Analyzer {
|
static final class BigramAnalyzer extends Analyzer {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||||
|
|
|
@ -22,19 +22,33 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
|
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
|
||||||
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
|
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
public class FieldQueryTest extends AbstractTestCase {
|
public class FieldQueryTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void testFlattenBoolean() throws Exception {
|
public void testFlattenBoolean() throws Exception {
|
||||||
Query query = paW.parse( "A AND B OR C NOT (D AND E)" );
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
|
||||||
|
booleanQuery.add(new TermQuery(new Term(F, "B")), Occur.MUST);
|
||||||
|
booleanQuery.add(new TermQuery(new Term(F, "C")), Occur.SHOULD);
|
||||||
|
|
||||||
|
BooleanQuery innerQuery = new BooleanQuery();
|
||||||
|
innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
|
||||||
|
innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
|
||||||
|
booleanQuery.add(innerQuery, Occur.MUST_NOT);
|
||||||
|
|
||||||
|
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
|
||||||
Set<Query> flatQueries = new HashSet<Query>();
|
Set<Query> flatQueries = new HashSet<Query>();
|
||||||
fq.flatten( query, flatQueries );
|
fq.flatten(booleanQuery, flatQueries);
|
||||||
assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) );
|
assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,15 +61,25 @@ public class FieldQueryTest extends AbstractTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFlattenTermAndPhrase() throws Exception {
|
public void testFlattenTermAndPhrase() throws Exception {
|
||||||
Query query = paW.parse( "A AND \"B C\"" );
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
|
||||||
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(F, "B"));
|
||||||
|
phraseQuery.add(new Term(F, "C"));
|
||||||
|
booleanQuery.add(phraseQuery, Occur.MUST);
|
||||||
|
|
||||||
|
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
|
||||||
Set<Query> flatQueries = new HashSet<Query>();
|
Set<Query> flatQueries = new HashSet<Query>();
|
||||||
fq.flatten( query, flatQueries );
|
fq.flatten(booleanQuery, flatQueries);
|
||||||
assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) );
|
assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFlattenTermAndPhrase2gram() throws Exception {
|
public void testFlattenTermAndPhrase2gram() throws Exception {
|
||||||
Query query = paB.parse( "AA AND \"BCD\" OR \"EFGH\"" );
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term(F, "AA")), Occur.MUST);
|
||||||
|
query.add(toPhraseQuery(analyze("BCD", F, analyzerB), F), Occur.MUST);
|
||||||
|
query.add(toPhraseQuery(analyze("EFGH", F, analyzerB), F), Occur.SHOULD);
|
||||||
|
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
FieldQuery fq = new FieldQuery( query, true, true );
|
||||||
Set<Query> flatQueries = new HashSet<Query>();
|
Set<Query> flatQueries = new HashSet<Query>();
|
||||||
fq.flatten( query, flatQueries );
|
fq.flatten( query, flatQueries );
|
||||||
|
@ -232,7 +256,16 @@ public class FieldQueryTest extends AbstractTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetTermSet() throws Exception {
|
public void testGetTermSet() throws Exception {
|
||||||
Query query = paW.parse( "A AND B OR x:C NOT (D AND E)" );
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term(F, "A")), Occur.MUST);
|
||||||
|
query.add(new TermQuery(new Term(F, "B")), Occur.MUST);
|
||||||
|
query.add(new TermQuery(new Term("x", "C")), Occur.SHOULD);
|
||||||
|
|
||||||
|
BooleanQuery innerQuery = new BooleanQuery();
|
||||||
|
innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
|
||||||
|
innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
|
||||||
|
query.add(innerQuery, Occur.MUST_NOT);
|
||||||
|
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
FieldQuery fq = new FieldQuery( query, true, true );
|
||||||
assertEquals( 2, fq.termSetMap.size() );
|
assertEquals( 2, fq.termSetMap.size() );
|
||||||
Set<String> termSet = fq.getTermSet( F );
|
Set<String> termSet = fq.getTermSet( F );
|
||||||
|
@ -679,7 +712,9 @@ public class FieldQueryTest extends AbstractTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testQueryPhraseMapOverlap2gram() throws Exception {
|
public void testQueryPhraseMapOverlap2gram() throws Exception {
|
||||||
Query query = paB.parse( "\"abc\" AND \"bcd\"" );
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(toPhraseQuery(analyze("abc", F, analyzerB), F), Occur.MUST);
|
||||||
|
query.add(toPhraseQuery(analyze("bcd", F, analyzerB), F), Occur.MUST);
|
||||||
|
|
||||||
// phraseHighlight = true, fieldMatch = true
|
// phraseHighlight = true, fieldMatch = true
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
FieldQuery fq = new FieldQuery( query, true, true );
|
||||||
|
|
|
@ -17,12 +17,20 @@ package org.apache.lucene.search.vectorhighlight;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
|
||||||
public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
|
public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void test3Frags() throws Exception {
|
public void test3Frags() throws Exception {
|
||||||
FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
|
||||||
|
query.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
|
FieldFragList ffl = ffl(query, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
||||||
ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder();
|
ScoreOrderFragmentsBuilder sofb = new ScoreOrderFragmentsBuilder();
|
||||||
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
|
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
|
||||||
assertEquals( 3, f.length );
|
assertEquals( 3, f.length );
|
||||||
|
@ -32,9 +40,8 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
|
||||||
assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
|
assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
|
private FieldFragList ffl(Query query, String indexValue ) throws Exception {
|
||||||
make1d1fIndex( indexValue );
|
make1d1fIndex( indexValue );
|
||||||
Query query = paW.parse( queryValue );
|
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
FieldQuery fq = new FieldQuery( query, true, true );
|
||||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||||
|
|
|
@ -17,20 +17,21 @@ package org.apache.lucene.search.vectorhighlight;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
|
||||||
public class SimpleFragListBuilderTest extends AbstractTestCase {
|
public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void testNullFieldFragList() throws Exception {
|
public void testNullFieldFragList() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
|
||||||
assertEquals( 0, ffl.getFragInfos().size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTooSmallFragSize() throws Exception {
|
public void testTooSmallFragSize() throws Exception {
|
||||||
try{
|
try{
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
sflb.createFieldFragList( fpl( "a", "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 );
|
sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE - 1 );
|
||||||
fail( "IllegalArgumentException must be thrown" );
|
fail( "IllegalArgumentException must be thrown" );
|
||||||
}
|
}
|
||||||
catch ( IllegalArgumentException expected ) {
|
catch ( IllegalArgumentException expected ) {
|
||||||
|
@ -39,14 +40,19 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void testSmallerFragSizeThanTermQuery() throws Exception {
|
public void testSmallerFragSizeThanTermQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
|
||||||
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(F, "abcdefgh"));
|
||||||
|
phraseQuery.add(new Term(F, "jklmnopqrs"));
|
||||||
|
|
||||||
|
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
|
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
|
@ -54,39 +60,39 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void test1TermIndex() throws Exception {
|
public void test1TermIndex() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a" ), 100 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2TermsIndex1Frag() throws Exception {
|
public void test2TermsIndex1Frag() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a a" ), 100 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a" ), 20 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2TermsIndex2Frags() throws Exception {
|
public void test2TermsIndex2Frags() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 2, ffl.getFragInfos().size() );
|
assertEquals( 2, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
|
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 2, ffl.getFragInfos().size() );
|
assertEquals( 2, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 2, ffl.getFragInfos().size() );
|
assertEquals( 2, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||||
|
@ -94,41 +100,56 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void test2TermsQuery() throws Exception {
|
public void test2TermsQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
|
|
||||||
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
|
booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
|
||||||
|
booleanQuery.add(new TermQuery(new Term(F, "b")), BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
|
FieldFragList ffl = sflb.createFieldFragList( fpl(booleanQuery, "c d e" ), 20 );
|
||||||
assertEquals( 0, ffl.getFragInfos().size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(booleanQuery, "d b c" ), 20 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(booleanQuery, "a b c" ), 20 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPhraseQuery() throws Exception {
|
public void testPhraseQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
|
|
||||||
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.add(new Term(F, "a"));
|
||||||
|
phraseQuery.add(new Term(F, "b"));
|
||||||
|
|
||||||
|
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "c d e" ), 20 );
|
||||||
assertEquals( 0, ffl.getFragInfos().size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
|
||||||
assertEquals( 0, ffl.getFragInfos().size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
|
ffl = sflb.createFieldFragList( fpl(phraseQuery, "a b c" ), 20 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPhraseQuerySlop() throws Exception {
|
public void testPhraseQuerySlop() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
|
|
||||||
|
PhraseQuery phraseQuery = new PhraseQuery();
|
||||||
|
phraseQuery.setSlop(1);
|
||||||
|
phraseQuery.add(new Term(F, "a"));
|
||||||
|
phraseQuery.add(new Term(F, "b"));
|
||||||
|
|
||||||
|
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "a c b" ), 20 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
|
private FieldPhraseList fpl(Query query, String indexValue ) throws Exception {
|
||||||
make1d1fIndex( indexValue );
|
make1d1fIndex( indexValue );
|
||||||
Query query = paW.parse( queryValue );
|
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
FieldQuery fq = new FieldQuery( query, true, true );
|
||||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||||
return new FieldPhraseList( stack, fq );
|
return new FieldPhraseList( stack, fq );
|
||||||
|
|
|
@ -26,13 +26,17 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
||||||
|
|
||||||
public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void test1TermIndex() throws Exception {
|
public void test1TermIndex() throws Exception {
|
||||||
FieldFragList ffl = ffl( "a", "a" );
|
FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a" );
|
||||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||||
assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
|
assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
|
||||||
|
|
||||||
|
@ -42,7 +46,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2Frags() throws Exception {
|
public void test2Frags() throws Exception {
|
||||||
FieldFragList ffl = ffl( "a", "a b b b b b b b b b b b a b a b" );
|
FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b a b a b" );
|
||||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||||
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
|
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
|
||||||
// 3 snippets requested, but should be 2
|
// 3 snippets requested, but should be 2
|
||||||
|
@ -52,7 +56,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test3Frags() throws Exception {
|
public void test3Frags() throws Exception {
|
||||||
FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
BooleanQuery booleanQuery = new BooleanQuery();
|
||||||
|
booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
|
||||||
|
booleanQuery.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
|
FieldFragList ffl = ffl(booleanQuery, "a b b b b b b b b b b b a b a b b b b b c a a b b" );
|
||||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||||
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
|
String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
|
||||||
assertEquals( 3, f.length );
|
assertEquals( 3, f.length );
|
||||||
|
@ -62,7 +70,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTagsAndEncoder() throws Exception {
|
public void testTagsAndEncoder() throws Exception {
|
||||||
FieldFragList ffl = ffl( "a", "<h1> a </h1>" );
|
FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "<h1> a </h1>" );
|
||||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||||
String[] preTags = { "[" };
|
String[] preTags = { "[" };
|
||||||
String[] postTags = { "]" };
|
String[] postTags = { "]" };
|
||||||
|
@ -70,9 +78,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
||||||
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
|
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
|
private FieldFragList ffl(Query query, String indexValue ) throws Exception {
|
||||||
make1d1fIndex( indexValue );
|
make1d1fIndex( indexValue );
|
||||||
Query query = paW.parse( queryValue );
|
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
FieldQuery fq = new FieldQuery( query, true, true );
|
||||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
package org.apache.lucene.search.vectorhighlight;
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -23,27 +25,26 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
|
||||||
|
|
||||||
public void testNullFieldFragList() throws Exception {
|
public void testNullFieldFragList() throws Exception {
|
||||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b c d" ), 100 );
|
||||||
assertEquals( 0, ffl.getFragInfos().size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testShortFieldFragList() throws Exception {
|
public void testShortFieldFragList() throws Exception {
|
||||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d" ), 100 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testLongFieldFragList() throws Exception {
|
public void testLongFieldFragList() throws Exception {
|
||||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
||||||
assertEquals( 1, ffl.getFragInfos().size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {
|
private FieldPhraseList fpl(Query query, String... indexValues ) throws Exception {
|
||||||
make1dmfIndex( indexValues );
|
make1dmfIndex( indexValues );
|
||||||
Query query = paW.parse( queryValue );
|
|
||||||
FieldQuery fq = new FieldQuery( query, true, true );
|
FieldQuery fq = new FieldQuery( query, true, true );
|
||||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||||
return new FieldPhraseList( stack, fq );
|
return new FieldPhraseList( stack, fq );
|
||||||
|
|
|
@ -427,6 +427,12 @@ public class InstantiatedIndexReader extends IndexReader {
|
||||||
return sumTotalTermFreq;
|
return sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: support this?
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
|
|
@ -842,6 +842,12 @@ public class MemoryIndex {
|
||||||
public long getSumTotalTermFreq() {
|
public long getSumTotalTermFreq() {
|
||||||
return info.getSumTotalTermFreq();
|
return info.getSumTotalTermFreq();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
// each term has df=1
|
||||||
|
return info.sortedTerms.length;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,6 +128,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
||||||
public long getSumTotalTermFreq() {
|
public long getSumTotalTermFreq() {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
assert termsEnum != null;
|
assert termsEnum != null;
|
||||||
|
|
|
@ -691,7 +691,7 @@ public class CheckIndex {
|
||||||
Comparator<BytesRef> termComp = terms.getComparator();
|
Comparator<BytesRef> termComp = terms.getComparator();
|
||||||
|
|
||||||
long sumTotalTermFreq = 0;
|
long sumTotalTermFreq = 0;
|
||||||
|
long sumDocFreq = 0;
|
||||||
while(true) {
|
while(true) {
|
||||||
|
|
||||||
final BytesRef term = terms.next();
|
final BytesRef term = terms.next();
|
||||||
|
@ -712,6 +712,7 @@ public class CheckIndex {
|
||||||
|
|
||||||
final int docFreq = terms.docFreq();
|
final int docFreq = terms.docFreq();
|
||||||
status.totFreq += docFreq;
|
status.totFreq += docFreq;
|
||||||
|
sumDocFreq += docFreq;
|
||||||
|
|
||||||
docs = terms.docs(liveDocs, docs);
|
docs = terms.docs(liveDocs, docs);
|
||||||
postings = terms.docsAndPositions(liveDocs, postings);
|
postings = terms.docsAndPositions(liveDocs, postings);
|
||||||
|
@ -880,6 +881,13 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sumDocFreq != 0) {
|
||||||
|
final long v = fields.terms(field).getSumDocFreq();
|
||||||
|
if (v != -1 && sumDocFreq != v) {
|
||||||
|
throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Test seek to last term:
|
// Test seek to last term:
|
||||||
if (lastTerm != null) {
|
if (lastTerm != null) {
|
||||||
if (terms.seekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND) {
|
if (terms.seekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND) {
|
||||||
|
|
|
@ -31,6 +31,7 @@ public final class FieldInvertState {
|
||||||
int numOverlap;
|
int numOverlap;
|
||||||
int offset;
|
int offset;
|
||||||
int maxTermFrequency;
|
int maxTermFrequency;
|
||||||
|
int uniqueTermCount;
|
||||||
float boost;
|
float boost;
|
||||||
AttributeSource attributeSource;
|
AttributeSource attributeSource;
|
||||||
|
|
||||||
|
@ -55,6 +56,7 @@ public final class FieldInvertState {
|
||||||
numOverlap = 0;
|
numOverlap = 0;
|
||||||
offset = 0;
|
offset = 0;
|
||||||
maxTermFrequency = 0;
|
maxTermFrequency = 0;
|
||||||
|
uniqueTermCount = 0;
|
||||||
boost = docBoost;
|
boost = docBoost;
|
||||||
attributeSource = null;
|
attributeSource = null;
|
||||||
}
|
}
|
||||||
|
@ -122,6 +124,13 @@ public final class FieldInvertState {
|
||||||
return maxTermFrequency;
|
return maxTermFrequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the number of unique terms encountered in this field.
|
||||||
|
*/
|
||||||
|
public int getUniqueTermCount() {
|
||||||
|
return uniqueTermCount;
|
||||||
|
}
|
||||||
|
|
||||||
public AttributeSource getAttributeSource() {
|
public AttributeSource getAttributeSource() {
|
||||||
return attributeSource;
|
return attributeSource;
|
||||||
}
|
}
|
||||||
|
|
|
@ -105,6 +105,11 @@ public class FilterIndexReader extends IndexReader {
|
||||||
public long getSumTotalTermFreq() throws IOException {
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
return in.getSumTotalTermFreq();
|
return in.getSumTotalTermFreq();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return in.getSumDocFreq();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Base class for filtering {@link TermsEnum} implementations. */
|
/** Base class for filtering {@link TermsEnum} implementations. */
|
||||||
|
|
|
@ -134,6 +134,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
writeProx(termID, fieldState.position);
|
writeProx(termID, fieldState.position);
|
||||||
}
|
}
|
||||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||||
|
fieldState.uniqueTermCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -151,6 +152,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
|
||||||
postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
|
postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
|
||||||
postings.lastDocIDs[termID] = docState.docID;
|
postings.lastDocIDs[termID] = docState.docID;
|
||||||
|
fieldState.uniqueTermCount++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (docState.docID != postings.lastDocIDs[termID]) {
|
if (docState.docID != postings.lastDocIDs[termID]) {
|
||||||
|
@ -171,6 +173,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
||||||
postings.lastDocIDs[termID] = docState.docID;
|
postings.lastDocIDs[termID] = docState.docID;
|
||||||
writeProx(termID, fieldState.position);
|
writeProx(termID, fieldState.position);
|
||||||
|
fieldState.uniqueTermCount++;
|
||||||
} else {
|
} else {
|
||||||
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
|
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
|
||||||
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
||||||
|
@ -251,6 +254,8 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
final ByteSliceReader prox = new ByteSliceReader();
|
final ByteSliceReader prox = new ByteSliceReader();
|
||||||
|
|
||||||
long sumTotalTermFreq = 0;
|
long sumTotalTermFreq = 0;
|
||||||
|
long sumDocFreq = 0;
|
||||||
|
|
||||||
for (int i = 0; i < numTerms; i++) {
|
for (int i = 0; i < numTerms; i++) {
|
||||||
final int termID = termIDs[i];
|
final int termID = termIDs[i];
|
||||||
// Get BytesRef
|
// Get BytesRef
|
||||||
|
@ -389,9 +394,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
||||||
}
|
}
|
||||||
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
|
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
|
||||||
sumTotalTermFreq += totTF;
|
sumTotalTermFreq += totTF;
|
||||||
|
sumDocFreq += numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
termsConsumer.finish(sumTotalTermFreq);
|
termsConsumer.finish(sumTotalTermFreq, sumDocFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,6 +89,19 @@ public final class MultiTerms extends Terms {
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
long sum = 0;
|
||||||
|
for(Terms terms : subs) {
|
||||||
|
final long v = terms.getSumDocFreq();
|
||||||
|
if (v == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
sum += v;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return termComp;
|
return termComp;
|
||||||
|
|
|
@ -132,6 +132,13 @@ public abstract class Terms {
|
||||||
* into account. */
|
* into account. */
|
||||||
public abstract long getSumTotalTermFreq() throws IOException;
|
public abstract long getSumTotalTermFreq() throws IOException;
|
||||||
|
|
||||||
|
/** Returns the sum of {@link #docFreq(BytesRef)} for
|
||||||
|
* all terms in this field, or -1 if this measure isn't
|
||||||
|
* stored by the codec. Note that, just like other term
|
||||||
|
* measures, this measure does not take deleted documents
|
||||||
|
* into account. */
|
||||||
|
public abstract long getSumDocFreq() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a thread-private {@link TermsEnum} instance. Obtaining
|
* Returns a thread-private {@link TermsEnum} instance. Obtaining
|
||||||
* {@link TermsEnum} from this method might be more efficient than using
|
* {@link TermsEnum} from this method might be more efficient than using
|
||||||
|
|
|
@ -137,8 +137,9 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
final long termsStartPointer = in.readVLong();
|
final long termsStartPointer = in.readVLong();
|
||||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||||
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
|
final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
|
||||||
|
final long sumDocFreq = in.readVLong();
|
||||||
assert !fields.containsKey(fieldInfo.name);
|
assert !fields.containsKey(fieldInfo.name);
|
||||||
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq));
|
fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq));
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -245,13 +246,15 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
final FieldInfo fieldInfo;
|
final FieldInfo fieldInfo;
|
||||||
final long termsStartPointer;
|
final long termsStartPointer;
|
||||||
final long sumTotalTermFreq;
|
final long sumTotalTermFreq;
|
||||||
|
final long sumDocFreq;
|
||||||
|
|
||||||
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq) {
|
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq) {
|
||||||
assert numTerms > 0;
|
assert numTerms > 0;
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
this.numTerms = numTerms;
|
this.numTerms = numTerms;
|
||||||
this.termsStartPointer = termsStartPointer;
|
this.termsStartPointer = termsStartPointer;
|
||||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||||
|
this.sumDocFreq = sumDocFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -279,6 +282,11 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
return sumTotalTermFreq;
|
return sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return sumDocFreq;
|
||||||
|
}
|
||||||
|
|
||||||
// Iterates through terms in this field
|
// Iterates through terms in this field
|
||||||
private final class SegmentTermsEnum extends TermsEnum {
|
private final class SegmentTermsEnum extends TermsEnum {
|
||||||
private final IndexInput in;
|
private final IndexInput in;
|
||||||
|
|
|
@ -132,6 +132,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
if (!field.fieldInfo.omitTermFreqAndPositions) {
|
if (!field.fieldInfo.omitTermFreqAndPositions) {
|
||||||
out.writeVLong(field.sumTotalTermFreq);
|
out.writeVLong(field.sumTotalTermFreq);
|
||||||
}
|
}
|
||||||
|
out.writeVLong(field.sumDocFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writeTrailer(dirStart);
|
writeTrailer(dirStart);
|
||||||
|
@ -157,6 +158,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
private long numTerms;
|
private long numTerms;
|
||||||
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
|
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
|
||||||
long sumTotalTermFreq;
|
long sumTotalTermFreq;
|
||||||
|
long sumDocFreq;
|
||||||
|
|
||||||
private TermEntry[] pendingTerms;
|
private TermEntry[] pendingTerms;
|
||||||
|
|
||||||
|
@ -231,7 +233,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
// Finishes all terms in this field
|
// Finishes all terms in this field
|
||||||
@Override
|
@Override
|
||||||
public void finish(long sumTotalTermFreq) throws IOException {
|
public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
|
||||||
if (pendingCount > 0) {
|
if (pendingCount > 0) {
|
||||||
flushBlock();
|
flushBlock();
|
||||||
}
|
}
|
||||||
|
@ -239,6 +241,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
out.writeVInt(0);
|
out.writeVInt(0);
|
||||||
|
|
||||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||||
|
this.sumDocFreq = sumDocFreq;
|
||||||
fieldIndexWriter.finish(out.getFilePointer());
|
fieldIndexWriter.finish(out.getFilePointer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@ public abstract class TermsConsumer {
|
||||||
public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
|
public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
|
||||||
|
|
||||||
/** Called when we are done adding terms to this field */
|
/** Called when we are done adding terms to this field */
|
||||||
public abstract void finish(long sumTotalTermFreq) throws IOException;
|
public abstract void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException;
|
||||||
|
|
||||||
/** Return the BytesRef Comparator used to sort terms
|
/** Return the BytesRef Comparator used to sort terms
|
||||||
* before feeding to this API. */
|
* before feeding to this API. */
|
||||||
|
@ -56,7 +56,8 @@ public abstract class TermsConsumer {
|
||||||
BytesRef term;
|
BytesRef term;
|
||||||
assert termsEnum != null;
|
assert termsEnum != null;
|
||||||
long sumTotalTermFreq = 0;
|
long sumTotalTermFreq = 0;
|
||||||
long sumDF = 0;
|
long sumDocFreq = 0;
|
||||||
|
long sumDFsinceLastAbortCheck = 0;
|
||||||
|
|
||||||
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
|
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
|
||||||
if (docsEnum == null) {
|
if (docsEnum == null) {
|
||||||
|
@ -74,10 +75,11 @@ public abstract class TermsConsumer {
|
||||||
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
|
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
|
||||||
if (stats.docFreq > 0) {
|
if (stats.docFreq > 0) {
|
||||||
finishTerm(term, stats);
|
finishTerm(term, stats);
|
||||||
sumDF += stats.docFreq;
|
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||||
if (sumDF > 60000) {
|
sumDocFreq += stats.docFreq;
|
||||||
mergeState.checkAbort.work(sumDF/5.0);
|
if (sumDFsinceLastAbortCheck > 60000) {
|
||||||
sumDF = 0;
|
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
|
||||||
|
sumDFsinceLastAbortCheck = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,16 +107,17 @@ public abstract class TermsConsumer {
|
||||||
if (stats.docFreq > 0) {
|
if (stats.docFreq > 0) {
|
||||||
finishTerm(term, stats);
|
finishTerm(term, stats);
|
||||||
sumTotalTermFreq += stats.totalTermFreq;
|
sumTotalTermFreq += stats.totalTermFreq;
|
||||||
sumDF += stats.docFreq;
|
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||||
if (sumDF > 60000) {
|
sumDocFreq += stats.docFreq;
|
||||||
mergeState.checkAbort.work(sumDF/5.0);
|
if (sumDFsinceLastAbortCheck > 60000) {
|
||||||
sumDF = 0;
|
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
|
||||||
|
sumDFsinceLastAbortCheck = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
finish(sumTotalTermFreq);
|
finish(sumTotalTermFreq, sumDocFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -219,13 +219,14 @@ public class MemoryCodec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(long sumTotalTermFreq) throws IOException {
|
public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
|
||||||
if (termCount > 0) {
|
if (termCount > 0) {
|
||||||
out.writeVInt(termCount);
|
out.writeVInt(termCount);
|
||||||
out.writeVInt(field.number);
|
out.writeVInt(field.number);
|
||||||
if (!field.omitTermFreqAndPositions) {
|
if (!field.omitTermFreqAndPositions) {
|
||||||
out.writeVLong(sumTotalTermFreq);
|
out.writeVLong(sumTotalTermFreq);
|
||||||
}
|
}
|
||||||
|
out.writeVLong(sumDocFreq);
|
||||||
builder.finish().save(out);
|
builder.finish().save(out);
|
||||||
if (VERBOSE) System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
|
if (VERBOSE) System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
|
||||||
}
|
}
|
||||||
|
@ -683,6 +684,7 @@ public class MemoryCodec extends Codec {
|
||||||
private final static class TermsReader extends Terms {
|
private final static class TermsReader extends Terms {
|
||||||
|
|
||||||
private final long sumTotalTermFreq;
|
private final long sumTotalTermFreq;
|
||||||
|
private final long sumDocFreq;
|
||||||
private FST<BytesRef> fst;
|
private FST<BytesRef> fst;
|
||||||
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||||
private final FieldInfo field;
|
private final FieldInfo field;
|
||||||
|
@ -695,6 +697,7 @@ public class MemoryCodec extends Codec {
|
||||||
} else {
|
} else {
|
||||||
sumTotalTermFreq = 0;
|
sumTotalTermFreq = 0;
|
||||||
}
|
}
|
||||||
|
sumDocFreq = in.readVLong();
|
||||||
|
|
||||||
fst = new FST<BytesRef>(in, outputs);
|
fst = new FST<BytesRef>(in, outputs);
|
||||||
}
|
}
|
||||||
|
@ -704,6 +707,11 @@ public class MemoryCodec extends Codec {
|
||||||
return sumTotalTermFreq;
|
return sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return sumDocFreq;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator() {
|
public TermsEnum iterator() {
|
||||||
return new FSTTermsEnum(field, fst);
|
return new FSTTermsEnum(field, fst);
|
||||||
|
|
|
@ -266,6 +266,11 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
public long getSumTotalTermFreq() {
|
public long getSumTotalTermFreq() {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class PreTermsEnum extends TermsEnum {
|
private class PreTermsEnum extends TermsEnum {
|
||||||
|
|
|
@ -463,6 +463,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
private final long termsStart;
|
private final long termsStart;
|
||||||
private final boolean omitTF;
|
private final boolean omitTF;
|
||||||
private long sumTotalTermFreq;
|
private long sumTotalTermFreq;
|
||||||
|
private long sumDocFreq;
|
||||||
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
|
private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
|
||||||
private int termCount;
|
private int termCount;
|
||||||
private final BytesRef scratch = new BytesRef(10);
|
private final BytesRef scratch = new BytesRef(10);
|
||||||
|
@ -500,6 +501,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
break;
|
break;
|
||||||
} else if (scratch.startsWith(DOC)) {
|
} else if (scratch.startsWith(DOC)) {
|
||||||
docFreq++;
|
docFreq++;
|
||||||
|
sumDocFreq++;
|
||||||
} else if (scratch.startsWith(POS)) {
|
} else if (scratch.startsWith(POS)) {
|
||||||
totalTermFreq++;
|
totalTermFreq++;
|
||||||
} else if (scratch.startsWith(TERM)) {
|
} else if (scratch.startsWith(TERM)) {
|
||||||
|
@ -554,6 +556,11 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
public long getSumTotalTermFreq() {
|
public long getSumTotalTermFreq() {
|
||||||
return sumTotalTermFreq;
|
return sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return sumDocFreq;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -89,7 +89,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(long sumTotalTermFreq) throws IOException {
|
public void finish(long sumTotalTermFreq, long sumDocFreq) throws IOException {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -195,7 +195,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(long sumTotalTermCount) throws IOException {
|
public void finish(long sumTotalTermCount, long sumDocFreq) throws IOException {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -102,6 +102,7 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
final String field;
|
final String field;
|
||||||
final SortedMap<String,RAMTerm> termToDocs = new TreeMap<String,RAMTerm>();
|
final SortedMap<String,RAMTerm> termToDocs = new TreeMap<String,RAMTerm>();
|
||||||
long sumTotalTermFreq;
|
long sumTotalTermFreq;
|
||||||
|
long sumDocFreq;
|
||||||
|
|
||||||
RAMField(String field) {
|
RAMField(String field) {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
|
@ -117,6 +118,11 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
return sumTotalTermFreq;
|
return sumTotalTermFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return sumDocFreq;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator() {
|
public TermsEnum iterator() {
|
||||||
return new RAMTermsEnum(RAMOnlyCodec.RAMField.this);
|
return new RAMTermsEnum(RAMOnlyCodec.RAMField.this);
|
||||||
|
@ -204,8 +210,9 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(long sumTotalTermFreq) {
|
public void finish(long sumTotalTermFreq, long sumDocFreq) {
|
||||||
field.sumTotalTermFreq = sumTotalTermFreq;
|
field.sumTotalTermFreq = sumTotalTermFreq;
|
||||||
|
field.sumDocFreq = sumDocFreq;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -455,9 +455,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
|
File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
|
||||||
_TestUtil.rmDir(indexDir);
|
_TestUtil.rmDir(indexDir);
|
||||||
Directory dir = newFSDirectory(indexDir);
|
Directory dir = newFSDirectory(indexDir);
|
||||||
|
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
|
||||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10);
|
mp.setUseCompoundFile(doCFS);
|
||||||
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
|
mp.setNoCFSRatio(1.0);
|
||||||
|
// TODO: remove randomness
|
||||||
|
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
|
||||||
|
.setMaxBufferedDocs(10).setMergePolicy(mp);
|
||||||
IndexWriter writer = new IndexWriter(dir, conf);
|
IndexWriter writer = new IndexWriter(dir, conf);
|
||||||
|
|
||||||
for(int i=0;i<35;i++) {
|
for(int i=0;i<35;i++) {
|
||||||
|
@ -471,8 +474,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
|
|
||||||
if (!optimized) {
|
if (!optimized) {
|
||||||
// open fresh writer so we get no prx file in the added segment
|
// open fresh writer so we get no prx file in the added segment
|
||||||
conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10);
|
mp = new LogByteSizeMergePolicy();
|
||||||
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
|
mp.setUseCompoundFile(doCFS);
|
||||||
|
mp.setNoCFSRatio(1.0);
|
||||||
|
// TODO: remove randomness
|
||||||
|
conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
|
||||||
|
.setMaxBufferedDocs(10).setMergePolicy(mp);
|
||||||
writer = new IndexWriter(dir, conf);
|
writer = new IndexWriter(dir, conf);
|
||||||
addNoProxDoc(writer);
|
addNoProxDoc(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
|
@ -101,10 +101,12 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
Arrays.sort(terms);
|
Arrays.sort(terms);
|
||||||
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
|
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
|
||||||
long sumTotalTermCount = 0;
|
long sumTotalTermCount = 0;
|
||||||
|
long sumDF = 0;
|
||||||
for (final TermData term : terms) {
|
for (final TermData term : terms) {
|
||||||
|
sumDF += term.docs.length;
|
||||||
sumTotalTermCount += term.write(termsConsumer);
|
sumTotalTermCount += term.write(termsConsumer);
|
||||||
}
|
}
|
||||||
termsConsumer.finish(sumTotalTermCount);
|
termsConsumer.finish(sumTotalTermCount, sumDF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests {@link Terms#getSumDocFreq()}
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class TestSumDocFreq extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testSumDocFreq() throws Exception {
|
||||||
|
final int numDocs = atLeast(500);
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
Field field1 = newField("foo", "", Field.Index.ANALYZED);
|
||||||
|
Field field2 = newField("bar", "", Field.Index.ANALYZED);
|
||||||
|
doc.add(field1);
|
||||||
|
doc.add(field2);
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
char ch1 = (char) _TestUtil.nextInt(random, 'a', 'z');
|
||||||
|
char ch2 = (char) _TestUtil.nextInt(random, 'a', 'z');
|
||||||
|
field1.setValue("" + ch1 + " " + ch2);
|
||||||
|
ch1 = (char) _TestUtil.nextInt(random, 'a', 'z');
|
||||||
|
ch2 = (char) _TestUtil.nextInt(random, 'a', 'z');
|
||||||
|
field2.setValue("" + ch1 + " " + ch2);
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader ir = writer.getReader();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
assertSumDocFreq(ir);
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
ir = IndexReader.open(dir, false);
|
||||||
|
int numDeletions = atLeast(20);
|
||||||
|
for (int i = 0; i < numDeletions; i++) {
|
||||||
|
ir.deleteDocument(random.nextInt(ir.maxDoc()));
|
||||||
|
}
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||||
|
w.optimize();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
ir = IndexReader.open(dir, true);
|
||||||
|
assertSumDocFreq(ir);
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertSumDocFreq(IndexReader ir) throws Exception {
|
||||||
|
// compute sumDocFreq across all fields
|
||||||
|
Fields fields = MultiFields.getFields(ir);
|
||||||
|
FieldsEnum fieldEnum = fields.iterator();
|
||||||
|
String f = null;
|
||||||
|
while ((f = fieldEnum.next()) != null) {
|
||||||
|
Terms terms = fields.terms(f);
|
||||||
|
long sumDocFreq = terms.getSumDocFreq();
|
||||||
|
if (sumDocFreq == -1) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("skipping field: " + f + ", codec does not support sumDocFreq");
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
long computedSumDocFreq = 0;
|
||||||
|
TermsEnum termsEnum = terms.iterator();
|
||||||
|
while (termsEnum.next() != null) {
|
||||||
|
computedSumDocFreq += termsEnum.docFreq();
|
||||||
|
}
|
||||||
|
assertEquals(computedSumDocFreq, sumDocFreq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,108 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.search.DefaultSimilarity;
|
||||||
|
import org.apache.lucene.search.DefaultSimilarityProvider;
|
||||||
|
import org.apache.lucene.search.Similarity;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the uniqueTermCount statistic in FieldInvertState
|
||||||
|
*/
|
||||||
|
public class TestUniqueTermCount extends LuceneTestCase {
|
||||||
|
Directory dir;
|
||||||
|
IndexReader reader;
|
||||||
|
/* expected uniqueTermCount values for our documents */
|
||||||
|
ArrayList<Integer> expected = new ArrayList<Integer>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
dir = newDirectory();
|
||||||
|
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||||
|
new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy());
|
||||||
|
config.setSimilarityProvider(new DefaultSimilarityProvider() {
|
||||||
|
@Override
|
||||||
|
public Similarity get(String field) {
|
||||||
|
return new TestSimilarity();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||||
|
Document doc = new Document();
|
||||||
|
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
|
doc.add(foo);
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
foo.setValue(addValue());
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
reader = writer.getReader();
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws Exception {
|
||||||
|
byte fooNorms[] = MultiNorms.norms(reader, "foo");
|
||||||
|
for (int i = 0; i < reader.maxDoc(); i++)
|
||||||
|
assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a bunch of single-char tokens (the max # unique terms will at most be 26).
|
||||||
|
* puts the # unique terms into expected, to be checked against the norm.
|
||||||
|
*/
|
||||||
|
private String addValue() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
HashSet<String> terms = new HashSet<String>();
|
||||||
|
int num = _TestUtil.nextInt(random, 0, 255);
|
||||||
|
for (int i = 0; i < num; i++) {
|
||||||
|
sb.append(' ');
|
||||||
|
char term = (char) _TestUtil.nextInt(random, 'a', 'z');
|
||||||
|
sb.append(term);
|
||||||
|
terms.add("" + term);
|
||||||
|
}
|
||||||
|
expected.add(terms.size());
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple similarity that encodes maxTermFrequency directly as a byte
|
||||||
|
*/
|
||||||
|
class TestSimilarity extends DefaultSimilarity {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte computeNorm(FieldInvertState state) {
|
||||||
|
return (byte) state.getUniqueTermCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,7 +16,6 @@ package org.apache.lucene.analysis.query;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
@ -25,19 +24,12 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queryParser.ParseException;
|
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
|
@ -74,22 +66,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
//Helper method to query
|
|
||||||
private int search(Analyzer a, String queryString) throws IOException, ParseException {
|
|
||||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
|
|
||||||
Query q = qp.parse(queryString);
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
int hits = searcher.search(q, null, 1000).totalHits;
|
|
||||||
searcher.close();
|
|
||||||
return hits;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testUninitializedAnalyzer() throws Exception {
|
public void testUninitializedAnalyzer() throws Exception {
|
||||||
//Note: no calls to "addStopWord"
|
// Note: no calls to "addStopWord"
|
||||||
String query = "variedField:quick repetitiveField:boring";
|
// query = "variedField:quick repetitiveField:boring";
|
||||||
int numHits1 = search(protectedAnalyzer, query);
|
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("quick"));
|
||||||
int numHits2 = search(appAnalyzer, query);
|
assertTokenStreamContents(protectedTokenStream, new String[]{"quick"});
|
||||||
assertEquals("No filtering test", numHits1, numHits2);
|
|
||||||
|
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -97,36 +81,41 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
*/
|
*/
|
||||||
public void testDefaultAddStopWordsIndexReader() throws Exception {
|
public void testDefaultAddStopWordsIndexReader() throws Exception {
|
||||||
protectedAnalyzer.addStopWords(reader);
|
protectedAnalyzer.addStopWords(reader);
|
||||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||||
assertEquals("Default filter should remove all docs", 0, numHits);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader, int)'
|
* Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader, int)'
|
||||||
*/
|
*/
|
||||||
public void testAddStopWordsIndexReaderInt() throws Exception {
|
public void testAddStopWordsIndexReaderInt() throws Exception {
|
||||||
protectedAnalyzer.addStopWords(reader, 1f / 2f);
|
protectedAnalyzer.addStopWords(reader, 1f / 2f);
|
||||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
|
||||||
assertEquals("A filter on terms in > one half of docs remove boring docs", 0, numHits);
|
|
||||||
|
|
||||||
numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring");
|
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||||
assertTrue("A filter on terms in > half of docs should not remove vaguelyBoring docs", numHits > 1);
|
// A filter on terms in > one half of docs remove boring
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||||
|
|
||||||
|
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||||
|
// A filter on terms in > half of docs should not remove vaguelyBoring
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"});
|
||||||
|
|
||||||
protectedAnalyzer.addStopWords(reader, 1f / 4f);
|
protectedAnalyzer.addStopWords(reader, 1f / 4f);
|
||||||
numHits = search(protectedAnalyzer, "repetitiveField:vaguelyboring");
|
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||||
assertEquals("A filter on terms in > quarter of docs should remove vaguelyBoring docs", 0, numHits);
|
// A filter on terms in > quarter of docs should remove vaguelyBoring
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testAddStopWordsIndexReaderStringFloat() throws Exception {
|
public void testAddStopWordsIndexReaderStringFloat() throws Exception {
|
||||||
protectedAnalyzer.addStopWords(reader, "variedField", 1f / 2f);
|
protectedAnalyzer.addStopWords(reader, "variedField", 1f / 2f);
|
||||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||||
assertTrue("A filter on one Field should not affect queris on another", numHits > 0);
|
// A filter on one Field should not affect queries on another
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||||
|
|
||||||
protectedAnalyzer.addStopWords(reader, "repetitiveField", 1f / 2f);
|
protectedAnalyzer.addStopWords(reader, "repetitiveField", 1f / 2f);
|
||||||
numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||||
assertEquals("A filter on the right Field should affect queries on it", numHits, 0);
|
// A filter on the right Field should affect queries on it
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddStopWordsIndexReaderStringInt() throws Exception {
|
public void testAddStopWordsIndexReaderStringInt() throws Exception {
|
||||||
|
@ -144,12 +133,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void testNoFieldNamePollution() throws Exception {
|
public void testNoFieldNamePollution() throws Exception {
|
||||||
protectedAnalyzer.addStopWords(reader, "repetitiveField", 10);
|
protectedAnalyzer.addStopWords(reader, "repetitiveField", 10);
|
||||||
int numHits = search(protectedAnalyzer, "repetitiveField:boring");
|
|
||||||
assertEquals("Check filter set up OK", 0, numHits);
|
|
||||||
|
|
||||||
numHits = search(protectedAnalyzer, "variedField:boring");
|
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||||
assertTrue("Filter should not prevent stopwords in one field being used in another ", numHits > 0);
|
// Check filter set up OK
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||||
|
|
||||||
|
protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("boring"));
|
||||||
|
// Filter should not prevent stopwords in one field being used in another
|
||||||
|
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -170,10 +161,12 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
public void testWrappingNonReusableAnalyzer() throws Exception {
|
public void testWrappingNonReusableAnalyzer() throws Exception {
|
||||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
|
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
|
||||||
a.addStopWords(reader, 10);
|
a.addStopWords(reader, 10);
|
||||||
int numHits = search(a, "repetitiveField:boring");
|
|
||||||
assertTrue(numHits == 0);
|
TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||||
numHits = search(a, "repetitiveField:vaguelyboring");
|
assertTokenStreamContents(tokenStream, new String[0]);
|
||||||
assertTrue(numHits == 0);
|
|
||||||
|
tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||||
|
assertTokenStreamContents(tokenStream, new String[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTokenStream() throws Exception {
|
public void testTokenStream() throws Exception {
|
||||||
|
|
|
@ -32,14 +32,7 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.search.TermQuery;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
@ -82,16 +75,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
return new IndexSearcher(dir, true);
|
return new IndexSearcher(dir, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
|
|
||||||
searcher = setUpSearcher(analyzer);
|
|
||||||
|
|
||||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", analyzer);
|
|
||||||
|
|
||||||
Query q = qp.parse(qs);
|
|
||||||
|
|
||||||
return searcher.search(q, null, 1000).scoreDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
|
protected void compareRanks(ScoreDoc[] hits, int[] ranks) throws Exception {
|
||||||
assertEquals(ranks.length, hits.length);
|
assertEquals(ranks.length, hits.length);
|
||||||
for (int i = 0; i < ranks.length; i++) {
|
for (int i = 0; i < ranks.length; i++) {
|
||||||
|
@ -99,51 +82,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Will not work on an index without unigrams, since QueryParser automatically
|
|
||||||
* tokenizes on whitespace.
|
|
||||||
*/
|
|
||||||
public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
|
|
||||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
|
||||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
|
||||||
"test sentence");
|
|
||||||
int[] ranks = new int[] { 1, 2, 0 };
|
|
||||||
compareRanks(hits, ranks);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This one fails with an exception.
|
|
||||||
*/
|
|
||||||
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
|
|
||||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
|
||||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
|
||||||
"\"this sentence\"");
|
|
||||||
int[] ranks = new int[] { 0 };
|
|
||||||
compareRanks(hits, ranks);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This one works, actually.
|
|
||||||
*/
|
|
||||||
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
|
|
||||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
|
||||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
|
||||||
"\"test sentence\"");
|
|
||||||
int[] ranks = new int[] { 1 };
|
|
||||||
compareRanks(hits, ranks);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Same as above, is tokenized without using the analyzer.
|
|
||||||
*/
|
|
||||||
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
|
|
||||||
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
|
|
||||||
(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
|
|
||||||
"+test +sentence");
|
|
||||||
int[] ranks = new int[] { 1, 2 };
|
|
||||||
compareRanks(hits, ranks);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This shows how to construct a phrase query containing shingles.
|
* This shows how to construct a phrase query containing shingles.
|
||||||
*/
|
*/
|
||||||
|
@ -153,8 +91,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
PhraseQuery q = new PhraseQuery();
|
PhraseQuery q = new PhraseQuery();
|
||||||
|
|
||||||
TokenStream ts = analyzer.tokenStream("content",
|
TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence"));
|
||||||
new StringReader("this sentence"));
|
|
||||||
int j = -1;
|
int j = -1;
|
||||||
|
|
||||||
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||||
|
@ -183,8 +120,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
BooleanQuery q = new BooleanQuery();
|
BooleanQuery q = new BooleanQuery();
|
||||||
|
|
||||||
TokenStream ts = analyzer.tokenStream("content",
|
TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence"));
|
||||||
new StringReader("test sentence"));
|
|
||||||
|
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,7 @@ package org.apache.lucene.queries;
|
||||||
import org.apache.lucene.queries.function.FunctionQuery;
|
import org.apache.lucene.queries.function.FunctionQuery;
|
||||||
import org.apache.lucene.queries.function.FunctionTestSetup;
|
import org.apache.lucene.queries.function.FunctionTestSetup;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.ByteFieldSource;
|
|
||||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
|
||||||
import org.apache.lucene.queries.function.valuesource.ShortFieldSource;
|
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.cache.*;
|
import org.apache.lucene.search.cache.*;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
@ -198,9 +194,10 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCustomExternalQuery() throws Exception {
|
public void testCustomExternalQuery() throws Exception {
|
||||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD,anlzr);
|
BooleanQuery q1 = new BooleanQuery();
|
||||||
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
|
q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
|
||||||
Query q1 = qp.parse(qtxt);
|
q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
|
||||||
|
q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
final Query q = new CustomExternalQuery(q1);
|
final Query q = new CustomExternalQuery(q1);
|
||||||
log(q);
|
log(q);
|
||||||
|
@ -243,11 +240,12 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
||||||
FunctionQuery functionQuery = new FunctionQuery(valueSource);
|
FunctionQuery functionQuery = new FunctionQuery(valueSource);
|
||||||
float boost = (float) dboost;
|
float boost = (float) dboost;
|
||||||
IndexSearcher s = new IndexSearcher(dir, true);
|
IndexSearcher s = new IndexSearcher(dir, true);
|
||||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, anlzr);
|
|
||||||
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
|
|
||||||
|
|
||||||
// regular (boolean) query.
|
// regular (boolean) query.
|
||||||
Query q1 = qp.parse(qtxt);
|
BooleanQuery q1 = new BooleanQuery();
|
||||||
|
q1.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD);
|
||||||
|
q1.add(new TermQuery(new Term(TEXT_FIELD, "aid")), BooleanClause.Occur.SHOULD);
|
||||||
|
q1.add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD);
|
||||||
log(q1);
|
log(q1);
|
||||||
|
|
||||||
// custom query, that should score the same as q1.
|
// custom query, that should score the same as q1.
|
||||||
|
|
|
@ -258,6 +258,12 @@ Other Changes
|
||||||
impls) and BoostedQuery have been consolidated into the queries module. They
|
impls) and BoostedQuery have been consolidated into the queries module. They
|
||||||
can now be found at o.a.l.queries.function.
|
can now be found at o.a.l.queries.function.
|
||||||
|
|
||||||
|
* SOLR-2027: FacetField.getValues() now returns an empty list if there are no
|
||||||
|
values, instead of null (Chris Male)
|
||||||
|
|
||||||
|
* SOLR-1825: SolrQuery.addFacetQuery now enables facets automatically, like
|
||||||
|
addFacetField (Chris Male)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -292,6 +292,7 @@ public class SolrQuery extends ModifiableSolrParams
|
||||||
*/
|
*/
|
||||||
public SolrQuery addFacetQuery(String f) {
|
public SolrQuery addFacetQuery(String f) {
|
||||||
this.add(FacetParams.FACET_QUERY, f);
|
this.add(FacetParams.FACET_QUERY, f);
|
||||||
|
this.set(FacetParams.FACET, true);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.client.solrj.response;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -145,7 +146,7 @@ import org.apache.solr.client.solrj.util.ClientUtils;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Count> getValues() {
|
public List<Count> getValues() {
|
||||||
return _values;
|
return _values == null ? Collections.<Count>emptyList() : _values;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getValueCount()
|
public int getValueCount()
|
||||||
|
|
|
@ -195,4 +195,10 @@ public class SolrQueryTest extends LuceneTestCase {
|
||||||
q.setTermsRegexFlag("multiline");
|
q.setTermsRegexFlag("multiline");
|
||||||
assertEquals(2, q.getTermsRegexFlags().length);
|
assertEquals(2, q.getTermsRegexFlags().length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAddFacetQuery() {
|
||||||
|
SolrQuery solrQuery = new SolrQuery();
|
||||||
|
solrQuery.addFacetQuery("field:value");
|
||||||
|
assertTrue("Adding a Facet Query should enable facets", solrQuery.getBool(FacetParams.FACET));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
package org.apache.solr.client.solrj.response;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class FacetFieldTest extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testGetValues() {
|
||||||
|
FacetField facetField = new FacetField("field");
|
||||||
|
|
||||||
|
assertNotNull(facetField.getValues());
|
||||||
|
assertEquals(0, facetField.getValues().size());
|
||||||
|
|
||||||
|
facetField.add("value", 1);
|
||||||
|
assertEquals(1, facetField.getValues().size());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue