LUCENE-6352: Improved tests for global ordinal join

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671774 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Martijn van Groningen 2015-04-07 09:37:20 +00:00
parent dbf9d54fd9
commit 344ce28214
3 changed files with 63 additions and 51 deletions

View File

@ -171,7 +171,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
@Override @Override
public float score(int globalOrdinal) { public float score(int globalOrdinal) {
return scores.getScore(globalOrdinal) / occurrences.getOccurence(globalOrdinal); return scores.getScore(globalOrdinal) / occurrences.getOccurrence(globalOrdinal);
} }
} }
@ -238,7 +238,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
occurrences[offset]++; occurrences[offset]++;
} }
public int getOccurence(int globalOrdinal) { public int getOccurrence(int globalOrdinal) {
int block = globalOrdinal / arraySize; int block = globalOrdinal / arraySize;
int offset = globalOrdinal % arraySize; int offset = globalOrdinal % arraySize;
int[] occurrences = blocks[block]; int[] occurrences = blocks[block];

View File

@ -123,7 +123,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
final float score; final float score;
if (globalOrds != null) { if (globalOrds != null) {
long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd); long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
score = collector.scores.getScore((int) globalOrd); score = collector.score((int) globalOrd);
} else { } else {
score = collector.score(segmentOrd); score = collector.score(segmentOrd);
} }

View File

@ -17,6 +17,7 @@ package org.apache.lucene.search.join;
* limitations under the License. * limitations under the License.
*/ */
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -25,7 +26,6 @@ import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
@ -285,14 +285,10 @@ public class TestJoinUtil extends LuceneTestCase {
RandomIndexWriter w = new RandomIndexWriter( RandomIndexWriter w = new RandomIndexWriter(
random(), random(),
dir, dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()) newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
); );
IndexIterationContext context = createContext(100, w, false, true); IndexIterationContext context = createContext(512, w, false, true);
IndexReader topLevelReader = w.getReader();
w.forceMerge(1);
w.close();
IndexReader topLevelReader = DirectoryReader.open(dir);
SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()]; SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
for (LeafReaderContext leadContext : topLevelReader.leaves()) { for (LeafReaderContext leadContext : topLevelReader.leaves()) {
@ -301,47 +297,55 @@ public class TestJoinUtil extends LuceneTestCase {
context.ordinalMap = MultiDocValues.OrdinalMap.build( context.ordinalMap = MultiDocValues.OrdinalMap.build(
topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT topLevelReader.getCoreCacheKey(), values, PackedInts.DEFAULT
); );
int searchIters = 10;
IndexSearcher indexSearcher = newSearcher(topLevelReader); IndexSearcher indexSearcher = newSearcher(topLevelReader);
for (int i = 0; i < searchIters; i++) {
if (VERBOSE) {
System.out.println("search iter=" + i);
}
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
int r = random().nextInt(context.randomUniqueValues.length); final Query actualQuery = new TermQuery(new Term("value", randomValue));
boolean from = context.randomFrom[r]; if (VERBOSE) {
String randomValue = context.randomUniqueValues[r]; System.out.println("actualQuery=" + actualQuery);
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context); }
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query actualQuery = new TermQuery(new Term("value", randomValue)); final Query joinQuery;
if (VERBOSE) { if (from) {
System.out.println("actualQuery=" + actualQuery); BooleanQuery fromQuery = new BooleanQuery();
} fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)]; fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
if (VERBOSE) { Query toQuery = new TermQuery(new Term("type", "to"));
System.out.println("scoreMode=" + scoreMode); joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
} else {
BooleanQuery fromQuery = new BooleanQuery();
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "from"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
assertBitSet(expectedResult, actualResult, indexSearcher);
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
} }
final Query joinQuery; w.close();
if (from) {
BooleanQuery fromQuery = new BooleanQuery();
fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
} else {
BooleanQuery fromQuery = new BooleanQuery();
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "from"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, indexSearcher, scoreMode, context.ordinalMap);
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
assertBitSet(expectedResult, actualResult, indexSearcher);
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
topLevelReader.close(); topLevelReader.close();
dir.close(); dir.close();
} }
@ -699,11 +703,14 @@ public class TestJoinUtil extends LuceneTestCase {
} }
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f); assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) { if (VERBOSE) {
if (VERBOSE) { for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score); System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score);
} }
}
for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f); assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc); Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
@ -721,7 +728,7 @@ public class TestJoinUtil extends LuceneTestCase {
} }
IndexIterationContext context = new IndexIterationContext(); IndexIterationContext context = new IndexIterationContext();
int numRandomValues = nDocs / 2; int numRandomValues = nDocs / RandomInts.randomIntBetween(random(), 2, 10);
context.randomUniqueValues = new String[numRandomValues]; context.randomUniqueValues = new String[numRandomValues];
Set<String> trackSet = new HashSet<>(); Set<String> trackSet = new HashSet<>();
context.randomFrom = new boolean[numRandomValues]; context.randomFrom = new boolean[numRandomValues];
@ -743,7 +750,7 @@ public class TestJoinUtil extends LuceneTestCase {
int randomI = random().nextInt(context.randomUniqueValues.length); int randomI = random().nextInt(context.randomUniqueValues.length);
String value = context.randomUniqueValues[randomI]; String value = context.randomUniqueValues[randomI];
Document document = new Document(); Document document = new Document();
document.add(newTextField(random(), "id", id, Field.Store.NO)); document.add(newTextField(random(), "id", id, Field.Store.YES));
document.add(newTextField(random(), "value", value, Field.Store.NO)); document.add(newTextField(random(), "value", value, Field.Store.NO));
boolean from = context.randomFrom[randomI]; boolean from = context.randomFrom[randomI];
@ -812,6 +819,11 @@ public class TestJoinUtil extends LuceneTestCase {
} }
} }
if (random().nextBoolean()) {
fromWriter.forceMerge(1);
toWriter.forceMerge(1);
}
// Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
// any ScoreMode. // any ScoreMode.
IndexSearcher fromSearcher = newSearcher(fromWriter.getReader()); IndexSearcher fromSearcher = newSearcher(fromWriter.getReader());