small cleanups to TestGrouping to try to make it easier to understand

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1179203 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-10-05 12:52:23 +00:00
parent e910534b7a
commit 86124e2667
1 changed files with 97 additions and 58 deletions

View File

@ -384,6 +384,11 @@ public class TestGrouping extends LuceneTestCase {
new MockAnalyzer(random)));
final List<List<Document>> updateDocs = new ArrayList<List<Document>>();
FieldType groupEndType = new FieldType(StringField.TYPE_UNSTORED);
groupEndType.setIndexOptions(IndexOptions.DOCS_ONLY);
groupEndType.setOmitNorms(true);
//System.out.println("TEST: index groups");
for(BytesRef group : groupValues) {
final List<Document> docs = new ArrayList<Document>();
@ -401,10 +406,7 @@ public class TestGrouping extends LuceneTestCase {
//System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id);
}
// So we can pull filter marking last doc in block:
FieldType ft = new FieldType(StringField.TYPE_UNSTORED);
ft.setIndexOptions(IndexOptions.DOCS_ONLY);
ft.setOmitNorms(true);
final Field groupEnd = newField("groupend", "x", ft);
final Field groupEnd = newField("groupend", "x", groupEndType);
docs.get(docs.size()-1).add(groupEnd);
// Add as a doc block:
w.addDocuments(docs);
@ -456,7 +458,7 @@ public class TestGrouping extends LuceneTestCase {
}
}
}
public void testRandom() throws Exception {
for(int iter=0;iter<3;iter++) {
@ -560,8 +562,8 @@ public class TestGrouping extends LuceneTestCase {
// NOTE: intentional but temporary field cache insanity!
final int[] docIDToID = FieldCache.DEFAULT.getInts(r, "id");
IndexReader r2 = null;
Directory dir2 = null;
IndexReader rBlocks = null;
Directory dirBlocks = null;
try {
final IndexSearcher s = newSearcher(r);
@ -584,15 +586,15 @@ public class TestGrouping extends LuceneTestCase {
// Build 2nd index, where docs are added in blocks by
// group, so we can use single pass collector
dir2 = newDirectory();
r2 = getDocBlockReader(dir2, groupDocs);
dirBlocks = newDirectory();
rBlocks = getDocBlockReader(dirBlocks, groupDocs);
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
final int[] docIDToID2 = FieldCache.DEFAULT.getInts(r2, "id");
final int[] docIDToIDBlocks = FieldCache.DEFAULT.getInts(rBlocks, "id");
final IndexSearcher s2 = newSearcher(r2);
final ShardState shards2 = new ShardState(s2);
final IndexSearcher sBlocks = newSearcher(rBlocks);
final ShardState shardsBlocks = new ShardState(sBlocks);
// Reader2 only increases maxDoc() vs reader, which
// ReaderBlocks only increases maxDoc() vs reader, which
// means a monotonic shift in scores, so we can
// reliably remap them w/ Map:
final Map<String,Map<Float,Float>> scoreMap = new HashMap<String,Map<Float,Float>>();
@ -605,14 +607,14 @@ public class TestGrouping extends LuceneTestCase {
final Map<Float,Float> termScoreMap = new HashMap<Float,Float>();
scoreMap.put("real"+contentID, termScoreMap);
//System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
//" dfnew=" + s2.docFreq(new Term("content", "real"+contentID)));
final ScoreDoc[] hits = s2.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
//" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
final GroupDoc gd = groupDocsByID[docIDToID2[hit.doc]];
final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
assertTrue(gd.score2 == 0.0);
gd.score2 = hit.score;
assertEquals(gd.id, docIDToID2[hit.doc]);
//System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToID2[hit.doc]);
assertEquals(gd.id, docIDToIDBlocks[hit.doc]);
//System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
termScoreMap.put(gd.score, gd.score2);
}
}
@ -620,7 +622,7 @@ public class TestGrouping extends LuceneTestCase {
for(int searchIter=0;searchIter<100;searchIter++) {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
System.out.println("\nTEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random.nextInt(3);
@ -657,7 +659,7 @@ public class TestGrouping extends LuceneTestCase {
final boolean doCache = random.nextBoolean();
final boolean doAllGroups = random.nextBoolean();
if (VERBOSE) {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq("content", new BytesRef(searchTerm)) +" dFBlock=" + rBlocks.docFreq("content", new BytesRef(searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
}
final TermAllGroupsCollector allGroupsCollector;
@ -700,8 +702,8 @@ public class TestGrouping extends LuceneTestCase {
}
// Search top reader:
final Query q = new TermQuery(new Term("content", searchTerm));
s.search(q, c);
final Query query = new TermQuery(new Term("content", searchTerm));
s.search(query, c);
if (doCache && !useWrappingCollector) {
if (cCache.isCached()) {
@ -713,37 +715,34 @@ public class TestGrouping extends LuceneTestCase {
}
} else {
// Replay by re-running search:
s.search(new TermQuery(new Term("content", searchTerm)), c1);
s.search(query, c1);
if (doAllGroups) {
s.search(new TermQuery(new Term("content", searchTerm)), allGroupsCollector);
s.search(query, allGroupsCollector);
}
}
}
// Get 1st pass top groups
final Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;
if (VERBOSE) {
System.out.println("TEST: topGroups:");
System.out.println("TEST: first pass topGroups");
if (topGroups == null) {
System.out.println(" null");
} else {
for(SearchGroup<BytesRef> groupx : topGroups) {
System.out.println(" " + groupToString(groupx.groupValue) + " sort=" + Arrays.toString(groupx.sortValues));
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
}
final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
// Get 1st pass top groups using shards
final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
final TopGroups<BytesRef> groupsResult;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
// Get 2nd pass grouped result:
final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
@ -755,10 +754,10 @@ public class TestGrouping extends LuceneTestCase {
if (VERBOSE) {
System.out.println("TEST: cache was too large");
}
s.search(new TermQuery(new Term("content", searchTerm)), c2);
s.search(query, c2);
}
} else {
s.search(new TermQuery(new Term("content", searchTerm)), c2);
s.search(query, c2);
}
if (doAllGroups) {
@ -780,15 +779,34 @@ public class TestGrouping extends LuceneTestCase {
if (expectedGroups == null) {
System.out.println("TEST: no expected groups");
} else {
System.out.println("TEST: expected groups");
System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()));
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc + " score=" + sd.score);
}
}
}
if (groupsResult == null) {
System.out.println("TEST: no matched groups");
} else {
System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : groupsResult.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToID[sd.doc] + " score=" + sd.score);
}
}
if (searchIter == 14) {
for(int docIDX=0;docIDX<s.maxDoc();docIDX++) {
System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
}
}
}
}
assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores);
// Confirm merged shards match:
@ -808,18 +826,39 @@ public class TestGrouping extends LuceneTestCase {
allGroupsCollector2 = null;
c4 = c3;
}
s2.search(new TermQuery(new Term("content", searchTerm)), c4);
// Get block grouping result:
sBlocks.search(query, c4);
@SuppressWarnings("unchecked")
final TopGroups<BytesRef> tempTopGroups2 = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups groupsResult2;
if (doAllGroups && tempTopGroups2 != null) {
assertEquals((int) tempTopGroups2.totalGroupCount, allGroupsCollector2.getGroupCount());
groupsResult2 = new TopGroups<BytesRef>(tempTopGroups2, allGroupsCollector2.getGroupCount());
final TopGroups<BytesRef> tempTopGroupsBlocks = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
final TopGroups<BytesRef> groupsResultBlocks;
if (doAllGroups && tempTopGroupsBlocks != null) {
assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
groupsResultBlocks = new TopGroups<BytesRef>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
} else {
groupsResult2 = tempTopGroups2;
groupsResultBlocks = tempTopGroupsBlocks;
}
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(s2, shards2.subSearchers, q, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
if (VERBOSE) {
if (groupsResultBlocks == null) {
System.out.println("TEST: no block groups");
} else {
System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
boolean first = true;
for(GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
if (first) {
System.out.println("explain: " + sBlocks.explain(query, sd.doc));
first = false;
}
}
}
}
}
// Get shard'd block grouping result:
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);
if (expectedGroups != null) {
// Fixup scores for reader2
@ -862,23 +901,23 @@ public class TestGrouping extends LuceneTestCase {
}
}
assertEquals(docIDToID2, expectedGroups, groupsResult2, false, true, true, getScores);
assertEquals(docIDToID2, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores);
assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores);
assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores);
}
s.close();
s2.close();
sBlocks.close();
} finally {
FieldCache.DEFAULT.purge(r);
if (r2 != null) {
FieldCache.DEFAULT.purge(r2);
if (rBlocks != null) {
FieldCache.DEFAULT.purge(rBlocks);
}
}
r.close();
dir.close();
r2.close();
dir2.close();
rBlocks.close();
dirBlocks.close();
}
}
@ -931,7 +970,7 @@ public class TestGrouping extends LuceneTestCase {
if (VERBOSE) {
System.out.println(" shard " + shardIDX + " s=" + subSearchers[shardIDX] + " " + topGroups.size() + " groups:");
for(SearchGroup<BytesRef> group : topGroups) {
System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues));
System.out.println(" " + groupToString(group.groupValue) + " groupSort=" + Arrays.toString(group.sortValues));
}
}
shardGroups.add(topGroups);
@ -945,7 +984,7 @@ public class TestGrouping extends LuceneTestCase {
System.out.println(" null");
} else {
for(SearchGroup<BytesRef> group : mergedTopGroups) {
System.out.println(" " + groupToString(group.groupValue) + " sort=" + Arrays.toString(group.sortValues));
System.out.println(" " + groupToString(group.groupValue) + " groupSort=" + Arrays.toString(group.sortValues));
}
}
}
@ -1039,7 +1078,7 @@ public class TestGrouping extends LuceneTestCase {
@Override
public String toString() {
return "ShardSearcher(" + ctx[0] + ")";
return "ShardSearcher(" + ctx[0].reader + ")";
}
}
}