diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java index f3d705e4433..1e8df8beef6 100644 --- a/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java +++ b/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java @@ -181,9 +181,9 @@ final class TermsHashPerField extends InvertedDocConsumerPerField { // term text into textStart address // Get the text & hash of this term. int termID; - try{ - termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef()); - }catch (MaxBytesLengthExceededException e) { + try { + termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef()); + } catch (MaxBytesLengthExceededException e) { // Not enough room in current block // Just skip this term, to remain as robust as // possible during indexing. A TokenFilter diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java index 06a7c988452..7eb26fd9617 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java @@ -212,7 +212,7 @@ public class BlockGroupingCollector extends Collector { // Swap pending scores final float[] savScores = og.scores; og.scores = pendingSubScores; - pendingSubScores = og.scores; + pendingSubScores = savScores; } og.readerContext = currentReaderContext; //og.groupOrd = lastGroupOrd; diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java index d194d0ed1d0..2ac341fc2d6 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java @@ -26,7 +26,7 @@ import java.io.IOException; /** * Concrete implementation of {@link AbstractFirstPassGroupingCollector} that groups based on - * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms} + * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTermsIndex} * to collect groups. * * @lucene.experimental diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java index 40d91b811a7..bf81f98ed90 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java @@ -27,7 +27,7 @@ import java.util.Collection; /** * Concrete implementation of {@link AbstractSecondPassGroupingCollector} that groups based on - * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms} + * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTermsIndex} * to collect grouped docs. * * @lucene.experimental diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index 87745f64a18..2a4bcbcec61 100644 --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -154,7 +154,10 @@ public class TestGrouping extends LuceneTestCase { final BytesRef group; final BytesRef sort1; final BytesRef sort2; + // content must be "realN ..." final String content; + float score; + float score2; public GroupDoc(int id, BytesRef group, BytesRef sort1, BytesRef sort2, String content) { this.id = id; @@ -167,16 +170,21 @@ public class TestGrouping extends LuceneTestCase { private Sort getRandomSort() { final List sortFields = new ArrayList(); - if (random.nextBoolean()) { + if (random.nextInt(7) == 2) { + sortFields.add(SortField.FIELD_SCORE); + } else { if (random.nextBoolean()) { + if (random.nextBoolean()) { + sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean())); + } else { + sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean())); + } + } else if (random.nextBoolean()) { sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean())); - } else { sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean())); } - } else if (random.nextBoolean()) { - sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean())); - sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean())); } + // Break ties: sortFields.add(new SortField("id", SortField.INT)); return new Sort(sortFields.toArray(new SortField[sortFields.size()])); } @@ -188,7 +196,15 @@ public class TestGrouping extends LuceneTestCase { public int compare(GroupDoc d1, GroupDoc d2) { for(SortField sf : sortFields) { final int cmp; - if (sf.getField().equals("sort1")) { + if (sf.getType() == SortField.SCORE) { + if (d1.score > d2.score) { + cmp = -1; + } else if (d1.score < d2.score) { + cmp = 1; + } else { + cmp = 0; + } + } else if (sf.getField().equals("sort1")) { cmp = d1.sort1.compareTo(d2.sort1); } else if (sf.getField().equals("sort2")) { cmp = d1.sort2.compareTo(d2.sort2); @@ -213,7 +229,9 @@ public class TestGrouping extends LuceneTestCase { for(int fieldIDX=0;fieldIDX c; final SortField sf = sortFields[fieldIDX]; - if (sf.getField().equals("sort1")) { + if (sf.getType() == SortField.SCORE) { + c = new Float(d.score); + } else if (sf.getField().equals("sort1")) { c = d.sort1; } else if (sf.getField().equals("sort2")) { c = d.sort2; @@ -237,17 +255,17 @@ public class TestGrouping extends LuceneTestCase { */ private TopGroups slowGrouping(GroupDoc[] groupDocs, - String searchTerm, - boolean fillFields, - boolean getScores, - boolean getMaxScores, - boolean doAllGroups, - Sort groupSort, - Sort docSort, - int topNGroups, - int docsPerGroup, - int groupOffset, - int docOffset) { + String searchTerm, + boolean fillFields, + boolean getScores, + boolean getMaxScores, + boolean doAllGroups, + Sort groupSort, + Sort docSort, + int topNGroups, + int docsPerGroup, + int groupOffset, + int docOffset) { final Comparator groupSortComp = getComparator(groupSort); @@ -262,11 +280,11 @@ public class TestGrouping extends LuceneTestCase { //System.out.println("TEST: slowGrouping"); for(GroupDoc d : groupDocs) { // TODO: would be better to filter by searchTerm before sorting! - if (!d.content.equals(searchTerm)) { + if (!d.content.startsWith(searchTerm)) { continue; } totalHitCount++; - //System.out.println(" match id=" + d.id); + //System.out.println(" match id=" + d.id + " score=" + d.score); if (doAllGroups) { if (!knownGroups.contains(d.group)) { @@ -312,9 +330,9 @@ public class TestGrouping extends LuceneTestCase { final GroupDoc d = docs.get(docIDX); final FieldDoc fd; if (fillFields) { - fd = new FieldDoc(d.id, 0.0f, fillFields(d, docSort)); + fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN, fillFields(d, docSort)); } else { - fd = new FieldDoc(d.id, 0.0f); + fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN); } hits[docIDX-docOffset] = fd; } @@ -373,7 +391,7 @@ public class TestGrouping extends LuceneTestCase { doc.add(newField("sort1", groupValue.sort1.utf8ToString(), Field.Index.NOT_ANALYZED)); doc.add(newField("sort2", groupValue.sort2.utf8ToString(), Field.Index.NOT_ANALYZED)); doc.add(new NumericField("id").setIntValue(groupValue.id)); - doc.add(newField("content", groupValue.content, Field.Index.NOT_ANALYZED)); + doc.add(newField("content", groupValue.content, Field.Index.ANALYZED)); //System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id); } // So we can pull filter marking last doc in block: @@ -421,7 +439,22 @@ public class TestGrouping extends LuceneTestCase { groups.add(new BytesRef(_TestUtil.randomRealisticUnicodeString(random))); //groups.add(new BytesRef(_TestUtil.randomSimpleString(random))); } - final String[] contentStrings = new String[] {"a", "b", "c", "d"}; + final String[] contentStrings = new String[_TestUtil.nextInt(random, 2, 20)]; + if (VERBOSE) { + System.out.println("TEST: create fake content"); + } + for(int contentIDX=0;contentIDX scoreMap = new HashMap(); + + // Tricky: must separately set .score2, because the doc + // block index was created with possible deletions! + for(int contentID=0;contentID<3;contentID++) { + //System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) + + //" dfnew=" + s2.docFreq(new Term("content", "real"+contentID))); + final ScoreDoc[] hits = s2.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs; + for(ScoreDoc hit : hits) { + final GroupDoc gd = groupDocsByID[docIDToID2[hit.doc]]; + assertTrue(gd.score2 == 0.0); + gd.score2 = hit.score; + assertEquals(gd.id, docIDToID2[hit.doc]); + //System.out.println(" score=" + hit.score + " id=" + docIDToID2[hit.doc]); + scoreMap.put(gd.score, gd.score2); + } + } + for(int searchIter=0;searchIter<100;searchIter++) { if (VERBOSE) { System.out.println("TEST: searchIter=" + searchIter); } - final String searchTerm = contentStrings[random.nextInt(contentStrings.length)]; + final String searchTerm = "real" + random.nextInt(3); final boolean fillFields = random.nextBoolean(); - final boolean getScores = random.nextBoolean(); + boolean getScores = random.nextBoolean(); final boolean getMaxScores = random.nextBoolean(); final Sort groupSort = getRandomSort(); //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)}); // TODO: also test null (= sort by relevance) final Sort docSort = getRandomSort(); + for(SortField sf : docSort.getSort()) { + if (sf.getType() == SortField.SCORE) { + getScores = true; + } + } + + for(SortField sf : groupSort.getSort()) { + if (sf.getType() == SortField.SCORE) { + getScores = true; + } + } + final int topNGroups = _TestUtil.nextInt(random, 1, 30); //final int topNGroups = 4; final int docsPerGroup = _TestUtil.nextInt(random, 1, 50); + final int groupOffset = _TestUtil.nextInt(random, 0, (topNGroups-1)/2); //final int groupOffset = 0; @@ -523,7 +612,7 @@ public class TestGrouping extends LuceneTestCase { final boolean doCache = random.nextBoolean(); final boolean doAllGroups = random.nextBoolean(); if (VERBOSE) { - System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups); + System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores); } final TermAllGroupsCollector allGroupsCollector; @@ -636,13 +725,12 @@ public class TestGrouping extends LuceneTestCase { for(GroupDocs gd : expectedGroups.groups) { System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString())); for(ScoreDoc sd : gd.scoreDocs) { - System.out.println(" id=" + sd.doc); + System.out.println(" id=" + sd.doc + " score=" + sd.score); } } } } - // NOTE: intentional but temporary field cache insanity! - assertEquals(docIDToID, expectedGroups, groupsResult, true); + assertEquals(docIDToID, expectedGroups, groupsResult, true, getScores); final boolean needsScores = getScores || getMaxScores || docSort == null; final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock); @@ -665,11 +753,53 @@ public class TestGrouping extends LuceneTestCase { } else { groupsResult2 = tempTopGroups2; } - assertEquals(docIDToID2, expectedGroups, groupsResult2, false); + + if (expectedGroups != null) { + // Fixup scores for reader2 + for (GroupDocs groupDocsHits : expectedGroups.groups) { + for(ScoreDoc hit : groupDocsHits.scoreDocs) { + final GroupDoc gd = groupDocsByID[hit.doc]; + assertEquals(gd.id, hit.doc); + //System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score); + hit.score = gd.score2; + } + } + + final SortField[] sortFields = groupSort.getSort(); + for(int groupSortIDX=0;groupSortIDX