fix GroupFacetCollectorTest

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-01-20 23:49:06 +00:00
parent 4c20299604
commit b1ad844149
6 changed files with 102 additions and 48 deletions

View File

@ -161,7 +161,7 @@ class DiskDocValuesProducer extends DocValuesProducer {
long address = bytes.offset + docID * (long)bytes.maxLength;
try {
data.seek(address);
if (result.length < bytes.maxLength) {
if (result.bytes.length < bytes.maxLength) {
result.offset = 0;
result.bytes = new byte[bytes.maxLength];
}
@ -185,7 +185,7 @@ class DiskDocValuesProducer extends DocValuesProducer {
int length = (int) (endAddress - startAddress);
try {
data.seek(startAddress);
if (result.length < length) {
if (result.bytes.length < length) {
result.offset = 0;
result.bytes = new byte[length];
}

View File

@ -44,6 +44,8 @@ final class DocValuesProcessor extends StoredFieldsConsumer {
@Override
void finishDocument() {
// nocommit catch missing DV fields here? else we have
// null/"" depending on how docs landed in segments?
}
@Override

View File

@ -97,8 +97,6 @@ final class SegmentCoreReaders {
}
};
// nocommit norms too
private final Set<CoreClosedListener> coreClosedListeners =
Collections.synchronizedSet(new LinkedHashSet<CoreClosedListener>());
@ -132,6 +130,7 @@ final class SegmentCoreReaders {
// TODO: since we don't write any norms file if there are no norms,
// kinda jaky to assume the codec handles the case of no norms file at all gracefully?!
// nocommit shouldn't need null check:
assert codec.docValuesFormat() != null;
if (codec.docValuesFormat() != null) {
if (fieldInfos.hasDocValues()) {
simpleDVProducer = codec.docValuesFormat().fieldsProducer(segmentReadState);
@ -255,6 +254,8 @@ final class SegmentCoreReaders {
return null;
}
assert simpleDVProducer != null;
// nocommit change to assert != null!!
if (simpleDVProducer == null) {
return null;

View File

@ -176,10 +176,16 @@ public abstract class SortedDocValues extends BinaryDocValues {
} else if (cmp > 0) {
high = mid - 1;
} else {
// nocommit is this the right way... else caller can
// pass this spare down to DiskDV, which will then
// "use" our byte[] ...
spare.bytes = BytesRef.EMPTY_BYTES;
return mid; // key found
}
}
// nocommit is this the right way...
spare.bytes = BytesRef.EMPTY_BYTES;
return -(low + 1); // key not found.
}
}

View File

@ -33,6 +33,7 @@ public abstract class AbstractGroupingTestCase extends LuceneTestCase {
// For that reason we don't generate empty string
// groups.
randomValue = _TestUtil.randomRealisticUnicodeString(random());
//randomValue = _TestUtil.randomSimpleString(random());
} while ("".equals(randomValue));
return randomValue;
}

View File

@ -88,7 +88,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
@ -102,7 +102,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
assertEquals(1, entries.get(1).getCount());
AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(4, durationResult.getTotalCount());
@ -118,6 +118,10 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
// 5
doc = new Document();
addField(doc, groupField, "b", useDv);
// missing airport
if (useDv) {
addField(doc, "airport", "", useDv);
}
addField(doc, "duration", "5", useDv);
w.addDocument(doc);
@ -144,20 +148,28 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
indexSearcher.getIndexReader().close();
indexSearcher = new IndexSearcher(w.getReader());
groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true, useDv);
groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true);
assertEquals(5, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
entries = airportResult.getFacetEntries(1, 2);
assertEquals(2, entries.size());
assertEquals("bru", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("dus", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
if (useDv) {
assertEquals(6, airportResult.getTotalCount());
assertEquals(0, airportResult.getTotalMissingCount());
assertEquals("bru", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
} else {
assertEquals(5, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
assertEquals("bru", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("dus", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
}
groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true);
assertEquals(5, durationResult.getTotalCount());
@ -184,22 +196,35 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
indexSearcher.getIndexReader().close();
indexSearcher = new IndexSearcher(w.getReader());
groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(7, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
entries = airportResult.getFacetEntries(0, 10);
assertEquals(3, entries.size());
assertEquals("ams", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("bru", entries.get(1).getValue().utf8ToString());
assertEquals(3, entries.get(1).getCount());
assertEquals("dus", entries.get(2).getValue().utf8ToString());
assertEquals(2, entries.get(2).getCount());
if (useDv) {
assertEquals(8, airportResult.getTotalCount());
assertEquals(0, airportResult.getTotalMissingCount());
assertEquals(4, entries.size());
assertEquals("", entries.get(0).getValue().utf8ToString());
assertEquals(1, entries.get(0).getCount());
assertEquals("ams", entries.get(1).getValue().utf8ToString());
assertEquals(2, entries.get(1).getCount());
assertEquals("bru", entries.get(2).getValue().utf8ToString());
assertEquals(3, entries.get(2).getCount());
assertEquals("dus", entries.get(3).getValue().utf8ToString());
assertEquals(2, entries.get(3).getCount());
} else {
assertEquals(7, airportResult.getTotalCount());
assertEquals(1, airportResult.getTotalMissingCount());
assertEquals(3, entries.size());
assertEquals("ams", entries.get(0).getValue().utf8ToString());
assertEquals(2, entries.get(0).getCount());
assertEquals("bru", entries.get(1).getValue().utf8ToString());
assertEquals(3, entries.get(1).getCount());
assertEquals("dus", entries.get(2).getValue().utf8ToString());
assertEquals(2, entries.get(2).getCount());
}
groupedDurationFacetCollector = createRandomCollector(groupField, "duration", "1", false, useDv);
groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true);
assertEquals(5, durationResult.getTotalCount());
@ -235,13 +260,13 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
// 0
Document doc = new Document();
addField(doc, "x", "x", useDv);
doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
addField(doc, groupField, "a", useDv);
addField(doc, "airport", "ams", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
w.commit();
@ -250,43 +275,44 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
// 2
doc = new Document();
addField(doc, groupField, "a", useDv);
addField(doc, "airport", "ams", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
// 3
doc = new Document();
addField(doc, groupField, "a", useDv);
addField(doc, "airport", "dus", useDv);
doc.add(new StringField("airport", "dus", Field.Store.NO));
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "ams", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "ams", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
// 6
doc = new Document();
addField(doc, groupField, "b", useDv);
addField(doc, "airport", "ams", useDv);
doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
w.commit();
// 7
doc = new Document();
addField(doc, "x", "x", useDv);
doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
w.commit();
w.close();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(dir));
AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true, useDv);
AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
@ -306,7 +332,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
private void addField(Document doc, String field, String value, boolean canUseIDV) {
doc.add(new StringField(field, value, Field.Store.NO));
if (canUseIDV) {
doc.add(new SortedBytesDocValuesField(field, new BytesRef(value)));
doc.add(new SortedBytesDocValuesField(field + "_dv", new BytesRef(value)));
}
}
@ -318,8 +344,15 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
IndexContext context = createIndexContext(multipleFacetsPerDocument);
final IndexSearcher searcher = newSearcher(context.indexReader);
if (VERBOSE) {
System.out.println("TEST: searcher=" + searcher);
}
for (int searchIter = 0; searchIter < 100; searchIter++) {
boolean useDv = context.useDV && random.nextBoolean();
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
boolean useDv = !multipleFacetsPerDocument && context.useDV && random.nextBoolean();
String searchTerm = context.contentStrings[random.nextInt(context.contentStrings.length)];
int limit = random.nextInt(context.facetValues.size());
int offset = random.nextInt(context.facetValues.size() - limit);
@ -342,7 +375,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
}
GroupedFacetResult expectedFacetResult = createExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix);
AbstractGroupFacetCollector groupFacetCollector = createRandomCollector(useDv ? "group_dv" : "group", useDv ? "facet_dv" : "facet", facetPrefix, multipleFacetsPerDocument, useDv);
AbstractGroupFacetCollector groupFacetCollector = createRandomCollector(useDv ? "group_dv" : "group", useDv ? "facet_dv" : "facet", facetPrefix, multipleFacetsPerDocument);
searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector);
TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount);
@ -350,6 +383,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
List<TermGroupFacetCollector.FacetEntry> actualFacetEntries = actualFacetResult.getFacetEntries(offset, limit);
if (VERBOSE) {
System.out.println("Use DV: " + useDv);
System.out.println("Collector: " + groupFacetCollector.getClass().getSimpleName());
System.out.println("Num group: " + context.numGroups);
System.out.println("Num doc: " + context.numDocs);
@ -367,7 +401,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
System.out.println("\n=== Expected: \n");
System.out.println("Total count " + expectedFacetResult.getTotalCount());
System.out.println("Total missing count " + expectedFacetResult.getTotalMissingCount());
int counter = 1;
int counter = 0;
for (TermGroupFacetCollector.FacetEntry expectedFacetEntry : expectedFacetEntries) {
System.out.println(
String.format(Locale.ROOT,
@ -380,7 +414,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
System.out.println("\n=== Actual: \n");
System.out.println("Total count " + actualFacetResult.getTotalCount());
System.out.println("Total missing count " + actualFacetResult.getTotalMissingCount());
counter = 1;
counter = 0;
for (TermGroupFacetCollector.FacetEntry actualFacetEntry : actualFacetEntries) {
System.out.println(
String.format(Locale.ROOT,
@ -391,15 +425,15 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
}
System.out.println("\n===================================================================================");
}
assertEquals(expectedFacetResult.getTotalCount(), actualFacetResult.getTotalCount());
assertEquals(expectedFacetResult.getTotalMissingCount(), actualFacetResult.getTotalMissingCount());
assertEquals(expectedFacetEntries.size(), actualFacetEntries.size());
for (int i = 0; i < expectedFacetEntries.size(); i++) {
TermGroupFacetCollector.FacetEntry expectedFacetEntry = expectedFacetEntries.get(i);
TermGroupFacetCollector.FacetEntry actualFacetEntry = actualFacetEntries.get(i);
assertEquals(expectedFacetEntry.getValue().utf8ToString() + " != " + actualFacetEntry.getValue().utf8ToString(), expectedFacetEntry.getValue(), actualFacetEntry.getValue());
assertEquals(expectedFacetEntry.getCount() + " != " + actualFacetEntry.getCount(), expectedFacetEntry.getCount(), actualFacetEntry.getCount());
assertEquals("i=" + i + ": " + expectedFacetEntry.getValue().utf8ToString() + " != " + actualFacetEntry.getValue().utf8ToString(), expectedFacetEntry.getValue(), actualFacetEntry.getValue());
assertEquals("i=" + i + ": " + expectedFacetEntry.getCount() + " != " + actualFacetEntry.getCount(), expectedFacetEntry.getCount(), actualFacetEntry.getCount());
}
}
@ -447,7 +481,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
)
);
boolean canUseDV = true;
boolean useDv = canUseDV && random.nextBoolean();
boolean useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();
Document doc = new Document();
Document docNoGroup = new Document();
@ -463,6 +497,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
docNoFacet.add(group);
Field[] facetFields;
if (useDv) {
assert !multipleFacetValuesPerDocument;
facetFields = new Field[2];
facetFields[0] = newStringField("facet", "", Field.Store.NO);
doc.add(facetFields[0]);
@ -507,7 +542,11 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
if (random.nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
groupValue = null;
if (useDv) {
groupValue = "";
} else {
groupValue = null;
}
} else {
groupValue = groups.get(random.nextInt(groups.size()));
}
@ -519,7 +558,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);
List<String> facetVals = new ArrayList<String>();
if (random.nextInt(24) != 18) {
if (useDv || random.nextInt(24) != 18) {
if (useDv) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
@ -571,6 +610,9 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
groupDc.setBytesValue(new BytesRef(groupValue));
}
group.setStringValue(groupValue);
} else if (useDv) {
// DV cannot have missing values:
groupDc.setBytesValue(new BytesRef());
}
content.setStringValue(contentStr);
if (groupValue == null && facetVals.isEmpty()) {
@ -660,8 +702,10 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
return new GroupedFacetResult(totalCount, totalMissCount, entriesResult);
}
private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument, boolean useDv) {
private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) {
BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix);
// DocValues cannot be multi-valued:
assert !multipleFacetsPerDocument || !groupField.endsWith("_dv");
return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random().nextInt(1024));
}