LUCENE-5016: Sampling can break FacetResult labeling

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1487807 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-05-30 10:36:59 +00:00
parent b4bf678713
commit 376b331651
6 changed files with 110 additions and 12 deletions

View File

@ -118,6 +118,11 @@ Bug Fixes
for scoringQueries. Instead use QueryValueSource to safely wrap arbitrary
queries and use them with CustomScoreQuery. (John Wang, Robert Muir)
* LUCENE-5016: SamplingAccumulator returned inconsistent label if asked to
aggregate a non-existing category. Also fixed a bug in RangeAccumulator if
some readers did not have the requested numeric DV field.
(Rob Audenaerde, Shai Erera)
Optimizations
* LUCENE-4936: Improve numeric doc values compression in case all values share

View File

@ -64,7 +64,7 @@ public class RangeAccumulator extends FacetsAccumulator {
throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed");
}
RangeFacetRequest<?> rfr = (RangeFacetRequest) fr;
RangeFacetRequest<?> rfr = (RangeFacetRequest<?>) fr;
requests.add(new RangeSet(rfr.ranges, rfr.categoryPath.components[0]));
}
@ -86,8 +86,11 @@ public class RangeAccumulator extends FacetsAccumulator {
RangeSet ranges = requests.get(i);
int[] counts = new int[ranges.ranges.length];
for(MatchingDocs hits : matchingDocs) {
for (MatchingDocs hits : matchingDocs) {
NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field);
if (ndv == null) {
continue; // no numeric values for this field in this reader
}
final int length = hits.bits.length();
int doc = 0;
while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {

View File

@ -209,7 +209,7 @@ public abstract class Sampler {
super(orig.categoryPath, num);
this.orig = orig;
setDepth(orig.getDepth());
setNumLabel(orig.getNumLabel());
setNumLabel(0); // don't label anything as we're over-sampling
setResultMode(orig.getResultMode());
setSortOrder(orig.getSortOrder());
}

View File

@ -87,7 +87,7 @@ public class SamplingAccumulator extends StandardFacetsAccumulator {
List<FacetResult> sampleRes = super.accumulate(docids);
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
List<FacetResult> results = new ArrayList<FacetResult>();
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
@ -104,13 +104,18 @@ public class SamplingAccumulator extends StandardFacetsAccumulator {
}
// final labeling if allowed (because labeling is a costly operation)
frh.labelResult(fres);
fixedRes.add(fres); // add to final results
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
// category does not exist, add an empty result
results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest()));
} else {
frh.labelResult(fres);
results.add(fres);
}
}
searchParams = original; // Back to original params
return fixedRes;
return results;
}
@Override

View File

@ -10,6 +10,7 @@ import org.apache.lucene.facet.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -62,7 +63,7 @@ public class SamplingWrapper extends StandardFacetsAccumulator {
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
List<FacetResult> results = new ArrayList<FacetResult>();
SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
for (FacetResult fres : sampleRes) {
@ -80,15 +81,20 @@ public class SamplingWrapper extends StandardFacetsAccumulator {
}
// final labeling if allowed (because labeling is a costly operation)
frh.labelResult(fres);
fixedRes.add(fres); // add to final results
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
// category does not exist, add an empty result
results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest()));
} else {
frh.labelResult(fres);
results.add(fres);
}
}
if (shouldOversample) {
delegee.searchParams = original; // Back to original params
}
return fixedRes;
return results;
}
@Override

View File

@ -17,8 +17,20 @@ import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.params.PerDimensionIndexingParams;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.facet.range.RangeAccumulator;
import org.apache.lucene.facet.range.RangeFacetRequest;
import org.apache.lucene.facet.sampling.RandomSampler;
import org.apache.lucene.facet.sampling.Sampler;
import org.apache.lucene.facet.sampling.SamplingAccumulator;
import org.apache.lucene.facet.sampling.SamplingParams;
import org.apache.lucene.facet.sampling.SamplingWrapper;
import org.apache.lucene.facet.sampling.TakmiSampleFixer;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
@ -385,4 +397,71 @@ public class TestFacetsCollector extends FacetTestCase {
IOUtils.close(taxo, taxoDir, r, indexDir);
}
@Test
public void testLabeling() throws Exception {
Directory indexDir = newDirectory(), taxoDir = newDirectory();
// create the index
IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetFields facetFields = new FacetFields(taxoWriter);
Document doc = new Document();
facetFields.addFields(doc, Arrays.asList(new CategoryPath("A/1", '/')));
indexWriter.addDocument(doc);
IOUtils.close(indexWriter, taxoWriter);
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
// ask to count a non-existing category to test labeling
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("B"), 5));
final SamplingParams sampleParams = new SamplingParams();
sampleParams.setMaxSampleSize(100);
sampleParams.setMinSampleSize(100);
sampleParams.setSamplingThreshold(100);
sampleParams.setOversampleFactor(1.0d);
if (random().nextBoolean()) {
sampleParams.setSampleFixer(new TakmiSampleFixer(indexReader, taxoReader, fsp));
}
final Sampler sampler = new RandomSampler(sampleParams, random());
FacetsAccumulator[] accumulators = new FacetsAccumulator[] {
new FacetsAccumulator(fsp, indexReader, taxoReader),
new StandardFacetsAccumulator(fsp, indexReader, taxoReader),
new SamplingAccumulator(sampler, fsp, indexReader, taxoReader),
new AdaptiveFacetsAccumulator(fsp, indexReader, taxoReader),
new SamplingWrapper(new StandardFacetsAccumulator(fsp, indexReader, taxoReader), sampler)
};
for (FacetsAccumulator fa : accumulators) {
FacetsCollector fc = FacetsCollector.create(fa);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertNotNull(facetResults);
assertEquals("incorrect label returned for " + fa, fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
}
try {
// SortedSetDocValuesAccumulator cannot even be created in such state
assertNull(new SortedSetDocValuesAccumulator(fsp, new SortedSetDocValuesReaderState(indexReader)));
// if this ever changes, make sure FacetResultNode is labeled correctly
fail("should not have succeeded to execute a request over a category which wasn't indexed as SortedSetDVField");
} catch (IllegalArgumentException e) {
// expected
}
fsp = new FacetSearchParams(new RangeFacetRequest<LongRange>("f", new LongRange("grr", 0, true, 1, true)));
RangeAccumulator ra = new RangeAccumulator(fsp, indexReader);
FacetsCollector fc = FacetsCollector.create(ra);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertNotNull(facetResults);
assertEquals("incorrect label returned for RangeAccumulator", fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
IOUtils.close(indexReader, taxoReader);
IOUtils.close(indexDir, taxoDir);
}
}