From aa28aff77e86e34cd6cbd5416adf7a00df027783 Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Mon, 23 Jul 2012 10:58:56 +0000 Subject: [PATCH] LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364576 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../lucene/facet/search/FacetsCollector.java | 6 +- .../facet/search/ScoredDocIdCollector.java | 14 +-- .../facet/search/TestFacetsCollector.java | 88 +++++++++++++++++++ 4 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c0c588a43b5..b3baa80d422 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -93,6 +93,9 @@ Bug Fixes all queries. Made Scorer.freq() abstract. (Koji Sekiguchi, Mike McCandless, Robert Muir) +* LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest, + and the number of matching documents is too large. (Gilad Barkai via Shai Erera) + Build * LUCENE-4094: Support overriding file.encoding on forked test JVMs diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java index c5e732fd173..3bd67f001af 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java @@ -72,12 +72,14 @@ public class FacetsCollector extends Collector { protected ScoredDocIdCollector initScoredDocCollector( FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { + boolean scoresNeeded = false; for (FacetRequest frq : facetSearchParams.getFacetRequests()) { if (frq.requireDocumentScore()) { - return ScoredDocIdCollector.create(1000, true); + scoresNeeded = true; + break; } } - return ScoredDocIdCollector.create(indexReader.maxDoc(), false); + return ScoredDocIdCollector.create(indexReader.maxDoc(), scoresNeeded); } /** diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java index 85e1f2a1478..6dd7e1db843 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java @@ -9,7 +9,7 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.FixedBitSet; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -52,7 +52,7 @@ public abstract class ScoredDocIdCollector extends Collector { @Override public void collect(int doc) { - docIds.fastSet(docBase + doc); + docIds.set(docBase + doc); ++numDocIds; } @@ -103,7 +103,9 @@ public abstract class ScoredDocIdCollector extends Collector { @SuppressWarnings("synthetic-access") public ScoringDocIdCollector(int maxDoc) { super(maxDoc); - scores = new float[maxDoc]; + // only matching documents have an entry in the scores array. Therefore start with + // a small array and grow when needed. + scores = new float[64]; } @Override @@ -111,7 +113,7 @@ public abstract class ScoredDocIdCollector extends Collector { @Override public void collect(int doc) throws IOException { - docIds.fastSet(docBase + doc); + docIds.set(docBase + doc); float score = this.scorer.score(); if (numDocIds >= scores.length) { @@ -167,7 +169,7 @@ public abstract class ScoredDocIdCollector extends Collector { protected int numDocIds; protected int docBase; - protected final OpenBitSet docIds; + protected final FixedBitSet docIds; /** * Creates a new {@link ScoredDocIdCollector} with the given parameters. @@ -187,7 +189,7 @@ public abstract class ScoredDocIdCollector extends Collector { private ScoredDocIdCollector(int maxDoc) { numDocIds = 0; - docIds = new OpenBitSet(maxDoc); + docIds = new FixedBitSet(maxDoc); } /** Returns the default score used when scoring is disabled. */ diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java new file mode 100644 index 00000000000..8004f3e91bf --- /dev/null +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java @@ -0,0 +1,88 @@ +package org.apache.lucene.facet.search; + +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; +import org.apache.lucene.facet.index.CategoryDocumentBuilder; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.params.ScoreFacetRequest; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestFacetsCollector extends LuceneTestCase { + + @Test + public void testFacetsWithDocScore() throws Exception { + Directory indexDir = newDirectory(); + Directory taxoDir = newDirectory(); + + TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir); + IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new KeywordAnalyzer())); + + CategoryDocumentBuilder cdb = new CategoryDocumentBuilder(taxonomyWriter); + Iterable cats = Arrays.asList(new CategoryPath("a")); + for(int i = atLeast(2000); i > 0; --i) { + Document doc = new Document(); + doc.add(new StringField("f", "v", Store.NO)); + cdb.setCategoryPaths(cats); + iw.addDocument(cdb.build(doc)); + } + + taxonomyWriter.close(); + iw.close(); + + FacetSearchParams sParams = new FacetSearchParams(); + sParams.addFacetRequest(new ScoreFacetRequest(new CategoryPath("a"), 10)); + + DirectoryReader r = DirectoryReader.open(indexDir); + DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); + + FacetsCollector fc = new FacetsCollector(sParams, r, taxo); + TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); + new IndexSearcher(r).search(new MatchAllDocsQuery(), MultiCollector.wrap(fc, topDocs)); + + List res = fc.getFacetResults(); + double value = res.get(0).getFacetResultNode().getValue(); + double expected = topDocs.topDocs().getMaxScore() * r.numDocs(); + assertEquals(expected, value, 1E-10); + + IOUtils.close(taxo, taxoDir, r, indexDir); + } + +}