mirror of https://github.com/apache/lucene.git
LUCENE-5426: allow customization of SortedSetDocValuesReaderState for Lucene doc values faceting
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1565167 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b436fd64cb
commit
9074bf8832
|
@ -135,6 +135,10 @@ New Features
|
|||
* LUCENE-5410: Add fuzzy and near support via '~' operator to SimpleQueryParser.
|
||||
(Lee Hinman via Robert Muir)
|
||||
|
||||
* LUCENE-5426: Make SortedSetDocValuesReaderState abstract to allow
|
||||
custom implementations for Lucene doc values faceting (John Wang via
|
||||
Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.facet.FacetResult;
|
|||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
||||
|
@ -88,7 +89,7 @@ public class SimpleSortedSetFacetsExample {
|
|||
private List<FacetResult> search() throws IOException {
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(indexReader);
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(indexReader);
|
||||
|
||||
// Aggregatses the facet counts
|
||||
FacetsCollector fc = new FacetsCollector();
|
||||
|
@ -113,7 +114,7 @@ public class SimpleSortedSetFacetsExample {
|
|||
private FacetResult drillDown() throws IOException {
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(indexReader);
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(indexReader);
|
||||
|
||||
// Now user drills down on Publish Year/2010:
|
||||
DrillDownQuery q = new DrillDownQuery(config);
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
package org.apache.lucene.facet.sortedset;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Default implementation of {@link SortedSetDocValuesFacetCounts}
|
||||
*/
|
||||
public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState {
|
||||
|
||||
private final String field;
|
||||
private final AtomicReader topReader;
|
||||
private final int valueCount;
|
||||
|
||||
/** {@link IndexReader} passed to the constructor. */
|
||||
public final IndexReader origReader;
|
||||
|
||||
private final Map<String,OrdRange> prefixToOrdRange = new HashMap<String,OrdRange>();
|
||||
|
||||
/** Creates this, pulling doc values from the default {@link
|
||||
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */
|
||||
public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException {
|
||||
this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
|
||||
}
|
||||
|
||||
/** Creates this, pulling doc values from the specified
|
||||
* field. */
|
||||
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
|
||||
this.field = field;
|
||||
this.origReader = reader;
|
||||
|
||||
// We need this to create thread-safe MultiSortedSetDV
|
||||
// per collector:
|
||||
topReader = SlowCompositeReaderWrapper.wrap(reader);
|
||||
SortedSetDocValues dv = topReader.getSortedSetDocValues(field);
|
||||
if (dv == null) {
|
||||
throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
|
||||
}
|
||||
if (dv.getValueCount() > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
|
||||
}
|
||||
valueCount = (int) dv.getValueCount();
|
||||
|
||||
// TODO: we can make this more efficient if eg we can be
|
||||
// "involved" when OrdinalMap is being created? Ie see
|
||||
// each term/ord it's assigning as it goes...
|
||||
String lastDim = null;
|
||||
int startOrd = -1;
|
||||
BytesRef spare = new BytesRef();
|
||||
|
||||
// TODO: this approach can work for full hierarchy?;
|
||||
// TaxoReader can't do this since ords are not in
|
||||
// "sorted order" ... but we should generalize this to
|
||||
// support arbitrary hierarchy:
|
||||
for(int ord=0;ord<valueCount;ord++) {
|
||||
dv.lookupOrd(ord, spare);
|
||||
String[] components = FacetsConfig.stringToPath(spare.utf8ToString());
|
||||
if (components.length != 2) {
|
||||
throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + spare.utf8ToString());
|
||||
}
|
||||
if (!components[0].equals(lastDim)) {
|
||||
if (lastDim != null) {
|
||||
prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
|
||||
}
|
||||
startOrd = ord;
|
||||
lastDim = components[0];
|
||||
}
|
||||
}
|
||||
|
||||
if (lastDim != null) {
|
||||
prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1));
|
||||
}
|
||||
}
|
||||
|
||||
/** Return top-level doc values. */
|
||||
@Override
|
||||
public SortedSetDocValues getDocValues() throws IOException {
|
||||
return topReader.getSortedSetDocValues(field);
|
||||
}
|
||||
|
||||
/** Returns mapping from prefix to {@link OrdRange}. */
|
||||
@Override
|
||||
public Map<String,OrdRange> getPrefixToOrdRange() {
|
||||
return prefixToOrdRange;
|
||||
}
|
||||
|
||||
/** Returns the {@link OrdRange} for this dimension. */
|
||||
@Override
|
||||
public OrdRange getOrdRange(String dim) {
|
||||
return prefixToOrdRange.get(dim);
|
||||
}
|
||||
|
||||
/** Indexed field we are reading. */
|
||||
@Override
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReader getOrigReader() {
|
||||
return origReader;
|
||||
}
|
||||
|
||||
/** Number of unique labels. */
|
||||
@Override
|
||||
public int getSize() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
}
|
|
@ -71,8 +71,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
throws IOException {
|
||||
this.state = state;
|
||||
this.field = state.getField();
|
||||
dv = state.getDocValues();
|
||||
counts = new int[state.getSize()];
|
||||
dv = state.getDocValues();
|
||||
//System.out.println("field=" + field);
|
||||
count(hits.getMatchingDocs());
|
||||
}
|
||||
|
@ -158,6 +158,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
} else {
|
||||
ordinalMap = null;
|
||||
}
|
||||
|
||||
IndexReader origReader = state.getOrigReader();
|
||||
|
||||
for(MatchingDocs hits : matchingDocs) {
|
||||
|
||||
|
@ -167,7 +169,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
// the top-level reader passed to the
|
||||
// SortedSetDocValuesReaderState, else cryptic
|
||||
// AIOOBE can happen:
|
||||
if (ReaderUtil.getTopLevelContext(hits.context).reader() != state.origReader) {
|
||||
if (ReaderUtil.getTopLevelContext(hits.context).reader() != origReader) {
|
||||
throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
|
||||
}
|
||||
|
||||
|
|
|
@ -45,14 +45,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* so you should create it once and re-use that one instance
|
||||
* for a given {@link IndexReader}. */
|
||||
|
||||
public final class SortedSetDocValuesReaderState {
|
||||
|
||||
private final String field;
|
||||
private final AtomicReader topReader;
|
||||
private final int valueCount;
|
||||
|
||||
/** {@link IndexReader} passed to the constructor. */
|
||||
public final IndexReader origReader;
|
||||
public abstract class SortedSetDocValuesReaderState {
|
||||
|
||||
/** Holds start/end range of ords, which maps to one
|
||||
* dimension (someday we may generalize it to map to
|
||||
|
@ -70,86 +63,25 @@ public final class SortedSetDocValuesReaderState {
|
|||
}
|
||||
}
|
||||
|
||||
private final Map<String,OrdRange> prefixToOrdRange = new HashMap<String,OrdRange>();
|
||||
|
||||
/** Creates this, pulling doc values from the default {@link
|
||||
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */
|
||||
public SortedSetDocValuesReaderState(IndexReader reader) throws IOException {
|
||||
this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
|
||||
/** Sole constructor. */
|
||||
protected SortedSetDocValuesReaderState() {
|
||||
}
|
||||
|
||||
/** Creates this, pulling doc values from the specified
|
||||
* field. */
|
||||
public SortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
|
||||
|
||||
this.field = field;
|
||||
this.origReader = reader;
|
||||
|
||||
// We need this to create thread-safe MultiSortedSetDV
|
||||
// per collector:
|
||||
topReader = SlowCompositeReaderWrapper.wrap(reader);
|
||||
SortedSetDocValues dv = topReader.getSortedSetDocValues(field);
|
||||
if (dv == null) {
|
||||
throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
|
||||
}
|
||||
if (dv.getValueCount() > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
|
||||
}
|
||||
valueCount = (int) dv.getValueCount();
|
||||
|
||||
// TODO: we can make this more efficient if eg we can be
|
||||
// "involved" when OrdinalMap is being created? Ie see
|
||||
// each term/ord it's assigning as it goes...
|
||||
String lastDim = null;
|
||||
int startOrd = -1;
|
||||
BytesRef spare = new BytesRef();
|
||||
|
||||
// TODO: this approach can work for full hierarchy?;
|
||||
// TaxoReader can't do this since ords are not in
|
||||
// "sorted order" ... but we should generalize this to
|
||||
// support arbitrary hierarchy:
|
||||
for(int ord=0;ord<valueCount;ord++) {
|
||||
dv.lookupOrd(ord, spare);
|
||||
String[] components = FacetsConfig.stringToPath(spare.utf8ToString());
|
||||
if (components.length != 2) {
|
||||
throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + spare.utf8ToString());
|
||||
}
|
||||
if (!components[0].equals(lastDim)) {
|
||||
if (lastDim != null) {
|
||||
prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
|
||||
}
|
||||
startOrd = ord;
|
||||
lastDim = components[0];
|
||||
}
|
||||
}
|
||||
|
||||
if (lastDim != null) {
|
||||
prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Return top-level doc values. */
|
||||
public SortedSetDocValues getDocValues() throws IOException {
|
||||
return topReader.getSortedSetDocValues(field);
|
||||
}
|
||||
|
||||
/** Returns mapping from prefix to {@link OrdRange}. */
|
||||
public Map<String,OrdRange> getPrefixToOrdRange() {
|
||||
return prefixToOrdRange;
|
||||
}
|
||||
|
||||
/** Returns the {@link OrdRange} for this dimension. */
|
||||
public OrdRange getOrdRange(String dim) {
|
||||
return prefixToOrdRange.get(dim);
|
||||
}
|
||||
|
||||
public abstract SortedSetDocValues getDocValues() throws IOException;
|
||||
|
||||
/** Indexed field we are reading. */
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
public abstract String getField();
|
||||
|
||||
/** Returns the {@link OrdRange} for this dimension. */
|
||||
public abstract OrdRange getOrdRange(String dim);
|
||||
|
||||
/** Returns mapping from prefix to {@link OrdRange}. */
|
||||
public abstract Map<String,OrdRange> getPrefixToOrdRange();
|
||||
|
||||
/** Returns top-level index reader. */
|
||||
public abstract IndexReader getOrigReader();
|
||||
|
||||
/** Number of unique labels. */
|
||||
public int getSize() {
|
||||
return valueCount;
|
||||
}
|
||||
public abstract int getSize();
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
|
||||
import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
@ -552,7 +553,7 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
IndexSearcher s = newSearcher(r);
|
||||
|
||||
if (doUseDV) {
|
||||
sortedSetDVState = new SortedSetDocValuesReaderState(s.getIndexReader());
|
||||
sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader());
|
||||
} else {
|
||||
sortedSetDVState = null;
|
||||
}
|
||||
|
|
|
@ -74,7 +74,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
IndexSearcher searcher = newSearcher(writer.getReader());
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
|
||||
|
@ -110,7 +110,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
writer.addDocument(config.build(doc));
|
||||
|
||||
IndexReader r = writer.getReader();
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(r);
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(r);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "bar"));
|
||||
|
@ -176,7 +176,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
writer.close();
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
@ -221,7 +221,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
writer.close();
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
@ -256,7 +256,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.wrap(writer.getReader()));
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
FacetsCollector c = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
|
@ -295,7 +295,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
IndexSearcher searcher = newSearcher(w.getReader());
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader());
|
||||
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
|
|
Loading…
Reference in New Issue