mirror of https://github.com/apache/lucene.git
Field names in FacetsConfig. State manager refreshes ok.
This commit is contained in:
parent
d7b39629eb
commit
e2e848ea61
|
@ -26,10 +26,10 @@ import org.apache.lucene.facet.FacetResult;
|
||||||
import org.apache.lucene.facet.Facets;
|
import org.apache.lucene.facet.Facets;
|
||||||
import org.apache.lucene.facet.FacetsCollector;
|
import org.apache.lucene.facet.FacetsCollector;
|
||||||
import org.apache.lucene.facet.FacetsConfig;
|
import org.apache.lucene.facet.FacetsConfig;
|
||||||
import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
|
|
||||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
|
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
|
||||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
||||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
||||||
|
import org.apache.lucene.facet.sortedset.SsdvReaderStatesManager;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -88,8 +88,8 @@ public class SimpleSortedSetFacetsExample {
|
||||||
private List<FacetResult> search() throws IOException {
|
private List<FacetResult> search() throws IOException {
|
||||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||||
SortedSetDocValuesReaderState state =
|
SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
|
||||||
new DefaultSortedSetDocValuesReaderState(indexReader, config);
|
SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);
|
||||||
|
|
||||||
// Aggregatses the facet counts
|
// Aggregatses the facet counts
|
||||||
FacetsCollector fc = new FacetsCollector();
|
FacetsCollector fc = new FacetsCollector();
|
||||||
|
@ -114,8 +114,8 @@ public class SimpleSortedSetFacetsExample {
|
||||||
private FacetResult drillDown() throws IOException {
|
private FacetResult drillDown() throws IOException {
|
||||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||||
SortedSetDocValuesReaderState state =
|
SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
|
||||||
new DefaultSortedSetDocValuesReaderState(indexReader, config);
|
SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);
|
||||||
|
|
||||||
// Now user drills down on Publish Year/2010:
|
// Now user drills down on Publish Year/2010:
|
||||||
DrillDownQuery q = new DrillDownQuery(config);
|
DrillDownQuery q = new DrillDownQuery(config);
|
||||||
|
|
|
@ -67,6 +67,9 @@ public class FacetsConfig {
|
||||||
// int/float/bytes in a single indexed field:
|
// int/float/bytes in a single indexed field:
|
||||||
private final Map<String, String> assocDimTypes = new ConcurrentHashMap<>();
|
private final Map<String, String> assocDimTypes = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
/** All the fields used for faceting. */
|
||||||
|
private final Set<String> fieldNames = new HashSet<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Drill down terms indexing option to control whether dimension and sub-path terms should be
|
* Drill down terms indexing option to control whether dimension and sub-path terms should be
|
||||||
* indexed.
|
* indexed.
|
||||||
|
@ -230,6 +233,14 @@ public class FacetsConfig {
|
||||||
return fieldTypes;
|
return fieldTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the names of all fields used for faceting. This is only populated for fields that are
|
||||||
|
* encountered by {@link #build(TaxonomyWriter, Document)}}.
|
||||||
|
*/
|
||||||
|
public Set<String> getFieldNames() {
|
||||||
|
return fieldNames;
|
||||||
|
}
|
||||||
|
|
||||||
private static void checkSeen(Set<String> seenDims, String dim) {
|
private static void checkSeen(Set<String> seenDims, String dim) {
|
||||||
if (seenDims.contains(dim)) {
|
if (seenDims.contains(dim)) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
|
@ -272,6 +283,7 @@ public class FacetsConfig {
|
||||||
if (field.fieldType() == FacetField.TYPE) {
|
if (field.fieldType() == FacetField.TYPE) {
|
||||||
FacetField facetField = (FacetField) field;
|
FacetField facetField = (FacetField) field;
|
||||||
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
|
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
|
||||||
|
fieldNames.add(dimConfig.indexFieldName);
|
||||||
if (dimConfig.multiValued == false) {
|
if (dimConfig.multiValued == false) {
|
||||||
checkSeen(seenDims, facetField.dim);
|
checkSeen(seenDims, facetField.dim);
|
||||||
}
|
}
|
||||||
|
@ -283,6 +295,7 @@ public class FacetsConfig {
|
||||||
if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
|
if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
|
||||||
SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
|
SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
|
||||||
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
|
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
|
||||||
|
fieldNames.add(dimConfig.indexFieldName);
|
||||||
if (dimConfig.multiValued == false) {
|
if (dimConfig.multiValued == false) {
|
||||||
checkSeen(seenDims, facetField.dim);
|
checkSeen(seenDims, facetField.dim);
|
||||||
}
|
}
|
||||||
|
@ -295,6 +308,7 @@ public class FacetsConfig {
|
||||||
if (field.fieldType() == AssociationFacetField.TYPE) {
|
if (field.fieldType() == AssociationFacetField.TYPE) {
|
||||||
AssociationFacetField facetField = (AssociationFacetField) field;
|
AssociationFacetField facetField = (AssociationFacetField) field;
|
||||||
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
|
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
|
||||||
|
fieldNames.add(dimConfig.indexFieldName);
|
||||||
if (dimConfig.multiValued == false) {
|
if (dimConfig.multiValued == false) {
|
||||||
checkSeen(seenDims, facetField.dim);
|
checkSeen(seenDims, facetField.dim);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,11 +17,12 @@
|
||||||
package org.apache.lucene.facet.sortedset;
|
package org.apache.lucene.facet.sortedset;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UncheckedIOException;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.facet.FacetsConfig;
|
import org.apache.lucene.facet.FacetsConfig;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class demonstrates how Lucene could help a user build SSDV ordinal maps eagerly, on refresh.
|
* This class demonstrates how Lucene could help a user build SSDV ordinal maps eagerly, on refresh.
|
||||||
|
@ -29,48 +30,60 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
*
|
*
|
||||||
* <p>1. A centralized store of SSDV reader states.
|
* <p>1. A centralized store of SSDV reader states.
|
||||||
*
|
*
|
||||||
* <p>2. A method to rebuild all reader states the user needs.
|
* <p>2. A mechanism to rebuild all reader states the user needs.
|
||||||
*/
|
*/
|
||||||
public class SsdvReaderStatesManager {
|
public class SsdvReaderStatesManager {
|
||||||
/**
|
/** Use the {@link FacetsConfig} to find out which fields are used for faceting. */
|
||||||
* The only attribute is a mapping of SSDV fields to reader states. Each reader state will be a
|
FacetsConfig facetsConfig;
|
||||||
* {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for one of the fields.
|
|
||||||
*/
|
|
||||||
private Map<String, SortedSetDocValuesReaderState> fieldToReaderState;
|
|
||||||
|
|
||||||
/** No arguments in the constructor. */
|
|
||||||
SsdvReaderStatesManager() {}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The application code would have a set of fields the user wants to facet on. They pass this set
|
* The values in this map are mappings of fields to reader states. Each reader state will be a
|
||||||
* here once, on application start-up.
|
* {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for the field. These mappings
|
||||||
|
* are keyed by {@link IndexReader} to account for multiple active index readers during refresh.
|
||||||
*/
|
*/
|
||||||
public void registerFields(Set<String> fields) {
|
private final Map<IndexReader, Map<String, SortedSetDocValuesReaderState>> readerStates =
|
||||||
fields.forEach(field -> fieldToReaderState.put(field, null));
|
new HashMap<>();
|
||||||
|
|
||||||
|
/** Accept a {@link FacetsConfig}. */
|
||||||
|
public SsdvReaderStatesManager(FacetsConfig facetsConfig) {
|
||||||
|
this.facetsConfig = facetsConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Here we create corresponding reader states for all the fields. Each time the user does a
|
* Retrieve and, if necessary, create reader states for all the fields in the {@link
|
||||||
* refresh, they would also call this method, which ensures they will be using up-to-date ordinal
|
* FacetsConfig}.
|
||||||
* maps until the next refresh.
|
|
||||||
*/
|
*/
|
||||||
public void loadReaderStates(IndexSearcher searcher, FacetsConfig facetsConfig) {
|
public Set<SortedSetDocValuesReaderState> getReaderStates(IndexReader indexReader)
|
||||||
fieldToReaderState.replaceAll(
|
throws IOException {
|
||||||
(field, readerState) -> {
|
Set<SortedSetDocValuesReaderState> states = new HashSet<>();
|
||||||
try {
|
for (String field : facetsConfig.getFieldNames()) {
|
||||||
return new DefaultSortedSetDocValuesReaderState(
|
states.add(getReaderState(indexReader, field));
|
||||||
searcher.getIndexReader(), field, facetsConfig);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new UncheckedIOException(e);
|
|
||||||
}
|
}
|
||||||
});
|
return states;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Retrieve and, if necessary, create the reader state for the default field. */
|
||||||
|
public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader) throws IOException {
|
||||||
|
return getReaderState(indexReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When the user wants to facet on a field, they call getReaderState on that field and pass the
|
* Retrieve and, if necessary, create the reader state for a named field. Store the reader state
|
||||||
* resulting reader state to the {@link SortedSetDocValuesFacetCounts} constructor.
|
* for re-use.
|
||||||
*/
|
*/
|
||||||
public SortedSetDocValuesReaderState getReaderState(String field) {
|
public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader, String field)
|
||||||
return fieldToReaderState.get(field);
|
throws IOException {
|
||||||
|
Map<String, SortedSetDocValuesReaderState> states = readerStates.get(indexReader);
|
||||||
|
if (states == null) {
|
||||||
|
states = new HashMap<>();
|
||||||
|
readerStates.put(indexReader, states);
|
||||||
|
}
|
||||||
|
|
||||||
|
SortedSetDocValuesReaderState state = states.get(field);
|
||||||
|
if (state == null) {
|
||||||
|
state = new DefaultSortedSetDocValuesReaderState(indexReader, field, facetsConfig);
|
||||||
|
states.put(field, state);
|
||||||
|
}
|
||||||
|
return state;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,8 +16,11 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.facet;
|
package org.apache.lucene.facet;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Set;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
@ -115,4 +118,21 @@ public class TestFacetsConfig extends FacetTestCase {
|
||||||
|
|
||||||
assertTrue(config.getDimConfig("foobar").hierarchical);
|
assertTrue(config.getDimConfig("foobar").hierarchical);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGetFieldNames() throws IOException {
|
||||||
|
FacetsConfig config = new FacetsConfig();
|
||||||
|
config.setIndexFieldName("a", "not-$facets");
|
||||||
|
Document doc;
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesFacetField("a", "1"));
|
||||||
|
config.build(doc);
|
||||||
|
assertEquals(Set.of("not-$facets"), config.getFieldNames());
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesFacetField("b", "1"));
|
||||||
|
config.build(doc);
|
||||||
|
assertEquals(
|
||||||
|
Set.of("not-$facets", FacetsConfig.DEFAULT_INDEX_FIELD_NAME), config.getFieldNames());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue