Field names in FacetsConfig. State manager refreshes ok.

This commit is contained in:
stefanvodita 2023-03-11 19:03:09 +00:00
parent d7b39629eb
commit e2e848ea61
4 changed files with 84 additions and 37 deletions

View File

@ -26,10 +26,10 @@ import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.sortedset.SsdvReaderStatesManager;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -88,8 +88,8 @@ public class SimpleSortedSetFacetsExample {
private List<FacetResult> search() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
SortedSetDocValuesReaderState state =
new DefaultSortedSetDocValuesReaderState(indexReader, config);
SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);
// Aggregatses the facet counts
FacetsCollector fc = new FacetsCollector();
@ -114,8 +114,8 @@ public class SimpleSortedSetFacetsExample {
private FacetResult drillDown() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
SortedSetDocValuesReaderState state =
new DefaultSortedSetDocValuesReaderState(indexReader, config);
SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);
// Now user drills down on Publish Year/2010:
DrillDownQuery q = new DrillDownQuery(config);

View File

@ -67,6 +67,9 @@ public class FacetsConfig {
// int/float/bytes in a single indexed field:
private final Map<String, String> assocDimTypes = new ConcurrentHashMap<>();
/** All the fields used for faceting. */
private final Set<String> fieldNames = new HashSet<>();
/**
* Drill down terms indexing option to control whether dimension and sub-path terms should be
* indexed.
@ -230,6 +233,14 @@ public class FacetsConfig {
return fieldTypes;
}
/**
* Return the names of all fields used for faceting. This is only populated for fields that are
* encountered by {@link #build(TaxonomyWriter, Document)}}.
*/
public Set<String> getFieldNames() {
return fieldNames;
}
private static void checkSeen(Set<String> seenDims, String dim) {
if (seenDims.contains(dim)) {
throw new IllegalArgumentException(
@ -272,6 +283,7 @@ public class FacetsConfig {
if (field.fieldType() == FacetField.TYPE) {
FacetField facetField = (FacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
fieldNames.add(dimConfig.indexFieldName);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
@ -283,6 +295,7 @@ public class FacetsConfig {
if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
fieldNames.add(dimConfig.indexFieldName);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
@ -295,6 +308,7 @@ public class FacetsConfig {
if (field.fieldType() == AssociationFacetField.TYPE) {
AssociationFacetField facetField = (AssociationFacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
fieldNames.add(dimConfig.indexFieldName);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}

View File

@ -17,11 +17,12 @@
package org.apache.lucene.facet.sortedset;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.index.IndexReader;
/**
* This class demonstrates how Lucene could help a user build SSDV ordinal maps eagerly, on refresh.
@ -29,48 +30,60 @@ import org.apache.lucene.search.IndexSearcher;
*
* <p>1. A centralized store of SSDV reader states.
*
* <p>2. A method to rebuild all reader states the user needs.
* <p>2. A mechanism to rebuild all reader states the user needs.
*/
public class SsdvReaderStatesManager {
/**
* The only attribute is a mapping of SSDV fields to reader states. Each reader state will be a
* {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for one of the fields.
*/
private Map<String, SortedSetDocValuesReaderState> fieldToReaderState;
/** No arguments in the constructor. */
SsdvReaderStatesManager() {}
/** Use the {@link FacetsConfig} to find out which fields are used for faceting. */
FacetsConfig facetsConfig;
/**
* The application code would have a set of fields the user wants to facet on. They pass this set
* here once, on application start-up.
* The values in this map are mappings of fields to reader states. Each reader state will be a
* {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for the field. These mappings
* are keyed by {@link IndexReader} to account for multiple active index readers during refresh.
*/
public void registerFields(Set<String> fields) {
fields.forEach(field -> fieldToReaderState.put(field, null));
private final Map<IndexReader, Map<String, SortedSetDocValuesReaderState>> readerStates =
new HashMap<>();
/** Accept a {@link FacetsConfig}. */
public SsdvReaderStatesManager(FacetsConfig facetsConfig) {
this.facetsConfig = facetsConfig;
}
/**
* Here we create corresponding reader states for all the fields. Each time the user does a
* refresh, they would also call this method, which ensures they will be using up-to-date ordinal
* maps until the next refresh.
* Retrieve and, if necessary, create reader states for all the fields in the {@link
* FacetsConfig}.
*/
public void loadReaderStates(IndexSearcher searcher, FacetsConfig facetsConfig) {
fieldToReaderState.replaceAll(
(field, readerState) -> {
try {
return new DefaultSortedSetDocValuesReaderState(
searcher.getIndexReader(), field, facetsConfig);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
public Set<SortedSetDocValuesReaderState> getReaderStates(IndexReader indexReader)
throws IOException {
Set<SortedSetDocValuesReaderState> states = new HashSet<>();
for (String field : facetsConfig.getFieldNames()) {
states.add(getReaderState(indexReader, field));
}
return states;
}
/** Retrieve and, if necessary, create the reader state for the default field. */
public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader) throws IOException {
return getReaderState(indexReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
}
/**
* When the user wants to facet on a field, they call getReaderState on that field and pass the
* resulting reader state to the {@link SortedSetDocValuesFacetCounts} constructor.
* Retrieve and, if necessary, create the reader state for a named field. Store the reader state
* for re-use.
*/
public SortedSetDocValuesReaderState getReaderState(String field) {
return fieldToReaderState.get(field);
public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader, String field)
throws IOException {
Map<String, SortedSetDocValuesReaderState> states = readerStates.get(indexReader);
if (states == null) {
states = new HashMap<>();
readerStates.put(indexReader, states);
}
SortedSetDocValuesReaderState state = states.get(field);
if (state == null) {
state = new DefaultSortedSetDocValuesReaderState(indexReader, field, facetsConfig);
states.put(field, state);
}
return state;
}
}

View File

@ -16,8 +16,11 @@
*/
package org.apache.lucene.facet;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
@ -115,4 +118,21 @@ public class TestFacetsConfig extends FacetTestCase {
assertTrue(config.getDimConfig("foobar").hierarchical);
}
public void testGetFieldNames() throws IOException {
FacetsConfig config = new FacetsConfig();
config.setIndexFieldName("a", "not-$facets");
Document doc;
doc = new Document();
doc.add(new SortedSetDocValuesFacetField("a", "1"));
config.build(doc);
assertEquals(Set.of("not-$facets"), config.getFieldNames());
doc = new Document();
doc.add(new SortedSetDocValuesFacetField("b", "1"));
config.build(doc);
assertEquals(
Set.of("not-$facets", FacetsConfig.DEFAULT_INDEX_FIELD_NAME), config.getFieldNames());
}
}