Field names in FacetsConfig. State manager refreshes ok.

This commit is contained in:
stefanvodita 2023-03-11 19:03:09 +00:00
parent d7b39629eb
commit e2e848ea61
4 changed files with 84 additions and 37 deletions

View File

@ -26,10 +26,10 @@ import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.sortedset.SsdvReaderStatesManager;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -88,8 +88,8 @@ public class SimpleSortedSetFacetsExample {
private List<FacetResult> search() throws IOException { private List<FacetResult> search() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir); DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader); IndexSearcher searcher = new IndexSearcher(indexReader);
SortedSetDocValuesReaderState state = SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
new DefaultSortedSetDocValuesReaderState(indexReader, config); SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);
// Aggregatses the facet counts // Aggregatses the facet counts
FacetsCollector fc = new FacetsCollector(); FacetsCollector fc = new FacetsCollector();
@ -114,8 +114,8 @@ public class SimpleSortedSetFacetsExample {
private FacetResult drillDown() throws IOException { private FacetResult drillDown() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir); DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader); IndexSearcher searcher = new IndexSearcher(indexReader);
SortedSetDocValuesReaderState state = SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
new DefaultSortedSetDocValuesReaderState(indexReader, config); SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);
// Now user drills down on Publish Year/2010: // Now user drills down on Publish Year/2010:
DrillDownQuery q = new DrillDownQuery(config); DrillDownQuery q = new DrillDownQuery(config);

View File

@ -67,6 +67,9 @@ public class FacetsConfig {
// int/float/bytes in a single indexed field: // int/float/bytes in a single indexed field:
private final Map<String, String> assocDimTypes = new ConcurrentHashMap<>(); private final Map<String, String> assocDimTypes = new ConcurrentHashMap<>();
/** All the fields used for faceting. */
private final Set<String> fieldNames = new HashSet<>();
/** /**
* Drill down terms indexing option to control whether dimension and sub-path terms should be * Drill down terms indexing option to control whether dimension and sub-path terms should be
* indexed. * indexed.
@ -230,6 +233,14 @@ public class FacetsConfig {
return fieldTypes; return fieldTypes;
} }
/**
* Return the names of all fields used for faceting. This is only populated for fields that are
* encountered by {@link #build(TaxonomyWriter, Document)}}.
*/
public Set<String> getFieldNames() {
return fieldNames;
}
private static void checkSeen(Set<String> seenDims, String dim) { private static void checkSeen(Set<String> seenDims, String dim) {
if (seenDims.contains(dim)) { if (seenDims.contains(dim)) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
@ -272,6 +283,7 @@ public class FacetsConfig {
if (field.fieldType() == FacetField.TYPE) { if (field.fieldType() == FacetField.TYPE) {
FacetField facetField = (FacetField) field; FacetField facetField = (FacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim); FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
fieldNames.add(dimConfig.indexFieldName);
if (dimConfig.multiValued == false) { if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim); checkSeen(seenDims, facetField.dim);
} }
@ -283,6 +295,7 @@ public class FacetsConfig {
if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) { if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field; SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim); FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
fieldNames.add(dimConfig.indexFieldName);
if (dimConfig.multiValued == false) { if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim); checkSeen(seenDims, facetField.dim);
} }
@ -295,6 +308,7 @@ public class FacetsConfig {
if (field.fieldType() == AssociationFacetField.TYPE) { if (field.fieldType() == AssociationFacetField.TYPE) {
AssociationFacetField facetField = (AssociationFacetField) field; AssociationFacetField facetField = (AssociationFacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim); FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
fieldNames.add(dimConfig.indexFieldName);
if (dimConfig.multiValued == false) { if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim); checkSeen(seenDims, facetField.dim);
} }

View File

@ -17,11 +17,12 @@
package org.apache.lucene.facet.sortedset; package org.apache.lucene.facet.sortedset;
import java.io.IOException; import java.io.IOException;
import java.io.UncheckedIOException; import java.util.HashMap;
import java.util.HashSet;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.index.IndexReader;
/** /**
* This class demonstrates how Lucene could help a user build SSDV ordinal maps eagerly, on refresh. * This class demonstrates how Lucene could help a user build SSDV ordinal maps eagerly, on refresh.
@ -29,48 +30,60 @@ import org.apache.lucene.search.IndexSearcher;
* *
* <p>1. A centralized store of SSDV reader states. * <p>1. A centralized store of SSDV reader states.
* *
* <p>2. A method to rebuild all reader states the user needs. * <p>2. A mechanism to rebuild all reader states the user needs.
*/ */
public class SsdvReaderStatesManager { public class SsdvReaderStatesManager {
/** /** Use the {@link FacetsConfig} to find out which fields are used for faceting. */
* The only attribute is a mapping of SSDV fields to reader states. Each reader state will be a FacetsConfig facetsConfig;
* {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for one of the fields.
*/
private Map<String, SortedSetDocValuesReaderState> fieldToReaderState;
/** No arguments in the constructor. */
SsdvReaderStatesManager() {}
/** /**
* The application code would have a set of fields the user wants to facet on. They pass this set * The values in this map are mappings of fields to reader states. Each reader state will be a
* here once, on application start-up. * {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for the field. These mappings
* are keyed by {@link IndexReader} to account for multiple active index readers during refresh.
*/ */
public void registerFields(Set<String> fields) { private final Map<IndexReader, Map<String, SortedSetDocValuesReaderState>> readerStates =
fields.forEach(field -> fieldToReaderState.put(field, null)); new HashMap<>();
/** Accept a {@link FacetsConfig}. */
public SsdvReaderStatesManager(FacetsConfig facetsConfig) {
this.facetsConfig = facetsConfig;
} }
/** /**
* Here we create corresponding reader states for all the fields. Each time the user does a * Retrieve and, if necessary, create reader states for all the fields in the {@link
* refresh, they would also call this method, which ensures they will be using up-to-date ordinal * FacetsConfig}.
* maps until the next refresh.
*/ */
public void loadReaderStates(IndexSearcher searcher, FacetsConfig facetsConfig) { public Set<SortedSetDocValuesReaderState> getReaderStates(IndexReader indexReader)
fieldToReaderState.replaceAll( throws IOException {
(field, readerState) -> { Set<SortedSetDocValuesReaderState> states = new HashSet<>();
try { for (String field : facetsConfig.getFieldNames()) {
return new DefaultSortedSetDocValuesReaderState( states.add(getReaderState(indexReader, field));
searcher.getIndexReader(), field, facetsConfig);
} catch (IOException e) {
throw new UncheckedIOException(e);
} }
}); return states;
}
/** Retrieve and, if necessary, create the reader state for the default field. */
public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader) throws IOException {
return getReaderState(indexReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
} }
/** /**
* When the user wants to facet on a field, they call getReaderState on that field and pass the * Retrieve and, if necessary, create the reader state for a named field. Store the reader state
* resulting reader state to the {@link SortedSetDocValuesFacetCounts} constructor. * for re-use.
*/ */
public SortedSetDocValuesReaderState getReaderState(String field) { public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader, String field)
return fieldToReaderState.get(field); throws IOException {
Map<String, SortedSetDocValuesReaderState> states = readerStates.get(indexReader);
if (states == null) {
states = new HashMap<>();
readerStates.put(indexReader, states);
}
SortedSetDocValuesReaderState state = states.get(field);
if (state == null) {
state = new DefaultSortedSetDocValuesReaderState(indexReader, field, facetsConfig);
states.put(field, state);
}
return state;
} }
} }

View File

@ -16,8 +16,11 @@
*/ */
package org.apache.lucene.facet; package org.apache.lucene.facet;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
@ -115,4 +118,21 @@ public class TestFacetsConfig extends FacetTestCase {
assertTrue(config.getDimConfig("foobar").hierarchical); assertTrue(config.getDimConfig("foobar").hierarchical);
} }
public void testGetFieldNames() throws IOException {
FacetsConfig config = new FacetsConfig();
config.setIndexFieldName("a", "not-$facets");
Document doc;
doc = new Document();
doc.add(new SortedSetDocValuesFacetField("a", "1"));
config.build(doc);
assertEquals(Set.of("not-$facets"), config.getFieldNames());
doc = new Document();
doc.add(new SortedSetDocValuesFacetField("b", "1"));
config.build(doc);
assertEquals(
Set.of("not-$facets", FacetsConfig.DEFAULT_INDEX_FIELD_NAME), config.getFieldNames());
}
} }