From e2e848ea61c98e5c0ba3f711c2305d6ccd4f589e Mon Sep 17 00:00:00 2001 From: stefanvodita Date: Sat, 11 Mar 2023 19:03:09 +0000 Subject: [PATCH] Field names in FacetsConfig. State manager refreshes ok. --- .../facet/SimpleSortedSetFacetsExample.java | 10 +-- .../org/apache/lucene/facet/FacetsConfig.java | 14 ++++ .../sortedset/SsdvReaderStatesManager.java | 77 +++++++++++-------- .../apache/lucene/facet/TestFacetsConfig.java | 20 +++++ 4 files changed, 84 insertions(+), 37 deletions(-) diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java index 32b7545b311..4a8197d77f8 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java @@ -26,10 +26,10 @@ import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsConfig; -import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; +import org.apache.lucene.facet.sortedset.SsdvReaderStatesManager; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -88,8 +88,8 @@ public class SimpleSortedSetFacetsExample { private List search() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); - SortedSetDocValuesReaderState state = - new DefaultSortedSetDocValuesReaderState(indexReader, config); + SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config); + SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader); // Aggregatses the facet counts FacetsCollector fc = new FacetsCollector(); @@ -114,8 +114,8 @@ public class SimpleSortedSetFacetsExample { private FacetResult drillDown() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); - SortedSetDocValuesReaderState state = - new DefaultSortedSetDocValuesReaderState(indexReader, config); + SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config); + SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader); // Now user drills down on Publish Year/2010: DrillDownQuery q = new DrillDownQuery(config); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java index da52c049627..7bb5395ea05 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java @@ -67,6 +67,9 @@ public class FacetsConfig { // int/float/bytes in a single indexed field: private final Map assocDimTypes = new ConcurrentHashMap<>(); + /** All the fields used for faceting. */ + private final Set fieldNames = new HashSet<>(); + /** * Drill down terms indexing option to control whether dimension and sub-path terms should be * indexed. @@ -230,6 +233,14 @@ public class FacetsConfig { return fieldTypes; } + /** + * Return the names of all fields used for faceting. This is only populated for fields that are + * encountered by {@link #build(TaxonomyWriter, Document)}}. + */ + public Set getFieldNames() { + return fieldNames; + } + private static void checkSeen(Set seenDims, String dim) { if (seenDims.contains(dim)) { throw new IllegalArgumentException( @@ -272,6 +283,7 @@ public class FacetsConfig { if (field.fieldType() == FacetField.TYPE) { FacetField facetField = (FacetField) field; FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim); + fieldNames.add(dimConfig.indexFieldName); if (dimConfig.multiValued == false) { checkSeen(seenDims, facetField.dim); } @@ -283,6 +295,7 @@ public class FacetsConfig { if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) { SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field; FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim); + fieldNames.add(dimConfig.indexFieldName); if (dimConfig.multiValued == false) { checkSeen(seenDims, facetField.dim); } @@ -295,6 +308,7 @@ public class FacetsConfig { if (field.fieldType() == AssociationFacetField.TYPE) { AssociationFacetField facetField = (AssociationFacetField) field; FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim); + fieldNames.add(dimConfig.indexFieldName); if (dimConfig.multiValued == false) { checkSeen(seenDims, facetField.dim); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SsdvReaderStatesManager.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SsdvReaderStatesManager.java index c169e449905..687028c9273 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SsdvReaderStatesManager.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SsdvReaderStatesManager.java @@ -17,11 +17,12 @@ package org.apache.lucene.facet.sortedset; import java.io.IOException; -import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.lucene.facet.FacetsConfig; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.index.IndexReader; /** * This class demonstrates how Lucene could help a user build SSDV ordinal maps eagerly, on refresh. @@ -29,48 +30,60 @@ import org.apache.lucene.search.IndexSearcher; * *

1. A centralized store of SSDV reader states. * - *

2. A method to rebuild all reader states the user needs. + *

2. A mechanism to rebuild all reader states the user needs. */ public class SsdvReaderStatesManager { - /** - * The only attribute is a mapping of SSDV fields to reader states. Each reader state will be a - * {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for one of the fields. - */ - private Map fieldToReaderState; - - /** No arguments in the constructor. */ - SsdvReaderStatesManager() {} + /** Use the {@link FacetsConfig} to find out which fields are used for faceting. */ + FacetsConfig facetsConfig; /** - * The application code would have a set of fields the user wants to facet on. They pass this set - * here once, on application start-up. + * The values in this map are mappings of fields to reader states. Each reader state will be a + * {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for the field. These mappings + * are keyed by {@link IndexReader} to account for multiple active index readers during refresh. */ - public void registerFields(Set fields) { - fields.forEach(field -> fieldToReaderState.put(field, null)); + private final Map> readerStates = + new HashMap<>(); + + /** Accept a {@link FacetsConfig}. */ + public SsdvReaderStatesManager(FacetsConfig facetsConfig) { + this.facetsConfig = facetsConfig; } /** - * Here we create corresponding reader states for all the fields. Each time the user does a - * refresh, they would also call this method, which ensures they will be using up-to-date ordinal - * maps until the next refresh. + * Retrieve and, if necessary, create reader states for all the fields in the {@link + * FacetsConfig}. */ - public void loadReaderStates(IndexSearcher searcher, FacetsConfig facetsConfig) { - fieldToReaderState.replaceAll( - (field, readerState) -> { - try { - return new DefaultSortedSetDocValuesReaderState( - searcher.getIndexReader(), field, facetsConfig); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }); + public Set getReaderStates(IndexReader indexReader) + throws IOException { + Set states = new HashSet<>(); + for (String field : facetsConfig.getFieldNames()) { + states.add(getReaderState(indexReader, field)); + } + return states; + } + + /** Retrieve and, if necessary, create the reader state for the default field. */ + public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader) throws IOException { + return getReaderState(indexReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME); } /** - * When the user wants to facet on a field, they call getReaderState on that field and pass the - * resulting reader state to the {@link SortedSetDocValuesFacetCounts} constructor. + * Retrieve and, if necessary, create the reader state for a named field. Store the reader state + * for re-use. */ - public SortedSetDocValuesReaderState getReaderState(String field) { - return fieldToReaderState.get(field); + public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader, String field) + throws IOException { + Map states = readerStates.get(indexReader); + if (states == null) { + states = new HashMap<>(); + readerStates.put(indexReader, states); + } + + SortedSetDocValuesReaderState state = states.get(field); + if (state == null) { + state = new DefaultSortedSetDocValuesReaderState(indexReader, field, facetsConfig); + states.put(field, state); + } + return state; } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java b/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java index 590ed65ead1..720f2e28d52 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java @@ -16,8 +16,11 @@ */ package org.apache.lucene.facet; +import java.io.IOException; import java.util.Arrays; +import java.util.Set; import org.apache.lucene.document.Document; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.index.DirectoryReader; @@ -115,4 +118,21 @@ public class TestFacetsConfig extends FacetTestCase { assertTrue(config.getDimConfig("foobar").hierarchical); } + + public void testGetFieldNames() throws IOException { + FacetsConfig config = new FacetsConfig(); + config.setIndexFieldName("a", "not-$facets"); + Document doc; + + doc = new Document(); + doc.add(new SortedSetDocValuesFacetField("a", "1")); + config.build(doc); + assertEquals(Set.of("not-$facets"), config.getFieldNames()); + + doc = new Document(); + doc.add(new SortedSetDocValuesFacetField("b", "1")); + config.build(doc); + assertEquals( + Set.of("not-$facets", FacetsConfig.DEFAULT_INDEX_FIELD_NAME), config.getFieldNames()); + } }