Field names in FacetsConfig. State manager refreshes ok.

2023-03-11 19:03:09 +00:00 · 2023-03-11 19:03:09 +00:00 · e2e848ea61
parent d7b39629eb
commit e2e848ea61
4 changed files with 84 additions and 37 deletions
--- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java
+++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java
@ -26,10 +26,10 @@ import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
+import org.apache.lucene.facet.sortedset.SsdvReaderStatesManager;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -88,8 +88,8 @@ public class SimpleSortedSetFacetsExample {
  private List<FacetResult> search() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
-    SortedSetDocValuesReaderState state =
-        new DefaultSortedSetDocValuesReaderState(indexReader, config);
+    SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
+    SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);

    // Aggregatses the facet counts
    FacetsCollector fc = new FacetsCollector();
@ -114,8 +114,8 @@ public class SimpleSortedSetFacetsExample {
  private FacetResult drillDown() throws IOException {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
-    SortedSetDocValuesReaderState state =
-        new DefaultSortedSetDocValuesReaderState(indexReader, config);
+    SsdvReaderStatesManager statesManager = new SsdvReaderStatesManager(config);
+    SortedSetDocValuesReaderState state = statesManager.getReaderState(indexReader);

    // Now user drills down on Publish Year/2010:
    DrillDownQuery q = new DrillDownQuery(config);
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
@ -67,6 +67,9 @@ public class FacetsConfig {
  // int/float/bytes in a single indexed field:
  private final Map<String, String> assocDimTypes = new ConcurrentHashMap<>();

+  /** All the fields used for faceting. */
+  private final Set<String> fieldNames = new HashSet<>();
+
  /**
   * Drill down terms indexing option to control whether dimension and sub-path terms should be
   * indexed.
@ -230,6 +233,14 @@ public class FacetsConfig {
    return fieldTypes;
  }

+  /**
+   * Return the names of all fields used for faceting. This is only populated for fields that are
+   * encountered by {@link #build(TaxonomyWriter, Document)}}.
+   */
+  public Set<String> getFieldNames() {
+    return fieldNames;
+  }
+
  private static void checkSeen(Set<String> seenDims, String dim) {
    if (seenDims.contains(dim)) {
      throw new IllegalArgumentException(
@ -272,6 +283,7 @@ public class FacetsConfig {
      if (field.fieldType() == FacetField.TYPE) {
        FacetField facetField = (FacetField) field;
        FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
+        fieldNames.add(dimConfig.indexFieldName);
        if (dimConfig.multiValued == false) {
          checkSeen(seenDims, facetField.dim);
        }
@ -283,6 +295,7 @@ public class FacetsConfig {
      if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
        SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
        FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
+        fieldNames.add(dimConfig.indexFieldName);
        if (dimConfig.multiValued == false) {
          checkSeen(seenDims, facetField.dim);
        }
@ -295,6 +308,7 @@ public class FacetsConfig {
      if (field.fieldType() == AssociationFacetField.TYPE) {
        AssociationFacetField facetField = (AssociationFacetField) field;
        FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
+        fieldNames.add(dimConfig.indexFieldName);
        if (dimConfig.multiValued == false) {
          checkSeen(seenDims, facetField.dim);
        }
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SsdvReaderStatesManager.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SsdvReaderStatesManager.java
@ -17,11 +17,12 @@
 package org.apache.lucene.facet.sortedset;

 import java.io.IOException;
-import java.io.UncheckedIOException;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.index.IndexReader;

 /**
 * This class demonstrates how Lucene could help a user build SSDV ordinal maps eagerly, on refresh.
@ -29,48 +30,60 @@ import org.apache.lucene.search.IndexSearcher;
 *
 * <p>1. A centralized store of SSDV reader states.
 *
- * <p>2. A method to rebuild all reader states the user needs.
+ * <p>2. A mechanism to rebuild all reader states the user needs.
 */
 public class SsdvReaderStatesManager {
-  /**
-   * The only attribute is a mapping of SSDV fields to reader states. Each reader state will be a
-   * {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for one of the fields.
-   */
-  private Map<String, SortedSetDocValuesReaderState> fieldToReaderState;
-
-  /** No arguments in the constructor. */
-  SsdvReaderStatesManager() {}
+  /** Use the {@link FacetsConfig} to find out which fields are used for faceting. */
+  FacetsConfig facetsConfig;

  /**
-   * The application code would have a set of fields the user wants to facet on. They pass this set
-   * here once, on application start-up.
+   * The values in this map are mappings of fields to reader states. Each reader state will be a
+   * {@link DefaultSortedSetDocValuesReaderState} with an ordinal map for the field. These mappings
+   * are keyed by {@link IndexReader} to account for multiple active index readers during refresh.
   */
-  public void registerFields(Set<String> fields) {
-    fields.forEach(field -> fieldToReaderState.put(field, null));
+  private final Map<IndexReader, Map<String, SortedSetDocValuesReaderState>> readerStates =
+      new HashMap<>();
+
+  /** Accept a {@link FacetsConfig}. */
+  public SsdvReaderStatesManager(FacetsConfig facetsConfig) {
+    this.facetsConfig = facetsConfig;
  }

  /**
-   * Here we create corresponding reader states for all the fields. Each time the user does a
-   * refresh, they would also call this method, which ensures they will be using up-to-date ordinal
-   * maps until the next refresh.
+   * Retrieve and, if necessary, create reader states for all the fields in the {@link
+   * FacetsConfig}.
   */
-  public void loadReaderStates(IndexSearcher searcher, FacetsConfig facetsConfig) {
-    fieldToReaderState.replaceAll(
-        (field, readerState) -> {
-          try {
-            return new DefaultSortedSetDocValuesReaderState(
-                searcher.getIndexReader(), field, facetsConfig);
-          } catch (IOException e) {
-            throw new UncheckedIOException(e);
-          }
-        });
+  public Set<SortedSetDocValuesReaderState> getReaderStates(IndexReader indexReader)
+      throws IOException {
+    Set<SortedSetDocValuesReaderState> states = new HashSet<>();
+    for (String field : facetsConfig.getFieldNames()) {
+      states.add(getReaderState(indexReader, field));
+    }
+    return states;
+  }
+
+  /** Retrieve and, if necessary, create the reader state for the default field. */
+  public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader) throws IOException {
+    return getReaderState(indexReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
  }

  /**
-   * When the user wants to facet on a field, they call getReaderState on that field and pass the
-   * resulting reader state to the {@link SortedSetDocValuesFacetCounts} constructor.
+   * Retrieve and, if necessary, create the reader state for a named field. Store the reader state
+   * for re-use.
   */
-  public SortedSetDocValuesReaderState getReaderState(String field) {
-    return fieldToReaderState.get(field);
+  public SortedSetDocValuesReaderState getReaderState(IndexReader indexReader, String field)
+      throws IOException {
+    Map<String, SortedSetDocValuesReaderState> states = readerStates.get(indexReader);
+    if (states == null) {
+      states = new HashMap<>();
+      readerStates.put(indexReader, states);
+    }
+
+    SortedSetDocValuesReaderState state = states.get(field);
+    if (state == null) {
+      state = new DefaultSortedSetDocValuesReaderState(indexReader, field, facetsConfig);
+      states.put(field, state);
+    }
+    return state;
  }
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java
@ -16,8 +16,11 @@
 */
 package org.apache.lucene.facet;

+import java.io.IOException;
 import java.util.Arrays;
+import java.util.Set;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.index.DirectoryReader;
@ -115,4 +118,21 @@ public class TestFacetsConfig extends FacetTestCase {

    assertTrue(config.getDimConfig("foobar").hierarchical);
  }
+
+  public void testGetFieldNames() throws IOException {
+    FacetsConfig config = new FacetsConfig();
+    config.setIndexFieldName("a", "not-$facets");
+    Document doc;
+
+    doc = new Document();
+    doc.add(new SortedSetDocValuesFacetField("a", "1"));
+    config.build(doc);
+    assertEquals(Set.of("not-$facets"), config.getFieldNames());
+
+    doc = new Document();
+    doc.add(new SortedSetDocValuesFacetField("b", "1"));
+    config.build(doc);
+    assertEquals(
+        Set.of("not-$facets", FacetsConfig.DEFAULT_INDEX_FIELD_NAME), config.getFieldNames());
+  }
 }