LUCENE-5387: Improve FacetConfig.build

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1562491 13f79535-47bb-0310-9956-ffa450edef68
2014-01-29 16:09:16 +00:00 · 2014-01-29 16:09:16 +00:00 · 30f1be17e8
parent 8a60a5e6a2
commit 30f1be17e8
8 changed files with 61 additions and 78 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -214,7 +214,9 @@ Bug fixes
 API Changes

 * LUCENE-5339: The facet module was simplified/reworked to make the
-  APIs more approachable to new users.  (Shai Erera, Gilad Barkai, Rob
+  APIs more approachable to new users. Note: when migrating to the new 
+  API, you must pass the Document that is returned from FacetConfig.build() 
+  to IndexWriter.addDocument(). (Shai Erera, Gilad Barkai, Rob
  Muir, Mike McCandless)

 * LUCENE-5395: The SpatialArgsParser now only reads WKT, no more "lat, lon"
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
@ -17,13 +17,11 @@ package org.apache.lucene.benchmark.byTask.tasks;
 * limitations under the License.
 */

-import java.text.NumberFormat;
 import java.util.Locale;

 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexDocument;

 /**
 * Add a document, optionally of a certain size.
@ -42,7 +40,7 @@ public class AddDocTask extends PerfTask {
   * volatile data passed between setup(), doLogic(), tearDown().
   * the doc is created at setup() and added at doLogic(). 
   */
-  protected IndexDocument doc = null;
+  protected Document doc = null;

  @Override
  public void setup() throws Exception {
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
@ -22,14 +22,8 @@ import java.util.List;

 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.facet.FacetField;
 import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.facet.taxonomy.FacetLabel;
-import org.apache.lucene.index.IndexDocument;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.StorableField;

 /**
 * Add a faceted document.
@ -83,7 +77,7 @@ public class AddFacetedDocTask extends AddDocTask {
      List<FacetField> facets = new ArrayList<FacetField>();
      getRunData().getFacetSource().getNextFacets(facets);
      for(FacetField ff : facets) {
-        ((Document) doc).add(ff);
+        doc.add(ff);
      }
      doc = config.build(getRunData().getTaxonomyWriter(), doc);
    }
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetField.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetField.java
@ -19,12 +19,17 @@ package org.apache.lucene.facet;

 import java.util.Arrays;

-import org.apache.lucene.document.Document; // javadoc
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;

-/** Add an instance of this to your {@link Document} for
- *  every facet label. */
+/**
+ * Add an instance of this to your {@link Document} for every facet label.
+ * 
+ * <p>
+ * <b>NOTE:</b> you must call {@link FacetsConfig#build(Document)} before
+ * you add the document to IndexWriter.
+ */
 public class FacetField extends Field {
  static final FieldType TYPE = new FieldType();
  static {
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
@ -28,7 +28,9 @@ import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;

 import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
@ -37,10 +39,7 @@ import org.apache.lucene.facet.taxonomy.FacetLabel;
 import org.apache.lucene.facet.taxonomy.FloatAssociationFacetField;
 import org.apache.lucene.facet.taxonomy.IntAssociationFacetField;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.index.IndexDocument;
 import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.IndexableFieldType;
-import org.apache.lucene.index.StorableField;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
@ -166,17 +165,27 @@ public class FacetsConfig {
    seenDims.add(dim);
  }

-  /** Translates any added {@link FacetField}s into normal
-   *  fields for indexing; only use this version if you
-   *  did not add any taxonomy-based fields ({@link
-   *  FacetField} or {@link AssociationFacetField}) */
-  public IndexDocument build(IndexDocument doc) throws IOException {
+  /**
+   * Translates any added {@link FacetField}s into normal fields for indexing;
+   * only use this version if you did not add any taxonomy-based fields (
+   * {@link FacetField} or {@link AssociationFacetField}).
+   * 
+   * <p>
+   * <b>NOTE:</b> you should add the returned document to IndexWriter, not the
+   * input one!
+   */
+  public Document build(Document doc) throws IOException {
    return build(null, doc);
  }

-  /** Translates any added {@link FacetField}s into normal
-   *  fields for indexing. */
-  public IndexDocument build(TaxonomyWriter taxoWriter, IndexDocument doc) throws IOException {
+  /**
+   * Translates any added {@link FacetField}s into normal fields for indexing.
+   * 
+   * <p>
+   * <b>NOTE:</b> you should add the returned document to IndexWriter, not the
+   * input one!
+   */
+  public Document build(TaxonomyWriter taxoWriter, Document doc) throws IOException {
    // Find all FacetFields, collated by the actual field:
    Map<String,List<FacetField>> byField = new HashMap<String,List<FacetField>>();

@ -188,7 +197,7 @@ public class FacetsConfig {

    Set<String> seenDims = new HashSet<String>();

-    for(IndexableField field : doc.indexableFields()) {
+    for (IndexableField field : doc.indexableFields()) {
      if (field.fieldType() == FacetField.TYPE) {
        FacetField facetField = (FacetField) field;
        FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
@ -260,47 +269,28 @@ public class FacetsConfig {
      }
    }

-    List<Field> addedIndexedFields = new ArrayList<Field>();
-    List<Field> addedStoredFields = new ArrayList<Field>();
+    Document result = new Document();

-    processFacetFields(taxoWriter, byField, addedIndexedFields, addedStoredFields);
-    processSSDVFacetFields(dvByField, addedIndexedFields, addedStoredFields);
-    processAssocFacetFields(taxoWriter, assocByField, addedIndexedFields, addedStoredFields);
+    processFacetFields(taxoWriter, byField, result);
+    processSSDVFacetFields(dvByField, result);
+    processAssocFacetFields(taxoWriter, assocByField, result);

    //System.out.println("add stored: " + addedStoredFields);

-    final List<IndexableField> allIndexedFields = new ArrayList<IndexableField>();
-    for(IndexableField field : doc.indexableFields()) {
-      IndexableFieldType ft = field.fieldType();
+    for (Field field : doc.getFields()) {
+      FieldType ft = field.fieldType();
      if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) {
-        allIndexedFields.add(field);
+        result.add(field);
      }
    }
-    allIndexedFields.addAll(addedIndexedFields);
-
-    final List<StorableField> allStoredFields = new ArrayList<StorableField>();
-    for(StorableField field : doc.storableFields()) {
-      allStoredFields.add(field);
-    }
-    allStoredFields.addAll(addedStoredFields);

    //System.out.println("all indexed: " + allIndexedFields);
    //System.out.println("all stored: " + allStoredFields);

-    return new IndexDocument() {
-        @Override
-        public Iterable<IndexableField> indexableFields() {
-          return allIndexedFields;
-        }
-
-        @Override
-        public Iterable<StorableField> storableFields() {
-          return allStoredFields;
-        }
-      };
+    return result;
  }

-  private void processFacetFields(TaxonomyWriter taxoWriter, Map<String,List<FacetField>> byField, List<Field> addedIndexedFields, List<Field> addedStoredFields) throws IOException {
+  private void processFacetFields(TaxonomyWriter taxoWriter, Map<String,List<FacetField>> byField, Document doc) throws IOException {

    for(Map.Entry<String,List<FacetField>> ent : byField.entrySet()) {

@ -345,18 +335,18 @@ public class FacetsConfig {
        }

        // Drill down:
-        for(int i=1;i<=cp.length;i++) {
-          addedIndexedFields.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
+        for (int i=1;i<=cp.length;i++) {
+          doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
        }
      }

      // Facet counts:
      // DocValues are considered stored fields:
-      addedStoredFields.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals)));
+      doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals)));
    }
  }

-  private void processSSDVFacetFields(Map<String,List<SortedSetDocValuesFacetField>> byField, List<Field> addedIndexedFields, List<Field> addedStoredFields) throws IOException {
+  private void processSSDVFacetFields(Map<String,List<SortedSetDocValuesFacetField>> byField, Document doc) throws IOException {
    //System.out.println("process SSDV: " + byField);
    for(Map.Entry<String,List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) {

@ -369,18 +359,19 @@ public class FacetsConfig {
        //System.out.println("add " + fullPath);

        // For facet counts:
-        addedStoredFields.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
+        doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));

        // For drill-down:
-        addedIndexedFields.add(new StringField(indexFieldName, fullPath, Field.Store.NO));
-        addedIndexedFields.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO));
+        doc.add(new StringField(indexFieldName, fullPath, Field.Store.NO));
+        doc.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO));
      }
    }
  }

-  private void processAssocFacetFields(TaxonomyWriter taxoWriter, Map<String,List<AssociationFacetField>> byField,
-                                       List<Field> addedIndexedFields, List<Field> addedStoredFields) throws IOException {
-    for(Map.Entry<String,List<AssociationFacetField>> ent : byField.entrySet()) {
+  private void processAssocFacetFields(TaxonomyWriter taxoWriter,
+      Map<String,List<AssociationFacetField>> byField, Document doc)
+      throws IOException {
+    for (Map.Entry<String,List<AssociationFacetField>> ent : byField.entrySet()) {
      byte[] bytes = new byte[16];
      int upto = 0;
      String indexFieldName = ent.getKey();
@ -402,7 +393,7 @@ public class FacetsConfig {
        System.arraycopy(field.assoc.bytes, field.assoc.offset, bytes, upto, field.assoc.length);
        upto += field.assoc.length;
      }
-      addedStoredFields.add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
+      doc.add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
    }
  }

--- a/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestFacetsConfig.java
@ -24,7 +24,6 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexDocument;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
@ -65,10 +64,10 @@ public class TestFacetsConfig extends FacetTestCase {
    FacetsConfig facetsConfig = new FacetsConfig();
    Document doc = new Document();
    doc.add(new FacetField("a", "b"));
-    IndexDocument facetDoc = facetsConfig.build(taxoWriter, doc);
+    doc = facetsConfig.build(taxoWriter, doc);
    // these two addDocument() used to fail
-    indexWriter.addDocument(facetDoc);
-    indexWriter.addDocument(facetDoc);
+    indexWriter.addDocument(doc);
+    indexWriter.addDocument(doc);
    IOUtils.close(indexWriter, taxoWriter);
    
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
--- a/lucene/replicator/src/test/org/apache/lucene/replicator/IndexAndTaxonomyReplicationClientTest.java
+++ b/lucene/replicator/src/test/org/apache/lucene/replicator/IndexAndTaxonomyReplicationClientTest.java
@ -20,7 +20,6 @@ package org.apache.lucene.replicator;
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.concurrent.Callable;
 import java.util.concurrent.atomic.AtomicInteger;
@ -28,7 +27,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.facet.DrillDownQuery;
 import org.apache.lucene.facet.FacetField;
-import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsConfig;
@ -38,7 +36,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexDocument;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.SnapshotDeletionPolicy;
@ -180,7 +177,7 @@ public class IndexAndTaxonomyReplicationClientTest extends ReplicatorTestCase {
    return new IndexAndTaxonomyRevision(publishIndexWriter, publishTaxoWriter);
  }
  
-  private IndexDocument newDocument(TaxonomyWriter taxoWriter, int id) throws IOException {
+  private Document newDocument(TaxonomyWriter taxoWriter, int id) throws IOException {
    Document doc = new Document();
    doc.add(new FacetField("A", Integer.toString(id, 16)));
    return config.build(publishTaxoWriter, doc);
--- a/lucene/replicator/src/test/org/apache/lucene/replicator/IndexAndTaxonomyRevisionTest.java
+++ b/lucene/replicator/src/test/org/apache/lucene/replicator/IndexAndTaxonomyRevisionTest.java
@ -19,17 +19,14 @@ package org.apache.lucene.replicator;

 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Collections;
 import java.util.List;
-import java.util.Map.Entry;
 import java.util.Map;
+import java.util.Map.Entry;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.facet.FacetField;
 import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.facet.taxonomy.FacetLabel;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.index.IndexDocument;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@ -43,7 +40,7 @@ import org.junit.Test;

 public class IndexAndTaxonomyRevisionTest extends ReplicatorTestCase {
  
-  private IndexDocument newDocument(TaxonomyWriter taxoWriter) throws IOException {
+  private Document newDocument(TaxonomyWriter taxoWriter) throws IOException {
    FacetsConfig config = new FacetsConfig();
    Document doc = new Document();
    doc.add(new FacetField("A", "1"));