LUCENE-5387: Improve FacetConfig.build

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1562491 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2014-01-29 16:09:16 +00:00
parent 8a60a5e6a2
commit 30f1be17e8
8 changed files with 61 additions and 78 deletions

View File

@ -214,7 +214,9 @@ Bug fixes
API Changes API Changes
* LUCENE-5339: The facet module was simplified/reworked to make the * LUCENE-5339: The facet module was simplified/reworked to make the
APIs more approachable to new users. (Shai Erera, Gilad Barkai, Rob APIs more approachable to new users. Note: when migrating to the new
API, you must pass the Document that is returned from FacetConfig.build()
to IndexWriter.addDocument(). (Shai Erera, Gilad Barkai, Rob
Muir, Mike McCandless) Muir, Mike McCandless)
* LUCENE-5395: The SpatialArgsParser now only reads WKT, no more "lat, lon" * LUCENE-5395: The SpatialArgsParser now only reads WKT, no more "lat, lon"

View File

@ -17,13 +17,11 @@ package org.apache.lucene.benchmark.byTask.tasks;
* limitations under the License. * limitations under the License.
*/ */
import java.text.NumberFormat;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexDocument;
/** /**
* Add a document, optionally of a certain size. * Add a document, optionally of a certain size.
@ -42,7 +40,7 @@ public class AddDocTask extends PerfTask {
* volatile data passed between setup(), doLogic(), tearDown(). * volatile data passed between setup(), doLogic(), tearDown().
* the doc is created at setup() and added at doLogic(). * the doc is created at setup() and added at doLogic().
*/ */
protected IndexDocument doc = null; protected Document doc = null;
@Override @Override
public void setup() throws Exception { public void setup() throws Exception {

View File

@ -22,14 +22,8 @@ import java.util.List;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.FacetSource; import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.index.IndexDocument;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.StorableField;
/** /**
* Add a faceted document. * Add a faceted document.
@ -83,7 +77,7 @@ public class AddFacetedDocTask extends AddDocTask {
List<FacetField> facets = new ArrayList<FacetField>(); List<FacetField> facets = new ArrayList<FacetField>();
getRunData().getFacetSource().getNextFacets(facets); getRunData().getFacetSource().getNextFacets(facets);
for(FacetField ff : facets) { for(FacetField ff : facets) {
((Document) doc).add(ff); doc.add(ff);
} }
doc = config.build(getRunData().getTaxonomyWriter(), doc); doc = config.build(getRunData().getTaxonomyWriter(), doc);
} }

View File

@ -19,12 +19,17 @@ package org.apache.lucene.facet;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.document.Document; // javadoc import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
/** Add an instance of this to your {@link Document} for /**
* every facet label. */ * Add an instance of this to your {@link Document} for every facet label.
*
* <p>
* <b>NOTE:</b> you must call {@link FacetsConfig#build(Document)} before
* you add the document to IndexWriter.
*/
public class FacetField extends Field { public class FacetField extends Field {
static final FieldType TYPE = new FieldType(); static final FieldType TYPE = new FieldType();
static { static {

View File

@ -28,7 +28,9 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
@ -37,10 +39,7 @@ import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.FloatAssociationFacetField; import org.apache.lucene.facet.taxonomy.FloatAssociationFacetField;
import org.apache.lucene.facet.taxonomy.IntAssociationFacetField; import org.apache.lucene.facet.taxonomy.IntAssociationFacetField;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.IndexDocument;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
@ -166,17 +165,27 @@ public class FacetsConfig {
seenDims.add(dim); seenDims.add(dim);
} }
/** Translates any added {@link FacetField}s into normal /**
* fields for indexing; only use this version if you * Translates any added {@link FacetField}s into normal fields for indexing;
* did not add any taxonomy-based fields ({@link * only use this version if you did not add any taxonomy-based fields (
* FacetField} or {@link AssociationFacetField}) */ * {@link FacetField} or {@link AssociationFacetField}).
public IndexDocument build(IndexDocument doc) throws IOException { *
* <p>
* <b>NOTE:</b> you should add the returned document to IndexWriter, not the
* input one!
*/
public Document build(Document doc) throws IOException {
return build(null, doc); return build(null, doc);
} }
/** Translates any added {@link FacetField}s into normal /**
* fields for indexing. */ * Translates any added {@link FacetField}s into normal fields for indexing.
public IndexDocument build(TaxonomyWriter taxoWriter, IndexDocument doc) throws IOException { *
* <p>
* <b>NOTE:</b> you should add the returned document to IndexWriter, not the
* input one!
*/
public Document build(TaxonomyWriter taxoWriter, Document doc) throws IOException {
// Find all FacetFields, collated by the actual field: // Find all FacetFields, collated by the actual field:
Map<String,List<FacetField>> byField = new HashMap<String,List<FacetField>>(); Map<String,List<FacetField>> byField = new HashMap<String,List<FacetField>>();
@ -188,7 +197,7 @@ public class FacetsConfig {
Set<String> seenDims = new HashSet<String>(); Set<String> seenDims = new HashSet<String>();
for(IndexableField field : doc.indexableFields()) { for (IndexableField field : doc.indexableFields()) {
if (field.fieldType() == FacetField.TYPE) { if (field.fieldType() == FacetField.TYPE) {
FacetField facetField = (FacetField) field; FacetField facetField = (FacetField) field;
FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim); FacetsConfig.DimConfig dimConfig = getDimConfig(facetField.dim);
@ -260,47 +269,28 @@ public class FacetsConfig {
} }
} }
List<Field> addedIndexedFields = new ArrayList<Field>(); Document result = new Document();
List<Field> addedStoredFields = new ArrayList<Field>();
processFacetFields(taxoWriter, byField, addedIndexedFields, addedStoredFields); processFacetFields(taxoWriter, byField, result);
processSSDVFacetFields(dvByField, addedIndexedFields, addedStoredFields); processSSDVFacetFields(dvByField, result);
processAssocFacetFields(taxoWriter, assocByField, addedIndexedFields, addedStoredFields); processAssocFacetFields(taxoWriter, assocByField, result);
//System.out.println("add stored: " + addedStoredFields); //System.out.println("add stored: " + addedStoredFields);
final List<IndexableField> allIndexedFields = new ArrayList<IndexableField>(); for (Field field : doc.getFields()) {
for(IndexableField field : doc.indexableFields()) { FieldType ft = field.fieldType();
IndexableFieldType ft = field.fieldType();
if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) { if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) {
allIndexedFields.add(field); result.add(field);
} }
} }
allIndexedFields.addAll(addedIndexedFields);
final List<StorableField> allStoredFields = new ArrayList<StorableField>();
for(StorableField field : doc.storableFields()) {
allStoredFields.add(field);
}
allStoredFields.addAll(addedStoredFields);
//System.out.println("all indexed: " + allIndexedFields); //System.out.println("all indexed: " + allIndexedFields);
//System.out.println("all stored: " + allStoredFields); //System.out.println("all stored: " + allStoredFields);
return new IndexDocument() { return result;
@Override
public Iterable<IndexableField> indexableFields() {
return allIndexedFields;
}
@Override
public Iterable<StorableField> storableFields() {
return allStoredFields;
}
};
} }
private void processFacetFields(TaxonomyWriter taxoWriter, Map<String,List<FacetField>> byField, List<Field> addedIndexedFields, List<Field> addedStoredFields) throws IOException { private void processFacetFields(TaxonomyWriter taxoWriter, Map<String,List<FacetField>> byField, Document doc) throws IOException {
for(Map.Entry<String,List<FacetField>> ent : byField.entrySet()) { for(Map.Entry<String,List<FacetField>> ent : byField.entrySet()) {
@ -345,18 +335,18 @@ public class FacetsConfig {
} }
// Drill down: // Drill down:
for(int i=1;i<=cp.length;i++) { for (int i=1;i<=cp.length;i++) {
addedIndexedFields.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO)); doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
} }
} }
// Facet counts: // Facet counts:
// DocValues are considered stored fields: // DocValues are considered stored fields:
addedStoredFields.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals))); doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals)));
} }
} }
private void processSSDVFacetFields(Map<String,List<SortedSetDocValuesFacetField>> byField, List<Field> addedIndexedFields, List<Field> addedStoredFields) throws IOException { private void processSSDVFacetFields(Map<String,List<SortedSetDocValuesFacetField>> byField, Document doc) throws IOException {
//System.out.println("process SSDV: " + byField); //System.out.println("process SSDV: " + byField);
for(Map.Entry<String,List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) { for(Map.Entry<String,List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) {
@ -369,18 +359,19 @@ public class FacetsConfig {
//System.out.println("add " + fullPath); //System.out.println("add " + fullPath);
// For facet counts: // For facet counts:
addedStoredFields.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath))); doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
// For drill-down: // For drill-down:
addedIndexedFields.add(new StringField(indexFieldName, fullPath, Field.Store.NO)); doc.add(new StringField(indexFieldName, fullPath, Field.Store.NO));
addedIndexedFields.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO)); doc.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO));
} }
} }
} }
private void processAssocFacetFields(TaxonomyWriter taxoWriter, Map<String,List<AssociationFacetField>> byField, private void processAssocFacetFields(TaxonomyWriter taxoWriter,
List<Field> addedIndexedFields, List<Field> addedStoredFields) throws IOException { Map<String,List<AssociationFacetField>> byField, Document doc)
for(Map.Entry<String,List<AssociationFacetField>> ent : byField.entrySet()) { throws IOException {
for (Map.Entry<String,List<AssociationFacetField>> ent : byField.entrySet()) {
byte[] bytes = new byte[16]; byte[] bytes = new byte[16];
int upto = 0; int upto = 0;
String indexFieldName = ent.getKey(); String indexFieldName = ent.getKey();
@ -402,7 +393,7 @@ public class FacetsConfig {
System.arraycopy(field.assoc.bytes, field.assoc.offset, bytes, upto, field.assoc.length); System.arraycopy(field.assoc.bytes, field.assoc.offset, bytes, upto, field.assoc.length);
upto += field.assoc.length; upto += field.assoc.length;
} }
addedStoredFields.add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto))); doc.add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
} }
} }

View File

@ -24,7 +24,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexDocument;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
@ -65,10 +64,10 @@ public class TestFacetsConfig extends FacetTestCase {
FacetsConfig facetsConfig = new FacetsConfig(); FacetsConfig facetsConfig = new FacetsConfig();
Document doc = new Document(); Document doc = new Document();
doc.add(new FacetField("a", "b")); doc.add(new FacetField("a", "b"));
IndexDocument facetDoc = facetsConfig.build(taxoWriter, doc); doc = facetsConfig.build(taxoWriter, doc);
// these two addDocument() used to fail // these two addDocument() used to fail
indexWriter.addDocument(facetDoc); indexWriter.addDocument(doc);
indexWriter.addDocument(facetDoc); indexWriter.addDocument(doc);
IOUtils.close(indexWriter, taxoWriter); IOUtils.close(indexWriter, taxoWriter);
DirectoryReader indexReader = DirectoryReader.open(indexDir); DirectoryReader indexReader = DirectoryReader.open(indexDir);

View File

@ -20,7 +20,6 @@ package org.apache.lucene.replicator;
import java.io.Closeable; import java.io.Closeable;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -28,7 +27,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.facet.DrillDownQuery; import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig;
@ -38,7 +36,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexDocument;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SnapshotDeletionPolicy; import org.apache.lucene.index.SnapshotDeletionPolicy;
@ -180,7 +177,7 @@ public class IndexAndTaxonomyReplicationClientTest extends ReplicatorTestCase {
return new IndexAndTaxonomyRevision(publishIndexWriter, publishTaxoWriter); return new IndexAndTaxonomyRevision(publishIndexWriter, publishTaxoWriter);
} }
private IndexDocument newDocument(TaxonomyWriter taxoWriter, int id) throws IOException { private Document newDocument(TaxonomyWriter taxoWriter, int id) throws IOException {
Document doc = new Document(); Document doc = new Document();
doc.add(new FacetField("A", Integer.toString(id, 16))); doc.add(new FacetField("A", Integer.toString(id, 16)));
return config.build(publishTaxoWriter, doc); return config.build(publishTaxoWriter, doc);

View File

@ -19,17 +19,14 @@ package org.apache.lucene.replicator;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map.Entry;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.IndexDocument;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -43,7 +40,7 @@ import org.junit.Test;
public class IndexAndTaxonomyRevisionTest extends ReplicatorTestCase { public class IndexAndTaxonomyRevisionTest extends ReplicatorTestCase {
private IndexDocument newDocument(TaxonomyWriter taxoWriter) throws IOException { private Document newDocument(TaxonomyWriter taxoWriter) throws IOException {
FacetsConfig config = new FacetsConfig(); FacetsConfig config = new FacetsConfig();
Document doc = new Document(); Document doc = new Document();
doc.add(new FacetField("A", "1")); doc.add(new FacetField("A", "1"));