LUCENE-5339: Gilad's feedback, improve javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1546008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-11-27 12:07:30 +00:00
parent b3a77738fa
commit 825a02356d
23 changed files with 195 additions and 98 deletions

1
TODO
View File

@ -4,7 +4,6 @@ TODO
- allow path.length==0? - allow path.length==0?
- make a variant/sugar of FacetsConfig.build that just updates an existing doc? - make a variant/sugar of FacetsConfig.build that just updates an existing doc?
- need test coverage of utility search methods - need test coverage of utility search methods
- move DocumentBuilder.build -> FacetsConfig.build
- getSpecificValue for a dim isn't reliable - getSpecificValue for a dim isn't reliable
- we could put more stuff into the "schema", e.g. this field is - we could put more stuff into the "schema", e.g. this field is
sorted-set-DV and that one is taxo? sorted-set-DV and that one is taxo?

View File

@ -240,7 +240,7 @@
<!-- core: problems --> <!-- core: problems -->
<check-missing-javadocs dir="build/docs/demo" level="method"/> <check-missing-javadocs dir="build/docs/demo" level="method"/>
<check-missing-javadocs dir="build/docs/expressions" level="method"/> <check-missing-javadocs dir="build/docs/expressions" level="method"/>
<!-- facet: problems --> <check-missing-javadocs dir="build/docs/facet" level="method"/>
<!-- grouping: problems --> <!-- grouping: problems -->
<!-- highlighter: problems --> <!-- highlighter: problems -->
<check-missing-javadocs dir="build/docs/join" level="method"/> <check-missing-javadocs dir="build/docs/join" level="method"/>

View File

@ -19,12 +19,20 @@ package org.apache.lucene.facet;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.document.Document; // javadocs
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** Associates an arbitrary byte[] with the added facet /** Add an instance of this to your {@link Document} to add
* path. */ * a facet label associated with an arbitrary byte[].
* This will require a custom {@link Facets}
* implementation at search time; see {@link
* IntAssociationFacetField} and {@link
* FloatAssociationFacetField} to use existing {@link
* Facets} implementations.
*
* @lucene.experimental */
public class AssociationFacetField extends Field { public class AssociationFacetField extends Field {
static final FieldType TYPE = new FieldType(); static final FieldType TYPE = new FieldType();
static { static {
@ -35,6 +43,8 @@ public class AssociationFacetField extends Field {
protected final String[] path; protected final String[] path;
protected final BytesRef assoc; protected final BytesRef assoc;
/** Creates this from {@code dim} and {@code path} and an
* association */
public AssociationFacetField(BytesRef assoc, String dim, String... path) { public AssociationFacetField(BytesRef assoc, String dim, String... path) {
super("dummy", TYPE); super("dummy", TYPE);
this.dim = dim; this.dim = dim;

View File

@ -21,7 +21,9 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
import java.util.WeakHashMap; import java.util.WeakHashMap;
import org.apache.lucene.codecs.DocValuesFormat; // javadocs
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues; // javadocs
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;

View File

@ -33,14 +33,14 @@ import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
/** /**
* A {@link Query} for drill-down over {@link FacetLabel categories}. You * A {@link Query} for drill-down over facet categories. You
* should call {@link #add(FacetLabel...)} for every group of categories you * should call {@link #add(String, String...)} for every group of categories you
* want to drill-down over. Each category in the group is {@code OR'ed} with * want to drill-down over.
* the others, and groups are {@code AND'ed}.
* <p> * <p>
* <b>NOTE:</b> if you choose to create your own {@link Query} by calling * <b>NOTE:</b> if you choose to create your own {@link Query} by calling
* {@link #term}, it is recommended to wrap it with {@link ConstantScoreQuery} * {@link #term}, it is recommended to wrap it with {@link ConstantScoreQuery}
@ -51,6 +51,7 @@ import org.apache.lucene.search.TermQuery;
*/ */
public final class DrillDownQuery extends Query { public final class DrillDownQuery extends Query {
/** Creates a drill-down term. */
public static Term term(String field, String dim, String... path) { public static Term term(String field, String dim, String... path) {
return new Term(field, FacetsConfig.pathToString(dim, path)); return new Term(field, FacetsConfig.pathToString(dim, path));
} }
@ -96,21 +97,17 @@ public final class DrillDownQuery extends Query {
this.config = config; this.config = config;
} }
/** /** Creates a new {@code DrillDownQuery} without a base query,
* Creates a new {@code DrillDownQuery} without a base query, * to perform a pure browsing query (equivalent to using
* to perform a pure browsing query (equivalent to using * {@link MatchAllDocsQuery} as base). */
* {@link MatchAllDocsQuery} as base).
*/
public DrillDownQuery(FacetsConfig config) { public DrillDownQuery(FacetsConfig config) {
this(config, null); this(config, null);
} }
/** /** Creates a new {@code DrillDownQuery} over the given base query. Can be
* Creates a new {@code DrillDownQuery} over the given base query. Can be * {@code null}, in which case the result {@link Query} from
* {@code null}, in which case the result {@link Query} from * {@link #rewrite(IndexReader)} will be a pure browsing query, filtering on
* {@link #rewrite(IndexReader)} will be a pure browsing query, filtering on * the added categories only. */
* the added categories only.
*/
public DrillDownQuery(FacetsConfig config, Query baseQuery) { public DrillDownQuery(FacetsConfig config, Query baseQuery) {
query = new BooleanQuery(true); // disable coord query = new BooleanQuery(true); // disable coord
if (baseQuery != null) { if (baseQuery != null) {
@ -139,10 +136,9 @@ public final class DrillDownQuery extends Query {
} }
/** Adds one dimension of drill downs; if you pass the same /** Adds one dimension of drill downs; if you pass the same
* dimension again, it's OR'd with the previous * dimension more than once it is OR'd with the previous
* constraints on that dimension, and all dimensions are * cofnstraints on that dimension, and all dimensions are
* AND'd against each other and the base query. */ * AND'd against each other and the base query. */
// nocommit can we remove FacetLabel here?
public void add(String dim, String... path) { public void add(String dim, String... path) {
if (drillDownDims.containsKey(dim)) { if (drillDownDims.containsKey(dim)) {
@ -165,7 +161,9 @@ public final class DrillDownQuery extends Query {
/** Expert: add a custom drill-down subQuery. Use this /** Expert: add a custom drill-down subQuery. Use this
* when you have a separate way to drill-down on the * when you have a separate way to drill-down on the
* dimension than the indexed facet ordinals. */ * dimension than the indexed facet ordinals (for
* example, use a {@link NumericRangeQuery} to drill down
* after{@link RangeFacetCounts}. */
public void add(String dim, Query subQuery) { public void add(String dim, Query subQuery) {
// TODO: we should use FilteredQuery? // TODO: we should use FilteredQuery?

View File

@ -65,7 +65,6 @@ import org.apache.lucene.search.Weight;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class DrillSideways { public class DrillSideways {
protected final IndexSearcher searcher; protected final IndexSearcher searcher;
@ -73,27 +72,21 @@ public class DrillSideways {
protected final SortedSetDocValuesReaderState state; protected final SortedSetDocValuesReaderState state;
protected final FacetsConfig config; protected final FacetsConfig config;
/** /** Create a new {@code DrillSideways} instance. */
* Create a new {@code DrillSideways} instance, assuming the categories were
* indexed with {@link FacetFields}.
*/
public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) { public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) {
this(searcher, config, taxoReader, null); this(searcher, config, taxoReader, null);
} }
/** /** Create a new {@code DrillSideways} instance, assuming the categories were
* Create a new {@code DrillSideways} instance, assuming the categories were * indexed with {@link SortedSetDocValuesFacetField}. */
* indexed with {@link SortedSetDocValuesFacetFields}.
*/
public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) { public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) {
this(searcher, config, null, state); this(searcher, config, null, state);
} }
/** /** Create a new {@code DrillSideways} instance, where some
* Create a new {@code DrillSideways} instance, where some * dimensions were indexed with {@link
* dimensions are sorted set facets and others are * SortedSetDocValuesFacetField} and others were indexed
* taxononmy facets. * with {@link FacetField}. */
*/
public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state) { public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state) {
this.searcher = searcher; this.searcher = searcher;
this.config = config; this.config = config;

View File

@ -19,20 +19,27 @@ package org.apache.lucene.facet;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.document.Document; // javadoc
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
/** Add an instance of this to your Document for every facet /** Add an instance of this to your {@link Document} for
* label. */ * every facet label. */
public class FacetField extends Field { public class FacetField extends Field {
static final FieldType TYPE = new FieldType(); static final FieldType TYPE = new FieldType();
static { static {
TYPE.setIndexed(true); TYPE.setIndexed(true);
TYPE.freeze(); TYPE.freeze();
} }
/** Dimension for this field. */
public final String dim; public final String dim;
/** Path for this field. */
public final String[] path; public final String[] path;
/** Creates the this from {@code dim} and
* {@code path}. */
public FacetField(String dim, String... path) { public FacetField(String dim, String... path) {
super("dummy", TYPE); super("dummy", TYPE);
this.dim = dim; this.dim = dim;

View File

@ -27,8 +27,7 @@ public final class FacetResult {
* the topN. */ * the topN. */
public final Number value; public final Number value;
/** How many labels were populated under the requested /** How many child labels were encountered. */
* path. */
public final int childCount; public final int childCount;
/** Child counts. */ /** Child counts. */

View File

@ -31,21 +31,25 @@ import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.TopScoreDocCollector;
/** Common base class for all facets implementations.
*
* @lucene.experimental */
public abstract class Facets { public abstract class Facets {
/** Returns the topN child labels under the specified /** Returns the topN child labels under the specified
* path. Returns null if the specified path doesn't * path. Returns null if the specified path doesn't
* exist. */ * exist or if this dimension was never seen. */
public abstract FacetResult getTopChildren(int topN, String dim, String... path) throws IOException; public abstract FacetResult getTopChildren(int topN, String dim, String... path) throws IOException;
/** Return the count for a specific path. Returns -1 if /** Return the count or value
* for a specific path. Returns -1 if
* this path doesn't exist, else the count. */ * this path doesn't exist, else the count. */
public abstract Number getSpecificValue(String dim, String... path) throws IOException; public abstract Number getSpecificValue(String dim, String... path) throws IOException;
/** Returns topN labels for any dimension that had hits, /** Returns topN labels for any dimension that had hits,
* sorted by the number of hits that dimension matched; * sorted by the number of hits that dimension matched;
* this is used for "sparse" faceting, where many * this is used for "sparse" faceting, where many
* different dimensions were indexed depending on the * different dimensions were indexed, for example
* type of document. */ * depending on the type of document. */
public abstract List<FacetResult> getAllDims(int topN) throws IOException; public abstract List<FacetResult> getAllDims(int topN) throws IOException;
// nocommit where to move? // nocommit where to move?
@ -55,7 +59,7 @@ public abstract class Facets {
* also collect results into a {@link * also collect results into a {@link
* FacetsCollector} for faceting. */ * FacetsCollector} for faceting. */
public static TopDocs search(IndexSearcher searcher, Query q, int topN, FacetsCollector sfc) throws IOException { public static TopDocs search(IndexSearcher searcher, Query q, int topN, FacetsCollector sfc) throws IOException {
// nocommit can we pass the "right" boolean for // TODO: can we pass the "right" boolean for
// in-order...? we'd need access to the protected // in-order...? we'd need access to the protected
// IS.search methods taking Weight... could use // IS.search methods taking Weight... could use
// reflection... // reflection...

View File

@ -41,15 +41,16 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
/** By default a dimension is flat, single valued and does /** Records per-dimension configuration. By default a
* dimension is flat, single valued and does
* not require count for the dimension; use * not require count for the dimension; use
* the setters in this class to change these settings for * the setters in this class to change these settings for
* any dims. * each dim.
* *
* <p><b>NOTE</b>: this configuration is not saved into the * <p><b>NOTE</b>: this configuration is not saved into the
* index, but it's vital, and up to the application to * index, but it's vital, and up to the application to
* ensure, that at search time the provided FacetsConfig * ensure, that at search time the provided {@code
* matches what was used during indexing. * FacetsConfig} matches what was used during indexing.
* *
* @lucene.experimental */ * @lucene.experimental */
public class FacetsConfig { public class FacetsConfig {
@ -64,7 +65,9 @@ public class FacetsConfig {
private final TaxonomyWriter taxoWriter; private final TaxonomyWriter taxoWriter;
/** @lucene.internal */ /** Holds the configuration for one dimension
*
* @lucene.experimental */
// nocommit expose this to the user, vs the setters? // nocommit expose this to the user, vs the setters?
public static final class DimConfig { public static final class DimConfig {
/** True if this dimension is hierarchical. */ /** True if this dimension is hierarchical. */
@ -82,16 +85,22 @@ public class FacetsConfig {
String indexFieldName = DEFAULT_INDEX_FIELD_NAME; String indexFieldName = DEFAULT_INDEX_FIELD_NAME;
} }
/** Default per-dimension configuration. */
public final static DimConfig DEFAULT_DIM_CONFIG = new DimConfig();
/** Default constructor. */
public FacetsConfig() { public FacetsConfig() {
this(null); this(null);
} }
/** Use this constructor at index time, with the provided
* {@link TaxonomyWriter}, and then use the {@link
* #build} method to index documents. */
public FacetsConfig(TaxonomyWriter taxoWriter) { public FacetsConfig(TaxonomyWriter taxoWriter) {
this.taxoWriter = taxoWriter; this.taxoWriter = taxoWriter;
} }
public final static DimConfig DEFAULT_DIM_CONFIG = new DimConfig(); /** Get the current configuration for a dimension. */
public DimConfig getDimConfig(String dimName) { public DimConfig getDimConfig(String dimName) {
DimConfig ft = fieldTypes.get(dimName); DimConfig ft = fieldTypes.get(dimName);
if (ft == null) { if (ft == null) {
@ -100,7 +109,8 @@ public class FacetsConfig {
return ft; return ft;
} }
// nocommit maybe setDimConfig instead? /** Pass {@code true} if this dimension is hierarchical
* (has depth > 1 paths). */
public synchronized void setHierarchical(String dimName, boolean v) { public synchronized void setHierarchical(String dimName, boolean v) {
DimConfig ft = fieldTypes.get(dimName); DimConfig ft = fieldTypes.get(dimName);
if (ft == null) { if (ft == null) {
@ -110,6 +120,8 @@ public class FacetsConfig {
ft.hierarchical = v; ft.hierarchical = v;
} }
/** Pass {@code true} if this dimension may have more than
* one value per document. */
public synchronized void setMultiValued(String dimName, boolean v) { public synchronized void setMultiValued(String dimName, boolean v) {
DimConfig ft = fieldTypes.get(dimName); DimConfig ft = fieldTypes.get(dimName);
if (ft == null) { if (ft == null) {
@ -119,6 +131,9 @@ public class FacetsConfig {
ft.multiValued = v; ft.multiValued = v;
} }
/** Pass {@code true} if at search time you require
* accurate counts of the dimension, i.e. how many
* hits have this dimension. */
public synchronized void setRequireDimCount(String dimName, boolean v) { public synchronized void setRequireDimCount(String dimName, boolean v) {
DimConfig ft = fieldTypes.get(dimName); DimConfig ft = fieldTypes.get(dimName);
if (ft == null) { if (ft == null) {
@ -128,6 +143,9 @@ public class FacetsConfig {
ft.requireDimCount = v; ft.requireDimCount = v;
} }
/** Specify which index field name should hold the
* ordinals for this dimension; this is only used by the
* taxonomy based facet methods. */
public synchronized void setIndexFieldName(String dimName, String indexFieldName) { public synchronized void setIndexFieldName(String dimName, String indexFieldName) {
DimConfig ft = fieldTypes.get(dimName); DimConfig ft = fieldTypes.get(dimName);
if (ft == null) { if (ft == null) {
@ -149,7 +167,7 @@ public class FacetsConfig {
} }
/** Translates any added {@link FacetField}s into normal /** Translates any added {@link FacetField}s into normal
* fields for indexing */ * fields for indexing. */
public IndexDocument build(IndexDocument doc) throws IOException { public IndexDocument build(IndexDocument doc) throws IOException {
// Find all FacetFields, collated by the actual field: // Find all FacetFields, collated by the actual field:
Map<String,List<FacetField>> byField = new HashMap<String,List<FacetField>>(); Map<String,List<FacetField>> byField = new HashMap<String,List<FacetField>>();
@ -442,8 +460,7 @@ public class FacetsConfig {
// Escapes any occurrence of the path component inside the label: // Escapes any occurrence of the path component inside the label:
private static final char ESCAPE_CHAR = '\u001E'; private static final char ESCAPE_CHAR = '\u001E';
/** Turns a path into a string without stealing any /** Turns a dim + path into an encoded string. */
* characters. */
public static String pathToString(String dim, String[] path) { public static String pathToString(String dim, String[] path) {
String[] fullPath = new String[1+path.length]; String[] fullPath = new String[1+path.length];
fullPath[0] = dim; fullPath[0] = dim;
@ -451,10 +468,13 @@ public class FacetsConfig {
return pathToString(fullPath, fullPath.length); return pathToString(fullPath, fullPath.length);
} }
/** Turns a dim + path into an encoded string. */
public static String pathToString(String[] path) { public static String pathToString(String[] path) {
return pathToString(path, path.length); return pathToString(path, path.length);
} }
/** Turns the first {@code} length elements of {@code
* path} into an encoded string. */
public static String pathToString(String[] path, int length) { public static String pathToString(String[] path, int length) {
// nocommit .... too anal? shouldn't we allow drill // nocommit .... too anal? shouldn't we allow drill
// down on just dim, to get all docs that have that // down on just dim, to get all docs that have that
@ -486,9 +506,9 @@ public class FacetsConfig {
return sb.toString(); return sb.toString();
} }
/** Turns a result from previous call to {@link /** Turns an encoded string (from a previous call to {@link
* #pathToString} back into the original {@code String[]} * #pathToString}) back into the original {@code
* without stealing any characters. */ * String[]}. */
public static String[] stringToPath(String s) { public static String[] stringToPath(String s) {
List<String> parts = new ArrayList<String>(); List<String> parts = new ArrayList<String>();
int length = s.length(); int length = s.length();

View File

@ -28,14 +28,24 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
// nocommit jdoc that this assumes/requires the default encoding /** Computes facets counts, assuming the default encoding
* into DocValues was used.
*
* @lucene.experimental */
public class FastTaxonomyFacetCounts extends TaxonomyFacets { public class FastTaxonomyFacetCounts extends TaxonomyFacets {
private final int[] counts; private final int[] counts;
/** Create {@code FastTaxonomyFacetCounts}, which also
* counts all facet labels. */
public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc); this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
} }
/** Create {@code FastTaxonomyFacetCounts}, using the
* specified {@code indexFieldName} for ordinals. Use
* this if you had set {@link
* FacetsConfig#setIndexFieldName} to change the index
* field name for certain dimensions. */
public FastTaxonomyFacetCounts(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { public FastTaxonomyFacetCounts(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(indexFieldName, taxoReader, config); super(indexFieldName, taxoReader, config);
counts = new int[taxoReader.getSize()]; counts = new int[taxoReader.getSize()];
@ -106,8 +116,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
return sum; return sum;
} }
/** Return the count for a specific path. Returns -1 if
* this path doesn't exist, else the count. */
@Override @Override
public Number getSpecificValue(String dim, String... path) throws IOException { public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim); verifyDim(dim);
@ -120,6 +128,7 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
@Override @Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) { if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
} }

View File

@ -19,24 +19,31 @@ package org.apache.lucene.facet;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.document.Document; // javadocs
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** Associates an arbitrary float with the added facet /** Add an instance of this to your {@link Document} to add
* path, encoding the float into a 4-byte BytesRef. */ * a facet label associated with a float. Use {@link
* TaxonomyFacetSumFloatAssociations} to aggregate float values
* per facet label at search time.
*
* @lucene.experimental */
public class FloatAssociationFacetField extends AssociationFacetField { public class FloatAssociationFacetField extends AssociationFacetField {
/** Utility ctor: associates an int value (translates it /** Creates this from {@code dim} and {@code path} and a
* to 4-byte BytesRef). */ * float association */
public FloatAssociationFacetField(float assoc, String dim, String... path) { public FloatAssociationFacetField(float assoc, String dim, String... path) {
super(floatToBytesRef(assoc), dim, path); super(floatToBytesRef(assoc), dim, path);
} }
/** Encodes a {@code float} as a 4-byte {@link BytesRef}. */
public static BytesRef floatToBytesRef(float v) { public static BytesRef floatToBytesRef(float v) {
return IntAssociationFacetField.intToBytesRef(Float.floatToIntBits(v)); return IntAssociationFacetField.intToBytesRef(Float.floatToIntBits(v));
} }
/** Decodes a previously encoded {@code float}. */
public static float bytesRefToFloat(BytesRef b) { public static float bytesRefToFloat(BytesRef b) {
return Float.intBitsToFloat(IntAssociationFacetField.bytesRefToInt(b)); return Float.intBitsToFloat(IntAssociationFacetField.bytesRefToInt(b));
} }

View File

@ -19,20 +19,27 @@ package org.apache.lucene.facet;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.document.Document; // javadocs
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** Associates an arbitrary int with the added facet /** Add an instance of this to your {@link Document} to add
* path, encoding the int into a 4-byte BytesRef. */ * a facet label associated with an int. Use {@link
* TaxonomyFacetSumIntAssociations} to aggregate int values
* per facet label at search time.
*
* @lucene.experimental */
public class IntAssociationFacetField extends AssociationFacetField { public class IntAssociationFacetField extends AssociationFacetField {
/** Utility ctor: associates an int value (translates it /** Creates this from {@code dim} and {@code path} and an
* to 4-byte BytesRef). */ * int association */
public IntAssociationFacetField(int assoc, String dim, String... path) { public IntAssociationFacetField(int assoc, String dim, String... path) {
super(intToBytesRef(assoc), dim, path); super(intToBytesRef(assoc), dim, path);
} }
/** Encodes an {@code int} as a 4-byte {@link BytesRef},
* big-endian. */
public static BytesRef intToBytesRef(int v) { public static BytesRef intToBytesRef(int v) {
byte[] bytes = new byte[4]; byte[] bytes = new byte[4];
// big-endian: // big-endian:
@ -43,6 +50,7 @@ public class IntAssociationFacetField extends AssociationFacetField {
return new BytesRef(bytes); return new BytesRef(bytes);
} }
/** Decodes a previously encoded {@code int}. */
public static int bytesRefToInt(BytesRef b) { public static int bytesRefToInt(BytesRef b) {
return ((b.bytes[b.offset]&0xFF) << 24) | return ((b.bytes[b.offset]&0xFF) << 24) |
((b.bytes[b.offset+1]&0xFF) << 16) | ((b.bytes[b.offset+1]&0xFF) << 16) |

View File

@ -43,5 +43,8 @@ public final class LabelAndValue {
return label.equals(other.label) && value.equals(other.value); return label.equals(other.label) && value.equals(other.value);
} }
// nocommit hashCode @Override
public int hashCode() {
return label.hashCode() + 1439 * value.hashCode();
}
} }

View File

@ -17,11 +17,10 @@ package org.apache.lucene.facet;
* limitations under the License. * limitations under the License.
*/ */
/** Represents a single labelled range, one facet label in /** Represents a single labeled range, one facet label in
* the facets computed by {@link RangeAccumulator}. * the facets computed by {@link RangeFacetCounts}.
* *
* @lucene.experimental */ * @lucene.experimental */
public abstract class Range { public abstract class Range {
public final String label; public final String label;

View File

@ -27,19 +27,28 @@ import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource; import org.apache.lucene.queries.function.valuesource.LongFieldSource;
/** /** {@link Facets} implementation that computes counts for
* accumulates counts for provided ranges. * dynamic ranges from a provided {@link ValueSource}. Use
*/ * this for dimensions that change in real-time (e.g. a
* relative time based dimension like "Past day", "Past 2
* days", etc.) or that change for each user (e.g. a
* distance dimension like "< 1 km", "< 2 km", etc.).
*
* @lucene.experimental */
public class RangeFacetCounts extends Facets { public class RangeFacetCounts extends Facets {
private final Range[] ranges; private final Range[] ranges;
private final int[] counts; private final int[] counts;
private final String field; private final String field;
private int totCount; private int totCount;
/** Create {@code RangeFacetCounts}, using {@link
* LongFieldSource} from the specified field. */
public RangeFacetCounts(String field, FacetsCollector hits, Range... ranges) throws IOException { public RangeFacetCounts(String field, FacetsCollector hits, Range... ranges) throws IOException {
this(field, new LongFieldSource(field), hits, ranges); this(field, new LongFieldSource(field), hits, ranges);
} }
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}. */
public RangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Range... ranges) throws IOException { public RangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Range... ranges) throws IOException {
this.ranges = ranges; this.ranges = ranges;
this.field = field; this.field = field;

View File

@ -71,13 +71,14 @@ public final class SortedSetDocValuesReaderState {
private final Map<String,OrdRange> prefixToOrdRange = new HashMap<String,OrdRange>(); private final Map<String,OrdRange> prefixToOrdRange = new HashMap<String,OrdRange>();
/** Creates this, pulling doc values from the default {@link
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */
public SortedSetDocValuesReaderState(IndexReader reader) throws IOException { public SortedSetDocValuesReaderState(IndexReader reader) throws IOException {
this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME); this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
} }
/** Create an instance, scanning the {@link /** Creates this, pulling doc values from the specified
* SortedSetDocValues} from the provided reader, with * field. */
* default {@link FacetIndexingParams}. */
public SortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException { public SortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
this.field = field; this.field = field;

View File

@ -30,13 +30,19 @@ import org.apache.lucene.util.IntsRef;
/** Reads from any {@link OrdinalsReader}; use {@link /** Reads from any {@link OrdinalsReader}; use {@link
* FastTaxonomyFacetCounts} if you are just using the * FastTaxonomyFacetCounts} if you are just using the
* default encoding from {@link BinaryDocValues}. */ * default encoding from {@link BinaryDocValues}.
*
* @lucene.experimental */
// nocommit remove & add specialized Cached variation only? // nocommit remove & add specialized Cached variation only?
public class TaxonomyFacetCounts extends TaxonomyFacets { public class TaxonomyFacetCounts extends TaxonomyFacets {
private final OrdinalsReader ordinalsReader; private final OrdinalsReader ordinalsReader;
private final int[] counts; private final int[] counts;
/** Create {@code TaxonomyFacetCounts}, which also
* counts all facet labels. Use this for a non-default
* {@link OrdinalsReader}; otherwise use {@link
* FastTaxonomyFacetCounts}. */
public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(ordinalsReader.getIndexFieldName(), taxoReader, config); super(ordinalsReader.getIndexFieldName(), taxoReader, config);
this.ordinalsReader = ordinalsReader; this.ordinalsReader = ordinalsReader;
@ -89,8 +95,6 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
return sum; return sum;
} }
/** Return the count for a specific path. Returns -1 if
* this path doesn't exist, else the count. */
@Override @Override
public Number getSpecificValue(String dim, String... path) throws IOException { public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim); verifyDim(dim);
@ -103,6 +107,7 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
@Override @Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) { if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
} }

View File

@ -27,14 +27,22 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
// nocommit jdoc that this assumes/requires the default encoding /** Aggregates sum of int values previously indexed with
* {@link FloatAssociationFacetField}, assuming the default
* encoding.
*
* @lucene.experimental */
public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets { public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
private final float[] values; private final float[] values;
/** Create {@code TaxonomyFacetSumFloatAssociations} against
* the default index field. */
public TaxonomyFacetSumFloatAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { public TaxonomyFacetSumFloatAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc); this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
} }
/** Create {@code TaxonomyFacetSumFloatAssociations} against
* the specified index field. */
public TaxonomyFacetSumFloatAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { public TaxonomyFacetSumFloatAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(indexFieldName, taxoReader, config); super(indexFieldName, taxoReader, config);
values = new float[taxoReader.getSize()]; values = new float[taxoReader.getSize()];
@ -80,8 +88,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
} }
} }
/** Return the count for a specific path. Returns -1 if
* this path doesn't exist, else the count. */
@Override @Override
public Number getSpecificValue(String dim, String... path) throws IOException { public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim); verifyDim(dim);
@ -94,6 +100,7 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
@Override @Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) { if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
} }

View File

@ -27,14 +27,22 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
// nocommit jdoc that this assumes/requires the default encoding /** Aggregates sum of int values previously indexed with
* {@link IntAssociationFacetField}, assuming the default
* encoding.
*
* @lucene.experimental */
public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets { public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
private final int[] values; private final int[] values;
/** Create {@code TaxonomyFacetSumIntAssociations} against
* the default index field. */
public TaxonomyFacetSumIntAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { public TaxonomyFacetSumIntAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc); this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
} }
/** Create {@code TaxonomyFacetSumIntAssociations} against
* the specified index field. */
public TaxonomyFacetSumIntAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { public TaxonomyFacetSumIntAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(indexFieldName, taxoReader, config); super(indexFieldName, taxoReader, config);
values = new int[taxoReader.getSize()]; values = new int[taxoReader.getSize()];
@ -80,8 +88,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
} }
} }
/** Return the count for a specific path. Returns -1 if
* this path doesn't exist, else the count. */
@Override @Override
public Number getSpecificValue(String dim, String... path) throws IOException { public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim); verifyDim(dim);
@ -94,6 +100,7 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
@Override @Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) { if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
} }

View File

@ -34,8 +34,9 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
/** Aggregates sum of values from a {@link ValueSource}, for /** Aggregates sum of values from a {@link ValueSource}, for
* each facet label. */ * each facet label.
*
* @lucene.experimental */
public class TaxonomyFacetSumValueSource extends TaxonomyFacets { public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
private final float[] values; private final float[] values;
private final OrdinalsReader ordinalsReader; private final OrdinalsReader ordinalsReader;
@ -141,6 +142,7 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
@Override @Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) { if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
} }

View File

@ -27,23 +27,19 @@ import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/** Base class for all taxonomy-based facets impls. */ /** Base class for all taxonomy-based facets impls. */
abstract class TaxonomyFacets extends Facets { public abstract class TaxonomyFacets extends Facets {
protected final String indexFieldName; protected final String indexFieldName;
protected final TaxonomyReader taxoReader; protected final TaxonomyReader taxoReader;
protected final FacetsConfig config; protected final FacetsConfig config;
protected final int[] children; protected final int[] children;
protected final int[] parents;
protected final int[] siblings; protected final int[] siblings;
/** Sole parameter is the field name that holds the facet
* counts. */
protected TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException { protected TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
this.indexFieldName = indexFieldName; this.indexFieldName = indexFieldName;
this.taxoReader = taxoReader; this.taxoReader = taxoReader;
this.config = config; this.config = config;
ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays(); ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays();
children = pta.children(); children = pta.children();
parents = pta.parents();
siblings = pta.siblings(); siblings = pta.siblings();
} }

View File

@ -21,4 +21,16 @@
<body> <body>
<h1>faceted search</h1> <h1>faceted search</h1>
</body> </body>
<p>
This module provides multiple methods for computing facet counts and
value aggregations.
</p>
<p>
The taxonomy-based methods ({@link FastTaxonomyFacetCounts},
{@link TaxonomyFacetSumIntAssociations
The Facets.search utility methods are useful for doing an "ordinary"
search (sorting by score, or by a specified Sort) but also
collecting into a FacetsCollector for subsequent faceting.
<p>
</html> </html>