mirror of https://github.com/apache/lucene.git
LUCENE-5339: Gilad's feedback, improve javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1546008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b3a77738fa
commit
825a02356d
1
TODO
1
TODO
|
@ -4,7 +4,6 @@ TODO
|
|||
- allow path.length==0?
|
||||
- make a variant/sugar of FacetsConfig.build that just updates an existing doc?
|
||||
- need test coverage of utility search methods
|
||||
- move DocumentBuilder.build -> FacetsConfig.build
|
||||
- getSpecificValue for a dim isn't reliable
|
||||
- we could put more stuff into the "schema", e.g. this field is
|
||||
sorted-set-DV and that one is taxo?
|
||||
|
|
|
@ -240,7 +240,7 @@
|
|||
<!-- core: problems -->
|
||||
<check-missing-javadocs dir="build/docs/demo" level="method"/>
|
||||
<check-missing-javadocs dir="build/docs/expressions" level="method"/>
|
||||
<!-- facet: problems -->
|
||||
<check-missing-javadocs dir="build/docs/facet" level="method"/>
|
||||
<!-- grouping: problems -->
|
||||
<!-- highlighter: problems -->
|
||||
<check-missing-javadocs dir="build/docs/join" level="method"/>
|
||||
|
|
|
@ -19,12 +19,20 @@ package org.apache.lucene.facet;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.document.Document; // javadocs
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Associates an arbitrary byte[] with the added facet
|
||||
* path. */
|
||||
/** Add an instance of this to your {@link Document} to add
|
||||
* a facet label associated with an arbitrary byte[].
|
||||
* This will require a custom {@link Facets}
|
||||
* implementation at search time; see {@link
|
||||
* IntAssociationFacetField} and {@link
|
||||
* FloatAssociationFacetField} to use existing {@link
|
||||
* Facets} implementations.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class AssociationFacetField extends Field {
|
||||
static final FieldType TYPE = new FieldType();
|
||||
static {
|
||||
|
@ -35,6 +43,8 @@ public class AssociationFacetField extends Field {
|
|||
protected final String[] path;
|
||||
protected final BytesRef assoc;
|
||||
|
||||
/** Creates this from {@code dim} and {@code path} and an
|
||||
* association */
|
||||
public AssociationFacetField(BytesRef assoc, String dim, String... path) {
|
||||
super("dummy", TYPE);
|
||||
this.dim = dim;
|
||||
|
|
|
@ -21,7 +21,9 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.WeakHashMap;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesFormat; // javadocs
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.BinaryDocValues; // javadocs
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
|
|
@ -33,14 +33,14 @@ import org.apache.lucene.search.ConstantScoreQuery;
|
|||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.FilteredQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
/**
|
||||
* A {@link Query} for drill-down over {@link FacetLabel categories}. You
|
||||
* should call {@link #add(FacetLabel...)} for every group of categories you
|
||||
* want to drill-down over. Each category in the group is {@code OR'ed} with
|
||||
* the others, and groups are {@code AND'ed}.
|
||||
* A {@link Query} for drill-down over facet categories. You
|
||||
* should call {@link #add(String, String...)} for every group of categories you
|
||||
* want to drill-down over.
|
||||
* <p>
|
||||
* <b>NOTE:</b> if you choose to create your own {@link Query} by calling
|
||||
* {@link #term}, it is recommended to wrap it with {@link ConstantScoreQuery}
|
||||
|
@ -51,6 +51,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
*/
|
||||
public final class DrillDownQuery extends Query {
|
||||
|
||||
/** Creates a drill-down term. */
|
||||
public static Term term(String field, String dim, String... path) {
|
||||
return new Term(field, FacetsConfig.pathToString(dim, path));
|
||||
}
|
||||
|
@ -96,21 +97,17 @@ public final class DrillDownQuery extends Query {
|
|||
this.config = config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@code DrillDownQuery} without a base query,
|
||||
* to perform a pure browsing query (equivalent to using
|
||||
* {@link MatchAllDocsQuery} as base).
|
||||
*/
|
||||
/** Creates a new {@code DrillDownQuery} without a base query,
|
||||
* to perform a pure browsing query (equivalent to using
|
||||
* {@link MatchAllDocsQuery} as base). */
|
||||
public DrillDownQuery(FacetsConfig config) {
|
||||
this(config, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@code DrillDownQuery} over the given base query. Can be
|
||||
* {@code null}, in which case the result {@link Query} from
|
||||
* {@link #rewrite(IndexReader)} will be a pure browsing query, filtering on
|
||||
* the added categories only.
|
||||
*/
|
||||
/** Creates a new {@code DrillDownQuery} over the given base query. Can be
|
||||
* {@code null}, in which case the result {@link Query} from
|
||||
* {@link #rewrite(IndexReader)} will be a pure browsing query, filtering on
|
||||
* the added categories only. */
|
||||
public DrillDownQuery(FacetsConfig config, Query baseQuery) {
|
||||
query = new BooleanQuery(true); // disable coord
|
||||
if (baseQuery != null) {
|
||||
|
@ -139,10 +136,9 @@ public final class DrillDownQuery extends Query {
|
|||
}
|
||||
|
||||
/** Adds one dimension of drill downs; if you pass the same
|
||||
* dimension again, it's OR'd with the previous
|
||||
* constraints on that dimension, and all dimensions are
|
||||
* dimension more than once it is OR'd with the previous
|
||||
* cofnstraints on that dimension, and all dimensions are
|
||||
* AND'd against each other and the base query. */
|
||||
// nocommit can we remove FacetLabel here?
|
||||
public void add(String dim, String... path) {
|
||||
|
||||
if (drillDownDims.containsKey(dim)) {
|
||||
|
@ -165,7 +161,9 @@ public final class DrillDownQuery extends Query {
|
|||
|
||||
/** Expert: add a custom drill-down subQuery. Use this
|
||||
* when you have a separate way to drill-down on the
|
||||
* dimension than the indexed facet ordinals. */
|
||||
* dimension than the indexed facet ordinals (for
|
||||
* example, use a {@link NumericRangeQuery} to drill down
|
||||
* after{@link RangeFacetCounts}. */
|
||||
public void add(String dim, Query subQuery) {
|
||||
|
||||
// TODO: we should use FilteredQuery?
|
||||
|
|
|
@ -65,7 +65,6 @@ import org.apache.lucene.search.Weight;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class DrillSideways {
|
||||
|
||||
protected final IndexSearcher searcher;
|
||||
|
@ -73,27 +72,21 @@ public class DrillSideways {
|
|||
protected final SortedSetDocValuesReaderState state;
|
||||
protected final FacetsConfig config;
|
||||
|
||||
/**
|
||||
* Create a new {@code DrillSideways} instance, assuming the categories were
|
||||
* indexed with {@link FacetFields}.
|
||||
*/
|
||||
/** Create a new {@code DrillSideways} instance. */
|
||||
public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) {
|
||||
this(searcher, config, taxoReader, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@code DrillSideways} instance, assuming the categories were
|
||||
* indexed with {@link SortedSetDocValuesFacetFields}.
|
||||
*/
|
||||
/** Create a new {@code DrillSideways} instance, assuming the categories were
|
||||
* indexed with {@link SortedSetDocValuesFacetField}. */
|
||||
public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) {
|
||||
this(searcher, config, null, state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@code DrillSideways} instance, where some
|
||||
* dimensions are sorted set facets and others are
|
||||
* taxononmy facets.
|
||||
*/
|
||||
/** Create a new {@code DrillSideways} instance, where some
|
||||
* dimensions were indexed with {@link
|
||||
* SortedSetDocValuesFacetField} and others were indexed
|
||||
* with {@link FacetField}. */
|
||||
public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state) {
|
||||
this.searcher = searcher;
|
||||
this.config = config;
|
||||
|
|
|
@ -19,20 +19,27 @@ package org.apache.lucene.facet;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.document.Document; // javadoc
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
||||
/** Add an instance of this to your Document for every facet
|
||||
* label. */
|
||||
/** Add an instance of this to your {@link Document} for
|
||||
* every facet label. */
|
||||
public class FacetField extends Field {
|
||||
static final FieldType TYPE = new FieldType();
|
||||
static {
|
||||
TYPE.setIndexed(true);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
/** Dimension for this field. */
|
||||
public final String dim;
|
||||
|
||||
/** Path for this field. */
|
||||
public final String[] path;
|
||||
|
||||
/** Creates the this from {@code dim} and
|
||||
* {@code path}. */
|
||||
public FacetField(String dim, String... path) {
|
||||
super("dummy", TYPE);
|
||||
this.dim = dim;
|
||||
|
|
|
@ -27,8 +27,7 @@ public final class FacetResult {
|
|||
* the topN. */
|
||||
public final Number value;
|
||||
|
||||
/** How many labels were populated under the requested
|
||||
* path. */
|
||||
/** How many child labels were encountered. */
|
||||
public final int childCount;
|
||||
|
||||
/** Child counts. */
|
||||
|
|
|
@ -31,21 +31,25 @@ import org.apache.lucene.search.TopFieldCollector;
|
|||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
|
||||
/** Common base class for all facets implementations.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public abstract class Facets {
|
||||
/** Returns the topN child labels under the specified
|
||||
* path. Returns null if the specified path doesn't
|
||||
* exist. */
|
||||
* exist or if this dimension was never seen. */
|
||||
public abstract FacetResult getTopChildren(int topN, String dim, String... path) throws IOException;
|
||||
|
||||
/** Return the count for a specific path. Returns -1 if
|
||||
/** Return the count or value
|
||||
* for a specific path. Returns -1 if
|
||||
* this path doesn't exist, else the count. */
|
||||
public abstract Number getSpecificValue(String dim, String... path) throws IOException;
|
||||
|
||||
/** Returns topN labels for any dimension that had hits,
|
||||
* sorted by the number of hits that dimension matched;
|
||||
* this is used for "sparse" faceting, where many
|
||||
* different dimensions were indexed depending on the
|
||||
* type of document. */
|
||||
* different dimensions were indexed, for example
|
||||
* depending on the type of document. */
|
||||
public abstract List<FacetResult> getAllDims(int topN) throws IOException;
|
||||
|
||||
// nocommit where to move?
|
||||
|
@ -55,7 +59,7 @@ public abstract class Facets {
|
|||
* also collect results into a {@link
|
||||
* FacetsCollector} for faceting. */
|
||||
public static TopDocs search(IndexSearcher searcher, Query q, int topN, FacetsCollector sfc) throws IOException {
|
||||
// nocommit can we pass the "right" boolean for
|
||||
// TODO: can we pass the "right" boolean for
|
||||
// in-order...? we'd need access to the protected
|
||||
// IS.search methods taking Weight... could use
|
||||
// reflection...
|
||||
|
|
|
@ -41,15 +41,16 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/** By default a dimension is flat, single valued and does
|
||||
/** Records per-dimension configuration. By default a
|
||||
* dimension is flat, single valued and does
|
||||
* not require count for the dimension; use
|
||||
* the setters in this class to change these settings for
|
||||
* any dims.
|
||||
* each dim.
|
||||
*
|
||||
* <p><b>NOTE</b>: this configuration is not saved into the
|
||||
* index, but it's vital, and up to the application to
|
||||
* ensure, that at search time the provided FacetsConfig
|
||||
* matches what was used during indexing.
|
||||
* ensure, that at search time the provided {@code
|
||||
* FacetsConfig} matches what was used during indexing.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class FacetsConfig {
|
||||
|
@ -64,7 +65,9 @@ public class FacetsConfig {
|
|||
|
||||
private final TaxonomyWriter taxoWriter;
|
||||
|
||||
/** @lucene.internal */
|
||||
/** Holds the configuration for one dimension
|
||||
*
|
||||
* @lucene.experimental */
|
||||
// nocommit expose this to the user, vs the setters?
|
||||
public static final class DimConfig {
|
||||
/** True if this dimension is hierarchical. */
|
||||
|
@ -82,16 +85,22 @@ public class FacetsConfig {
|
|||
String indexFieldName = DEFAULT_INDEX_FIELD_NAME;
|
||||
}
|
||||
|
||||
/** Default per-dimension configuration. */
|
||||
public final static DimConfig DEFAULT_DIM_CONFIG = new DimConfig();
|
||||
|
||||
/** Default constructor. */
|
||||
public FacetsConfig() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
/** Use this constructor at index time, with the provided
|
||||
* {@link TaxonomyWriter}, and then use the {@link
|
||||
* #build} method to index documents. */
|
||||
public FacetsConfig(TaxonomyWriter taxoWriter) {
|
||||
this.taxoWriter = taxoWriter;
|
||||
}
|
||||
|
||||
public final static DimConfig DEFAULT_DIM_CONFIG = new DimConfig();
|
||||
|
||||
/** Get the current configuration for a dimension. */
|
||||
public DimConfig getDimConfig(String dimName) {
|
||||
DimConfig ft = fieldTypes.get(dimName);
|
||||
if (ft == null) {
|
||||
|
@ -100,7 +109,8 @@ public class FacetsConfig {
|
|||
return ft;
|
||||
}
|
||||
|
||||
// nocommit maybe setDimConfig instead?
|
||||
/** Pass {@code true} if this dimension is hierarchical
|
||||
* (has depth > 1 paths). */
|
||||
public synchronized void setHierarchical(String dimName, boolean v) {
|
||||
DimConfig ft = fieldTypes.get(dimName);
|
||||
if (ft == null) {
|
||||
|
@ -110,6 +120,8 @@ public class FacetsConfig {
|
|||
ft.hierarchical = v;
|
||||
}
|
||||
|
||||
/** Pass {@code true} if this dimension may have more than
|
||||
* one value per document. */
|
||||
public synchronized void setMultiValued(String dimName, boolean v) {
|
||||
DimConfig ft = fieldTypes.get(dimName);
|
||||
if (ft == null) {
|
||||
|
@ -119,6 +131,9 @@ public class FacetsConfig {
|
|||
ft.multiValued = v;
|
||||
}
|
||||
|
||||
/** Pass {@code true} if at search time you require
|
||||
* accurate counts of the dimension, i.e. how many
|
||||
* hits have this dimension. */
|
||||
public synchronized void setRequireDimCount(String dimName, boolean v) {
|
||||
DimConfig ft = fieldTypes.get(dimName);
|
||||
if (ft == null) {
|
||||
|
@ -128,6 +143,9 @@ public class FacetsConfig {
|
|||
ft.requireDimCount = v;
|
||||
}
|
||||
|
||||
/** Specify which index field name should hold the
|
||||
* ordinals for this dimension; this is only used by the
|
||||
* taxonomy based facet methods. */
|
||||
public synchronized void setIndexFieldName(String dimName, String indexFieldName) {
|
||||
DimConfig ft = fieldTypes.get(dimName);
|
||||
if (ft == null) {
|
||||
|
@ -149,7 +167,7 @@ public class FacetsConfig {
|
|||
}
|
||||
|
||||
/** Translates any added {@link FacetField}s into normal
|
||||
* fields for indexing */
|
||||
* fields for indexing. */
|
||||
public IndexDocument build(IndexDocument doc) throws IOException {
|
||||
// Find all FacetFields, collated by the actual field:
|
||||
Map<String,List<FacetField>> byField = new HashMap<String,List<FacetField>>();
|
||||
|
@ -442,8 +460,7 @@ public class FacetsConfig {
|
|||
// Escapes any occurrence of the path component inside the label:
|
||||
private static final char ESCAPE_CHAR = '\u001E';
|
||||
|
||||
/** Turns a path into a string without stealing any
|
||||
* characters. */
|
||||
/** Turns a dim + path into an encoded string. */
|
||||
public static String pathToString(String dim, String[] path) {
|
||||
String[] fullPath = new String[1+path.length];
|
||||
fullPath[0] = dim;
|
||||
|
@ -451,10 +468,13 @@ public class FacetsConfig {
|
|||
return pathToString(fullPath, fullPath.length);
|
||||
}
|
||||
|
||||
/** Turns a dim + path into an encoded string. */
|
||||
public static String pathToString(String[] path) {
|
||||
return pathToString(path, path.length);
|
||||
}
|
||||
|
||||
/** Turns the first {@code} length elements of {@code
|
||||
* path} into an encoded string. */
|
||||
public static String pathToString(String[] path, int length) {
|
||||
// nocommit .... too anal? shouldn't we allow drill
|
||||
// down on just dim, to get all docs that have that
|
||||
|
@ -486,9 +506,9 @@ public class FacetsConfig {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
/** Turns a result from previous call to {@link
|
||||
* #pathToString} back into the original {@code String[]}
|
||||
* without stealing any characters. */
|
||||
/** Turns an encoded string (from a previous call to {@link
|
||||
* #pathToString}) back into the original {@code
|
||||
* String[]}. */
|
||||
public static String[] stringToPath(String s) {
|
||||
List<String> parts = new ArrayList<String>();
|
||||
int length = s.length();
|
||||
|
|
|
@ -28,14 +28,24 @@ import org.apache.lucene.index.BinaryDocValues;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
// nocommit jdoc that this assumes/requires the default encoding
|
||||
/** Computes facets counts, assuming the default encoding
|
||||
* into DocValues was used.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
||||
private final int[] counts;
|
||||
|
||||
/** Create {@code FastTaxonomyFacetCounts}, which also
|
||||
* counts all facet labels. */
|
||||
public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
|
||||
}
|
||||
|
||||
/** Create {@code FastTaxonomyFacetCounts}, using the
|
||||
* specified {@code indexFieldName} for ordinals. Use
|
||||
* this if you had set {@link
|
||||
* FacetsConfig#setIndexFieldName} to change the index
|
||||
* field name for certain dimensions. */
|
||||
public FastTaxonomyFacetCounts(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
counts = new int[taxoReader.getSize()];
|
||||
|
@ -106,8 +116,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
|||
return sum;
|
||||
}
|
||||
|
||||
/** Return the count for a specific path. Returns -1 if
|
||||
* this path doesn't exist, else the count. */
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
|
@ -120,6 +128,7 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
|||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
|
|
|
@ -19,24 +19,31 @@ package org.apache.lucene.facet;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.document.Document; // javadocs
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Associates an arbitrary float with the added facet
|
||||
* path, encoding the float into a 4-byte BytesRef. */
|
||||
/** Add an instance of this to your {@link Document} to add
|
||||
* a facet label associated with a float. Use {@link
|
||||
* TaxonomyFacetSumFloatAssociations} to aggregate float values
|
||||
* per facet label at search time.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class FloatAssociationFacetField extends AssociationFacetField {
|
||||
|
||||
/** Utility ctor: associates an int value (translates it
|
||||
* to 4-byte BytesRef). */
|
||||
/** Creates this from {@code dim} and {@code path} and a
|
||||
* float association */
|
||||
public FloatAssociationFacetField(float assoc, String dim, String... path) {
|
||||
super(floatToBytesRef(assoc), dim, path);
|
||||
}
|
||||
|
||||
/** Encodes a {@code float} as a 4-byte {@link BytesRef}. */
|
||||
public static BytesRef floatToBytesRef(float v) {
|
||||
return IntAssociationFacetField.intToBytesRef(Float.floatToIntBits(v));
|
||||
}
|
||||
|
||||
/** Decodes a previously encoded {@code float}. */
|
||||
public static float bytesRefToFloat(BytesRef b) {
|
||||
return Float.intBitsToFloat(IntAssociationFacetField.bytesRefToInt(b));
|
||||
}
|
||||
|
|
|
@ -19,20 +19,27 @@ package org.apache.lucene.facet;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.document.Document; // javadocs
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Associates an arbitrary int with the added facet
|
||||
* path, encoding the int into a 4-byte BytesRef. */
|
||||
/** Add an instance of this to your {@link Document} to add
|
||||
* a facet label associated with an int. Use {@link
|
||||
* TaxonomyFacetSumIntAssociations} to aggregate int values
|
||||
* per facet label at search time.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class IntAssociationFacetField extends AssociationFacetField {
|
||||
|
||||
/** Utility ctor: associates an int value (translates it
|
||||
* to 4-byte BytesRef). */
|
||||
/** Creates this from {@code dim} and {@code path} and an
|
||||
* int association */
|
||||
public IntAssociationFacetField(int assoc, String dim, String... path) {
|
||||
super(intToBytesRef(assoc), dim, path);
|
||||
}
|
||||
|
||||
/** Encodes an {@code int} as a 4-byte {@link BytesRef},
|
||||
* big-endian. */
|
||||
public static BytesRef intToBytesRef(int v) {
|
||||
byte[] bytes = new byte[4];
|
||||
// big-endian:
|
||||
|
@ -43,6 +50,7 @@ public class IntAssociationFacetField extends AssociationFacetField {
|
|||
return new BytesRef(bytes);
|
||||
}
|
||||
|
||||
/** Decodes a previously encoded {@code int}. */
|
||||
public static int bytesRefToInt(BytesRef b) {
|
||||
return ((b.bytes[b.offset]&0xFF) << 24) |
|
||||
((b.bytes[b.offset+1]&0xFF) << 16) |
|
||||
|
|
|
@ -43,5 +43,8 @@ public final class LabelAndValue {
|
|||
return label.equals(other.label) && value.equals(other.value);
|
||||
}
|
||||
|
||||
// nocommit hashCode
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return label.hashCode() + 1439 * value.hashCode();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,11 +17,10 @@ package org.apache.lucene.facet;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** Represents a single labelled range, one facet label in
|
||||
* the facets computed by {@link RangeAccumulator}.
|
||||
/** Represents a single labeled range, one facet label in
|
||||
* the facets computed by {@link RangeFacetCounts}.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public abstract class Range {
|
||||
public final String label;
|
||||
|
||||
|
|
|
@ -27,19 +27,28 @@ import org.apache.lucene.queries.function.FunctionValues;
|
|||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
|
||||
/**
|
||||
* accumulates counts for provided ranges.
|
||||
*/
|
||||
/** {@link Facets} implementation that computes counts for
|
||||
* dynamic ranges from a provided {@link ValueSource}. Use
|
||||
* this for dimensions that change in real-time (e.g. a
|
||||
* relative time based dimension like "Past day", "Past 2
|
||||
* days", etc.) or that change for each user (e.g. a
|
||||
* distance dimension like "< 1 km", "< 2 km", etc.).
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class RangeFacetCounts extends Facets {
|
||||
private final Range[] ranges;
|
||||
private final int[] counts;
|
||||
private final String field;
|
||||
private int totCount;
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using {@link
|
||||
* LongFieldSource} from the specified field. */
|
||||
public RangeFacetCounts(String field, FacetsCollector hits, Range... ranges) throws IOException {
|
||||
this(field, new LongFieldSource(field), hits, ranges);
|
||||
}
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using the provided
|
||||
* {@link ValueSource}. */
|
||||
public RangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Range... ranges) throws IOException {
|
||||
this.ranges = ranges;
|
||||
this.field = field;
|
||||
|
|
|
@ -71,13 +71,14 @@ public final class SortedSetDocValuesReaderState {
|
|||
|
||||
private final Map<String,OrdRange> prefixToOrdRange = new HashMap<String,OrdRange>();
|
||||
|
||||
/** Creates this, pulling doc values from the default {@link
|
||||
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */
|
||||
public SortedSetDocValuesReaderState(IndexReader reader) throws IOException {
|
||||
this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
|
||||
}
|
||||
|
||||
/** Create an instance, scanning the {@link
|
||||
* SortedSetDocValues} from the provided reader, with
|
||||
* default {@link FacetIndexingParams}. */
|
||||
/** Creates this, pulling doc values from the specified
|
||||
* field. */
|
||||
public SortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
|
||||
|
||||
this.field = field;
|
||||
|
|
|
@ -30,13 +30,19 @@ import org.apache.lucene.util.IntsRef;
|
|||
|
||||
/** Reads from any {@link OrdinalsReader}; use {@link
|
||||
* FastTaxonomyFacetCounts} if you are just using the
|
||||
* default encoding from {@link BinaryDocValues}. */
|
||||
* default encoding from {@link BinaryDocValues}.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
// nocommit remove & add specialized Cached variation only?
|
||||
public class TaxonomyFacetCounts extends TaxonomyFacets {
|
||||
private final OrdinalsReader ordinalsReader;
|
||||
private final int[] counts;
|
||||
|
||||
/** Create {@code TaxonomyFacetCounts}, which also
|
||||
* counts all facet labels. Use this for a non-default
|
||||
* {@link OrdinalsReader}; otherwise use {@link
|
||||
* FastTaxonomyFacetCounts}. */
|
||||
public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(ordinalsReader.getIndexFieldName(), taxoReader, config);
|
||||
this.ordinalsReader = ordinalsReader;
|
||||
|
@ -89,8 +95,6 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
|
|||
return sum;
|
||||
}
|
||||
|
||||
/** Return the count for a specific path. Returns -1 if
|
||||
* this path doesn't exist, else the count. */
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
|
@ -103,6 +107,7 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
|
|||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
|
|
|
@ -27,14 +27,22 @@ import org.apache.lucene.index.BinaryDocValues;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
// nocommit jdoc that this assumes/requires the default encoding
|
||||
/** Aggregates sum of int values previously indexed with
|
||||
* {@link FloatAssociationFacetField}, assuming the default
|
||||
* encoding.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
|
||||
private final float[] values;
|
||||
|
||||
/** Create {@code TaxonomyFacetSumFloatAssociations} against
|
||||
* the default index field. */
|
||||
public TaxonomyFacetSumFloatAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
|
||||
}
|
||||
|
||||
/** Create {@code TaxonomyFacetSumFloatAssociations} against
|
||||
* the specified index field. */
|
||||
public TaxonomyFacetSumFloatAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
values = new float[taxoReader.getSize()];
|
||||
|
@ -80,8 +88,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
|
||||
/** Return the count for a specific path. Returns -1 if
|
||||
* this path doesn't exist, else the count. */
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
|
@ -94,6 +100,7 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
|
|||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
|
|
|
@ -27,14 +27,22 @@ import org.apache.lucene.index.BinaryDocValues;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
// nocommit jdoc that this assumes/requires the default encoding
|
||||
/** Aggregates sum of int values previously indexed with
|
||||
* {@link IntAssociationFacetField}, assuming the default
|
||||
* encoding.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
|
||||
private final int[] values;
|
||||
|
||||
/** Create {@code TaxonomyFacetSumIntAssociations} against
|
||||
* the default index field. */
|
||||
public TaxonomyFacetSumIntAssociations(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
|
||||
}
|
||||
|
||||
/** Create {@code TaxonomyFacetSumIntAssociations} against
|
||||
* the specified index field. */
|
||||
public TaxonomyFacetSumIntAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
values = new int[taxoReader.getSize()];
|
||||
|
@ -80,8 +88,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
|
||||
/** Return the count for a specific path. Returns -1 if
|
||||
* this path doesn't exist, else the count. */
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
|
@ -94,6 +100,7 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
|
|||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
|
|
|
@ -34,8 +34,9 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/** Aggregates sum of values from a {@link ValueSource}, for
|
||||
* each facet label. */
|
||||
|
||||
* each facet label.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
|
||||
private final float[] values;
|
||||
private final OrdinalsReader ordinalsReader;
|
||||
|
@ -141,6 +142,7 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
|
|||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
|
|
|
@ -27,23 +27,19 @@ import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
|||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/** Base class for all taxonomy-based facets impls. */
|
||||
abstract class TaxonomyFacets extends Facets {
|
||||
public abstract class TaxonomyFacets extends Facets {
|
||||
protected final String indexFieldName;
|
||||
protected final TaxonomyReader taxoReader;
|
||||
protected final FacetsConfig config;
|
||||
protected final int[] children;
|
||||
protected final int[] parents;
|
||||
protected final int[] siblings;
|
||||
|
||||
/** Sole parameter is the field name that holds the facet
|
||||
* counts. */
|
||||
protected TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
|
||||
this.indexFieldName = indexFieldName;
|
||||
this.taxoReader = taxoReader;
|
||||
this.config = config;
|
||||
ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays();
|
||||
children = pta.children();
|
||||
parents = pta.parents();
|
||||
siblings = pta.siblings();
|
||||
}
|
||||
|
||||
|
|
|
@ -21,4 +21,16 @@
|
|||
<body>
|
||||
<h1>faceted search</h1>
|
||||
</body>
|
||||
<p>
|
||||
This module provides multiple methods for computing facet counts and
|
||||
value aggregations.
|
||||
</p>
|
||||
<p>
|
||||
The taxonomy-based methods ({@link FastTaxonomyFacetCounts},
|
||||
{@link TaxonomyFacetSumIntAssociations
|
||||
The Facets.search utility methods are useful for doing an "ordinary"
|
||||
search (sorting by score, or by a specified Sort) but also
|
||||
collecting into a FacetsCollector for subsequent faceting.
|
||||
|
||||
<p>
|
||||
</html>
|
Loading…
Reference in New Issue