SOLR-12398: Add Heatmap facet option to JSON Facet API.

* moved the preponderance of the implementation from SpatialHeatmapFacets (used by SimpleFacets) into the new API.
This commit is contained in:
David Smiley 2018-06-25 22:37:13 -04:00
parent 1d85cd7838
commit 095f9eb90d
11 changed files with 902 additions and 498 deletions

View File

@ -88,6 +88,8 @@ New Features
* SOLR-12506: Add SolrJ support for the modify collection API. (shalin)
* SOLR-12398: The JSON Facet API now supports type=heatmap facets, just as classic faceting does. (David Smiley)
Bug Fixes
----------------------

View File

@ -16,257 +16,53 @@
*/
package org.apache.solr.handler.component;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.spi.ImageReaderSpi;
import javax.imageio.stream.ImageInputStream;
import javax.imageio.stream.ImageInputStreamImpl;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.AbstractList;
import java.util.Iterator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.spatial.prefix.HeatmapFacetCounter;
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.AbstractSpatialPrefixTreeFieldType;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.RptWithGeometrySpatialField;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.DistanceUnits;
import org.apache.solr.util.SpatialUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Shape;
import org.apache.solr.search.facet.FacetHeatmap;
import org.apache.solr.search.facet.FacetMerger;
import org.apache.solr.search.facet.FacetRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** A 2D spatial faceting summary of a rectangular region. Used by {@link org.apache.solr.handler.component.FacetComponent}
* and {@link org.apache.solr.request.SimpleFacets}. */
* and {@link org.apache.solr.request.SimpleFacets}.
* @see FacetHeatmap
*/
public class SpatialHeatmapFacets {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
//underneath facet_counts we put this here:
public static final String RESPONSE_KEY = "facet_heatmaps";
public static final String FORMAT_PNG = "png";
public static final String FORMAT_INTS2D = "ints2D";
//note: if we change or add more formats, remember to update the javadoc on the format param
//TODO for more format ideas, see formatCountsAndAddToNL
public static final double DEFAULT_DIST_ERR_PCT = 0.15;
/** Called by {@link org.apache.solr.request.SimpleFacets} to compute heatmap facets. */
public static NamedList<Object> getHeatmapForField(String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params, DocSet docSet) throws IOException {
//get the strategy from the field type
final SchemaField schemaField = rb.req.getSchema().getField(fieldName);
final FieldType type = schemaField.getType();
final PrefixTreeStrategy strategy;
final DistanceUnits distanceUnits;
// note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor.
if ((type instanceof AbstractSpatialPrefixTreeFieldType)) {
AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
distanceUnits = rptType.getDistanceUnits();
} else if (type instanceof RptWithGeometrySpatialField) {
RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type;
strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy();
distanceUnits = rptSdvType.getDistanceUnits();
} else {
//FYI we support the term query one too but few people use that one
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "heatmap field needs to be of type "
+ SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class);
}
final SpatialContext ctx = strategy.getSpatialContext();
//get the bbox (query Rectangle)
String geomStr = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM);
final Shape boundsShape = geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx);
//get the grid level (possibly indirectly via distErr or distErrPct)
final int gridLevel;
Integer gridLevelObj = params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL);
final int maxGridLevel = strategy.getGrid().getMaxLevels();
if (gridLevelObj != null) {
gridLevel = gridLevelObj;
if (gridLevel <= 0 || gridLevel > maxGridLevel) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
FacetParams.FACET_HEATMAP_LEVEL +" should be > 0 and <= " + maxGridLevel);
}
} else {
//SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr & distErrPct. Arguably that
// should be refactored to feel less weird than using it like this.
SpatialArgs spatialArgs = new SpatialArgs(SpatialOperation.Intersects/*ignored*/,
boundsShape == null ? ctx.getWorldBounds() : boundsShape);
final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR);
if (distErrObj != null) {
// convert distErr units based on configured units
spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees());
}
spatialArgs.setDistErrPct(params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT));
double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT);
if (distErr <= 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
FacetParams.FACET_HEATMAP_DIST_ERR_PCT + " or " + FacetParams.FACET_HEATMAP_DIST_ERR
+ " should be > 0 or instead provide " + FacetParams.FACET_HEATMAP_LEVEL + "=" + maxGridLevel
+ " if you insist on maximum detail");
}
//The SPT (grid) can lookup a grid level satisfying an error distance constraint
gridLevel = strategy.getGrid().getLevelForDistance(distErr);
}
//Compute!
final HeatmapFacetCounter.Heatmap heatmap;
try {
heatmap = HeatmapFacetCounter.calcFacets(
strategy,
rb.req.getSearcher().getTopReaderContext(),
getTopAcceptDocs(docSet, rb.req.getSearcher()), // turn DocSet into Bits
boundsShape,
gridLevel,
params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS, 100_000) // will throw if exceeded
);
} catch (IllegalArgumentException e) {//e.g. too many cells
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e);
}
//Populate response
NamedList<Object> result = new NamedList<>();
result.add("gridLevel", gridLevel);
result.add("columns", heatmap.columns);
result.add("rows", heatmap.rows);
result.add("minX", heatmap.region.getMinX());
result.add("maxX", heatmap.region.getMaxX());
result.add("minY", heatmap.region.getMinY());
result.add("maxY", heatmap.region.getMaxY());
boolean hasNonZero = false;
for (int count : heatmap.counts) {
if (count > 0) {
hasNonZero = true;
break;
}
}
formatCountsAndAddToNL(fieldKey, rb, params, heatmap.columns, heatmap.rows, hasNonZero ? heatmap.counts : null, result);
return result;
final FacetRequest facetRequest = createHeatmapRequest(fieldKey, fieldName, rb, params);
return (NamedList) facetRequest.process(rb.req, docSet);
}
private static Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException {
if (searcher.getLiveDocSet() == docSet) {
return null; // means match everything (all live docs). This can speedup things a lot.
} else if (docSet.size() == 0) {
return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
} else if (docSet instanceof BitDocSet) {
return ((BitDocSet) docSet).getBits();
} else {
// TODO DocSetBase.calcBits ought to be at DocSet level?
FixedBitSet bits = new FixedBitSet(searcher.maxDoc());
for (DocIterator iter = docSet.iterator(); iter.hasNext();) {
bits.set(iter.nextDoc());
}
return bits;
}
}
private static FacetRequest createHeatmapRequest(String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params) {
Map<String, Object> jsonFacet = new HashMap<>();
jsonFacet.put("type", "heatmap");
jsonFacet.put("field", fieldName);
// jsonFacets has typed values, unlike SolrParams which is all string
jsonFacet.put(FacetHeatmap.GEOM_PARAM, params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM));
jsonFacet.put(FacetHeatmap.LEVEL_PARAM, params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL));
jsonFacet.put(FacetHeatmap.DIST_ERR_PCT_PARAM, params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT));
jsonFacet.put(FacetHeatmap.DIST_ERR_PARAM, params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR));
jsonFacet.put(FacetHeatmap.MAX_CELLS_PARAM, params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS));
jsonFacet.put(FacetHeatmap.FORMAT_PARAM, params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_FORMAT));
private static void formatCountsAndAddToNL(String fieldKey, ResponseBuilder rb, SolrParams params,
int columns, int rows, int[] counts, NamedList<Object> result) {
final String format = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_FORMAT, FORMAT_INTS2D);
final Object countsVal;
switch (format) {
case FORMAT_INTS2D: //A List of List of Integers. Good for small heatmaps and ease of consumption
countsVal = counts != null ? asInts2D(columns, rows, counts) : null;
break;
case FORMAT_PNG: //A PNG graphic; compressed. Good for large & dense heatmaps; hard to consume.
countsVal = counts != null ? asPngBytes(columns, rows, counts, rb) : null;
break;
//TODO case skipList: //A sequence of values; negative values are actually how many 0's to insert.
// Good for small or large but sparse heatmaps.
//TODO auto choose png or skipList; use skipList when < ~25% full or <= ~512 cells
// remember to augment error list below when we add more formats.
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"format should be " + FORMAT_INTS2D + " or " + FORMAT_PNG);
}
result.add("counts_" + format, countsVal);
}
static List<List<Integer>> asInts2D(final int columns, final int rows, final int[] counts) {
//Returns a view versus returning a copy. This saves memory.
//The data is oriented naturally for human/developer viewing: one row at a time top-down
return new AbstractList<List<Integer>>() {
@Override
public List<Integer> get(final int rowIdx) {//top-down remember; the heatmap.counts is bottom up
//check if all zeroes and return null if so
boolean hasNonZero = false;
int y = rows - rowIdx - 1;//flip direction for 'y'
for (int c = 0; c < columns; c++) {
if (counts[c * rows + y] > 0) {
hasNonZero = true;
break;
}
}
if (!hasNonZero) {
return null;
}
return new AbstractList<Integer>() {
@Override
public Integer get(int columnIdx) {
return counts[columnIdx * rows + y];
}
@Override
public int size() {
return columns;
}
};
}
@Override
public int size() {
return rows;
}
};
}
//package access for tests
static byte[] asPngBytes(final int columns, final int rows, final int[] counts, ResponseBuilder rb) {
long startTimeNano = System.nanoTime();
BufferedImage image = PngHelper.newImage(columns, rows);
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
PngHelper.writeCountAtColumnRow(image, rows, c, r, counts[c * rows + r]);
}
}
byte[] bytes = PngHelper.writeImage(image);
long durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNano);
log.debug("heatmap nativeSize={} pngSize={} pngTime={}", (counts.length * 4), bytes.length, durationMs);
if (rb != null && rb.isDebugTimings()) {
rb.addDebug(durationMs, "timing", "heatmap png generation");
}
return bytes;
return FacetRequest.parseOneFacetReq(rb.req, jsonFacet);
}
//
@ -301,7 +97,7 @@ public class SpatialHeatmapFacets {
newLocalParams.add(facet.localParams);
}
// Set format to PNG; it's the only one we parse
newLocalParams.set(FacetParams.FACET_HEATMAP_FORMAT, FORMAT_PNG);
newLocalParams.set(FacetParams.FACET_HEATMAP_FORMAT, FacetHeatmap.FORMAT_PNG);
sreq.params.add(FacetParams.FACET_HEATMAP,
newLocalParams.toLocalParamsString() + facet.facetOn);
}
@ -324,37 +120,10 @@ public class SpatialHeatmapFacets {
log.error("received heatmap for field/key {} that we weren't expecting", fieldKey);
continue;
}
facet.counts = addPngToIntArray((byte[]) shardNamedList.remove("counts_" + FORMAT_PNG), facet.counts);
if (facet.namedList == null) {
// First shard
facet.namedList = shardNamedList;
} else {
assert facet.namedList.equals(shardNamedList);
}
facet.jsonFacetMerger.merge(shardNamedList, null);//merge context not needed (null)
}
}
//package access for tests
static int[] addPngToIntArray(byte[] pngBytes, int[] counts) {
if (pngBytes == null) {
return counts;
}
//read PNG
final BufferedImage image = PngHelper.readImage(pngBytes);
int columns = image.getWidth();
int rows = image.getHeight();
if (counts == null) {
counts = new int[columns * rows];
} else {
assert counts.length == columns * rows;
}
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
counts[c * rows + r] += PngHelper.getCountAtColumnRow(image, rows, c, r);
}
}
return counts;
}
/** Called by FacetComponent's impl of
* {@link org.apache.solr.handler.component.SearchComponent#finishStage(ResponseBuilder)}. */
@ -362,13 +131,7 @@ public class SpatialHeatmapFacets {
NamedList<NamedList<Object>> result = new SimpleOrderedMap<>();
for (Map.Entry<String, HeatmapFacet> entry : heatmapInfos.entrySet()) {
final HeatmapFacet facet = entry.getValue();
final NamedList<Object> namedList = facet.namedList;
if (namedList == null) {
continue;//should never happen but play it safe
}
formatCountsAndAddToNL(entry.getKey(), rb, SolrParams.wrapDefaults(facet.localParams, rb.req.getParams()),
(int) namedList.get("columns"), (int) namedList.get("rows"), facet.counts, namedList);
result.add(entry.getKey(), namedList);
result.add(entry.getKey(), (NamedList<Object>) facet.jsonFacetMerger.getMergedResult());
}
return result;
}
@ -378,125 +141,17 @@ public class SpatialHeatmapFacets {
public static class HeatmapFacet extends FacetComponent.FacetBase {
//note: 'public' following-suit with FacetBase & existing subclasses... though should this really be?
//Holds response NamedList for this field, with counts pulled out. Taken from 1st shard response.
public NamedList<Object> namedList;
//Like Heatmap.counts in Lucene spatial, although null if it would be all-0.
public int[] counts;
public FacetMerger jsonFacetMerger;
public HeatmapFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_HEATMAP, facetStr);
//note: logic in super (FacetBase) is partially redundant with SimpleFacet.parseParams :-(
final SolrParams params = SolrParams.wrapDefaults(localParams, rb.req.getParams());
final FacetRequest heatmapRequest = createHeatmapRequest(getKey(), facetOn, rb, params);
jsonFacetMerger = heatmapRequest.createFacetMerger(null);
}
}
//
// PngHelper
//
//package access for tests
static class PngHelper {
static final ImageReaderSpi imageReaderSpi;//thread-safe
static {
final Iterator<ImageReader> imageReaders = ImageIO.getImageReadersByFormatName("png");
if (!imageReaders.hasNext()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't find png image reader, neaded for heatmaps!");
}
ImageReader imageReader = imageReaders.next();
imageReaderSpi = imageReader.getOriginatingProvider();
}
static BufferedImage readImage(final byte[] bytes) {
// Wrap ImageInputStream around the bytes. We could use MemoryCacheImageInputStream but it will
// cache the data which is quite unnecessary given we have it all in-memory already.
ImageInputStream imageInputStream = new ImageInputStreamImpl() {
//TODO re-use this instance; superclass has 8KB buffer.
@Override
public int read() throws IOException {
checkClosed();
bitOffset = 0;
if (streamPos >= bytes.length) {
return -1;
} else {
return bytes[(int) streamPos++];
}
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
checkClosed();
bitOffset = 0;
if (streamPos >= bytes.length) {
return -1;
} else {
int copyLen = Math.min(len, bytes.length - (int)streamPos);
System.arraycopy(bytes, (int)streamPos, b, off, copyLen);
streamPos += copyLen;
return copyLen;
}
}
@Override
public long length() {
return bytes.length;
}
@Override
public boolean isCached() {
return true;
}
@Override
public boolean isCachedMemory() {
return true;
}
};
try {
//TODO can/should we re-use an imageReader instance on FacetInfo?
ImageReader imageReader = imageReaderSpi.createReaderInstance();
imageReader.setInput(imageInputStream,
false,//forwardOnly
true);//ignoreMetadata
return imageReader.read(0);//read first & only image
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Problem reading png heatmap: " + e);
}
}
static byte[] writeImage(BufferedImage image) {
ByteArrayOutputStream baos = new ByteArrayOutputStream(
// initialize to roughly 1/4th the size a native int would take per-pixel
image.getWidth() * image.getHeight() + 1024
);
try {
ImageIO.write(image, FORMAT_PNG, baos);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "While generating PNG: " + e);
}
//too bad we can't access the raw byte[]; this copies to a new one
return baos.toByteArray();
}
// We abuse the image for storing integers (4 bytes), and so we need a 4-byte ABGR.
// first (low) byte is blue, next byte is green, next byte red, and last (high) byte is alpha.
static BufferedImage newImage(int columns, int rows) {
return new BufferedImage(columns, rows, BufferedImage.TYPE_4BYTE_ABGR);
}
// 'y' dimension goes top-down, so invert.
// Alpha chanel is high byte; 0 means transparent. So XOR those bits with '1' so that we need
// to have counts > 16M before the picture starts to fade
static void writeCountAtColumnRow(BufferedImage image, int rows, int c, int r, int val) {
image.setRGB(c, rows - 1 - r, val ^ 0xFF_00_00_00);
}
static int getCountAtColumnRow(BufferedImage image, int rows, int c, int r) {
return image.getRGB(c, rows - 1 - r) ^ 0xFF_00_00_00;
}
}
// Note: originally there was a lot more code here but it migrated to the JSON Facet API as "FacetHeatmap"
}

View File

@ -90,7 +90,7 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortedIntDocSet;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.facet.FacetDebugInfo;
import org.apache.solr.search.facet.FacetProcessor;
import org.apache.solr.search.facet.FacetRequest;
import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.util.DefaultSolrThreadFactory;
@ -565,29 +565,20 @@ public class SimpleFacets {
}
jsonFacet.put(SORT, sortVal );
Map<String, Object> topLevel = new HashMap<>();
topLevel.put(field, jsonFacet);
topLevel.put("processEmpty", true);
FacetProcessor fproc = FacetProcessor.createProcessor(rb.req, topLevel, // rb.getResults().docSet
docs );
//TODO do we handle debug? Should probably already be handled by the legacy code
fproc.process();
Object resObj = FacetRequest.parseOneFacetReq(req, jsonFacet).process(req, docs);
//Go through the response to build the expected output for SimpleFacets
Object res = fproc.getResponse();
counts = new NamedList<Integer>();
if(res != null) {
SimpleOrderedMap<Object> som = (SimpleOrderedMap<Object>)res;
SimpleOrderedMap<Object> asdf = (SimpleOrderedMap<Object>) som.get(field);
counts = new NamedList<>();
if(resObj != null) {
NamedList<Object> res = (NamedList<Object>) resObj;
List<SimpleOrderedMap<Object>> buckets = (List<SimpleOrderedMap<Object>>)asdf.get("buckets");
for(SimpleOrderedMap<Object> b : buckets) {
List<NamedList<Object>> buckets = (List<NamedList<Object>>)res.get("buckets");
for(NamedList<Object> b : buckets) {
counts.add(b.get("val").toString(), (Integer)b.get("count"));
}
if(missing) {
SimpleOrderedMap<Object> missingCounts = (SimpleOrderedMap<Object>) asdf.get("missing");
NamedList<Object> missingCounts = (NamedList<Object>) res.get("missing");
counts.add(null, (Integer)missingCounts.get("count"));
}
}

View File

@ -0,0 +1,520 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.spi.ImageReaderSpi;
import javax.imageio.stream.ImageInputStream;
import javax.imageio.stream.ImageInputStreamImpl;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.AbstractList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.spatial.prefix.HeatmapFacetCounter;
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.AbstractSpatialPrefixTreeFieldType;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.RptWithGeometrySpatialField;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.DistanceUnits;
import org.apache.solr.util.SpatialUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Shape;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* JSON Facet API request for a 2D spatial summary of a rectangular region.
*
* @see HeatmapFacetCounter
* @version 7.5.0
*/
@SuppressWarnings("WeakerAccess")
public class FacetHeatmap extends FacetRequest {
// note: much of this code was moved from SpatialHeatmapFacets (SimpleFacets API)
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_GEOM */
public static final String GEOM_PARAM = "geom";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_LEVEL */
public static final String LEVEL_PARAM = "gridLevel";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_DIST_ERR_PCT */
public static final String DIST_ERR_PCT_PARAM = "distErrPct";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_DIST_ERR */
public static final String DIST_ERR_PARAM = "distErr";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_MAX_CELLS */
public static final String MAX_CELLS_PARAM = "maxCells";
/** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_FORMAT */
public static final String FORMAT_PARAM = "format";
public static final String FORMAT_PNG = "png";
public static final String FORMAT_INTS2D = "ints2D";
//note: if we change or add more formats, remember to update the javadoc on the format param
//TODO for more format ideas, see formatCountsVal()
public static final double DEFAULT_DIST_ERR_PCT = 0.15;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
static class Parser extends FacetParser<FacetHeatmap> {
Parser(FacetParser parent, String key) {
super(parent, key);
}
public FacetHeatmap parse(Object argsObj) {
assert facet == null;
if (!(argsObj instanceof Map)) {
throw err("Missing heatmap arguments");
}
@SuppressWarnings("unchecked")
Map<String, Object> argsMap = (Map<String, Object>) argsObj;
String fieldName = getField(argsMap);
//get the strategy from the field type
final SchemaField schemaField = getSchema().getField(fieldName);
final FieldType type = schemaField.getType();
final PrefixTreeStrategy strategy;
final DistanceUnits distanceUnits;
// note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor.
if ((type instanceof AbstractSpatialPrefixTreeFieldType)) {
AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
distanceUnits = rptType.getDistanceUnits();
} else if (type instanceof RptWithGeometrySpatialField) {
RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type;
strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy();
distanceUnits = rptSdvType.getDistanceUnits();
} else {
//FYI we support the term query one too but few people use that one
throw err("heatmap field needs to be of type " + SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class);
}
final SpatialContext ctx = strategy.getSpatialContext();
//get the bbox (query Rectangle)
String geomStr = getString(argsMap, GEOM_PARAM, null);
final Shape boundsShape = geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx);
//get the grid level (possibly indirectly via distErr or distErrPct)
final int gridLevel;
final Long gridLevelObj = getLongOrNull(argsMap, LEVEL_PARAM, false);
final int maxGridLevel = strategy.getGrid().getMaxLevels();
if (gridLevelObj != null) {
gridLevel = gridLevelObj.intValue();
if (gridLevel <= 0 || gridLevel > maxGridLevel) {
throw err(LEVEL_PARAM +" should be > 0 and <= " + maxGridLevel);
}
} else {
//SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr & distErrPct. Arguably that
// should be refactored to feel less weird than using it like this.
SpatialArgs spatialArgs = new SpatialArgs(SpatialOperation.Intersects/*ignored*/,
boundsShape == null ? ctx.getWorldBounds() : boundsShape);
final Double distErrObj = getDoubleOrNull(argsMap, DIST_ERR_PARAM, false);
if (distErrObj != null) {
// convert distErr units based on configured units
spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees());
}
spatialArgs.setDistErrPct(getDoubleOrNull(argsMap, DIST_ERR_PCT_PARAM, false));
double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT);
if (distErr <= 0) {
throw err(DIST_ERR_PCT_PARAM + " or " + DIST_ERR_PARAM
+ " should be > 0 or instead provide " + LEVEL_PARAM + "=" + maxGridLevel
+ " if you insist on maximum detail");
}
//The SPT (grid) can lookup a grid level satisfying an error distance constraint
gridLevel = strategy.getGrid().getLevelForDistance(distErr);
}
final int maxCells = (int) getLong(argsMap, MAX_CELLS_PARAM, 100_000);// will throw later if exceeded
final String format = getString(argsMap, FORMAT_PARAM, FORMAT_INTS2D);
if (!format.equals(FORMAT_INTS2D) && !format.equals(FORMAT_PNG)) {
throw err("format should be " + FORMAT_INTS2D + " or " + FORMAT_PNG);
}
this.facet = new FacetHeatmap(argsMap, strategy, boundsShape, gridLevel, maxCells, format);
parseCommonParams(argsObj); // e.g. domain change
return this.facet;
}
}//class Parser
private final Map<String, Object> argsMap;
private final PrefixTreeStrategy strategy;
private final Shape boundsShape;
private final int gridLevel;
private final int maxCells;
private final String format;
FacetHeatmap(Map<String, Object> argsMap, PrefixTreeStrategy strategy, Shape boundsShape, int gridLevel, int maxCells, String format) {
this.argsMap = argsMap;
this.strategy = strategy;
this.boundsShape = boundsShape;
this.gridLevel = gridLevel;
this.maxCells = maxCells;
this.format = format;
}
//TODO perhaps all FacetRequest objs should have this?
@Override
public Map<String, Object> getFacetDescription() {
return argsMap;
}
@Override
public FacetProcessor createFacetProcessor(FacetContext fcontext) {
return new FacetProcessor(fcontext, this) {
@Override
public void process() throws IOException {
super.process(); // handles domain changes
//Compute!
final HeatmapFacetCounter.Heatmap heatmap;
try {
heatmap = HeatmapFacetCounter.calcFacets(
strategy,
fcontext.searcher.getTopReaderContext(),
getTopAcceptDocs(fcontext.base, fcontext.searcher), // turn DocSet into Bits
boundsShape,
gridLevel,
maxCells);
} catch (IllegalArgumentException e) {//e.g. too many cells
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e);
}
//Populate response
response = new SimpleOrderedMap();
response.add("gridLevel", gridLevel);
response.add("columns", heatmap.columns);
response.add("rows", heatmap.rows);
response.add("minX", heatmap.region.getMinX());
response.add("maxX", heatmap.region.getMaxX());
response.add("minY", heatmap.region.getMinY());
response.add("maxY", heatmap.region.getMaxY());
//A shard request will always be a PNG
String format = fcontext.isShard() ? FORMAT_PNG : FacetHeatmap.this.format;
response.add("counts_" + format, formatCountsVal(format, heatmap.columns, heatmap.rows, heatmap.counts, fcontext.getDebugInfo()));
// note: we do not call processStats or processSubs as it's not supported yet
}
//TODO this is a general utility that should go elsewhere? DocSetUtil? Then should DocSetBase.getBits go away?
private Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException {
if (docSet.size() == searcher.numDocs()) {
return null; // means match everything (all live docs). This can speedup things a lot.
} else if (docSet.size() == 0) {
return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
} else if (docSet instanceof BitDocSet) {
return ((BitDocSet) docSet).getBits();
} else {
// TODO DocSetBase.getBits ought to be at DocSet level? Though it doesn't know maxDoc but it could?
FixedBitSet bits = new FixedBitSet(searcher.maxDoc());
for (DocIterator iter = docSet.iterator(); iter.hasNext();) {
bits.set(iter.nextDoc());
}
return bits;
}
}
};
}
private static Object formatCountsVal(String format, int columns, int rows, int[] counts, FacetDebugInfo debugInfo) {
if (counts == null) {
return null;
}
boolean hasNonZero = false;
for (int count : counts) {
if (count > 0) {
hasNonZero = true;
break;
}
}
if (!hasNonZero) {
return null;
}
switch (format) {
case FORMAT_INTS2D: //A List of List of Integers. Good for small heatmaps and ease of consumption
return asInts2D(columns, rows, counts);
case FORMAT_PNG: //A PNG graphic; compressed. Good for large & dense heatmaps; hard to consume.
return asPngBytes(columns, rows, counts, debugInfo);
//TODO case UTFGRID https://github.com/mapbox/utfgrid-spec
//TODO case skipList: //A sequence of values; negative values are actually how many 0's to insert.
// Good for small or large but sparse heatmaps.
//TODO auto choose png or skipList; use skipList when < ~25% full or <= ~512 cells
// remember to augment error list below when we add more formats.
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown format: " + format);
}
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new FacetMerger() {
NamedList<Object> mergedResult; // except counts, which we add in when done
int[] counts;
// note: there appears to be no mechanism to modify the shard requests in this API. If we could, we'd
// change the format to png. Instead, we have the facet processor recognize it's a shard request and ignore
// the requested format, which seems like a hack.
@SuppressWarnings("unchecked")
@Override
public void merge(Object facetResult, Context mcontext) {
NamedList<Object> facetResultNL = (NamedList<Object>) facetResult;
counts = addPngToIntArray((byte[]) facetResultNL.remove("counts_" + FORMAT_PNG), counts);
if (mergedResult == null) {
mergedResult = facetResultNL;
}
}
@Override
public void finish(Context mcontext) {
//nothing to do; we have no sub-facets
}
@Override
public Object getMergedResult() {
mergedResult.add("counts_" + format, formatCountsVal(
format, (Integer) mergedResult.get("columns"), (Integer) mergedResult.get("rows"), counts, null));//TODO where debugInfo?
return mergedResult;
}
};
}
@VisibleForTesting
static int[] addPngToIntArray(byte[] pngBytes, int[] counts) {
if (pngBytes == null) {
return counts;
}
//read PNG
final BufferedImage image = PngHelper.readImage(pngBytes);
int columns = image.getWidth();
int rows = image.getHeight();
if (counts == null) {
counts = new int[columns * rows];
} else {
assert counts.length == columns * rows;
}
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
counts[c * rows + r] += PngHelper.getCountAtColumnRow(image, rows, c, r);
}
}
return counts;
}
@VisibleForTesting
static List<List<Integer>> asInts2D(final int columns, final int rows, final int[] counts) {
//Returns a view versus returning a copy. This saves memory.
//The data is oriented naturally for human/developer viewing: one row at a time top-down
return new AbstractList<List<Integer>>() {
@Override
public List<Integer> get(final int rowIdx) {//top-down remember; the heatmap.counts is bottom up
//check if all zeroes and return null if so
boolean hasNonZero = false;
int y = rows - rowIdx - 1;//flip direction for 'y'
for (int c = 0; c < columns; c++) {
if (counts[c * rows + y] > 0) {
hasNonZero = true;
break;
}
}
if (!hasNonZero) {
return null;
}
return new AbstractList<Integer>() {
@Override
public Integer get(int columnIdx) {
return counts[columnIdx * rows + y];
}
@Override
public int size() {
return columns;
}
};
}
@Override
public int size() {
return rows;
}
};
}
@VisibleForTesting
static byte[] asPngBytes(final int columns, final int rows, final int[] counts, FacetDebugInfo debugInfo) {
long startTimeNano = System.nanoTime();
BufferedImage image = PngHelper.newImage(columns, rows);
for (int c = 0; c < columns; c++) {
for (int r = 0; r < rows; r++) {
PngHelper.writeCountAtColumnRow(image, rows, c, r, counts[c * rows + r]);
}
}
byte[] bytes = PngHelper.writeImage(image);
long durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNano);
log.debug("heatmap nativeSize={} pngSize={} pngTime={}", (counts.length * 4), bytes.length, durationMs);
if (debugInfo != null) {
debugInfo.putInfoItem("heatmap png timing", durationMs);
}
return bytes;
}
@VisibleForTesting
static class PngHelper {
static final ImageReaderSpi imageReaderSpi;//thread-safe
static {
final Iterator<ImageReader> imageReaders = ImageIO.getImageReadersByFormatName("png");
if (!imageReaders.hasNext()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't find png image reader, neaded for heatmaps!");
}
ImageReader imageReader = imageReaders.next();
imageReaderSpi = imageReader.getOriginatingProvider();
}
static BufferedImage readImage(final byte[] bytes) {
// Wrap ImageInputStream around the bytes. We could use MemoryCacheImageInputStream but it will
// cache the data which is quite unnecessary given we have it all in-memory already.
ImageInputStream imageInputStream = new ImageInputStreamImpl() {
//TODO re-use this instance; superclass has 8KB buffer.
@Override
public int read() throws IOException {
checkClosed();
bitOffset = 0;
if (streamPos >= bytes.length) {
return -1;
} else {
return bytes[(int) streamPos++];
}
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
checkClosed();
bitOffset = 0;
if (streamPos >= bytes.length) {
return -1;
} else {
int copyLen = Math.min(len, bytes.length - (int)streamPos);
System.arraycopy(bytes, (int)streamPos, b, off, copyLen);
streamPos += copyLen;
return copyLen;
}
}
@Override
public long length() {
return bytes.length;
}
@Override
public boolean isCached() {
return true;
}
@Override
public boolean isCachedMemory() {
return true;
}
};
try {
//TODO can/should we re-use an imageReader instance on FacetInfo?
ImageReader imageReader = imageReaderSpi.createReaderInstance();
imageReader.setInput(imageInputStream,
false,//forwardOnly
true);//ignoreMetadata
return imageReader.read(0);//read first & only image
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Problem reading png heatmap: " + e);
}
}
static byte[] writeImage(BufferedImage image) {
ByteArrayOutputStream baos = new ByteArrayOutputStream(
// initialize to roughly 1/4th the size a native int would take per-pixel
image.getWidth() * image.getHeight() + 1024
);
try {
ImageIO.write(image, FORMAT_PNG, baos);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "While generating PNG: " + e);
}
//too bad we can't access the raw byte[]; this copies to a new one
return baos.toByteArray();
}
// We abuse the image for storing integers (4 bytes), and so we need a 4-byte ABGR.
// first (low) byte is blue, next byte is green, next byte red, and last (high) byte is alpha.
static BufferedImage newImage(int columns, int rows) {
return new BufferedImage(columns, rows, BufferedImage.TYPE_4BYTE_ABGR);
}
// 'y' dimension goes top-down, so invert.
// Alpha chanel is high byte; 0 means transparent. So XOR those bits with '1' so that we need
// to have counts > 16M before the picture starts to fade
static void writeCountAtColumnRow(BufferedImage image, int rows, int c, int r, int val) {
image.setRGB(c, rows - 1 - r, val ^ 0xFF_00_00_00);
}
static int getCountAtColumnRow(BufferedImage image, int rows, int c, int r) {
return image.getRGB(c, rows - 1 - r) ^ 0xFF_00_00_00;
}
}
}

View File

@ -37,8 +37,6 @@ import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.RTimer;
import org.noggit.CharArr;
import org.noggit.JSONWriter;
import org.noggit.ObjectBuilder;
@ -98,13 +96,7 @@ public class FacetModule extends SearchComponent {
rb.setNeedDocSet(true);
// Parse the facet in the prepare phase?
FacetParser parser = new FacetTopParser(rb.req);
FacetRequest facetRequest = null;
try {
facetRequest = parser.parse(jsonFacet);
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
FacetRequest facetRequest = FacetRequest.parse(rb.req, jsonFacet);
FacetComponentState fcState = new FacetComponentState();
fcState.rb = rb;
@ -138,30 +130,17 @@ public class FacetModule extends SearchComponent {
fcontext.flags |= FacetContext.SKIP_FACET; // the root bucket should have been received from all shards previously
}
}
FacetProcessor fproc = facetState.facetRequest.createFacetProcessor(fcontext);
if (rb.isDebug()) {
FacetDebugInfo fdebug = new FacetDebugInfo();
fcontext.setDebugInfo(fdebug);
fdebug.setReqDescription(facetState.facetRequest.getFacetDescription());
fdebug.setProcessor(fproc.getClass().getSimpleName());
final RTimer timer = new RTimer();
fproc.process();
long timeElapsed = (long) timer.getTime();
fdebug.setElapse(timeElapsed);
fdebug.putInfoItem("domainSize", (long)fcontext.base.size());
rb.req.getContext().put("FacetDebugInfo", fdebug);
} else {
fproc.process();
}
rb.rsp.add("facets", fproc.getResponse());
final Object results = facetState.facetRequest.process(fcontext);
rb.rsp.add("facets", results);
}
private void clearFaceting(List<ShardRequest> outgoing) {
// turn off faceting for requests not marked as being for faceting refinements
for (ShardRequest sreq : outgoing) {

View File

@ -33,19 +33,17 @@ import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.facet.SlotAcc.SlotContext;
import org.apache.solr.util.RTimer;
/** Base abstraction for a class that computes facets. This is fairly internal to the module. */
public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
SimpleOrderedMap<Object> response;
FacetContext fcontext;
@ -56,27 +54,6 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
SlotAcc[] accs;
CountSlotAcc countAcc;
/** factory method for invoking json facet framework as whole.
* Note: this is currently only used from SimpleFacets, not from JSON Facet API itself. */
public static FacetProcessor<?> createProcessor(SolrQueryRequest req,
Map<String, Object> params, DocSet docs){
FacetParser parser = new FacetTopParser(req);
FacetRequest facetRequest = null;
try {
facetRequest = parser.parse(params);
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
FacetContext fcontext = new FacetContext();
fcontext.base = docs;
fcontext.req = req;
fcontext.searcher = req.getSearcher();
fcontext.qcontext = QueryContext.newContext(fcontext.searcher);
return facetRequest.createFacetProcessor(fcontext);
}
FacetProcessor(FacetContext fcontext, FacetRequestT freq) {
this.fcontext = fcontext;
this.freq = freq;
@ -201,9 +178,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
return;
}
// TODO: somehow remove responsebuilder dependency
ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
Map tagMap = (Map) rb.req.getContext().get("tags");
Map tagMap = (Map) fcontext.req.getContext().get("tags");
if (tagMap == null) {
// no filters were tagged
return;
@ -229,6 +204,9 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
List<Query> qlist = new ArrayList<>();
// TODO: somehow remove responsebuilder dependency
ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
// add the base query
if (!excludeSet.containsKey(rb.getQuery())) {
qlist.add(rb.getQuery());
@ -484,27 +462,16 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
FacetContext subContext = fcontext.sub(filter, domain);
subContext.facetInfo = facetInfoSub;
if (!skip) subContext.flags &= ~FacetContext.SKIP_FACET; // turn off the skip flag if we're not skipping this bucket
FacetProcessor subProcessor = subRequest.createFacetProcessor(subContext);
if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true
FacetDebugInfo fdebug = new FacetDebugInfo();
subContext.setDebugInfo(fdebug);
fcontext.getDebugInfo().addChild(fdebug);
fdebug.setReqDescription(subRequest.getFacetDescription());
fdebug.setProcessor(subProcessor.getClass().getSimpleName());
if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString());
final RTimer timer = new RTimer();
subProcessor.process();
long timeElapsed = (long) timer.getTime();
fdebug.setElapse(timeElapsed);
fdebug.putInfoItem("domainSize", (long)subContext.base.size());
} else {
subProcessor.process();
}
response.add( sub.getKey(), subProcessor.getResponse() );
Object result = subRequest.process(subContext);
response.add( sub.getKey(), result);
}
}

View File

@ -16,6 +16,13 @@
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
@ -24,17 +31,28 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.*;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.FunctionQParserPlugin;
import org.apache.solr.search.JoinQParserPlugin;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.join.GraphQuery;
import org.apache.solr.search.join.GraphQueryParser;
import java.io.IOException;
import java.util.*;
import org.apache.solr.util.RTimer;
import static org.apache.solr.common.params.CommonParams.SORT;
import static org.apache.solr.search.facet.FacetRequest.RefineMethod.NONE;
/**
* A request to do facets/stats that might itself be composed of sub-FacetRequests.
* This is a cornerstone of the facet module.
*
* @see #parse(SolrQueryRequest, Map)
*/
public abstract class FacetRequest {
public static enum SortDirection {
@ -237,6 +255,39 @@ public abstract class FacetRequest {
}
/**
* Factory method to parse a facet request tree. The outer keys are arbitrary labels and their values are
* facet request specifications. Will throw a {@link SolrException} if it fails to parse.
* @param req the overall request
* @param params a typed parameter structure (unlike SolrParams which are all string values).
*/
public static FacetRequest parse(SolrQueryRequest req, Map<String, Object> params) {
FacetParser parser = new FacetTopParser(req);
try {
return parser.parse(params);
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
}
//TODO it would be nice if there was no distinction. If the top level request had "type" as special then there wouldn't be a need.
/**
* Factory method to parse out a rooted facet request tree that would normally go one level below a label.
* The params must contain a "type".
* This is intended to be useful externally, such as by {@link org.apache.solr.request.SimpleFacets}.
* @param req the overall request
* @param params a typed parameter structure (unlike SolrParams which are all string values).
*/
public static FacetRequest parseOneFacetReq(SolrQueryRequest req, Map<String, Object> params) {
FacetParser parser = new FacetTopParser(req);
try {
return (FacetRequest) parser.parseFacetOrStat("", params);
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
}
public FacetRequest() {
facetStats = new LinkedHashMap<>();
subFacets = new LinkedHashMap<>();
@ -294,7 +345,46 @@ public abstract class FacetRequest {
s += "}";
return s;
}
/**
* Process this facet request against the given domain of docs.
* Note: this is currently used externally by {@link org.apache.solr.request.SimpleFacets}.
*/
public final Object process(SolrQueryRequest req, DocSet domain) throws IOException {
//TODO check for FacetDebugInfo? and if so set on fcontext
// rb.req.getContext().get("FacetDebugInfo");
//TODO should the SolrQueryRequest be held on the FacetRequest? It was created from parse(req,...) so is known.
FacetContext fcontext = new FacetContext();
fcontext.base = domain;
fcontext.req = req;
fcontext.searcher = req.getSearcher();
fcontext.qcontext = QueryContext.newContext(fcontext.searcher);
return process(fcontext);
}
/** Process the request with the facet context settings, a parameter-object. */
final Object process(FacetContext fcontext) throws IOException {
FacetProcessor facetProcessor = createFacetProcessor(fcontext);
FacetDebugInfo debugInfo = fcontext.getDebugInfo();
if (debugInfo == null) {
facetProcessor.process();
} else {
if (fcontext.filter != null) {
debugInfo.setFilter(fcontext.filter.toString());
}
debugInfo.setReqDescription(getFacetDescription());
debugInfo.setProcessor(getClass().getSimpleName());
debugInfo.putInfoItem("domainSize", (long) fcontext.base.size());
RTimer timer = new RTimer();
facetProcessor.process();
debugInfo.setElapse((long) timer.getTime());
}
return facetProcessor.getResponse(); // note: not captured in elapsed time above; good/bad?
}
public abstract FacetProcessor createFacetProcessor(FacetContext fcontext);
public abstract FacetMerger createFacetMerger(Object prototype);
@ -453,12 +543,16 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
public Object parseFacetOrStat(String key, String type, Object args) throws SyntaxError {
// TODO: a place to register all these facet types?
if ("field".equals(type) || "terms".equals(type)) {
return parseFieldFacet(key, args);
} else if ("query".equals(type)) {
return parseQueryFacet(key, args);
} else if ("range".equals(type)) {
return parseRangeFacet(key, args);
switch (type) {
case "field":
case "terms":
return new FacetFieldParser(this, key).parse(args);
case "query":
return new FacetQueryParser(this, key).parse(args);
case "range":
return new FacetRangeParser(this, key).parse(args);
case "heatmap":
return new FacetHeatmap.Parser(this, key).parse(args);
}
AggValueSource stat = parseStat(key, type, args);
@ -468,23 +562,6 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
return stat;
}
FacetField parseFieldFacet(String key, Object args) throws SyntaxError {
FacetFieldParser parser = new FacetFieldParser(this, key);
return parser.parse(args);
}
FacetQuery parseQueryFacet(String key, Object args) throws SyntaxError {
FacetQueryParser parser = new FacetQueryParser(this, key);
return parser.parse(args);
}
FacetRange parseRangeFacet(String key, Object args) throws SyntaxError {
FacetRangeParser parser = new FacetRangeParser(this, key);
return parser.parse(args);
}
public Object parseStringFacetOrStat(String key, String s) throws SyntaxError {
// "avg(myfield)"
return parseStringStat(key, s);
@ -623,6 +700,21 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
return ((Number)o).longValue();
}
public Double getDoubleOrNull(Map<String,Object> args, String paramName, boolean required) {
Object o = args.get(paramName);
if (o == null) {
if (required) {
throw err("Missing required parameter '" + paramName + "'");
}
return null;
}
if (!(o instanceof Number)) {
throw err("Expected double type for param '" + paramName + "' but got " + o);
}
return ((Number)o).doubleValue();
}
public boolean getBoolean(Map<String,Object> args, String paramName, boolean defVal) {
Object o = args.get(paramName);
if (o == null) {

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
package org.apache.solr.search.facet;
import java.util.Arrays;
import java.util.List;
@ -30,6 +30,7 @@ import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass;
import org.junit.Test;
/** Test Heatmap Facets (both impls) */
public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase {
private static final String FIELD = "srpt_quad";
@ -42,9 +43,10 @@ public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase {
System.setProperty("java.awt.headless", "true");
}
/** Tests SimpleFacets/Classic faceting implementation of heatmaps */
@SuppressWarnings("unchecked")
@Test
public void test() throws Exception {
public void testClassicFacets() throws Exception { // AKA SimpleFacets
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
@ -165,20 +167,152 @@ public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase {
Object v = getHmObj(query(params(baseParams, FacetParams.FACET_HEATMAP_FORMAT, "png"))).get("counts_png");
assertTrue(v instanceof byte[]);
//simply test we can read the image
assertNotNull(SpatialHeatmapFacets.PngHelper.readImage((byte[]) v));
assertNotNull(FacetHeatmap.PngHelper.readImage((byte[]) v));
//good enough for this test method
}
private NamedList getHmObj(QueryResponse response) {
return (NamedList) response.getResponse().findRecursive("facet_counts", "facet_heatmaps", FIELD);
}
private ModifiableSolrParams params(SolrParams baseParams, String... moreParams) {
final ModifiableSolrParams params = new ModifiableSolrParams(baseParams);
params.add(params(moreParams));//actually replaces
return params;
}
/** Tests JSON Facet module implementation of heatmaps. */
@SuppressWarnings("unchecked")
@Test
public void testJsonFacets() throws Exception {
/*
THIS IS THE MOSTLY SAME CODE as above with tweaks to request it using the JSON Facet approach.
Near-duplication is sad; not clear if one test doing both is better -- would be awkward
*/
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
SolrParams baseParams = params("q", "*:*", "rows", "0");
final String testBox = "[\"50 50\" TO \"180 90\"]";//top-right somewhere on edge (whatever)
// ------ Index data
index("id", "0", FIELD, "ENVELOPE(100, 120, 80, 40)");// on right side
index("id", "1", FIELD, "ENVELOPE(-120, -110, 80, 20)");// on left side (outside heatmap)
index("id", "3", FIELD, "POINT(70 60)");//just left of BOX 0
index("id", "4", FIELD, "POINT(91 89)");//just outside box 0 (above it) near pole,
commit();
//----- Test gridLevel derivation
try {
query(params(baseParams, "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErr:0}}"));
fail();
} catch (SolrException e) {
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
}
try {
query(params(baseParams, "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErrPct:0}}"));
fail();
} catch (SolrException e) {
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
}
// Monkeying with these params changes the gridLevel in different directions. We don't test the exact
// computation here; that's not _that_ relevant, and is Lucene spatial's job (not Solr) any way.
assertEquals(7, getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "'}}"))).get("gridLevel"));//default
assertEquals(3, getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', gridLevel:3}}"))).get("gridLevel"));
assertEquals(2, getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErr:100}}"))).get("gridLevel"));
//TODO test impact of distance units
assertEquals(9, getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErrPct:0.05}}"))).get("gridLevel"));
assertEquals(6, getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", distErrPct:0.10}}"))).get("gridLevel"));
// ----- Search
// this test simply has some 0's, nulls, 1's and a 2 in there.
NamedList hmObj = getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + " geom:'[\"50 20\" TO \"180 90\"]', gridLevel:4}}")));
List<List<Integer>> counts = (List<List<Integer>>) hmObj.get("counts_ints2D");
List<List<Integer>> expectedCounts1 = Arrays.asList(
Arrays.asList(0, 0, 2, 1, 0, 0),
Arrays.asList(0, 0, 1, 1, 0, 0),
Arrays.asList(0, 1, 1, 1, 0, 0),
Arrays.asList(0, 0, 1, 1, 0, 0),
Arrays.asList(0, 0, 1, 1, 0, 0),
null,
null
);
assertEquals( expectedCounts1, counts);
// now this time we add a filter query and exclude it
QueryResponse response = query(params(baseParams,
"fq", "{!tag=excludeme}id:0", // filter to only be id:0
"json.facet", "{f1:{type:heatmap, excludeTags:['excludeme'], f:" + FIELD + ", geom:'[\"50 20\" TO \"180 90\"]', gridLevel:4}}"));
assertEquals(1, response.getResults().getNumFound());// because of our 'fq'
hmObj = getHmObj(response);
counts = (List<List<Integer>>) hmObj.get("counts_ints2D");
assertEquals( expectedCounts1, counts);
{
// impractical example but nonetheless encloses the points of both doc3 and doc4 (both of which are points)
final String jsonHeatmap = "facet:{hm:{type:heatmap, f:" + FIELD + ", geom:'MULTIPOINT(70 60, 91 89)', distErrPct:0.2}}";
response = query(params(baseParams,
"json.facet", "{" +
"q1:{type:query, q:'id:3', " + jsonHeatmap + " }, " +
"q2:{type:query, q:'id:4', " + jsonHeatmap + " } " +
"}"));
{
final NamedList q1Res = (NamedList) response.getResponse().findRecursive("facets", "q1");
assertEquals("1", q1Res.get("count").toString());
final NamedList q2Res = (NamedList) response.getResponse().findRecursive("facets", "q2");
assertEquals("1", q2Res.get("count").toString());
// essentially, these will differ only in the heatmap counts but otherwise will be the same
assertNotNull(compare(q1Res, q2Res, flags, handle));
}
}
// test using a circle input shape
hmObj = getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'BUFFER(POINT(110 40), 7)', gridLevel:7}}")));
counts = (List<List<Integer>>) hmObj.get("counts_ints2D");
assertEquals(
Arrays.asList(
Arrays.asList(0, 1, 1, 1, 1, 1, 1, 0),//curved; we have a 0
Arrays.asList(0, 1, 1, 1, 1, 1, 1, 0),//curved; we have a 0
Arrays.asList(0, 1, 1, 1, 1, 1, 1, 0),//curved; we have a 0
Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1),
Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1),
Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1),
null, null, null, null, null//no data here (below edge of rect 0)
),
counts
);
// Search in no-where ville and get null counts
assertNull(getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'ENVELOPE(0, 10, -80, -90)'}}"))).get("counts_ints2D"));
Object v = getHmObj(query(params(baseParams,
"json.facet", "{f1:{type:heatmap, f:" + FIELD + ", format:png }}"))).get("counts_png");
assertTrue(v instanceof byte[]);
//simply test we can read the image
assertNotNull(FacetHeatmap.PngHelper.readImage((byte[]) v));
//good enough for this test method
}
private NamedList getHmObj(QueryResponse response) {
// classic faceting
final NamedList classicResp = (NamedList) response.getResponse().findRecursive("facet_counts", "facet_heatmaps", FIELD);
if (classicResp != null) {
return classicResp;
}
// JSON Facet
return (NamedList) response.getResponse().findRecursive("facets", "f1");
}
@Test
@Repeat(iterations = 3)
public void testPng() {
@ -197,14 +331,14 @@ public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase {
}
}
// Round-trip
final byte[] bytes = SpatialHeatmapFacets.asPngBytes(columns, rows, counts, null);
final byte[] bytes = FacetHeatmap.asPngBytes(columns, rows, counts, null);
int[] countsOut = random().nextBoolean() ? new int[columns * rows] : null;
int base = 0;
if (countsOut != null) {
base = 9;
Arrays.fill(countsOut, base);
}
countsOut = SpatialHeatmapFacets.addPngToIntArray(bytes, countsOut);
countsOut = FacetHeatmap.addPngToIntArray(bytes, countsOut);
// Test equal
assertEquals(counts.length, countsOut.length);
for (int i = 0; i < countsOut.length; i++) {

View File

@ -300,6 +300,52 @@ By default, the ranges used to compute range faceting between `start` and `end`
|facet |Aggregations, metrics, or nested facets that will be calculated for every returned bucket
|===
== Heatmap Facet
The heatmap facet generates a 2D grid of facet counts for documents having spatial data in each grid cell.
This feature is primarily documented in the <<spatial-search.adoc#heatmap-faceting,spatial>> section of the reference guide.
The key parameters are `type` to specify `heatmap` and `field` to indicate a spatial RPT field.
The rest of the parameter names use the same names and semantics mirroring
facet.heatmap query-parameter style faceting, albeit without the "facet.heatmap." prefix.
For example `geom` here corresponds to `facet.heatmap.geom` in a facet.heatmap command.
Like the other facet types, heatmaps may have a custom domain (e.g. to exclude filters) and they can be subordinate to
other facet types like a query facet. However, unlike those, a heatmap facet cannot have facets or stats hang beneath --
at least not yet.
Here's an example query:
[source,java]
----
{
hm : {
type : heatmap,
field : points_srpt,
geom : "[-49.492,-180 TO 64.701,73.125]",
distErrPct : 0.5
}
}
----
And the facet response will look like:
[source,json]
----
{
"facets":{
"count":145725,
"hm":{
"gridLevel":1,
"columns":6,
"rows":4,
"minX":-180.0,
"maxX":90.0,
"minY":-90.0,
"maxY":90.0,
"counts_ints2D":[[68,1270,459,5359,39456,1713],[123,10472,13620,7777,18376,6239],[88,6,3898,989,1314,255],[0,0,30,1,0,1]]
}}}
----
== Filtering Facets
One can filter the domain *before* faceting via the `filter` keyword in the `domain` block of the facet.

View File

@ -375,10 +375,13 @@ When using this field type, you will likely _not_ want to mark the field as stor
The RPT field supports generating a 2D grid of facet counts for documents having spatial data in each grid cell. For high-detail grids, this can be used to plot points, and for lesser detail it can be used for heatmap generation. The grid cells are determined at index-time based on RPT's configuration. At facet counting time, the indexed cells in the region of interest are traversed and a grid of counters corresponding to each cell are incremented. Solr can return the data in a straight-forward 2D array of integers or in a PNG which compresses better for larger data sets but must be decoded.
The heatmap feature is accessed from Solr's faceting feature. As a part of faceting, it supports the `key` local parameter as well as excluding tagged filter queries, just like other types of faceting do. This allows multiple heatmaps to be returned on the same field with different filters.
The heatmap feature is accessible both from Solr's standard faceting feature, plus the newer more flexible <<json-facet-api.adoc#heatmap-facet,JSON Facet API>>.
We'll proceed now with standard faceting.
As a part of faceting, it supports the `key` local parameter as well as excluding tagged filter queries, just like other types of faceting do.
This allows multiple heatmaps to be returned on the same field with different filters.
`facet`::
Set to `true` to enable faceting.
Set to `true` to enable standard faceting.
`facet.heatmap`::
The field name of type RPT.

View File

@ -481,7 +481,9 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
*/
protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException {
controlClient.add(doc);
if (shardCount == 0) {//mostly for temp debugging
return;
}
int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size();
SolrClient client = clients.get(which);
client.add(doc);
@ -599,6 +601,10 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
final QueryResponse controlRsp = controlClient.query(params);
validateControlData(controlRsp);
if (shardCount == 0) {//mostly for temp debugging
return controlRsp;
}
params.remove("distrib");
if (setDistribParams) setDistributedParams(params);
@ -872,6 +878,15 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
}
// equivalent integer numbers
if ((a instanceof Integer || a instanceof Long) && (b instanceof Integer || b instanceof Long)) {
if (((Number)a).longValue() == ((Number)b).longValue()) {
return null;
} else {
return ":" + a + "!=" + b;
}
}
if ((flags & FUZZY) != 0) {
if ((a instanceof Double && b instanceof Double)) {
double aaa = ((Double) a).doubleValue();