diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5ce9411f84a..1d7bb9ccd6f 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -88,6 +88,8 @@ New Features * SOLR-12506: Add SolrJ support for the modify collection API. (shalin) +* SOLR-12398: The JSON Facet API now supports type=heatmap facets, just as classic faceting does. (David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java b/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java index 0a39e5b1b5f..8814953bc15 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java @@ -16,257 +16,53 @@ */ package org.apache.solr.handler.component; -import javax.imageio.ImageIO; -import javax.imageio.ImageReader; -import javax.imageio.spi.ImageReaderSpi; -import javax.imageio.stream.ImageInputStream; -import javax.imageio.stream.ImageInputStreamImpl; -import java.awt.image.BufferedImage; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.lang.invoke.MethodHandles; -import java.util.AbstractList; -import java.util.Iterator; +import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; -import java.util.concurrent.TimeUnit; -import org.apache.lucene.spatial.prefix.HeatmapFacetCounter; -import org.apache.lucene.spatial.prefix.PrefixTreeStrategy; -import org.apache.lucene.spatial.query.SpatialArgs; -import org.apache.lucene.spatial.query.SpatialOperation; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.FixedBitSet; -import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.schema.AbstractSpatialPrefixTreeFieldType; -import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.RptWithGeometrySpatialField; -import org.apache.solr.schema.SchemaField; -import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType; -import org.apache.solr.search.BitDocSet; -import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; -import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.util.DistanceUnits; -import org.apache.solr.util.SpatialUtils; -import org.locationtech.spatial4j.context.SpatialContext; -import org.locationtech.spatial4j.shape.Shape; +import org.apache.solr.search.facet.FacetHeatmap; +import org.apache.solr.search.facet.FacetMerger; +import org.apache.solr.search.facet.FacetRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** A 2D spatial faceting summary of a rectangular region. Used by {@link org.apache.solr.handler.component.FacetComponent} - * and {@link org.apache.solr.request.SimpleFacets}. */ + * and {@link org.apache.solr.request.SimpleFacets}. + * @see FacetHeatmap + */ public class SpatialHeatmapFacets { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); //underneath facet_counts we put this here: public static final String RESPONSE_KEY = "facet_heatmaps"; - public static final String FORMAT_PNG = "png"; - public static final String FORMAT_INTS2D = "ints2D"; - //note: if we change or add more formats, remember to update the javadoc on the format param - //TODO for more format ideas, see formatCountsAndAddToNL - - public static final double DEFAULT_DIST_ERR_PCT = 0.15; - /** Called by {@link org.apache.solr.request.SimpleFacets} to compute heatmap facets. */ public static NamedList getHeatmapForField(String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params, DocSet docSet) throws IOException { - //get the strategy from the field type - final SchemaField schemaField = rb.req.getSchema().getField(fieldName); - final FieldType type = schemaField.getType(); - - final PrefixTreeStrategy strategy; - final DistanceUnits distanceUnits; - // note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor. - if ((type instanceof AbstractSpatialPrefixTreeFieldType)) { - AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type; - strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName); - distanceUnits = rptType.getDistanceUnits(); - } else if (type instanceof RptWithGeometrySpatialField) { - RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type; - strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy(); - distanceUnits = rptSdvType.getDistanceUnits(); - } else { - //FYI we support the term query one too but few people use that one - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "heatmap field needs to be of type " - + SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class); - } - - final SpatialContext ctx = strategy.getSpatialContext(); - - //get the bbox (query Rectangle) - String geomStr = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM); - final Shape boundsShape = geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx); - - //get the grid level (possibly indirectly via distErr or distErrPct) - final int gridLevel; - Integer gridLevelObj = params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL); - final int maxGridLevel = strategy.getGrid().getMaxLevels(); - if (gridLevelObj != null) { - gridLevel = gridLevelObj; - if (gridLevel <= 0 || gridLevel > maxGridLevel) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - FacetParams.FACET_HEATMAP_LEVEL +" should be > 0 and <= " + maxGridLevel); - } - } else { - //SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr & distErrPct. Arguably that - // should be refactored to feel less weird than using it like this. - SpatialArgs spatialArgs = new SpatialArgs(SpatialOperation.Intersects/*ignored*/, - boundsShape == null ? ctx.getWorldBounds() : boundsShape); - final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR); - if (distErrObj != null) { - // convert distErr units based on configured units - spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees()); - } - spatialArgs.setDistErrPct(params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT)); - double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT); - if (distErr <= 0) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - FacetParams.FACET_HEATMAP_DIST_ERR_PCT + " or " + FacetParams.FACET_HEATMAP_DIST_ERR - + " should be > 0 or instead provide " + FacetParams.FACET_HEATMAP_LEVEL + "=" + maxGridLevel - + " if you insist on maximum detail"); - } - //The SPT (grid) can lookup a grid level satisfying an error distance constraint - gridLevel = strategy.getGrid().getLevelForDistance(distErr); - } - - //Compute! - final HeatmapFacetCounter.Heatmap heatmap; - try { - heatmap = HeatmapFacetCounter.calcFacets( - strategy, - rb.req.getSearcher().getTopReaderContext(), - getTopAcceptDocs(docSet, rb.req.getSearcher()), // turn DocSet into Bits - boundsShape, - gridLevel, - params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS, 100_000) // will throw if exceeded - ); - } catch (IllegalArgumentException e) {//e.g. too many cells - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e); - } - - //Populate response - NamedList result = new NamedList<>(); - result.add("gridLevel", gridLevel); - result.add("columns", heatmap.columns); - result.add("rows", heatmap.rows); - result.add("minX", heatmap.region.getMinX()); - result.add("maxX", heatmap.region.getMaxX()); - result.add("minY", heatmap.region.getMinY()); - result.add("maxY", heatmap.region.getMaxY()); - - boolean hasNonZero = false; - for (int count : heatmap.counts) { - if (count > 0) { - hasNonZero = true; - break; - } - } - formatCountsAndAddToNL(fieldKey, rb, params, heatmap.columns, heatmap.rows, hasNonZero ? heatmap.counts : null, result); - - return result; + final FacetRequest facetRequest = createHeatmapRequest(fieldKey, fieldName, rb, params); + return (NamedList) facetRequest.process(rb.req, docSet); } - private static Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException { - if (searcher.getLiveDocSet() == docSet) { - return null; // means match everything (all live docs). This can speedup things a lot. - } else if (docSet.size() == 0) { - return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot - } else if (docSet instanceof BitDocSet) { - return ((BitDocSet) docSet).getBits(); - } else { - // TODO DocSetBase.calcBits ought to be at DocSet level? - FixedBitSet bits = new FixedBitSet(searcher.maxDoc()); - for (DocIterator iter = docSet.iterator(); iter.hasNext();) { - bits.set(iter.nextDoc()); - } - return bits; - } - } + private static FacetRequest createHeatmapRequest(String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params) { + Map jsonFacet = new HashMap<>(); + jsonFacet.put("type", "heatmap"); + jsonFacet.put("field", fieldName); + // jsonFacets has typed values, unlike SolrParams which is all string + jsonFacet.put(FacetHeatmap.GEOM_PARAM, params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM)); + jsonFacet.put(FacetHeatmap.LEVEL_PARAM, params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL)); + jsonFacet.put(FacetHeatmap.DIST_ERR_PCT_PARAM, params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT)); + jsonFacet.put(FacetHeatmap.DIST_ERR_PARAM, params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR)); + jsonFacet.put(FacetHeatmap.MAX_CELLS_PARAM, params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS)); + jsonFacet.put(FacetHeatmap.FORMAT_PARAM, params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_FORMAT)); - private static void formatCountsAndAddToNL(String fieldKey, ResponseBuilder rb, SolrParams params, - int columns, int rows, int[] counts, NamedList result) { - final String format = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_FORMAT, FORMAT_INTS2D); - final Object countsVal; - switch (format) { - case FORMAT_INTS2D: //A List of List of Integers. Good for small heatmaps and ease of consumption - countsVal = counts != null ? asInts2D(columns, rows, counts) : null; - break; - case FORMAT_PNG: //A PNG graphic; compressed. Good for large & dense heatmaps; hard to consume. - countsVal = counts != null ? asPngBytes(columns, rows, counts, rb) : null; - break; - //TODO case skipList: //A sequence of values; negative values are actually how many 0's to insert. - // Good for small or large but sparse heatmaps. - //TODO auto choose png or skipList; use skipList when < ~25% full or <= ~512 cells - // remember to augment error list below when we add more formats. - default: - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "format should be " + FORMAT_INTS2D + " or " + FORMAT_PNG); - } - result.add("counts_" + format, countsVal); - } - - static List> asInts2D(final int columns, final int rows, final int[] counts) { - //Returns a view versus returning a copy. This saves memory. - //The data is oriented naturally for human/developer viewing: one row at a time top-down - return new AbstractList>() { - @Override - public List get(final int rowIdx) {//top-down remember; the heatmap.counts is bottom up - //check if all zeroes and return null if so - boolean hasNonZero = false; - int y = rows - rowIdx - 1;//flip direction for 'y' - for (int c = 0; c < columns; c++) { - if (counts[c * rows + y] > 0) { - hasNonZero = true; - break; - } - } - if (!hasNonZero) { - return null; - } - - return new AbstractList() { - @Override - public Integer get(int columnIdx) { - return counts[columnIdx * rows + y]; - } - - @Override - public int size() { - return columns; - } - }; - } - - @Override - public int size() { - return rows; - } - }; - } - - //package access for tests - static byte[] asPngBytes(final int columns, final int rows, final int[] counts, ResponseBuilder rb) { - long startTimeNano = System.nanoTime(); - BufferedImage image = PngHelper.newImage(columns, rows); - for (int c = 0; c < columns; c++) { - for (int r = 0; r < rows; r++) { - PngHelper.writeCountAtColumnRow(image, rows, c, r, counts[c * rows + r]); - } - } - byte[] bytes = PngHelper.writeImage(image); - long durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNano); - log.debug("heatmap nativeSize={} pngSize={} pngTime={}", (counts.length * 4), bytes.length, durationMs); - if (rb != null && rb.isDebugTimings()) { - rb.addDebug(durationMs, "timing", "heatmap png generation"); - } - return bytes; + return FacetRequest.parseOneFacetReq(rb.req, jsonFacet); } // @@ -301,7 +97,7 @@ public class SpatialHeatmapFacets { newLocalParams.add(facet.localParams); } // Set format to PNG; it's the only one we parse - newLocalParams.set(FacetParams.FACET_HEATMAP_FORMAT, FORMAT_PNG); + newLocalParams.set(FacetParams.FACET_HEATMAP_FORMAT, FacetHeatmap.FORMAT_PNG); sreq.params.add(FacetParams.FACET_HEATMAP, newLocalParams.toLocalParamsString() + facet.facetOn); } @@ -324,37 +120,10 @@ public class SpatialHeatmapFacets { log.error("received heatmap for field/key {} that we weren't expecting", fieldKey); continue; } - facet.counts = addPngToIntArray((byte[]) shardNamedList.remove("counts_" + FORMAT_PNG), facet.counts); - if (facet.namedList == null) { - // First shard - facet.namedList = shardNamedList; - } else { - assert facet.namedList.equals(shardNamedList); - } + facet.jsonFacetMerger.merge(shardNamedList, null);//merge context not needed (null) } } - //package access for tests - static int[] addPngToIntArray(byte[] pngBytes, int[] counts) { - if (pngBytes == null) { - return counts; - } - //read PNG - final BufferedImage image = PngHelper.readImage(pngBytes); - int columns = image.getWidth(); - int rows = image.getHeight(); - if (counts == null) { - counts = new int[columns * rows]; - } else { - assert counts.length == columns * rows; - } - for (int c = 0; c < columns; c++) { - for (int r = 0; r < rows; r++) { - counts[c * rows + r] += PngHelper.getCountAtColumnRow(image, rows, c, r); - } - } - return counts; - } /** Called by FacetComponent's impl of * {@link org.apache.solr.handler.component.SearchComponent#finishStage(ResponseBuilder)}. */ @@ -362,13 +131,7 @@ public class SpatialHeatmapFacets { NamedList> result = new SimpleOrderedMap<>(); for (Map.Entry entry : heatmapInfos.entrySet()) { final HeatmapFacet facet = entry.getValue(); - final NamedList namedList = facet.namedList; - if (namedList == null) { - continue;//should never happen but play it safe - } - formatCountsAndAddToNL(entry.getKey(), rb, SolrParams.wrapDefaults(facet.localParams, rb.req.getParams()), - (int) namedList.get("columns"), (int) namedList.get("rows"), facet.counts, namedList); - result.add(entry.getKey(), namedList); + result.add(entry.getKey(), (NamedList) facet.jsonFacetMerger.getMergedResult()); } return result; } @@ -378,125 +141,17 @@ public class SpatialHeatmapFacets { public static class HeatmapFacet extends FacetComponent.FacetBase { //note: 'public' following-suit with FacetBase & existing subclasses... though should this really be? - //Holds response NamedList for this field, with counts pulled out. Taken from 1st shard response. - public NamedList namedList; - //Like Heatmap.counts in Lucene spatial, although null if it would be all-0. - public int[] counts; + public FacetMerger jsonFacetMerger; public HeatmapFacet(ResponseBuilder rb, String facetStr) { super(rb, FacetParams.FACET_HEATMAP, facetStr); //note: logic in super (FacetBase) is partially redundant with SimpleFacet.parseParams :-( + final SolrParams params = SolrParams.wrapDefaults(localParams, rb.req.getParams()); + final FacetRequest heatmapRequest = createHeatmapRequest(getKey(), facetOn, rb, params); + jsonFacetMerger = heatmapRequest.createFacetMerger(null); } } - // - // PngHelper - // - - //package access for tests - static class PngHelper { - - static final ImageReaderSpi imageReaderSpi;//thread-safe - static { - final Iterator imageReaders = ImageIO.getImageReadersByFormatName("png"); - if (!imageReaders.hasNext()) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't find png image reader, neaded for heatmaps!"); - } - ImageReader imageReader = imageReaders.next(); - imageReaderSpi = imageReader.getOriginatingProvider(); - } - - static BufferedImage readImage(final byte[] bytes) { - // Wrap ImageInputStream around the bytes. We could use MemoryCacheImageInputStream but it will - // cache the data which is quite unnecessary given we have it all in-memory already. - ImageInputStream imageInputStream = new ImageInputStreamImpl() { - //TODO re-use this instance; superclass has 8KB buffer. - - @Override - public int read() throws IOException { - checkClosed(); - bitOffset = 0; - if (streamPos >= bytes.length) { - return -1; - } else { - return bytes[(int) streamPos++]; - } - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - checkClosed(); - bitOffset = 0; - if (streamPos >= bytes.length) { - return -1; - } else { - int copyLen = Math.min(len, bytes.length - (int)streamPos); - System.arraycopy(bytes, (int)streamPos, b, off, copyLen); - streamPos += copyLen; - return copyLen; - } - } - - @Override - public long length() { - return bytes.length; - } - - @Override - public boolean isCached() { - return true; - } - - @Override - public boolean isCachedMemory() { - return true; - } - }; - try { - //TODO can/should we re-use an imageReader instance on FacetInfo? - ImageReader imageReader = imageReaderSpi.createReaderInstance(); - - imageReader.setInput(imageInputStream, - false,//forwardOnly - true);//ignoreMetadata - return imageReader.read(0);//read first & only image - } catch (IOException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Problem reading png heatmap: " + e); - } - } - - static byte[] writeImage(BufferedImage image) { - ByteArrayOutputStream baos = new ByteArrayOutputStream( - // initialize to roughly 1/4th the size a native int would take per-pixel - image.getWidth() * image.getHeight() + 1024 - ); - try { - ImageIO.write(image, FORMAT_PNG, baos); - } catch (IOException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "While generating PNG: " + e); - } - //too bad we can't access the raw byte[]; this copies to a new one - return baos.toByteArray(); - } - - // We abuse the image for storing integers (4 bytes), and so we need a 4-byte ABGR. - // first (low) byte is blue, next byte is green, next byte red, and last (high) byte is alpha. - static BufferedImage newImage(int columns, int rows) { - return new BufferedImage(columns, rows, BufferedImage.TYPE_4BYTE_ABGR); - } - - // 'y' dimension goes top-down, so invert. - // Alpha chanel is high byte; 0 means transparent. So XOR those bits with '1' so that we need - // to have counts > 16M before the picture starts to fade - - static void writeCountAtColumnRow(BufferedImage image, int rows, int c, int r, int val) { - image.setRGB(c, rows - 1 - r, val ^ 0xFF_00_00_00); - } - - static int getCountAtColumnRow(BufferedImage image, int rows, int c, int r) { - return image.getRGB(c, rows - 1 - r) ^ 0xFF_00_00_00; - } - - } + // Note: originally there was a lot more code here but it migrated to the JSON Facet API as "FacetHeatmap" } diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java index b5d97d2ac31..2cc6ec49684 100644 --- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java +++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java @@ -90,7 +90,7 @@ import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SortedIntDocSet; import org.apache.solr.search.SyntaxError; import org.apache.solr.search.facet.FacetDebugInfo; -import org.apache.solr.search.facet.FacetProcessor; +import org.apache.solr.search.facet.FacetRequest; import org.apache.solr.search.grouping.GroupingSpecification; import org.apache.solr.util.BoundedTreeSet; import org.apache.solr.util.DefaultSolrThreadFactory; @@ -565,29 +565,20 @@ public class SimpleFacets { } jsonFacet.put(SORT, sortVal ); - Map topLevel = new HashMap<>(); - topLevel.put(field, jsonFacet); - - topLevel.put("processEmpty", true); - - FacetProcessor fproc = FacetProcessor.createProcessor(rb.req, topLevel, // rb.getResults().docSet - docs ); //TODO do we handle debug? Should probably already be handled by the legacy code - fproc.process(); + Object resObj = FacetRequest.parseOneFacetReq(req, jsonFacet).process(req, docs); //Go through the response to build the expected output for SimpleFacets - Object res = fproc.getResponse(); - counts = new NamedList(); - if(res != null) { - SimpleOrderedMap som = (SimpleOrderedMap)res; - SimpleOrderedMap asdf = (SimpleOrderedMap) som.get(field); + counts = new NamedList<>(); + if(resObj != null) { + NamedList res = (NamedList) resObj; - List> buckets = (List>)asdf.get("buckets"); - for(SimpleOrderedMap b : buckets) { + List> buckets = (List>)res.get("buckets"); + for(NamedList b : buckets) { counts.add(b.get("val").toString(), (Integer)b.get("count")); } if(missing) { - SimpleOrderedMap missingCounts = (SimpleOrderedMap) asdf.get("missing"); + NamedList missingCounts = (NamedList) res.get("missing"); counts.add(null, (Integer)missingCounts.get("count")); } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetHeatmap.java b/solr/core/src/java/org/apache/solr/search/facet/FacetHeatmap.java new file mode 100644 index 00000000000..b5e481a1760 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetHeatmap.java @@ -0,0 +1,520 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.facet; + +import javax.imageio.ImageIO; +import javax.imageio.ImageReader; +import javax.imageio.spi.ImageReaderSpi; +import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.ImageInputStreamImpl; +import java.awt.image.BufferedImage; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.AbstractList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.spatial.prefix.HeatmapFacetCounter; +import org.apache.lucene.spatial.prefix.PrefixTreeStrategy; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.AbstractSpatialPrefixTreeFieldType; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.RptWithGeometrySpatialField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType; +import org.apache.solr.search.BitDocSet; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.DistanceUnits; +import org.apache.solr.util.SpatialUtils; +import org.locationtech.spatial4j.context.SpatialContext; +import org.locationtech.spatial4j.shape.Shape; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * JSON Facet API request for a 2D spatial summary of a rectangular region. + * + * @see HeatmapFacetCounter + * @version 7.5.0 + */ +@SuppressWarnings("WeakerAccess") +public class FacetHeatmap extends FacetRequest { + + // note: much of this code was moved from SpatialHeatmapFacets (SimpleFacets API) + + /** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_GEOM */ + public static final String GEOM_PARAM = "geom"; + + /** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_LEVEL */ + public static final String LEVEL_PARAM = "gridLevel"; + + /** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_DIST_ERR_PCT */ + public static final String DIST_ERR_PCT_PARAM = "distErrPct"; + + /** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_DIST_ERR */ + public static final String DIST_ERR_PARAM = "distErr"; + + /** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_MAX_CELLS */ + public static final String MAX_CELLS_PARAM = "maxCells"; + + /** @see org.apache.solr.common.params.FacetParams#FACET_HEATMAP_FORMAT */ + public static final String FORMAT_PARAM = "format"; + + public static final String FORMAT_PNG = "png"; + public static final String FORMAT_INTS2D = "ints2D"; + //note: if we change or add more formats, remember to update the javadoc on the format param + //TODO for more format ideas, see formatCountsVal() + + public static final double DEFAULT_DIST_ERR_PCT = 0.15; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + static class Parser extends FacetParser { + Parser(FacetParser parent, String key) { + super(parent, key); + } + + public FacetHeatmap parse(Object argsObj) { + assert facet == null; + + if (!(argsObj instanceof Map)) { + throw err("Missing heatmap arguments"); + } + + @SuppressWarnings("unchecked") + Map argsMap = (Map) argsObj; + String fieldName = getField(argsMap); + + //get the strategy from the field type + final SchemaField schemaField = getSchema().getField(fieldName); + final FieldType type = schemaField.getType(); + + final PrefixTreeStrategy strategy; + final DistanceUnits distanceUnits; + // note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor. + if ((type instanceof AbstractSpatialPrefixTreeFieldType)) { + AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type; + strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName); + distanceUnits = rptType.getDistanceUnits(); + } else if (type instanceof RptWithGeometrySpatialField) { + RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type; + strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy(); + distanceUnits = rptSdvType.getDistanceUnits(); + } else { + //FYI we support the term query one too but few people use that one + throw err("heatmap field needs to be of type " + SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class); + } + + final SpatialContext ctx = strategy.getSpatialContext(); + + //get the bbox (query Rectangle) + String geomStr = getString(argsMap, GEOM_PARAM, null); + final Shape boundsShape = geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx); + + //get the grid level (possibly indirectly via distErr or distErrPct) + final int gridLevel; + final Long gridLevelObj = getLongOrNull(argsMap, LEVEL_PARAM, false); + final int maxGridLevel = strategy.getGrid().getMaxLevels(); + if (gridLevelObj != null) { + gridLevel = gridLevelObj.intValue(); + if (gridLevel <= 0 || gridLevel > maxGridLevel) { + throw err(LEVEL_PARAM +" should be > 0 and <= " + maxGridLevel); + } + } else { + //SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr & distErrPct. Arguably that + // should be refactored to feel less weird than using it like this. + SpatialArgs spatialArgs = new SpatialArgs(SpatialOperation.Intersects/*ignored*/, + boundsShape == null ? ctx.getWorldBounds() : boundsShape); + final Double distErrObj = getDoubleOrNull(argsMap, DIST_ERR_PARAM, false); + if (distErrObj != null) { + // convert distErr units based on configured units + spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees()); + } + spatialArgs.setDistErrPct(getDoubleOrNull(argsMap, DIST_ERR_PCT_PARAM, false)); + double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT); + if (distErr <= 0) { + throw err(DIST_ERR_PCT_PARAM + " or " + DIST_ERR_PARAM + + " should be > 0 or instead provide " + LEVEL_PARAM + "=" + maxGridLevel + + " if you insist on maximum detail"); + } + //The SPT (grid) can lookup a grid level satisfying an error distance constraint + gridLevel = strategy.getGrid().getLevelForDistance(distErr); + } + + final int maxCells = (int) getLong(argsMap, MAX_CELLS_PARAM, 100_000);// will throw later if exceeded + + final String format = getString(argsMap, FORMAT_PARAM, FORMAT_INTS2D); + if (!format.equals(FORMAT_INTS2D) && !format.equals(FORMAT_PNG)) { + throw err("format should be " + FORMAT_INTS2D + " or " + FORMAT_PNG); + } + + this.facet = new FacetHeatmap(argsMap, strategy, boundsShape, gridLevel, maxCells, format); + + parseCommonParams(argsObj); // e.g. domain change + + return this.facet; + } + + }//class Parser + + private final Map argsMap; + private final PrefixTreeStrategy strategy; + private final Shape boundsShape; + private final int gridLevel; + private final int maxCells; + private final String format; + + FacetHeatmap(Map argsMap, PrefixTreeStrategy strategy, Shape boundsShape, int gridLevel, int maxCells, String format) { + this.argsMap = argsMap; + this.strategy = strategy; + this.boundsShape = boundsShape; + this.gridLevel = gridLevel; + this.maxCells = maxCells; + this.format = format; + } + + //TODO perhaps all FacetRequest objs should have this? + @Override + public Map getFacetDescription() { + return argsMap; + } + + @Override + public FacetProcessor createFacetProcessor(FacetContext fcontext) { + return new FacetProcessor(fcontext, this) { + @Override + public void process() throws IOException { + super.process(); // handles domain changes + + //Compute! + final HeatmapFacetCounter.Heatmap heatmap; + try { + heatmap = HeatmapFacetCounter.calcFacets( + strategy, + fcontext.searcher.getTopReaderContext(), + getTopAcceptDocs(fcontext.base, fcontext.searcher), // turn DocSet into Bits + boundsShape, + gridLevel, + maxCells); + } catch (IllegalArgumentException e) {//e.g. too many cells + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e); + } + + //Populate response + response = new SimpleOrderedMap(); + response.add("gridLevel", gridLevel); + response.add("columns", heatmap.columns); + response.add("rows", heatmap.rows); + response.add("minX", heatmap.region.getMinX()); + response.add("maxX", heatmap.region.getMaxX()); + response.add("minY", heatmap.region.getMinY()); + response.add("maxY", heatmap.region.getMaxY()); + + //A shard request will always be a PNG + String format = fcontext.isShard() ? FORMAT_PNG : FacetHeatmap.this.format; + + response.add("counts_" + format, formatCountsVal(format, heatmap.columns, heatmap.rows, heatmap.counts, fcontext.getDebugInfo())); + + // note: we do not call processStats or processSubs as it's not supported yet + } + + //TODO this is a general utility that should go elsewhere? DocSetUtil? Then should DocSetBase.getBits go away? + private Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException { + if (docSet.size() == searcher.numDocs()) { + return null; // means match everything (all live docs). This can speedup things a lot. + } else if (docSet.size() == 0) { + return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot + } else if (docSet instanceof BitDocSet) { + return ((BitDocSet) docSet).getBits(); + } else { + // TODO DocSetBase.getBits ought to be at DocSet level? Though it doesn't know maxDoc but it could? + FixedBitSet bits = new FixedBitSet(searcher.maxDoc()); + for (DocIterator iter = docSet.iterator(); iter.hasNext();) { + bits.set(iter.nextDoc()); + } + return bits; + } + } + + }; + } + + private static Object formatCountsVal(String format, int columns, int rows, int[] counts, FacetDebugInfo debugInfo) { + if (counts == null) { + return null; + } + boolean hasNonZero = false; + for (int count : counts) { + if (count > 0) { + hasNonZero = true; + break; + } + } + if (!hasNonZero) { + return null; + } + + switch (format) { + case FORMAT_INTS2D: //A List of List of Integers. Good for small heatmaps and ease of consumption + return asInts2D(columns, rows, counts); + case FORMAT_PNG: //A PNG graphic; compressed. Good for large & dense heatmaps; hard to consume. + return asPngBytes(columns, rows, counts, debugInfo); + + //TODO case UTFGRID https://github.com/mapbox/utfgrid-spec + //TODO case skipList: //A sequence of values; negative values are actually how many 0's to insert. + // Good for small or large but sparse heatmaps. + //TODO auto choose png or skipList; use skipList when < ~25% full or <= ~512 cells + // remember to augment error list below when we add more formats. + default: + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown format: " + format); + } + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetMerger() { + NamedList mergedResult; // except counts, which we add in when done + int[] counts; + + // note: there appears to be no mechanism to modify the shard requests in this API. If we could, we'd + // change the format to png. Instead, we have the facet processor recognize it's a shard request and ignore + // the requested format, which seems like a hack. + + @SuppressWarnings("unchecked") + @Override + public void merge(Object facetResult, Context mcontext) { + NamedList facetResultNL = (NamedList) facetResult; + counts = addPngToIntArray((byte[]) facetResultNL.remove("counts_" + FORMAT_PNG), counts); + if (mergedResult == null) { + mergedResult = facetResultNL; + } + } + + @Override + public void finish(Context mcontext) { + //nothing to do; we have no sub-facets + } + + @Override + public Object getMergedResult() { + mergedResult.add("counts_" + format, formatCountsVal( + format, (Integer) mergedResult.get("columns"), (Integer) mergedResult.get("rows"), counts, null));//TODO where debugInfo? + return mergedResult; + } + }; + } + + @VisibleForTesting + static int[] addPngToIntArray(byte[] pngBytes, int[] counts) { + if (pngBytes == null) { + return counts; + } + //read PNG + final BufferedImage image = PngHelper.readImage(pngBytes); + int columns = image.getWidth(); + int rows = image.getHeight(); + if (counts == null) { + counts = new int[columns * rows]; + } else { + assert counts.length == columns * rows; + } + for (int c = 0; c < columns; c++) { + for (int r = 0; r < rows; r++) { + counts[c * rows + r] += PngHelper.getCountAtColumnRow(image, rows, c, r); + } + } + return counts; + } + + @VisibleForTesting + static List> asInts2D(final int columns, final int rows, final int[] counts) { + //Returns a view versus returning a copy. This saves memory. + //The data is oriented naturally for human/developer viewing: one row at a time top-down + return new AbstractList>() { + @Override + public List get(final int rowIdx) {//top-down remember; the heatmap.counts is bottom up + //check if all zeroes and return null if so + boolean hasNonZero = false; + int y = rows - rowIdx - 1;//flip direction for 'y' + for (int c = 0; c < columns; c++) { + if (counts[c * rows + y] > 0) { + hasNonZero = true; + break; + } + } + if (!hasNonZero) { + return null; + } + + return new AbstractList() { + @Override + public Integer get(int columnIdx) { + return counts[columnIdx * rows + y]; + } + + @Override + public int size() { + return columns; + } + }; + } + + @Override + public int size() { + return rows; + } + }; + } + + @VisibleForTesting + static byte[] asPngBytes(final int columns, final int rows, final int[] counts, FacetDebugInfo debugInfo) { + long startTimeNano = System.nanoTime(); + BufferedImage image = PngHelper.newImage(columns, rows); + for (int c = 0; c < columns; c++) { + for (int r = 0; r < rows; r++) { + PngHelper.writeCountAtColumnRow(image, rows, c, r, counts[c * rows + r]); + } + } + byte[] bytes = PngHelper.writeImage(image); + long durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNano); + log.debug("heatmap nativeSize={} pngSize={} pngTime={}", (counts.length * 4), bytes.length, durationMs); + if (debugInfo != null) { + debugInfo.putInfoItem("heatmap png timing", durationMs); + } + return bytes; + } + + @VisibleForTesting + static class PngHelper { + + static final ImageReaderSpi imageReaderSpi;//thread-safe + static { + final Iterator imageReaders = ImageIO.getImageReadersByFormatName("png"); + if (!imageReaders.hasNext()) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't find png image reader, neaded for heatmaps!"); + } + ImageReader imageReader = imageReaders.next(); + imageReaderSpi = imageReader.getOriginatingProvider(); + } + + static BufferedImage readImage(final byte[] bytes) { + // Wrap ImageInputStream around the bytes. We could use MemoryCacheImageInputStream but it will + // cache the data which is quite unnecessary given we have it all in-memory already. + ImageInputStream imageInputStream = new ImageInputStreamImpl() { + //TODO re-use this instance; superclass has 8KB buffer. + + @Override + public int read() throws IOException { + checkClosed(); + bitOffset = 0; + if (streamPos >= bytes.length) { + return -1; + } else { + return bytes[(int) streamPos++]; + } + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + checkClosed(); + bitOffset = 0; + if (streamPos >= bytes.length) { + return -1; + } else { + int copyLen = Math.min(len, bytes.length - (int)streamPos); + System.arraycopy(bytes, (int)streamPos, b, off, copyLen); + streamPos += copyLen; + return copyLen; + } + } + + @Override + public long length() { + return bytes.length; + } + + @Override + public boolean isCached() { + return true; + } + + @Override + public boolean isCachedMemory() { + return true; + } + }; + try { + //TODO can/should we re-use an imageReader instance on FacetInfo? + ImageReader imageReader = imageReaderSpi.createReaderInstance(); + + imageReader.setInput(imageInputStream, + false,//forwardOnly + true);//ignoreMetadata + return imageReader.read(0);//read first & only image + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Problem reading png heatmap: " + e); + } + } + + static byte[] writeImage(BufferedImage image) { + ByteArrayOutputStream baos = new ByteArrayOutputStream( + // initialize to roughly 1/4th the size a native int would take per-pixel + image.getWidth() * image.getHeight() + 1024 + ); + try { + ImageIO.write(image, FORMAT_PNG, baos); + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "While generating PNG: " + e); + } + //too bad we can't access the raw byte[]; this copies to a new one + return baos.toByteArray(); + } + + // We abuse the image for storing integers (4 bytes), and so we need a 4-byte ABGR. + // first (low) byte is blue, next byte is green, next byte red, and last (high) byte is alpha. + static BufferedImage newImage(int columns, int rows) { + return new BufferedImage(columns, rows, BufferedImage.TYPE_4BYTE_ABGR); + } + + // 'y' dimension goes top-down, so invert. + // Alpha chanel is high byte; 0 means transparent. So XOR those bits with '1' so that we need + // to have counts > 16M before the picture starts to fade + + static void writeCountAtColumnRow(BufferedImage image, int rows, int c, int r, int val) { + image.setRGB(c, rows - 1 - r, val ^ 0xFF_00_00_00); + } + + static int getCountAtColumnRow(BufferedImage image, int rows, int c, int r) { + return image.getRGB(c, rows - 1 - r) ^ 0xFF_00_00_00; + } + + } + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java index 0e4266efcdc..6b4b303670b 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java @@ -37,8 +37,6 @@ import org.apache.solr.handler.component.SearchComponent; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.search.QueryContext; -import org.apache.solr.search.SyntaxError; -import org.apache.solr.util.RTimer; import org.noggit.CharArr; import org.noggit.JSONWriter; import org.noggit.ObjectBuilder; @@ -98,13 +96,7 @@ public class FacetModule extends SearchComponent { rb.setNeedDocSet(true); // Parse the facet in the prepare phase? - FacetParser parser = new FacetTopParser(rb.req); - FacetRequest facetRequest = null; - try { - facetRequest = parser.parse(jsonFacet); - } catch (SyntaxError syntaxError) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); - } + FacetRequest facetRequest = FacetRequest.parse(rb.req, jsonFacet); FacetComponentState fcState = new FacetComponentState(); fcState.rb = rb; @@ -138,30 +130,17 @@ public class FacetModule extends SearchComponent { fcontext.flags |= FacetContext.SKIP_FACET; // the root bucket should have been received from all shards previously } } - - FacetProcessor fproc = facetState.facetRequest.createFacetProcessor(fcontext); if (rb.isDebug()) { FacetDebugInfo fdebug = new FacetDebugInfo(); fcontext.setDebugInfo(fdebug); - fdebug.setReqDescription(facetState.facetRequest.getFacetDescription()); - fdebug.setProcessor(fproc.getClass().getSimpleName()); - - final RTimer timer = new RTimer(); - fproc.process(); - long timeElapsed = (long) timer.getTime(); - fdebug.setElapse(timeElapsed); - fdebug.putInfoItem("domainSize", (long)fcontext.base.size()); rb.req.getContext().put("FacetDebugInfo", fdebug); - } else { - fproc.process(); } - - rb.rsp.add("facets", fproc.getResponse()); + + final Object results = facetState.facetRequest.process(fcontext); + rb.rsp.add("facets", results); } - - private void clearFaceting(List outgoing) { // turn off faceting for requests not marked as being for faceting refinements for (ShardRequest sreq : outgoing) { diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java index 454b85fe08b..15a3ccb2220 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java @@ -33,19 +33,17 @@ import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.handler.component.ResponseBuilder; -import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.BitDocSet; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.QParser; -import org.apache.solr.search.QueryContext; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SyntaxError; import org.apache.solr.search.facet.SlotAcc.SlotContext; -import org.apache.solr.util.RTimer; +/** Base abstraction for a class that computes facets. This is fairly internal to the module. */ public abstract class FacetProcessor { SimpleOrderedMap response; FacetContext fcontext; @@ -56,27 +54,6 @@ public abstract class FacetProcessor { SlotAcc[] accs; CountSlotAcc countAcc; - /** factory method for invoking json facet framework as whole. - * Note: this is currently only used from SimpleFacets, not from JSON Facet API itself. */ - public static FacetProcessor createProcessor(SolrQueryRequest req, - Map params, DocSet docs){ - FacetParser parser = new FacetTopParser(req); - FacetRequest facetRequest = null; - try { - facetRequest = parser.parse(params); - } catch (SyntaxError syntaxError) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); - } - - FacetContext fcontext = new FacetContext(); - fcontext.base = docs; - fcontext.req = req; - fcontext.searcher = req.getSearcher(); - fcontext.qcontext = QueryContext.newContext(fcontext.searcher); - - return facetRequest.createFacetProcessor(fcontext); - } - FacetProcessor(FacetContext fcontext, FacetRequestT freq) { this.fcontext = fcontext; this.freq = freq; @@ -201,9 +178,7 @@ public abstract class FacetProcessor { return; } - // TODO: somehow remove responsebuilder dependency - ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder(); - Map tagMap = (Map) rb.req.getContext().get("tags"); + Map tagMap = (Map) fcontext.req.getContext().get("tags"); if (tagMap == null) { // no filters were tagged return; @@ -229,6 +204,9 @@ public abstract class FacetProcessor { List qlist = new ArrayList<>(); + // TODO: somehow remove responsebuilder dependency + ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder(); + // add the base query if (!excludeSet.containsKey(rb.getQuery())) { qlist.add(rb.getQuery()); @@ -484,27 +462,16 @@ public abstract class FacetProcessor { FacetContext subContext = fcontext.sub(filter, domain); subContext.facetInfo = facetInfoSub; if (!skip) subContext.flags &= ~FacetContext.SKIP_FACET; // turn off the skip flag if we're not skipping this bucket - FacetProcessor subProcessor = subRequest.createFacetProcessor(subContext); if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true FacetDebugInfo fdebug = new FacetDebugInfo(); subContext.setDebugInfo(fdebug); fcontext.getDebugInfo().addChild(fdebug); - - fdebug.setReqDescription(subRequest.getFacetDescription()); - fdebug.setProcessor(subProcessor.getClass().getSimpleName()); - if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString()); - - final RTimer timer = new RTimer(); - subProcessor.process(); - long timeElapsed = (long) timer.getTime(); - fdebug.setElapse(timeElapsed); - fdebug.putInfoItem("domainSize", (long)subContext.base.size()); - } else { - subProcessor.process(); } - response.add( sub.getKey(), subProcessor.getResponse() ); + Object result = subRequest.process(subContext); + + response.add( sub.getKey(), result); } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index ddf2e981985..00ff44a16ff 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -16,6 +16,13 @@ */ package org.apache.solr.search.facet; +import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; @@ -24,17 +31,28 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.StrUtils; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; -import org.apache.solr.search.*; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.FunctionQParserPlugin; +import org.apache.solr.search.JoinQParserPlugin; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryContext; +import org.apache.solr.search.SolrConstantScoreQuery; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SyntaxError; import org.apache.solr.search.join.GraphQuery; import org.apache.solr.search.join.GraphQueryParser; - -import java.io.IOException; -import java.util.*; +import org.apache.solr.util.RTimer; import static org.apache.solr.common.params.CommonParams.SORT; import static org.apache.solr.search.facet.FacetRequest.RefineMethod.NONE; - +/** + * A request to do facets/stats that might itself be composed of sub-FacetRequests. + * This is a cornerstone of the facet module. + * + * @see #parse(SolrQueryRequest, Map) + */ public abstract class FacetRequest { public static enum SortDirection { @@ -237,6 +255,39 @@ public abstract class FacetRequest { } + /** + * Factory method to parse a facet request tree. The outer keys are arbitrary labels and their values are + * facet request specifications. Will throw a {@link SolrException} if it fails to parse. + * @param req the overall request + * @param params a typed parameter structure (unlike SolrParams which are all string values). + */ + public static FacetRequest parse(SolrQueryRequest req, Map params) { + FacetParser parser = new FacetTopParser(req); + try { + return parser.parse(params); + } catch (SyntaxError syntaxError) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); + } + } + + //TODO it would be nice if there was no distinction. If the top level request had "type" as special then there wouldn't be a need. + + /** + * Factory method to parse out a rooted facet request tree that would normally go one level below a label. + * The params must contain a "type". + * This is intended to be useful externally, such as by {@link org.apache.solr.request.SimpleFacets}. + * @param req the overall request + * @param params a typed parameter structure (unlike SolrParams which are all string values). + */ + public static FacetRequest parseOneFacetReq(SolrQueryRequest req, Map params) { + FacetParser parser = new FacetTopParser(req); + try { + return (FacetRequest) parser.parseFacetOrStat("", params); + } catch (SyntaxError syntaxError) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); + } + } + public FacetRequest() { facetStats = new LinkedHashMap<>(); subFacets = new LinkedHashMap<>(); @@ -294,7 +345,46 @@ public abstract class FacetRequest { s += "}"; return s; } - + + /** + * Process this facet request against the given domain of docs. + * Note: this is currently used externally by {@link org.apache.solr.request.SimpleFacets}. + */ + public final Object process(SolrQueryRequest req, DocSet domain) throws IOException { + //TODO check for FacetDebugInfo? and if so set on fcontext + // rb.req.getContext().get("FacetDebugInfo"); + //TODO should the SolrQueryRequest be held on the FacetRequest? It was created from parse(req,...) so is known. + FacetContext fcontext = new FacetContext(); + fcontext.base = domain; + fcontext.req = req; + fcontext.searcher = req.getSearcher(); + fcontext.qcontext = QueryContext.newContext(fcontext.searcher); + + return process(fcontext); + } + + /** Process the request with the facet context settings, a parameter-object. */ + final Object process(FacetContext fcontext) throws IOException { + FacetProcessor facetProcessor = createFacetProcessor(fcontext); + + FacetDebugInfo debugInfo = fcontext.getDebugInfo(); + if (debugInfo == null) { + facetProcessor.process(); + } else { + if (fcontext.filter != null) { + debugInfo.setFilter(fcontext.filter.toString()); + } + debugInfo.setReqDescription(getFacetDescription()); + debugInfo.setProcessor(getClass().getSimpleName()); + debugInfo.putInfoItem("domainSize", (long) fcontext.base.size()); + RTimer timer = new RTimer(); + facetProcessor.process(); + debugInfo.setElapse((long) timer.getTime()); + } + + return facetProcessor.getResponse(); // note: not captured in elapsed time above; good/bad? + } + public abstract FacetProcessor createFacetProcessor(FacetContext fcontext); public abstract FacetMerger createFacetMerger(Object prototype); @@ -453,12 +543,16 @@ abstract class FacetParser { public Object parseFacetOrStat(String key, String type, Object args) throws SyntaxError { // TODO: a place to register all these facet types? - if ("field".equals(type) || "terms".equals(type)) { - return parseFieldFacet(key, args); - } else if ("query".equals(type)) { - return parseQueryFacet(key, args); - } else if ("range".equals(type)) { - return parseRangeFacet(key, args); + switch (type) { + case "field": + case "terms": + return new FacetFieldParser(this, key).parse(args); + case "query": + return new FacetQueryParser(this, key).parse(args); + case "range": + return new FacetRangeParser(this, key).parse(args); + case "heatmap": + return new FacetHeatmap.Parser(this, key).parse(args); } AggValueSource stat = parseStat(key, type, args); @@ -468,23 +562,6 @@ abstract class FacetParser { return stat; } - - - FacetField parseFieldFacet(String key, Object args) throws SyntaxError { - FacetFieldParser parser = new FacetFieldParser(this, key); - return parser.parse(args); - } - - FacetQuery parseQueryFacet(String key, Object args) throws SyntaxError { - FacetQueryParser parser = new FacetQueryParser(this, key); - return parser.parse(args); - } - - FacetRange parseRangeFacet(String key, Object args) throws SyntaxError { - FacetRangeParser parser = new FacetRangeParser(this, key); - return parser.parse(args); - } - public Object parseStringFacetOrStat(String key, String s) throws SyntaxError { // "avg(myfield)" return parseStringStat(key, s); @@ -623,6 +700,21 @@ abstract class FacetParser { return ((Number)o).longValue(); } + public Double getDoubleOrNull(Map args, String paramName, boolean required) { + Object o = args.get(paramName); + if (o == null) { + if (required) { + throw err("Missing required parameter '" + paramName + "'"); + } + return null; + } + if (!(o instanceof Number)) { + throw err("Expected double type for param '" + paramName + "' but got " + o); + } + + return ((Number)o).doubleValue(); + } + public boolean getBoolean(Map args, String paramName, boolean defVal) { Object o = args.get(paramName); if (o == null) { diff --git a/solr/core/src/test/org/apache/solr/handler/component/SpatialHeatmapFacetsTest.java b/solr/core/src/test/org/apache/solr/search/facet/SpatialHeatmapFacetsTest.java similarity index 57% rename from solr/core/src/test/org/apache/solr/handler/component/SpatialHeatmapFacetsTest.java rename to solr/core/src/test/org/apache/solr/search/facet/SpatialHeatmapFacetsTest.java index 8d66b07e72e..5af7bb6a0a6 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/SpatialHeatmapFacetsTest.java +++ b/solr/core/src/test/org/apache/solr/search/facet/SpatialHeatmapFacetsTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.handler.component; +package org.apache.solr.search.facet; import java.util.Arrays; import java.util.List; @@ -30,6 +30,7 @@ import org.apache.solr.common.util.NamedList; import org.junit.BeforeClass; import org.junit.Test; +/** Test Heatmap Facets (both impls) */ public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase { private static final String FIELD = "srpt_quad"; @@ -42,9 +43,10 @@ public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase { System.setProperty("java.awt.headless", "true"); } + /** Tests SimpleFacets/Classic faceting implementation of heatmaps */ @SuppressWarnings("unchecked") @Test - public void test() throws Exception { + public void testClassicFacets() throws Exception { // AKA SimpleFacets handle.clear(); handle.put("QTime", SKIPVAL); handle.put("timestamp", SKIPVAL); @@ -165,20 +167,152 @@ public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase { Object v = getHmObj(query(params(baseParams, FacetParams.FACET_HEATMAP_FORMAT, "png"))).get("counts_png"); assertTrue(v instanceof byte[]); //simply test we can read the image - assertNotNull(SpatialHeatmapFacets.PngHelper.readImage((byte[]) v)); + assertNotNull(FacetHeatmap.PngHelper.readImage((byte[]) v)); //good enough for this test method } - private NamedList getHmObj(QueryResponse response) { - return (NamedList) response.getResponse().findRecursive("facet_counts", "facet_heatmaps", FIELD); - } - private ModifiableSolrParams params(SolrParams baseParams, String... moreParams) { final ModifiableSolrParams params = new ModifiableSolrParams(baseParams); params.add(params(moreParams));//actually replaces return params; } + /** Tests JSON Facet module implementation of heatmaps. */ + @SuppressWarnings("unchecked") + @Test + public void testJsonFacets() throws Exception { + /* + THIS IS THE MOSTLY SAME CODE as above with tweaks to request it using the JSON Facet approach. + Near-duplication is sad; not clear if one test doing both is better -- would be awkward + */ + handle.clear(); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + handle.put("maxScore", SKIPVAL); + + SolrParams baseParams = params("q", "*:*", "rows", "0"); + + final String testBox = "[\"50 50\" TO \"180 90\"]";//top-right somewhere on edge (whatever) + + // ------ Index data + + index("id", "0", FIELD, "ENVELOPE(100, 120, 80, 40)");// on right side + index("id", "1", FIELD, "ENVELOPE(-120, -110, 80, 20)");// on left side (outside heatmap) + index("id", "3", FIELD, "POINT(70 60)");//just left of BOX 0 + index("id", "4", FIELD, "POINT(91 89)");//just outside box 0 (above it) near pole, + + commit(); + + //----- Test gridLevel derivation + try { + query(params(baseParams, "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErr:0}}")); + fail(); + } catch (SolrException e) { + assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code()); + } + try { + query(params(baseParams, "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErrPct:0}}")); + fail(); + } catch (SolrException e) { + assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code()); + } + // Monkeying with these params changes the gridLevel in different directions. We don't test the exact + // computation here; that's not _that_ relevant, and is Lucene spatial's job (not Solr) any way. + assertEquals(7, getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "'}}"))).get("gridLevel"));//default + assertEquals(3, getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', gridLevel:3}}"))).get("gridLevel")); + assertEquals(2, getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErr:100}}"))).get("gridLevel")); + //TODO test impact of distance units + assertEquals(9, getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'" + testBox + "', distErrPct:0.05}}"))).get("gridLevel")); + assertEquals(6, getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", distErrPct:0.10}}"))).get("gridLevel")); + + // ----- Search + // this test simply has some 0's, nulls, 1's and a 2 in there. + NamedList hmObj = getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + " geom:'[\"50 20\" TO \"180 90\"]', gridLevel:4}}"))); + List> counts = (List>) hmObj.get("counts_ints2D"); + List> expectedCounts1 = Arrays.asList( + Arrays.asList(0, 0, 2, 1, 0, 0), + Arrays.asList(0, 0, 1, 1, 0, 0), + Arrays.asList(0, 1, 1, 1, 0, 0), + Arrays.asList(0, 0, 1, 1, 0, 0), + Arrays.asList(0, 0, 1, 1, 0, 0), + null, + null + ); + assertEquals( expectedCounts1, counts); + + // now this time we add a filter query and exclude it + QueryResponse response = query(params(baseParams, + "fq", "{!tag=excludeme}id:0", // filter to only be id:0 + "json.facet", "{f1:{type:heatmap, excludeTags:['excludeme'], f:" + FIELD + ", geom:'[\"50 20\" TO \"180 90\"]', gridLevel:4}}")); + + assertEquals(1, response.getResults().getNumFound());// because of our 'fq' + hmObj = getHmObj(response); + counts = (List>) hmObj.get("counts_ints2D"); + assertEquals( expectedCounts1, counts); + + { + // impractical example but nonetheless encloses the points of both doc3 and doc4 (both of which are points) + final String jsonHeatmap = "facet:{hm:{type:heatmap, f:" + FIELD + ", geom:'MULTIPOINT(70 60, 91 89)', distErrPct:0.2}}"; + response = query(params(baseParams, + "json.facet", "{" + + "q1:{type:query, q:'id:3', " + jsonHeatmap + " }, " + + "q2:{type:query, q:'id:4', " + jsonHeatmap + " } " + + "}")); + { + final NamedList q1Res = (NamedList) response.getResponse().findRecursive("facets", "q1"); + assertEquals("1", q1Res.get("count").toString()); + final NamedList q2Res = (NamedList) response.getResponse().findRecursive("facets", "q2"); + assertEquals("1", q2Res.get("count").toString()); + // essentially, these will differ only in the heatmap counts but otherwise will be the same + assertNotNull(compare(q1Res, q2Res, flags, handle)); + } + } + + // test using a circle input shape + hmObj = getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'BUFFER(POINT(110 40), 7)', gridLevel:7}}"))); + counts = (List>) hmObj.get("counts_ints2D"); + assertEquals( + Arrays.asList( + Arrays.asList(0, 1, 1, 1, 1, 1, 1, 0),//curved; we have a 0 + Arrays.asList(0, 1, 1, 1, 1, 1, 1, 0),//curved; we have a 0 + Arrays.asList(0, 1, 1, 1, 1, 1, 1, 0),//curved; we have a 0 + Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1), + Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1), + Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1), + null, null, null, null, null//no data here (below edge of rect 0) + ), + counts + ); + + // Search in no-where ville and get null counts + assertNull(getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", geom:'ENVELOPE(0, 10, -80, -90)'}}"))).get("counts_ints2D")); + + Object v = getHmObj(query(params(baseParams, + "json.facet", "{f1:{type:heatmap, f:" + FIELD + ", format:png }}"))).get("counts_png"); + assertTrue(v instanceof byte[]); + //simply test we can read the image + assertNotNull(FacetHeatmap.PngHelper.readImage((byte[]) v)); + //good enough for this test method + } + + private NamedList getHmObj(QueryResponse response) { + // classic faceting + final NamedList classicResp = (NamedList) response.getResponse().findRecursive("facet_counts", "facet_heatmaps", FIELD); + if (classicResp != null) { + return classicResp; + } + // JSON Facet + return (NamedList) response.getResponse().findRecursive("facets", "f1"); + } + @Test @Repeat(iterations = 3) public void testPng() { @@ -197,14 +331,14 @@ public class SpatialHeatmapFacetsTest extends BaseDistributedSearchTestCase { } } // Round-trip - final byte[] bytes = SpatialHeatmapFacets.asPngBytes(columns, rows, counts, null); + final byte[] bytes = FacetHeatmap.asPngBytes(columns, rows, counts, null); int[] countsOut = random().nextBoolean() ? new int[columns * rows] : null; int base = 0; if (countsOut != null) { base = 9; Arrays.fill(countsOut, base); } - countsOut = SpatialHeatmapFacets.addPngToIntArray(bytes, countsOut); + countsOut = FacetHeatmap.addPngToIntArray(bytes, countsOut); // Test equal assertEquals(counts.length, countsOut.length); for (int i = 0; i < countsOut.length; i++) { diff --git a/solr/solr-ref-guide/src/json-facet-api.adoc b/solr/solr-ref-guide/src/json-facet-api.adoc index a7766b22ab1..21287a02c9e 100644 --- a/solr/solr-ref-guide/src/json-facet-api.adoc +++ b/solr/solr-ref-guide/src/json-facet-api.adoc @@ -300,6 +300,52 @@ By default, the ranges used to compute range faceting between `start` and `end` |facet |Aggregations, metrics, or nested facets that will be calculated for every returned bucket |=== +== Heatmap Facet + +The heatmap facet generates a 2D grid of facet counts for documents having spatial data in each grid cell. + +This feature is primarily documented in the <> section of the reference guide. +The key parameters are `type` to specify `heatmap` and `field` to indicate a spatial RPT field. +The rest of the parameter names use the same names and semantics mirroring + facet.heatmap query-parameter style faceting, albeit without the "facet.heatmap." prefix. +For example `geom` here corresponds to `facet.heatmap.geom` in a facet.heatmap command. + +Like the other facet types, heatmaps may have a custom domain (e.g. to exclude filters) and they can be subordinate to +other facet types like a query facet. However, unlike those, a heatmap facet cannot have facets or stats hang beneath -- +at least not yet. + +Here's an example query: +[source,java] +---- +{ + hm : { + type : heatmap, + field : points_srpt, + geom : "[-49.492,-180 TO 64.701,73.125]", + distErrPct : 0.5 + } +} +---- + +And the facet response will look like: +[source,json] +---- +{ +"facets":{ + "count":145725, + "hm":{ + "gridLevel":1, + "columns":6, + "rows":4, + "minX":-180.0, + "maxX":90.0, + "minY":-90.0, + "maxY":90.0, + "counts_ints2D":[[68,1270,459,5359,39456,1713],[123,10472,13620,7777,18376,6239],[88,6,3898,989,1314,255],[0,0,30,1,0,1]] + }}} +---- + + == Filtering Facets One can filter the domain *before* faceting via the `filter` keyword in the `domain` block of the facet. diff --git a/solr/solr-ref-guide/src/spatial-search.adoc b/solr/solr-ref-guide/src/spatial-search.adoc index 3e4eaab7688..47d19c4b090 100644 --- a/solr/solr-ref-guide/src/spatial-search.adoc +++ b/solr/solr-ref-guide/src/spatial-search.adoc @@ -375,10 +375,13 @@ When using this field type, you will likely _not_ want to mark the field as stor The RPT field supports generating a 2D grid of facet counts for documents having spatial data in each grid cell. For high-detail grids, this can be used to plot points, and for lesser detail it can be used for heatmap generation. The grid cells are determined at index-time based on RPT's configuration. At facet counting time, the indexed cells in the region of interest are traversed and a grid of counters corresponding to each cell are incremented. Solr can return the data in a straight-forward 2D array of integers or in a PNG which compresses better for larger data sets but must be decoded. -The heatmap feature is accessed from Solr's faceting feature. As a part of faceting, it supports the `key` local parameter as well as excluding tagged filter queries, just like other types of faceting do. This allows multiple heatmaps to be returned on the same field with different filters. +The heatmap feature is accessible both from Solr's standard faceting feature, plus the newer more flexible <>. +We'll proceed now with standard faceting. +As a part of faceting, it supports the `key` local parameter as well as excluding tagged filter queries, just like other types of faceting do. +This allows multiple heatmaps to be returned on the same field with different filters. `facet`:: -Set to `true` to enable faceting. +Set to `true` to enable standard faceting. `facet.heatmap`:: The field name of type RPT. diff --git a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java index 253ba405dd9..4a675811257 100644 --- a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java +++ b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java @@ -481,7 +481,9 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { */ protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException { controlClient.add(doc); - + if (shardCount == 0) {//mostly for temp debugging + return; + } int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size(); SolrClient client = clients.get(which); client.add(doc); @@ -599,6 +601,10 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { final QueryResponse controlRsp = controlClient.query(params); validateControlData(controlRsp); + if (shardCount == 0) {//mostly for temp debugging + return controlRsp; + } + params.remove("distrib"); if (setDistribParams) setDistributedParams(params); @@ -872,6 +878,15 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 { } + // equivalent integer numbers + if ((a instanceof Integer || a instanceof Long) && (b instanceof Integer || b instanceof Long)) { + if (((Number)a).longValue() == ((Number)b).longValue()) { + return null; + } else { + return ":" + a + "!=" + b; + } + } + if ((flags & FUZZY) != 0) { if ((a instanceof Double && b instanceof Double)) { double aaa = ((Double) a).doubleValue();