mirror of https://github.com/apache/lucene.git
LUCENE-5339: small opto for range facets, and factor out base class; put longHashCode back
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1547511 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
034595f5e9
commit
8da43e722e
|
@ -31,8 +31,8 @@ import org.apache.lucene.util.Bits;
|
|||
|
||||
/** Represents a range over double values. */
|
||||
public final class DoubleRange extends Range {
|
||||
private final double minIncl;
|
||||
private final double maxIncl;
|
||||
final double minIncl;
|
||||
final double maxIncl;
|
||||
|
||||
public final double min;
|
||||
public final double max;
|
||||
|
|
|
@ -45,11 +45,7 @@ import org.apache.lucene.queries.function.valuesource.FloatFieldSource; // javad
|
|||
* pass just a the field name).
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class DoubleRangeFacetCounts extends Facets {
|
||||
private final DoubleRange[] ranges;
|
||||
private final int[] counts;
|
||||
private final String field;
|
||||
private int totCount;
|
||||
public class DoubleRangeFacetCounts extends RangeFacetCounts {
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using {@link
|
||||
* DoubleFieldSource} from the specified field. */
|
||||
|
@ -60,14 +56,22 @@ public class DoubleRangeFacetCounts extends Facets {
|
|||
/** Create {@code RangeFacetCounts}, using the provided
|
||||
* {@link ValueSource}. */
|
||||
public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, DoubleRange... ranges) throws IOException {
|
||||
this.ranges = ranges;
|
||||
this.field = field;
|
||||
counts = new int[ranges.length];
|
||||
super(field, ranges);
|
||||
count(valueSource, hits.getMatchingDocs());
|
||||
}
|
||||
|
||||
private void count(ValueSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
|
||||
|
||||
DoubleRange[] ranges = (DoubleRange[]) this.ranges;
|
||||
|
||||
// Compute min & max over all ranges:
|
||||
double minIncl = Double.POSITIVE_INFINITY;
|
||||
double maxIncl = Double.NEGATIVE_INFINITY;
|
||||
for(DoubleRange range : ranges) {
|
||||
minIncl = Math.min(minIncl, range.minIncl);
|
||||
maxIncl = Math.max(maxIncl, range.maxIncl);
|
||||
}
|
||||
|
||||
// TODO: test if this is faster (in the past it was
|
||||
// faster to do MatchingDocs on the inside) ... see
|
||||
// patches on LUCENE-4965):
|
||||
|
@ -81,6 +85,10 @@ public class DoubleRangeFacetCounts extends Facets {
|
|||
if (fv.exists(doc)) {
|
||||
|
||||
double v = fv.doubleVal(doc);
|
||||
if (v < minIncl || v > maxIncl) {
|
||||
doc++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: if all ranges are non-overlapping, we
|
||||
// should instead do a bin-search up front
|
||||
|
@ -98,35 +106,4 @@ public class DoubleRangeFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nocommit all args are ... unused ... this doesn't "fit"
|
||||
// very well:
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) {
|
||||
if (dim.equals(field) == false) {
|
||||
throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
|
||||
}
|
||||
if (path.length != 0) {
|
||||
throw new IllegalArgumentException("path.length should be 0");
|
||||
}
|
||||
LabelAndValue[] labelValues = new LabelAndValue[counts.length];
|
||||
for(int i=0;i<counts.length;i++) {
|
||||
// nocommit can we add the range into this?
|
||||
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
|
||||
}
|
||||
|
||||
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
// nocommit we could impl this?
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
return Collections.singletonList(getTopChildren(topN, null));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,8 +31,8 @@ import org.apache.lucene.util.Bits;
|
|||
|
||||
/** Represents a range over long values. */
|
||||
public final class LongRange extends Range {
|
||||
private final long minIncl;
|
||||
private final long maxIncl;
|
||||
final long minIncl;
|
||||
final long maxIncl;
|
||||
|
||||
public final long min;
|
||||
public final long max;
|
||||
|
|
|
@ -35,13 +35,9 @@ import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
|||
* distance dimension like "< 1 km", "< 2 km", etc.).
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class LongRangeFacetCounts extends Facets {
|
||||
private final LongRange[] ranges;
|
||||
private final int[] counts;
|
||||
private final String field;
|
||||
private int totCount;
|
||||
public class LongRangeFacetCounts extends RangeFacetCounts {
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using {@link
|
||||
/** Create {@code LongRangeFacetCounts}, using {@link
|
||||
* LongFieldSource} from the specified field. */
|
||||
public LongRangeFacetCounts(String field, FacetsCollector hits, LongRange... ranges) throws IOException {
|
||||
this(field, new LongFieldSource(field), hits, ranges);
|
||||
|
@ -50,14 +46,22 @@ public class LongRangeFacetCounts extends Facets {
|
|||
/** Create {@code RangeFacetCounts}, using the provided
|
||||
* {@link ValueSource}. */
|
||||
public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException {
|
||||
this.ranges = ranges;
|
||||
this.field = field;
|
||||
counts = new int[ranges.length];
|
||||
super(field, ranges);
|
||||
count(valueSource, hits.getMatchingDocs());
|
||||
}
|
||||
|
||||
private void count(ValueSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
|
||||
|
||||
LongRange[] ranges = (LongRange[]) this.ranges;
|
||||
|
||||
// Compute min & max over all ranges:
|
||||
long minIncl = Long.MAX_VALUE;
|
||||
long maxIncl = Long.MIN_VALUE;
|
||||
for(LongRange range : ranges) {
|
||||
minIncl = Math.min(minIncl, range.minIncl);
|
||||
maxIncl = Math.max(maxIncl, range.maxIncl);
|
||||
}
|
||||
|
||||
// TODO: test if this is faster (in the past it was
|
||||
// faster to do MatchingDocs on the inside) ... see
|
||||
// patches on LUCENE-4965):
|
||||
|
@ -71,6 +75,10 @@ public class LongRangeFacetCounts extends Facets {
|
|||
if (fv.exists(doc)) {
|
||||
|
||||
long v = fv.longVal(doc);
|
||||
if (v < minIncl || v > maxIncl) {
|
||||
doc++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: if all ranges are non-overlapping, we
|
||||
// should instead do a bin-search up front
|
||||
|
@ -88,35 +96,4 @@ public class LongRangeFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nocommit all args are ... unused ... this doesn't "fit"
|
||||
// very well:
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) {
|
||||
if (dim.equals(field) == false) {
|
||||
throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
|
||||
}
|
||||
if (path.length != 0) {
|
||||
throw new IllegalArgumentException("path.length should be 0");
|
||||
}
|
||||
LabelAndValue[] labelValues = new LabelAndValue[counts.length];
|
||||
for(int i=0;i<counts.length;i++) {
|
||||
// nocommit can we add the range into this?
|
||||
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
|
||||
}
|
||||
|
||||
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
// nocommit we could impl this?
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
return Collections.singletonList(getTopChildren(topN, null));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
package org.apache.lucene.facet;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/** Base class for range faceting.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
abstract class RangeFacetCounts extends Facets {
|
||||
protected final Range[] ranges;
|
||||
protected final int[] counts;
|
||||
protected final String field;
|
||||
protected int totCount;
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using {@link
|
||||
* LongFieldSource} from the specified field. */
|
||||
protected RangeFacetCounts(String field, Range[] ranges) throws IOException {
|
||||
this.field = field;
|
||||
this.ranges = ranges;
|
||||
counts = new int[ranges.length];
|
||||
}
|
||||
|
||||
// nocommit all args are ... unused ... this doesn't "fit"
|
||||
// very well:
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) {
|
||||
if (dim.equals(field) == false) {
|
||||
throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
|
||||
}
|
||||
if (path.length != 0) {
|
||||
throw new IllegalArgumentException("path.length should be 0");
|
||||
}
|
||||
LabelAndValue[] labelValues = new LabelAndValue[counts.length];
|
||||
for(int i=0;i<counts.length;i++) {
|
||||
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
|
||||
}
|
||||
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
// TODO: should we impl this?
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
return Collections.singletonList(getTopChildren(topN, null));
|
||||
}
|
||||
}
|
|
@ -41,9 +41,6 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
|||
*/
|
||||
public final static int MAX_CATEGORY_PATH_LENGTH = (BYTE_BLOCK_SIZE - 2) / 4;
|
||||
|
||||
/** An empty {@link FacetLabel} */
|
||||
//public static final FacetLabel EMPTY = new FacetLabel();
|
||||
|
||||
/**
|
||||
* The components of this {@link FacetLabel}. Note that this array may be
|
||||
* shared with other {@link FacetLabel} instances, e.g. as a result of
|
||||
|
@ -114,8 +111,12 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
|||
final int len = length < other.length ? length : other.length;
|
||||
for (int i = 0, j = 0; i < len; i++, j++) {
|
||||
int cmp = components[i].compareTo(other.components[j]);
|
||||
if (cmp < 0) return -1; // this is 'before'
|
||||
if (cmp > 0) return 1; // this is 'after'
|
||||
if (cmp < 0) {
|
||||
return -1; // this is 'before'
|
||||
}
|
||||
if (cmp > 0) {
|
||||
return 1; // this is 'after'
|
||||
}
|
||||
}
|
||||
|
||||
// one is a prefix of the other
|
||||
|
@ -156,6 +157,23 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
|||
return hash;
|
||||
}
|
||||
|
||||
/** Calculate a 64-bit hash function for this path. This
|
||||
* is necessary for {@link NameHashIntCacheLRU} (the
|
||||
* default cache impl for {@link
|
||||
* LruTaxonomyWriterCache}) to reduce the chance of
|
||||
* "silent but deadly" collisions. */
|
||||
public long longHashCode() {
|
||||
if (length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
long hash = length;
|
||||
for (int i = 0; i < length; i++) {
|
||||
hash = hash * 65599 + components[i].hashCode();
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
/** Returns a sub-path of this path up to {@code length} components. */
|
||||
public FacetLabel subpath(final int length) {
|
||||
if (length >= this.length || length < 0) {
|
||||
|
|
|
@ -36,12 +36,12 @@ public class NameHashIntCacheLRU extends NameIntCacheLRU {
|
|||
|
||||
@Override
|
||||
Object key(FacetLabel name) {
|
||||
return new Integer(name.hashCode());
|
||||
return new Long(name.longHashCode());
|
||||
}
|
||||
|
||||
@Override
|
||||
Object key(FacetLabel name, int prefixLen) {
|
||||
return new Integer(name.subpath(prefixLen).hashCode());
|
||||
return new Long(name.subpath(prefixLen).longHashCode());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -107,6 +107,13 @@ public class TestFacetLabel extends FacetTestCase {
|
|||
assertEquals(new FacetLabel("hello", "world").hashCode(), new FacetLabel("hello", "world").hashCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLongHashCode() {
|
||||
assertEquals(new FacetLabel().longHashCode(), new FacetLabel().longHashCode());
|
||||
assertFalse(new FacetLabel().longHashCode() == new FacetLabel("hi").longHashCode());
|
||||
assertEquals(new FacetLabel("hello", "world").longHashCode(), new FacetLabel("hello", "world").longHashCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testArrayConstructor() {
|
||||
FacetLabel p = new FacetLabel("hello", "world", "yo");
|
||||
|
|
|
@ -128,14 +128,23 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase {
|
|||
for (Thread t : indexThreads) t.join();
|
||||
|
||||
DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(tw);
|
||||
assertEquals("mismatch number of categories", values.size() + 1, tr.getSize()); // +1 for root category
|
||||
// +1 for root category
|
||||
if (values.size() + 1 != tr.getSize()) {
|
||||
for(String value : values.keySet()) {
|
||||
FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
|
||||
if (tr.getOrdinal(label) == -1) {
|
||||
System.out.println("FAIL: path=" + label + " not recognized");
|
||||
}
|
||||
}
|
||||
fail("mismatch number of categories");
|
||||
}
|
||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
||||
for (String cat : values.keySet()) {
|
||||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
||||
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
|
||||
int level = cp.length;
|
||||
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
|
||||
FacetLabel path = new FacetLabel();
|
||||
FacetLabel path = null;
|
||||
for (int i = 0; i < level; i++) {
|
||||
path = cp.subpath(i + 1);
|
||||
int ord = tr.getOrdinal(path);
|
||||
|
|
|
@ -256,6 +256,9 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
|
|||
// this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
|
||||
cache = new LruTaxonomyWriterCache(ncats / 10);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: use cache=" + cache);
|
||||
}
|
||||
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
|
||||
Thread[] addThreads = new Thread[atLeast(4)];
|
||||
for (int z = 0; z < addThreads.length; z++) {
|
||||
|
@ -291,7 +294,17 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
|
|||
tw.close();
|
||||
|
||||
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
|
||||
assertEquals("mismatch number of categories", values.size() + 1, dtr.getSize()); // +1 for root category
|
||||
// +1 for root category
|
||||
if (values.size() + 1 != dtr.getSize()) {
|
||||
for(String value : values.keySet()) {
|
||||
FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
|
||||
if (dtr.getOrdinal(label) == -1) {
|
||||
System.out.println("FAIL: path=" + label + " not recognized");
|
||||
}
|
||||
}
|
||||
fail("mismatch number of categories");
|
||||
}
|
||||
|
||||
int[] parents = dtr.getParallelTaxonomyArrays().parents();
|
||||
for (String cat : values.keySet()) {
|
||||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
||||
|
@ -306,9 +319,8 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
|
|||
parentOrd = ord; // next level should have this parent
|
||||
}
|
||||
}
|
||||
dtr.close();
|
||||
|
||||
dir.close();
|
||||
|
||||
IOUtils.close(dtr, dir);
|
||||
}
|
||||
|
||||
private long getEpoch(Directory taxoDir) throws IOException {
|
||||
|
|
Loading…
Reference in New Issue