mirror of https://github.com/apache/lucene.git
Merged /lucene/dev/trunk:r1435377-1436565
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436566 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
1801cac63e
|
@ -29,6 +29,15 @@ Changes in backwards compatibility policy
|
|||
support in-memory caching, CategoryListCache was removed too.
|
||||
(Shai Erera, Michael McCandless)
|
||||
|
||||
* LUCENE-4697: FacetResultNode is now a concrete class with public members
|
||||
(instead of getter methods). (Shai Erera)
|
||||
|
||||
* LUCENE-4600: FacetsCollector is now an abstract class with two
|
||||
implementations: StandardFacetsCollector (the old version of
|
||||
FacetsCollector) and CountingFacetsCollector. FacetsCollector.create()
|
||||
returns the most optimized collector for the given parameters.
|
||||
(Shai Erera, Michael McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
|
||||
|
@ -49,6 +58,13 @@ New Features
|
|||
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the
|
||||
default). (Shai Erera)
|
||||
|
||||
* LUCENE-4703: Add simple PrintTaxonomyStats tool to see summary
|
||||
information about the facets taxonomy index. (Mike McCandless)
|
||||
|
||||
* LUCENE-4599: New oal.codecs.compressing.CompressingTermVectorsFormat which
|
||||
compresses term vectors into chunks of documents similarly to
|
||||
CompressingStoredFieldsFormat. (Adrien Grand)
|
||||
|
||||
======================= Lucene 4.1.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -18,8 +18,9 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Source items for facets.
|
||||
|
@ -29,12 +30,11 @@ import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
|
|||
public abstract class FacetSource extends ContentItemsSource {
|
||||
|
||||
/**
|
||||
* Returns the next {@link CategoryAssociationsContainer facets content item}.
|
||||
* Implementations must account for multi-threading, as multiple threads can
|
||||
* call this method simultaneously.
|
||||
* Fills the next facets content items in the given list. Implementations must
|
||||
* account for multi-threading, as multiple threads can call this method
|
||||
* simultaneously.
|
||||
*/
|
||||
public abstract CategoryAssociationsContainer getNextFacets(CategoryAssociationsContainer facets)
|
||||
throws NoMoreDataException, IOException;
|
||||
public abstract void getNextFacets(List<CategoryPath> facets) throws NoMoreDataException, IOException;
|
||||
|
||||
@Override
|
||||
public void resetInputs() throws IOException {
|
||||
|
|
|
@ -18,10 +18,10 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
|
@ -29,42 +29,38 @@ import org.apache.lucene.facet.taxonomy.CategoryPath;
|
|||
* <p>
|
||||
* Supports the following parameters:
|
||||
* <ul>
|
||||
* <li><b>rand.seed</b> - defines the seed to initialize Random with (default: <b>13</b>).
|
||||
* <li><b>rand.seed</b> - defines the seed to initialize {@link Random} with
|
||||
* (default: <b>13</b>).
|
||||
* <li><b>max.doc.facets</b> - maximal #facets per doc (default: <b>10</b>).
|
||||
* Actual number of facets in a certain doc would be anything between 1 and that number.
|
||||
* <li><b>max.facet.depth</b> - maximal #components in a facet (default: <b>3</b>).
|
||||
* Actual number of components in a certain facet would be anything between 1 and that number.
|
||||
* Actual number of facets in a certain doc would be anything between 1 and that
|
||||
* number.
|
||||
* <li><b>max.facet.depth</b> - maximal #components in a facet (default:
|
||||
* <b>3</b>). Actual number of components in a certain facet would be anything
|
||||
* between 1 and that number.
|
||||
* </ul>
|
||||
*/
|
||||
public class RandomFacetSource extends FacetSource {
|
||||
|
||||
Random random;
|
||||
|
||||
private int maxDocFacets = 10;
|
||||
private int maxFacetDepth = 3;
|
||||
private Random random;
|
||||
private int maxDocFacets;
|
||||
private int maxFacetDepth;
|
||||
private int maxValue = maxDocFacets * maxFacetDepth;
|
||||
|
||||
@Override
|
||||
public CategoryAssociationsContainer getNextFacets(CategoryAssociationsContainer facets)
|
||||
throws NoMoreDataException, IOException {
|
||||
if (facets == null) {
|
||||
facets = new CategoryAssociationsContainer();
|
||||
} else {
|
||||
facets.clear();
|
||||
}
|
||||
int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
|
||||
public void getNextFacets(List<CategoryPath> facets) throws NoMoreDataException, IOException {
|
||||
facets.clear();
|
||||
int numFacets = 1 + random.nextInt(maxDocFacets); // at least one facet to each doc
|
||||
for (int i = 0; i < numFacets; i++) {
|
||||
int depth = 1 + random.nextInt(maxFacetDepth - 1); // depth 0 is not useful
|
||||
int depth = 1 + random.nextInt(maxFacetDepth); // depth 0 is not useful
|
||||
String[] components = new String[depth];
|
||||
for (int k = 0; k < depth; k++) {
|
||||
components[k] = Integer.toString(random.nextInt(maxValue));
|
||||
addItem();
|
||||
}
|
||||
CategoryPath cp = new CategoryPath(components);
|
||||
facets.setAssociation(cp, null);
|
||||
facets.add(cp);
|
||||
addBytes(cp.toString().length()); // very rough approximation
|
||||
}
|
||||
return facets;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -76,8 +72,8 @@ public class RandomFacetSource extends FacetSource {
|
|||
public void setConfig(Config config) {
|
||||
super.setConfig(config);
|
||||
random = new Random(config.get("rand.seed", 13));
|
||||
maxDocFacets = config.get("max.doc.facets", 200);
|
||||
maxFacetDepth = config.get("max.facet.depth", 10);
|
||||
maxDocFacets = config.get("max.doc.facets", 10);
|
||||
maxFacetDepth = config.get("max.facet.depth", 3);
|
||||
maxValue = maxDocFacets * maxFacetDepth;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,49 +17,56 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
|
||||
import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
|
||||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Add a faceted document.
|
||||
* <p>
|
||||
* Config properties:
|
||||
* <ul>
|
||||
* <li><b>with.facets</b>=<tells whether to actually add any facets to the document| Default: true>
|
||||
* <br>This config property allows to easily compare the performance of adding docs with and without facets.
|
||||
* Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy).
|
||||
* </ul>
|
||||
* <li><b>with.facets</b>=<tells whether to actually add any facets to the
|
||||
* document| Default: true> <br>
|
||||
* This config property allows to easily compare the performance of adding docs
|
||||
* with and without facets. Note that facets are created even when this is
|
||||
* false, just that they are not added to the document (nor to the taxonomy).
|
||||
* </ul>
|
||||
* <p>
|
||||
* See {@link AddDocTask} for general document parameters and configuration.
|
||||
* <p>
|
||||
* Makes use of the {@link FacetSource} in effect - see {@link PerfRunData} for facet source settings.
|
||||
* Makes use of the {@link FacetSource} in effect - see {@link PerfRunData} for
|
||||
* facet source settings.
|
||||
*/
|
||||
public class AddFacetedDocTask extends AddDocTask {
|
||||
|
||||
private final List<CategoryPath> facets = new ArrayList<CategoryPath>();
|
||||
private FacetFields facetFields;
|
||||
|
||||
public AddFacetedDocTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
private CategoryAssociationsContainer facets = null;
|
||||
private FacetFields facetFields = null;
|
||||
private boolean withFacets = true;
|
||||
|
||||
@Override
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
// create the facets even if they should not be added - allows to measure the effect of just adding facets
|
||||
facets = getRunData().getFacetSource().getNextFacets(facets);
|
||||
withFacets = getRunData().getConfig().get("with.facets", true);
|
||||
if (withFacets) {
|
||||
facetFields = new FacetFields(getRunData().getTaxonomyWriter());
|
||||
if (facetFields == null) {
|
||||
boolean withFacets = getRunData().getConfig().get("with.facets", true);
|
||||
if (withFacets) {
|
||||
FacetSource facetsSource = getRunData().getFacetSource();
|
||||
facetFields = withFacets ? new FacetFields(getRunData().getTaxonomyWriter()) : null;
|
||||
facetsSource.getNextFacets(facets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getLogMessage(int recsCount) {
|
||||
if (!withFacets) {
|
||||
if (facetFields == null) {
|
||||
return super.getLogMessage(recsCount);
|
||||
}
|
||||
return super.getLogMessage(recsCount)+ " with facets";
|
||||
|
@ -67,7 +74,7 @@ public class AddFacetedDocTask extends AddDocTask {
|
|||
|
||||
@Override
|
||||
public int doLogic() throws Exception {
|
||||
if (withFacets) {
|
||||
if (facetFields != null) {
|
||||
facetFields.addFields(doc, facets);
|
||||
}
|
||||
return super.doLogic();
|
||||
|
|
|
@ -205,6 +205,7 @@
|
|||
<exclude name="queryparser/classes/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.class"/>
|
||||
<exclude name="queryparser/classes/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.class"/>
|
||||
<exclude name="queryparser/classes/java/org/apache/lucene/queryparser/surround/parser/QueryParserTokenManager.class"/>
|
||||
<exclude name="facet/classes/java/org/apache/lucene/facet/util/PrintTaxonomyStats.class"/>
|
||||
</fileset>
|
||||
</forbidden-apis>
|
||||
</target>
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
package org.apache.lucene.codecs.compressing;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
||||
/**
|
||||
* A {@link TermVectorsFormat} that compresses chunks of documents together in
|
||||
* order to improve the compression ratio.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CompressingTermVectorsFormat extends TermVectorsFormat {
|
||||
|
||||
private final String formatName;
|
||||
private final String segmentSuffix;
|
||||
private final CompressionMode compressionMode;
|
||||
private final int chunkSize;
|
||||
|
||||
/**
|
||||
* Create a new {@link CompressingTermVectorsFormat}.
|
||||
* <p>
|
||||
* <code>formatName</code> is the name of the format. This name will be used
|
||||
* in the file formats to perform
|
||||
* {@link CodecUtil#checkHeader(org.apache.lucene.store.DataInput, String, int, int) codec header checks}.
|
||||
* <p>
|
||||
* The <code>compressionMode</code> parameter allows you to choose between
|
||||
* compression algorithms that have various compression and decompression
|
||||
* speeds so that you can pick the one that best fits your indexing and
|
||||
* searching throughput. You should never instantiate two
|
||||
* {@link CompressingTermVectorsFormat}s that have the same name but
|
||||
* different {@link CompressionMode}s.
|
||||
* <p>
|
||||
* <code>chunkSize</code> is the minimum byte size of a chunk of documents.
|
||||
* Higher values of <code>chunkSize</code> should improve the compression
|
||||
* ratio but will require more memory at indexing time and might make document
|
||||
* loading a little slower (depending on the size of your OS cache compared
|
||||
* to the size of your index).
|
||||
*
|
||||
* @param formatName the name of the {@link StoredFieldsFormat}
|
||||
* @param segmentSuffix a suffix to append to files created by this format
|
||||
* @param compressionMode the {@link CompressionMode} to use
|
||||
* @param chunkSize the minimum number of bytes of a single chunk of stored documents
|
||||
* @see CompressionMode
|
||||
*/
|
||||
public CompressingTermVectorsFormat(String formatName, String segmentSuffix,
|
||||
CompressionMode compressionMode, int chunkSize) {
|
||||
this.formatName = formatName;
|
||||
this.segmentSuffix = segmentSuffix;
|
||||
this.compressionMode = compressionMode;
|
||||
if (chunkSize < 1) {
|
||||
throw new IllegalArgumentException("chunkSize must be >= 1");
|
||||
}
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermVectorsReader vectorsReader(Directory directory,
|
||||
SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context)
|
||||
throws IOException {
|
||||
return new CompressingTermVectorsReader(directory, segmentInfo, segmentSuffix,
|
||||
fieldInfos, context, formatName, compressionMode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermVectorsWriter vectorsWriter(Directory directory,
|
||||
SegmentInfo segmentInfo, IOContext context) throws IOException {
|
||||
return new CompressingTermVectorsWriter(directory, segmentInfo, segmentSuffix,
|
||||
context, formatName, compressionMode, chunkSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(compressionMode=" + compressionMode
|
||||
+ ", chunkSize=" + chunkSize + ")";
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,818 @@
|
|||
package org.apache.lucene.codecs.compressing;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Deque;
|
||||
import java.util.Iterator;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* {@link TermVectorsWriter} for {@link CompressingTermVectorsFormat}.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CompressingTermVectorsWriter extends TermVectorsWriter {
|
||||
|
||||
static final String VECTORS_EXTENSION = "tvd";
|
||||
static final String VECTORS_INDEX_EXTENSION = "tvx";
|
||||
|
||||
static final String CODEC_SFX_IDX = "Index";
|
||||
static final String CODEC_SFX_DAT = "Data";
|
||||
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static final int BLOCK_SIZE = 64;
|
||||
|
||||
static final int POSITIONS = 0x01;
|
||||
static final int OFFSETS = 0x02;
|
||||
static final int PAYLOADS = 0x04;
|
||||
static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
|
||||
|
||||
private final Directory directory;
|
||||
private final String segment;
|
||||
private final String segmentSuffix;
|
||||
private CompressingStoredFieldsIndexWriter indexWriter;
|
||||
private IndexOutput vectorsStream;
|
||||
|
||||
private final CompressionMode compressionMode;
|
||||
private final Compressor compressor;
|
||||
private final int chunkSize;
|
||||
|
||||
/** a pending doc */
|
||||
private class DocData {
|
||||
final int numFields;
|
||||
final Deque<FieldData> fields;
|
||||
final int posStart, offStart, payStart;
|
||||
DocData(int numFields, int posStart, int offStart, int payStart) {
|
||||
this.numFields = numFields;
|
||||
this.fields = new ArrayDeque<FieldData>(numFields);
|
||||
this.posStart = posStart;
|
||||
this.offStart = offStart;
|
||||
this.payStart = payStart;
|
||||
}
|
||||
FieldData addField(int fieldNum, int numTerms, boolean positions, boolean offsets, boolean payloads) {
|
||||
final FieldData field;
|
||||
if (fields.isEmpty()) {
|
||||
field = new FieldData(fieldNum, numTerms, positions, offsets, payloads, posStart, offStart, payStart);
|
||||
} else {
|
||||
final FieldData last = fields.getLast();
|
||||
final int posStart = last.posStart + (last.hasPositions ? last.totalPositions : 0);
|
||||
final int offStart = last.offStart + (last.hasOffsets ? last.totalPositions : 0);
|
||||
final int payStart = last.payStart + (last.hasPayloads ? last.totalPositions : 0);
|
||||
field = new FieldData(fieldNum, numTerms, positions, offsets, payloads, posStart, offStart, payStart);
|
||||
}
|
||||
fields.add(field);
|
||||
return field;
|
||||
}
|
||||
}
|
||||
|
||||
private DocData addDocData(int numVectorFields) {
|
||||
FieldData last = null;
|
||||
for (Iterator<DocData> it = pendingDocs.descendingIterator(); it.hasNext(); ) {
|
||||
final DocData doc = it.next();
|
||||
if (!doc.fields.isEmpty()) {
|
||||
last = doc.fields.getLast();
|
||||
break;
|
||||
}
|
||||
}
|
||||
final DocData doc;
|
||||
if (last == null) {
|
||||
doc = new DocData(numVectorFields, 0, 0, 0);
|
||||
} else {
|
||||
final int posStart = last.posStart + (last.hasPositions ? last.totalPositions : 0);
|
||||
final int offStart = last.offStart + (last.hasOffsets ? last.totalPositions : 0);
|
||||
final int payStart = last.payStart + (last.hasPayloads ? last.totalPositions : 0);
|
||||
doc = new DocData(numVectorFields, posStart, offStart, payStart);
|
||||
}
|
||||
pendingDocs.add(doc);
|
||||
return doc;
|
||||
}
|
||||
|
||||
/** a pending field */
|
||||
private class FieldData {
|
||||
final boolean hasPositions, hasOffsets, hasPayloads;
|
||||
final int fieldNum, flags, numTerms;
|
||||
final int[] freqs, prefixLengths, suffixLengths;
|
||||
final int posStart, offStart, payStart;
|
||||
int totalPositions;
|
||||
int ord;
|
||||
FieldData(int fieldNum, int numTerms, boolean positions, boolean offsets, boolean payloads,
|
||||
int posStart, int offStart, int payStart) {
|
||||
this.fieldNum = fieldNum;
|
||||
this.numTerms = numTerms;
|
||||
this.hasPositions = positions;
|
||||
this.hasOffsets = offsets;
|
||||
this.hasPayloads = payloads;
|
||||
this.flags = (positions ? POSITIONS : 0) | (offsets ? OFFSETS : 0) | (payloads ? PAYLOADS : 0);
|
||||
this.freqs = new int[numTerms];
|
||||
this.prefixLengths = new int[numTerms];
|
||||
this.suffixLengths = new int[numTerms];
|
||||
this.posStart = posStart;
|
||||
this.offStart = offStart;
|
||||
this.payStart = payStart;
|
||||
totalPositions = 0;
|
||||
ord = 0;
|
||||
}
|
||||
void addTerm(int freq, int prefixLength, int suffixLength) {
|
||||
freqs[ord] = freq;
|
||||
prefixLengths[ord] = prefixLength;
|
||||
suffixLengths[ord] = suffixLength;
|
||||
++ord;
|
||||
}
|
||||
void addPosition(int position, int startOffset, int length, int payloadLength) {
|
||||
if (hasPositions) {
|
||||
if (posStart + totalPositions == positionsBuf.length) {
|
||||
positionsBuf = ArrayUtil.grow(positionsBuf);
|
||||
}
|
||||
positionsBuf[posStart + totalPositions] = position;
|
||||
}
|
||||
if (hasOffsets) {
|
||||
if (offStart + totalPositions == startOffsetsBuf.length) {
|
||||
final int newLength = ArrayUtil.oversize(offStart + totalPositions, 4);
|
||||
startOffsetsBuf = Arrays.copyOf(startOffsetsBuf, newLength);
|
||||
lengthsBuf = Arrays.copyOf(lengthsBuf, newLength);
|
||||
}
|
||||
startOffsetsBuf[offStart + totalPositions] = startOffset;
|
||||
lengthsBuf[offStart + totalPositions] = length;
|
||||
}
|
||||
if (hasPayloads) {
|
||||
if (payStart + totalPositions == payloadLengthsBuf.length) {
|
||||
payloadLengthsBuf = ArrayUtil.grow(payloadLengthsBuf);
|
||||
}
|
||||
payloadLengthsBuf[payStart + totalPositions] = payloadLength;
|
||||
}
|
||||
++totalPositions;
|
||||
}
|
||||
}
|
||||
|
||||
private int numDocs; // total number of docs seen
|
||||
private final Deque<DocData> pendingDocs; // pending docs
|
||||
private DocData curDoc; // current document
|
||||
private FieldData curField; // current field
|
||||
private final BytesRef lastTerm;
|
||||
private int[] positionsBuf, startOffsetsBuf, lengthsBuf, payloadLengthsBuf;
|
||||
private final GrowableByteArrayDataOutput termSuffixes; // buffered term suffixes
|
||||
private final GrowableByteArrayDataOutput payloadBytes; // buffered term payloads
|
||||
private final BlockPackedWriter writer;
|
||||
|
||||
/** Sole constructor. */
|
||||
public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
|
||||
String formatName, CompressionMode compressionMode, int chunkSize) throws IOException {
|
||||
assert directory != null;
|
||||
this.directory = directory;
|
||||
this.segment = si.name;
|
||||
this.segmentSuffix = segmentSuffix;
|
||||
this.compressionMode = compressionMode;
|
||||
this.compressor = compressionMode.newCompressor();
|
||||
this.chunkSize = chunkSize;
|
||||
|
||||
numDocs = 0;
|
||||
pendingDocs = new ArrayDeque<DocData>();
|
||||
termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.oversize(chunkSize, 1));
|
||||
payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.oversize(1, 1));
|
||||
lastTerm = new BytesRef(ArrayUtil.oversize(30, 1));
|
||||
|
||||
boolean success = false;
|
||||
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context);
|
||||
try {
|
||||
vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
|
||||
|
||||
final String codecNameIdx = formatName + CODEC_SFX_IDX;
|
||||
final String codecNameDat = formatName + CODEC_SFX_DAT;
|
||||
CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
|
||||
CodecUtil.writeHeader(vectorsStream, codecNameDat, VERSION_CURRENT);
|
||||
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
|
||||
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
|
||||
|
||||
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
|
||||
indexStream = null;
|
||||
|
||||
vectorsStream.writeVInt(PackedInts.VERSION_CURRENT);
|
||||
vectorsStream.writeVInt(chunkSize);
|
||||
writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE);
|
||||
|
||||
positionsBuf = new int[1024];
|
||||
startOffsetsBuf = new int[1024];
|
||||
lengthsBuf = new int[1024];
|
||||
payloadLengthsBuf = new int[1024];
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(indexStream);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
IOUtils.close(vectorsStream, indexWriter);
|
||||
} finally {
|
||||
vectorsStream = null;
|
||||
indexWriter = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void abort() {
|
||||
IOUtils.closeWhileHandlingException(this);
|
||||
IOUtils.deleteFilesIgnoringExceptions(directory,
|
||||
IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION),
|
||||
IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startDocument(int numVectorFields) throws IOException {
|
||||
curDoc = addDocData(numVectorFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishDocument() throws IOException {
|
||||
// append the payload bytes of the doc after its terms
|
||||
termSuffixes.writeBytes(payloadBytes.bytes, payloadBytes.length);
|
||||
payloadBytes.length = 0;
|
||||
++numDocs;
|
||||
if (triggerFlush()) {
|
||||
flush();
|
||||
}
|
||||
curDoc = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startField(FieldInfo info, int numTerms, boolean positions,
|
||||
boolean offsets, boolean payloads) throws IOException {
|
||||
curField = curDoc.addField(info.number, numTerms, positions, offsets, payloads);
|
||||
lastTerm.length = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishField() throws IOException {
|
||||
curField = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startTerm(BytesRef term, int freq) throws IOException {
|
||||
assert freq >= 1;
|
||||
final int prefix = StringHelper.bytesDifference(lastTerm, term);
|
||||
curField.addTerm(freq, prefix, term.length - prefix);
|
||||
termSuffixes.writeBytes(term.bytes, term.offset + prefix, term.length - prefix);
|
||||
// copy last term
|
||||
if (lastTerm.bytes.length < term.length) {
|
||||
lastTerm.bytes = new byte[ArrayUtil.oversize(term.length, 1)];
|
||||
}
|
||||
lastTerm.offset = 0;
|
||||
lastTerm.length = term.length;
|
||||
System.arraycopy(term.bytes, term.offset, lastTerm.bytes, 0, term.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addPosition(int position, int startOffset, int endOffset,
|
||||
BytesRef payload) throws IOException {
|
||||
assert curField.flags != 0;
|
||||
curField.addPosition(position, startOffset, endOffset - startOffset, payload == null ? 0 : payload.length);
|
||||
if (curField.hasPayloads && payload != null) {
|
||||
payloadBytes.writeBytes(payload.bytes, payload.offset, payload.length);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean triggerFlush() {
|
||||
return termSuffixes.length >= chunkSize || pendingDocs.size() >= chunkSize;
|
||||
}
|
||||
|
||||
private void flush() throws IOException {
|
||||
final int chunkDocs = pendingDocs.size();
|
||||
assert chunkDocs > 0 : chunkDocs;
|
||||
|
||||
// write the index file
|
||||
indexWriter.writeIndex(chunkDocs, vectorsStream.getFilePointer());
|
||||
|
||||
final int docBase = numDocs - chunkDocs;
|
||||
vectorsStream.writeVInt(docBase);
|
||||
vectorsStream.writeVInt(chunkDocs);
|
||||
|
||||
// total number of fields of the chunk
|
||||
final int totalFields = flushNumFields(chunkDocs);
|
||||
|
||||
if (totalFields > 0) {
|
||||
// unique field numbers (sorted)
|
||||
final int[] fieldNums = flushFieldNums();
|
||||
// offsets in the array of unique field numbers
|
||||
flushFields(totalFields, fieldNums);
|
||||
// flags (does the field have positions, offsets, payloads?)
|
||||
flushFlags(totalFields, fieldNums);
|
||||
// number of terms of each field
|
||||
flushNumTerms(totalFields);
|
||||
// prefix and suffix lengths for each field
|
||||
flushTermLengths();
|
||||
// term freqs - 1 (because termFreq is always >=1) for each term
|
||||
flushTermFreqs();
|
||||
// positions for all terms, when enabled
|
||||
flushPositions();
|
||||
// offsets for all terms, when enabled
|
||||
flushOffsets(fieldNums);
|
||||
// payload lengths for all terms, when enabled
|
||||
flushPayloadLengths();
|
||||
|
||||
// compress terms and payloads and write them to the output
|
||||
compressor.compress(termSuffixes.bytes, 0, termSuffixes.length, vectorsStream);
|
||||
}
|
||||
|
||||
// reset
|
||||
pendingDocs.clear();
|
||||
curDoc = null;
|
||||
curField = null;
|
||||
termSuffixes.length = 0;
|
||||
}
|
||||
|
||||
private int flushNumFields(int chunkDocs) throws IOException {
|
||||
if (chunkDocs == 1) {
|
||||
final int numFields = pendingDocs.getFirst().numFields;
|
||||
vectorsStream.writeVInt(numFields);
|
||||
return numFields;
|
||||
} else {
|
||||
writer.reset(vectorsStream);
|
||||
int totalFields = 0;
|
||||
for (DocData dd : pendingDocs) {
|
||||
writer.add(dd.numFields);
|
||||
totalFields += dd.numFields;
|
||||
}
|
||||
writer.finish();
|
||||
return totalFields;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a sorted array containing unique field numbers */
|
||||
private int[] flushFieldNums() throws IOException {
|
||||
SortedSet<Integer> fieldNums = new TreeSet<Integer>();
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
fieldNums.add(fd.fieldNum);
|
||||
}
|
||||
}
|
||||
|
||||
final int numDistinctFields = fieldNums.size();
|
||||
assert numDistinctFields > 0;
|
||||
final int bitsRequired = PackedInts.bitsRequired(fieldNums.last());
|
||||
final int token = (Math.min(numDistinctFields - 1, 0x07) << 5) | bitsRequired;
|
||||
vectorsStream.writeByte((byte) token);
|
||||
if (numDistinctFields - 1 >= 0x07) {
|
||||
vectorsStream.writeVInt(numDistinctFields - 1 - 0x07);
|
||||
}
|
||||
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, fieldNums.size(), bitsRequired, 1);
|
||||
for (Integer fieldNum : fieldNums) {
|
||||
writer.add(fieldNum);
|
||||
}
|
||||
writer.finish();
|
||||
|
||||
int[] fns = new int[fieldNums.size()];
|
||||
int i = 0;
|
||||
for (Integer key : fieldNums) {
|
||||
fns[i++] = key;
|
||||
}
|
||||
return fns;
|
||||
}
|
||||
|
||||
private void flushFields(int totalFields, int[] fieldNums) throws IOException {
|
||||
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, totalFields, PackedInts.bitsRequired(fieldNums.length - 1), 1);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
|
||||
assert fieldNumIndex >= 0;
|
||||
writer.add(fieldNumIndex);
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
|
||||
// check if fields always have the same flags
|
||||
boolean nonChangingFlags = true;
|
||||
int[] fieldFlags = new int[fieldNums.length];
|
||||
Arrays.fill(fieldFlags, -1);
|
||||
outer:
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
|
||||
assert fieldNumOff >= 0;
|
||||
if (fieldFlags[fieldNumOff] == -1) {
|
||||
fieldFlags[fieldNumOff] = fd.flags;
|
||||
} else if (fieldFlags[fieldNumOff] != fd.flags) {
|
||||
nonChangingFlags = false;
|
||||
break outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nonChangingFlags) {
|
||||
// write one flag per field num
|
||||
vectorsStream.writeVInt(0);
|
||||
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1);
|
||||
for (int flags : fieldFlags) {
|
||||
assert flags >= 0;
|
||||
writer.add(flags);
|
||||
}
|
||||
assert writer.ord() == fieldFlags.length - 1;
|
||||
writer.finish();
|
||||
} else {
|
||||
// write one flag for every field instance
|
||||
vectorsStream.writeVInt(1);
|
||||
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
writer.add(fd.flags);
|
||||
}
|
||||
}
|
||||
assert writer.ord() == totalFields - 1;
|
||||
writer.finish();
|
||||
}
|
||||
}
|
||||
|
||||
private void flushNumTerms(int totalFields) throws IOException {
|
||||
int maxNumTerms = 0;
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
maxNumTerms |= fd.numTerms;
|
||||
}
|
||||
}
|
||||
final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
|
||||
vectorsStream.writeVInt(bitsRequired);
|
||||
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(
|
||||
vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
writer.add(fd.numTerms);
|
||||
}
|
||||
}
|
||||
assert writer.ord() == totalFields - 1;
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
private void flushTermLengths() throws IOException {
|
||||
writer.reset(vectorsStream);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
for (int i = 0; i < fd.numTerms; ++i) {
|
||||
writer.add(fd.prefixLengths[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
writer.reset(vectorsStream);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
for (int i = 0; i < fd.numTerms; ++i) {
|
||||
writer.add(fd.suffixLengths[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
private void flushTermFreqs() throws IOException {
|
||||
writer.reset(vectorsStream);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
for (int i = 0; i < fd.numTerms; ++i) {
|
||||
writer.add(fd.freqs[i] - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
private void flushPositions() throws IOException {
|
||||
writer.reset(vectorsStream);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
if (fd.hasPositions) {
|
||||
int pos = 0;
|
||||
for (int i = 0; i < fd.numTerms; ++i) {
|
||||
int previousPosition = 0;
|
||||
for (int j = 0; j < fd.freqs[i]; ++j) {
|
||||
final int position = positionsBuf[fd .posStart + pos++];
|
||||
writer.add(position - previousPosition);
|
||||
previousPosition = position;
|
||||
}
|
||||
}
|
||||
assert pos == fd.totalPositions;
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
private void flushOffsets(int[] fieldNums) throws IOException {
|
||||
boolean hasOffsets = false;
|
||||
long[] sumPos = new long[fieldNums.length];
|
||||
long[] sumOffsets = new long[fieldNums.length];
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
hasOffsets |= fd.hasOffsets;
|
||||
if (fd.hasOffsets && fd.hasPositions) {
|
||||
final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
|
||||
int pos = 0;
|
||||
for (int i = 0; i < fd.numTerms; ++i) {
|
||||
int previousPos = 0;
|
||||
int previousOff = 0;
|
||||
for (int j = 0; j < fd.freqs[i]; ++j) {
|
||||
final int position = positionsBuf[fd.posStart + pos];
|
||||
final int startOffset = startOffsetsBuf[fd.offStart + pos];
|
||||
sumPos[fieldNumOff] += position - previousPos;
|
||||
sumOffsets[fieldNumOff] += startOffset - previousOff;
|
||||
previousPos = position;
|
||||
previousOff = startOffset;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
assert pos == fd.totalPositions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasOffsets) {
|
||||
// nothing to do
|
||||
return;
|
||||
}
|
||||
|
||||
final float[] charsPerTerm = new float[fieldNums.length];
|
||||
for (int i = 0; i < fieldNums.length; ++i) {
|
||||
charsPerTerm[i] = (sumPos[i] <= 0 || sumOffsets[i] <= 0) ? 0 : (float) ((double) sumOffsets[i] / sumPos[i]);
|
||||
}
|
||||
|
||||
// start offsets
|
||||
for (int i = 0; i < fieldNums.length; ++i) {
|
||||
vectorsStream.writeInt(Float.floatToRawIntBits(charsPerTerm[i]));
|
||||
}
|
||||
|
||||
writer.reset(vectorsStream);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
if ((fd.flags & OFFSETS) != 0) {
|
||||
final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
|
||||
final float cpt = charsPerTerm[fieldNumOff];
|
||||
int pos = 0;
|
||||
for (int i = 0; i < fd.numTerms; ++i) {
|
||||
int previousPos = 0;
|
||||
int previousOff = 0;
|
||||
for (int j = 0; j < fd.freqs[i]; ++j) {
|
||||
final int position = fd.hasPositions ? positionsBuf[fd.posStart + pos] : 0;
|
||||
final int startOffset = startOffsetsBuf[fd.offStart + pos];
|
||||
writer.add(startOffset - previousOff - (int) (cpt * (position - previousPos)));
|
||||
previousPos = position;
|
||||
previousOff = startOffset;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
|
||||
// lengths
|
||||
writer.reset(vectorsStream);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
if ((fd.flags & OFFSETS) != 0) {
|
||||
int pos = 0;
|
||||
for (int i = 0; i < fd.numTerms; ++i) {
|
||||
for (int j = 0; j < fd.freqs[i]; ++j) {
|
||||
writer.add(lengthsBuf[fd.offStart + pos++] - fd.prefixLengths[i] - fd.suffixLengths[i]);
|
||||
}
|
||||
}
|
||||
assert pos == fd.totalPositions;
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
private void flushPayloadLengths() throws IOException {
|
||||
writer.reset(vectorsStream);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
if (fd.hasPayloads) {
|
||||
for (int i = 0; i < fd.totalPositions; ++i) {
|
||||
writer.add(payloadLengthsBuf[fd.payStart + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
||||
if (!pendingDocs.isEmpty()) {
|
||||
flush();
|
||||
}
|
||||
if (numDocs != this.numDocs) {
|
||||
throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs);
|
||||
}
|
||||
indexWriter.finish(numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addProx(int numProx, DataInput positions, DataInput offsets)
|
||||
throws IOException {
|
||||
assert (curField.hasPositions) == (positions != null);
|
||||
assert (curField.hasOffsets) == (offsets != null);
|
||||
|
||||
if (curField.hasPositions) {
|
||||
final int posStart = curField.posStart + curField.totalPositions;
|
||||
if (posStart + numProx > positionsBuf.length) {
|
||||
positionsBuf = ArrayUtil.grow(positionsBuf, posStart + numProx);
|
||||
}
|
||||
int position = 0;
|
||||
if (curField.hasPayloads) {
|
||||
final int payStart = curField.payStart + curField.totalPositions;
|
||||
if (payStart + numProx > payloadLengthsBuf.length) {
|
||||
payloadLengthsBuf = ArrayUtil.grow(payloadLengthsBuf, payStart + numProx);
|
||||
}
|
||||
for (int i = 0; i < numProx; ++i) {
|
||||
final int code = positions.readVInt();
|
||||
if ((code & 1) != 0) {
|
||||
// This position has a payload
|
||||
final int payloadLength = positions.readVInt();
|
||||
payloadLengthsBuf[payStart + i] = payloadLength;
|
||||
payloadBytes.copyBytes(positions, payloadLength);
|
||||
} else {
|
||||
payloadLengthsBuf[payStart + i] = 0;
|
||||
}
|
||||
position += code >>> 1;
|
||||
positionsBuf[posStart + i] = position;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < numProx; ++i) {
|
||||
position += (positions.readVInt() >>> 1);
|
||||
positionsBuf[posStart + i] = position;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (curField.hasOffsets) {
|
||||
final int offStart = curField.offStart + curField.totalPositions;
|
||||
if (offStart + numProx > startOffsetsBuf.length) {
|
||||
final int newLength = ArrayUtil.oversize(offStart + numProx, 4);
|
||||
startOffsetsBuf = Arrays.copyOf(startOffsetsBuf, newLength);
|
||||
lengthsBuf = Arrays.copyOf(lengthsBuf, newLength);
|
||||
}
|
||||
int lastOffset = 0, startOffset, endOffset;
|
||||
for (int i = 0; i < numProx; ++i) {
|
||||
startOffset = lastOffset + offsets.readVInt();
|
||||
endOffset = startOffset + offsets.readVInt();
|
||||
lastOffset = endOffset;
|
||||
startOffsetsBuf[offStart + i] = startOffset;
|
||||
lengthsBuf[offStart + i] = endOffset - startOffset;
|
||||
}
|
||||
}
|
||||
|
||||
curField.totalPositions += numProx;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int merge(MergeState mergeState) throws IOException {
|
||||
int docCount = 0;
|
||||
int idx = 0;
|
||||
|
||||
for (AtomicReader reader : mergeState.readers) {
|
||||
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
|
||||
CompressingTermVectorsReader matchingVectorsReader = null;
|
||||
if (matchingSegmentReader != null) {
|
||||
final TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
|
||||
// we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
|
||||
if (vectorsReader != null && vectorsReader instanceof CompressingTermVectorsReader) {
|
||||
matchingVectorsReader = (CompressingTermVectorsReader) vectorsReader;
|
||||
}
|
||||
}
|
||||
|
||||
final int maxDoc = reader.maxDoc();
|
||||
final Bits liveDocs = reader.getLiveDocs();
|
||||
|
||||
if (matchingVectorsReader == null
|
||||
|| matchingVectorsReader.getCompressionMode() != compressionMode
|
||||
|| matchingVectorsReader.getChunkSize() != chunkSize
|
||||
|| matchingVectorsReader.getPackedIntsVersion() != PackedInts.VERSION_CURRENT) {
|
||||
// naive merge...
|
||||
for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
|
||||
final Fields vectors = reader.getTermVectors(i);
|
||||
addAllDocVectors(vectors, mergeState);
|
||||
++docCount;
|
||||
mergeState.checkAbort.work(300);
|
||||
}
|
||||
} else {
|
||||
final CompressingStoredFieldsIndexReader index = matchingVectorsReader.getIndex();
|
||||
final IndexInput vectorsStream = matchingVectorsReader.getVectorsStream();
|
||||
for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; ) {
|
||||
if (pendingDocs.isEmpty()
|
||||
&& (i == 0 || index.getStartPointer(i - 1) < index.getStartPointer(i))) { // start of a chunk
|
||||
final long startPointer = index.getStartPointer(i);
|
||||
vectorsStream.seek(startPointer);
|
||||
final int docBase = vectorsStream.readVInt();
|
||||
final int chunkDocs = vectorsStream.readVInt();
|
||||
assert docBase + chunkDocs <= matchingSegmentReader.maxDoc();
|
||||
if (docBase + chunkDocs < matchingSegmentReader.maxDoc()
|
||||
&& nextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) {
|
||||
final long chunkEnd = index.getStartPointer(docBase + chunkDocs);
|
||||
final long chunkLength = chunkEnd - vectorsStream.getFilePointer();
|
||||
indexWriter.writeIndex(chunkDocs, this.vectorsStream.getFilePointer());
|
||||
this.vectorsStream.writeVInt(docCount);
|
||||
this.vectorsStream.writeVInt(chunkDocs);
|
||||
this.vectorsStream.copyBytes(vectorsStream, chunkLength);
|
||||
docCount += chunkDocs;
|
||||
this.numDocs += chunkDocs;
|
||||
mergeState.checkAbort.work(300 * chunkDocs);
|
||||
i = nextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
|
||||
} else {
|
||||
for (; i < docBase + chunkDocs; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
|
||||
final Fields vectors = reader.getTermVectors(i);
|
||||
addAllDocVectors(vectors, mergeState);
|
||||
++docCount;
|
||||
mergeState.checkAbort.work(300);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final Fields vectors = reader.getTermVectors(i);
|
||||
addAllDocVectors(vectors, mergeState);
|
||||
++docCount;
|
||||
mergeState.checkAbort.work(300);
|
||||
i = nextLiveDoc(i + 1, liveDocs, maxDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
finish(mergeState.fieldInfos, docCount);
|
||||
return docCount;
|
||||
}
|
||||
|
||||
private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) {
|
||||
if (liveDocs == null) {
|
||||
return doc;
|
||||
}
|
||||
while (doc < maxDoc && !liveDocs.get(doc)) {
|
||||
++doc;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc) {
|
||||
if (liveDocs == null) {
|
||||
return maxDoc;
|
||||
}
|
||||
while (doc < maxDoc && liveDocs.get(doc)) {
|
||||
++doc;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
}
|
|
@ -71,11 +71,12 @@ public final class BlockPackedReader {
|
|||
return i;
|
||||
}
|
||||
|
||||
final DataInput in;
|
||||
DataInput in;
|
||||
final int packedIntsVersion;
|
||||
final long valueCount;
|
||||
long valueCount;
|
||||
final int blockSize;
|
||||
final LongsRef values;
|
||||
final long[] values;
|
||||
final LongsRef valuesRef;
|
||||
byte[] blocks;
|
||||
int off;
|
||||
long ord;
|
||||
|
@ -87,10 +88,17 @@ public final class BlockPackedReader {
|
|||
*/
|
||||
public BlockPackedReader(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
|
||||
checkBlockSize(blockSize);
|
||||
this.in = in;
|
||||
this.packedIntsVersion = packedIntsVersion;
|
||||
this.blockSize = blockSize;
|
||||
this.values = new LongsRef(blockSize);
|
||||
this.values = new long[blockSize];
|
||||
this.valuesRef = new LongsRef(this.values, 0, 0);
|
||||
reset(in, valueCount);
|
||||
}
|
||||
|
||||
/** Reset the current reader to wrap a stream of <code>valueCount</code>
|
||||
* values contained in <code>in</code>. The block size remains unchanged. */
|
||||
public void reset(DataInput in, long valueCount) {
|
||||
this.in = in;
|
||||
assert valueCount >= 0;
|
||||
this.valueCount = valueCount;
|
||||
off = blockSize;
|
||||
|
@ -159,9 +167,15 @@ public final class BlockPackedReader {
|
|||
|
||||
/** Read the next value. */
|
||||
public long next() throws IOException {
|
||||
next(1);
|
||||
assert values.length == 1;
|
||||
return values.longs[values.offset];
|
||||
if (ord == valueCount) {
|
||||
throw new EOFException();
|
||||
}
|
||||
if (off == blockSize) {
|
||||
refill();
|
||||
}
|
||||
final long value = values[off++];
|
||||
++ord;
|
||||
return value;
|
||||
}
|
||||
|
||||
/** Read between <tt>1</tt> and <code>count</code> values. */
|
||||
|
@ -177,11 +191,11 @@ public final class BlockPackedReader {
|
|||
count = Math.min(count, blockSize - off);
|
||||
count = (int) Math.min(count, valueCount - ord);
|
||||
|
||||
values.offset = off;
|
||||
values.length = count;
|
||||
valuesRef.offset = off;
|
||||
valuesRef.length = count;
|
||||
off += count;
|
||||
ord += count;
|
||||
return values;
|
||||
return valuesRef;
|
||||
}
|
||||
|
||||
private void refill() throws IOException {
|
||||
|
@ -195,7 +209,7 @@ public final class BlockPackedReader {
|
|||
assert minEquals0 || minValue != 0;
|
||||
|
||||
if (bitsPerValue == 0) {
|
||||
Arrays.fill(values.longs, minValue);
|
||||
Arrays.fill(values, minValue);
|
||||
} else {
|
||||
final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
|
||||
final int iterations = blockSize / decoder.valueCount();
|
||||
|
@ -208,11 +222,11 @@ public final class BlockPackedReader {
|
|||
final int blocksCount = (int) PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, bitsPerValue);
|
||||
in.readBytes(blocks, 0, blocksCount);
|
||||
|
||||
decoder.decode(blocks, 0, values.longs, 0, iterations);
|
||||
decoder.decode(blocks, 0, values, 0, iterations);
|
||||
|
||||
if (minValue != 0) {
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
values.longs[i] += minValue;
|
||||
values[i] += minValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public final class BlockPackedWriter {
|
|||
out.writeByte((byte) i);
|
||||
}
|
||||
|
||||
final DataOutput out;
|
||||
DataOutput out;
|
||||
final long[] values;
|
||||
byte[] blocks;
|
||||
int off;
|
||||
|
@ -75,8 +75,14 @@ public final class BlockPackedWriter {
|
|||
*/
|
||||
public BlockPackedWriter(DataOutput out, int blockSize) {
|
||||
checkBlockSize(blockSize);
|
||||
this.out = out;
|
||||
reset(out);
|
||||
values = new long[blockSize];
|
||||
}
|
||||
|
||||
/** Reset this writer to wrap <code>out</code>. The block size remains unchanged. */
|
||||
public void reset(DataOutput out) {
|
||||
assert out != null;
|
||||
this.out = out;
|
||||
off = 0;
|
||||
ord = 0L;
|
||||
finished = false;
|
||||
|
@ -99,7 +105,8 @@ public final class BlockPackedWriter {
|
|||
}
|
||||
|
||||
/** Flush all buffered data to disk. This instance is not usable anymore
|
||||
* after this method has been called. */
|
||||
* after this method has been called until {@link #reset(DataOutput)} has
|
||||
* been called. */
|
||||
public void finish() throws IOException {
|
||||
checkNotFinished();
|
||||
if (off > 0) {
|
||||
|
|
|
@ -1500,7 +1500,15 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
doc.add(newField("c", "val", customType));
|
||||
writer.addDocument(doc);
|
||||
// Adding just one document does not call flush yet.
|
||||
assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length);
|
||||
int computedExtraFileCount = 0;
|
||||
for (String file : dir.listAll()) {
|
||||
if (file.lastIndexOf('.') < 0
|
||||
// don't count stored fields and term vectors in
|
||||
|| !Arrays.asList("fdx", "fdt", "tvx", "tvd", "tvf").contains(file.substring(file.lastIndexOf('.') + 1))) {
|
||||
++computedExtraFileCount;
|
||||
}
|
||||
}
|
||||
assertEquals("only the stored and term vector files should exist in the directory", extraFileCount, computedExtraFileCount);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newField("c", "val", customType));
|
||||
|
|
|
@ -17,21 +17,38 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
||||
public class TestPayloadsOnVectors extends LuceneTestCase {
|
||||
|
||||
|
@ -141,4 +158,314 @@ public class TestPayloadsOnVectors extends LuceneTestCase {
|
|||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// custom impl to test cases that are forbidden by the default OffsetAttribute impl
|
||||
static class PermissiveOffsetAttributeImpl extends AttributeImpl implements OffsetAttribute {
|
||||
|
||||
int start, end;
|
||||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
return start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setOffset(int startOffset, int endOffset) {
|
||||
// no check!
|
||||
start = startOffset;
|
||||
end = endOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
start = end = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == this) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other instanceof PermissiveOffsetAttributeImpl) {
|
||||
PermissiveOffsetAttributeImpl o = (PermissiveOffsetAttributeImpl) other;
|
||||
return o.start == start && o.end == end;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return start + 31 * end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
OffsetAttribute t = (OffsetAttribute) target;
|
||||
t.setOffset(start, end);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static BytesRef randomPayload() {
|
||||
final int len = random().nextInt(5);
|
||||
if (len == 0) {
|
||||
return null;
|
||||
}
|
||||
final BytesRef payload = new BytesRef(len);
|
||||
random().nextBytes(payload.bytes);
|
||||
payload.length = len;
|
||||
return payload;
|
||||
}
|
||||
|
||||
class RandomTokenStream extends TokenStream {
|
||||
|
||||
final String[] terms;
|
||||
final int[] positionsIncrements;
|
||||
final int[] positions;
|
||||
final int[] startOffsets, endOffsets;
|
||||
final BytesRef[] payloads;
|
||||
|
||||
final Map<Integer, Set<Integer>> positionToTerms;
|
||||
final Map<Integer, Set<Integer>> startOffsetToTerms;
|
||||
|
||||
final CharTermAttribute termAtt;
|
||||
final PositionIncrementAttribute piAtt;
|
||||
final OffsetAttribute oAtt;
|
||||
final PayloadAttribute pAtt;
|
||||
int i = 0;
|
||||
|
||||
RandomTokenStream(int len, String[] sampleTerms, boolean weird) {
|
||||
terms = new String[len];
|
||||
positionsIncrements = new int[len];
|
||||
positions = new int[len];
|
||||
startOffsets = new int[len];
|
||||
endOffsets = new int[len];
|
||||
payloads = new BytesRef[len];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
terms[i] = RandomPicks.randomFrom(random(), sampleTerms);
|
||||
if (weird) {
|
||||
positionsIncrements[i] = random().nextInt(1 << 18);
|
||||
startOffsets[i] = random().nextInt();
|
||||
endOffsets[i] = random().nextInt();
|
||||
} else if (i == 0) {
|
||||
positionsIncrements[i] = _TestUtil.nextInt(random(), 1, 1 << 5);
|
||||
startOffsets[i] = _TestUtil.nextInt(random(), 0, 1 << 16);
|
||||
endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
|
||||
} else {
|
||||
positionsIncrements[i] = _TestUtil.nextInt(random(), 0, 1 << 5);
|
||||
startOffsets[i] = startOffsets[i-1] + _TestUtil.nextInt(random(), 0, 1 << 16);
|
||||
endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (i == 0) {
|
||||
positions[i] = positionsIncrements[i] - 1;
|
||||
} else {
|
||||
positions[i] = positions[i - 1] + positionsIncrements[i];
|
||||
}
|
||||
}
|
||||
if (rarely()) {
|
||||
Arrays.fill(payloads, randomPayload());
|
||||
} else {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
payloads[i] = randomPayload();
|
||||
}
|
||||
}
|
||||
|
||||
positionToTerms = new HashMap<Integer, Set<Integer>>();
|
||||
startOffsetToTerms = new HashMap<Integer, Set<Integer>>();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (!positionToTerms.containsKey(positions[i])) {
|
||||
positionToTerms.put(positions[i], new HashSet<Integer>(1));
|
||||
}
|
||||
positionToTerms.get(positions[i]).add(i);
|
||||
if (!startOffsetToTerms.containsKey(startOffsets[i])) {
|
||||
startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
|
||||
}
|
||||
startOffsetToTerms.get(startOffsets[i]).add(i);
|
||||
}
|
||||
|
||||
addAttributeImpl(new PermissiveOffsetAttributeImpl());
|
||||
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
piAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
oAtt = addAttribute(OffsetAttribute.class);
|
||||
pAtt = addAttribute(PayloadAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (i < terms.length) {
|
||||
termAtt.setLength(0).append(terms[i]);
|
||||
piAtt.setPositionIncrement(positionsIncrements[i]);
|
||||
oAtt.setOffset(startOffsets[i], endOffsets[i]);
|
||||
pAtt.setPayload(payloads[i]);
|
||||
++i;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static FieldType randomFieldType() {
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
ft.setStoreTermVectors(true);
|
||||
ft.setStoreTermVectorPositions(random().nextBoolean());
|
||||
ft.setStoreTermVectorOffsets(random().nextBoolean());
|
||||
if (random().nextBoolean()) {
|
||||
ft.setStoreTermVectorPositions(true);
|
||||
ft.setStoreTermVectorPayloads(true);
|
||||
}
|
||||
ft.freeze();
|
||||
return ft;
|
||||
}
|
||||
|
||||
public void testRandomVectors() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
|
||||
String[] sampleTerms = new String[RandomInts.randomIntBetween(random(), 20, 50)];
|
||||
for (int i = 0; i < sampleTerms.length; ++i) {
|
||||
sampleTerms[i] = _TestUtil.randomUnicodeString(random());
|
||||
}
|
||||
FieldType ft = randomFieldType();
|
||||
// generate random documents and index them
|
||||
final String[] fieldNames = new String[_TestUtil.nextInt(random(), 1, 200)];
|
||||
for (int i = 0; i < fieldNames.length; ++i) {
|
||||
String fieldName;
|
||||
do {
|
||||
fieldName = _TestUtil.randomSimpleString(random());
|
||||
} while ("id".equals(fieldName));
|
||||
fieldNames[i] = fieldName;
|
||||
}
|
||||
final int numDocs = _TestUtil.nextInt(random(), 10, 100);
|
||||
@SuppressWarnings("unchecked")
|
||||
final Map<String, RandomTokenStream>[] fieldValues = new Map[numDocs];
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
fieldValues[i] = new HashMap<String, RandomTokenStream>();
|
||||
final int numFields = _TestUtil.nextInt(random(), 0, rarely() ? fieldNames.length : 5);
|
||||
for (int j = 0; j < numFields; ++j) {
|
||||
final String fieldName = fieldNames[(i+j*31) % fieldNames.length];
|
||||
final int tokenStreamLen = _TestUtil.nextInt(random(), 1, rarely() ? 300 : 5);
|
||||
fieldValues[i].put(fieldName, new RandomTokenStream(tokenStreamLen, sampleTerms, rarely()));
|
||||
}
|
||||
}
|
||||
|
||||
// index them
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
doc.add(new IntField("id", i, Store.YES));
|
||||
for (Map.Entry<String, RandomTokenStream> entry : fieldValues[i].entrySet()) {
|
||||
doc.add(new Field(entry.getKey(), entry.getValue(), ft));
|
||||
}
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
iw.commit();
|
||||
// make sure the format can merge
|
||||
iw.forceMerge(2);
|
||||
|
||||
// read term vectors
|
||||
final DirectoryReader reader = DirectoryReader.open(dir);
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
final int docID = random().nextInt(numDocs);
|
||||
final Map<String, RandomTokenStream> fvs = fieldValues[reader.document(docID).getField("id").numericValue().intValue()];
|
||||
final Fields fields = reader.getTermVectors(docID);
|
||||
if (fvs.isEmpty()) {
|
||||
assertNull(fields);
|
||||
} else {
|
||||
Set<String> fns = new HashSet<String>();
|
||||
for (String field : fields) {
|
||||
fns.add(field);
|
||||
}
|
||||
assertEquals(fields.size(), fns.size());
|
||||
assertEquals(fvs.keySet(), fns);
|
||||
for (String field : fields) {
|
||||
final RandomTokenStream tk = fvs.get(field);
|
||||
assert tk != null;
|
||||
final Terms terms = fields.terms(field);
|
||||
assertEquals(ft.storeTermVectorPositions(), terms.hasPositions());
|
||||
assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets());
|
||||
assertEquals(1, terms.getDocCount());
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
while (termsEnum.next() != null) {
|
||||
assertEquals(1, termsEnum.docFreq());
|
||||
final DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
|
||||
final DocsEnum docsEnum = docsAndPositionsEnum == null ? termsEnum.docs(null, null) : docsAndPositionsEnum;
|
||||
if (ft.storeTermVectorOffsets() || ft.storeTermVectorPositions()) {
|
||||
assertNotNull(docsAndPositionsEnum);
|
||||
}
|
||||
assertEquals(0, docsEnum.nextDoc());
|
||||
if (terms.hasPositions() || terms.hasOffsets()) {
|
||||
final int freq = docsEnum.freq();
|
||||
assertTrue(freq >= 1);
|
||||
if (docsAndPositionsEnum != null) {
|
||||
for (int k = 0; k < freq; ++k) {
|
||||
final int position = docsAndPositionsEnum.nextPosition();
|
||||
final Set<Integer> indexes;
|
||||
if (terms.hasPositions()) {
|
||||
indexes = tk.positionToTerms.get(position);
|
||||
assertNotNull(tk.positionToTerms.keySet().toString() + " does not contain " + position, indexes);
|
||||
} else {
|
||||
indexes = tk.startOffsetToTerms.get(docsAndPositionsEnum.startOffset());
|
||||
assertNotNull(indexes);
|
||||
}
|
||||
if (terms.hasPositions()) {
|
||||
boolean foundPosition = false;
|
||||
for (int index : indexes) {
|
||||
if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && tk.positions[index] == position) {
|
||||
foundPosition = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertTrue(foundPosition);
|
||||
}
|
||||
if (terms.hasOffsets()) {
|
||||
boolean foundOffset = false;
|
||||
for (int index : indexes) {
|
||||
if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && tk.startOffsets[index] == docsAndPositionsEnum.startOffset() && tk.endOffsets[index] == docsAndPositionsEnum.endOffset()) {
|
||||
foundOffset = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertTrue(foundOffset);
|
||||
}
|
||||
if (terms.hasPayloads()) {
|
||||
boolean foundPayload = false;
|
||||
for (int index : indexes) {
|
||||
if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && equals(tk.payloads[index], docsAndPositionsEnum.getPayload())) {
|
||||
foundPayload = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertTrue(foundPayload);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
IOUtils.close(reader, iw, dir);
|
||||
}
|
||||
|
||||
private static boolean equals(Object o1, Object o2) {
|
||||
if (o1 == null) {
|
||||
return o2 == null;
|
||||
} else {
|
||||
return o1.equals(o2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
<path id="test.classpath">
|
||||
<path refid="test.base.classpath" />
|
||||
<pathelement location="${build.dir}/classes/examples" />
|
||||
<!-- TODO, cut over tests to MockAnalyzer etc and nuke this dependency -->
|
||||
<pathelement path="${analyzers-common.jar}" />
|
||||
</path>
|
||||
|
||||
<path id="classpath">
|
||||
|
|
|
@ -109,8 +109,7 @@ public class MultiCLSearcher {
|
|||
// behavior - in those
|
||||
// situations other, more low-level interfaces are available, as
|
||||
// demonstrated in other search examples.
|
||||
FacetsCollector facetsCollector = new FacetsCollector(
|
||||
facetSearchParams, indexReader, taxo);
|
||||
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, indexReader, taxo);
|
||||
|
||||
// perform documents search and facets accumulation
|
||||
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
|
||||
|
|
|
@ -104,9 +104,9 @@ public class SimpleSearcher {
|
|||
}
|
||||
|
||||
// Faceted search parameters indicate which facets are we interested in
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams(Arrays.asList(facetRequests), indexingParams);
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams(indexingParams, facetRequests);
|
||||
|
||||
FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, indexReader, taxoReader);
|
||||
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, indexReader, taxoReader);
|
||||
|
||||
// perform documents search and facets accumulation
|
||||
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
|
||||
|
@ -148,9 +148,9 @@ public class SimpleSearcher {
|
|||
|
||||
// assume the user is interested in the second sub-result
|
||||
// (just take the second sub-result returned by the iterator - we know there are 3 results!)
|
||||
Iterator<? extends FacetResultNode> resIterator = fres.getFacetResultNode().getSubResults().iterator();
|
||||
Iterator<? extends FacetResultNode> resIterator = fres.getFacetResultNode().subResults.iterator();
|
||||
resIterator.next(); // skip first result
|
||||
CategoryPath categoryOfInterest = resIterator.next().getLabel();
|
||||
CategoryPath categoryOfInterest = resIterator.next().label;
|
||||
|
||||
// drill-down preparation: turn the base query into a drill-down query for the category of interest
|
||||
Query q2 = DrillDown.query(indexingParams, baseQuery, categoryOfInterest);
|
||||
|
|
|
@ -48,7 +48,7 @@ public class FacetIndexingParams {
|
|||
|
||||
/**
|
||||
* A {@link FacetIndexingParams} which fixes {@link OrdinalPolicy} to
|
||||
* {@link OrdinalPolicy#NO_PARENTS}. This is a singleton equivalent to new
|
||||
* {@link OrdinalPolicy#ALL_PARENTS}. This is a singleton equivalent to new
|
||||
* {@link #FacetIndexingParams()}.
|
||||
*/
|
||||
public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();
|
||||
|
|
|
@ -37,8 +37,8 @@ import org.apache.lucene.index.IndexReader;
|
|||
* <p>
|
||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
||||
* does not guarantee accurate values for
|
||||
* {@link FacetResult#getNumValidDescendants()} &
|
||||
* {@link FacetResultNode#getResidue()}.
|
||||
* {@link FacetResult#getNumValidDescendants()} and
|
||||
* {@link FacetResultNode#residue}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,346 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.encoding.DGapVInt8IntDecoder;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link Collector} which counts facets associated with matching documents.
|
||||
* This {@link Collector} can be used only in the following conditions:
|
||||
* <ul>
|
||||
* <li>All {@link FacetRequest requests} must be {@link CountFacetRequest}, with
|
||||
* their {@link FacetRequest#getDepth() depth} equals to 1, and
|
||||
* {@link FacetRequest#getNumLabel()} must be ≥ than
|
||||
* {@link FacetRequest#getNumResults()}. Also, their sorting options must be
|
||||
* {@link SortOrder#DESCENDING} and {@link SortBy#VALUE} (although ties are
|
||||
* broken by ordinals).
|
||||
* <li>Partitions should be disabled (
|
||||
* {@link FacetIndexingParams#getPartitionSize()} should return
|
||||
* Integer.MAX_VALUE).
|
||||
* <li>There can be only one {@link CategoryListParams} in the
|
||||
* {@link FacetIndexingParams}, with {@link DGapVInt8IntDecoder}.
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> this colletro uses {@link DocValues#getSource()} by default,
|
||||
* which pre-loads the values into memory. If your application cannot afford the
|
||||
* RAM, you should use
|
||||
* {@link #CountingFacetsCollector(FacetSearchParams, TaxonomyReader, FacetArrays, boolean)}
|
||||
* and specify to use a direct source (corresponds to
|
||||
* {@link DocValues#getDirectSource()}).
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> this collector supports category lists that were indexed with
|
||||
* {@link OrdinalPolicy#NO_PARENTS}, by counting up the parents too, after
|
||||
* resolving the leafs counts. Note though that it is your responsibility to
|
||||
* guarantee that indeed a document wasn't indexed with two categories that
|
||||
* share a common parent, or otherwise the parent's count will be wrong.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CountingFacetsCollector extends FacetsCollector {
|
||||
|
||||
private final FacetSearchParams fsp;
|
||||
private final TaxonomyReader taxoReader;
|
||||
private final BytesRef buf = new BytesRef(32);
|
||||
private final FacetArrays facetArrays;
|
||||
private final int[] counts;
|
||||
private final String facetsField;
|
||||
private final boolean useDirectSource;
|
||||
private final HashMap<Source,FixedBitSet> matchingDocs = new HashMap<Source,FixedBitSet>();
|
||||
|
||||
private DocValues facetsValues;
|
||||
private FixedBitSet bits;
|
||||
|
||||
public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) {
|
||||
this(fsp, taxoReader, new FacetArrays(taxoReader.getSize()), false);
|
||||
}
|
||||
|
||||
public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader, FacetArrays facetArrays,
|
||||
boolean useDirectSource) {
|
||||
assert facetArrays.arrayLength >= taxoReader.getSize() : "too small facet array";
|
||||
assert assertParams(fsp) == null : assertParams(fsp);
|
||||
|
||||
this.fsp = fsp;
|
||||
this.taxoReader = taxoReader;
|
||||
this.facetArrays = facetArrays;
|
||||
this.counts = facetArrays.getIntArray();
|
||||
this.facetsField = fsp.indexingParams.getCategoryListParams(null).field;
|
||||
this.useDirectSource = useDirectSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that this {@link FacetsCollector} can handle the given
|
||||
* {@link FacetSearchParams}. Returns {@code null} if true, otherwise an error
|
||||
* message.
|
||||
*/
|
||||
static String assertParams(FacetSearchParams fsp) {
|
||||
// verify that all facet requests are CountFacetRequest
|
||||
for (FacetRequest fr : fsp.facetRequests) {
|
||||
if (!(fr instanceof CountFacetRequest)) {
|
||||
return "all FacetRequests must be CountFacetRequest";
|
||||
}
|
||||
if (fr.getDepth() != 1) {
|
||||
return "all requests must be of depth 1";
|
||||
}
|
||||
if (fr.getNumLabel() < fr.getNumResults()) {
|
||||
return "this Collector always labels all requested results";
|
||||
}
|
||||
if (fr.getSortOrder() != SortOrder.DESCENDING) {
|
||||
return "this Collector always sorts results in descending order";
|
||||
}
|
||||
if (fr.getSortBy() != SortBy.VALUE) {
|
||||
return "this Collector always sorts by results' values";
|
||||
}
|
||||
}
|
||||
|
||||
// verify that there's only one CategoryListParams
|
||||
List<CategoryListParams> clps = fsp.indexingParams.getAllCategoryListParams();
|
||||
if (clps.size() != 1) {
|
||||
return "this Collector supports only one CategoryListParams";
|
||||
}
|
||||
|
||||
// verify DGapVInt decoder
|
||||
CategoryListParams clp = clps.get(0);
|
||||
if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) {
|
||||
return "this Collector supports only DGap + VInt encoding";
|
||||
}
|
||||
|
||||
// verify that partitions are disabled
|
||||
if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) {
|
||||
return "this Collector does not support partitions";
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
facetsValues = context.reader().docValues(facetsField);
|
||||
if (facetsValues != null) {
|
||||
Source facetSource = useDirectSource ? facetsValues.getDirectSource() : facetsValues.getSource();
|
||||
bits = new FixedBitSet(context.reader().maxDoc());
|
||||
matchingDocs.put(facetSource, bits);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
if (facetsValues == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
bits.set(doc);
|
||||
}
|
||||
|
||||
private void countFacets() {
|
||||
for (Entry<Source,FixedBitSet> entry : matchingDocs.entrySet()) {
|
||||
Source facetsSource = entry.getKey();
|
||||
FixedBitSet bits = entry.getValue();
|
||||
int doc = 0;
|
||||
int length = bits.length();
|
||||
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
|
||||
facetsSource .getBytes(doc, buf);
|
||||
if (buf.length > 0) {
|
||||
// this document has facets
|
||||
int upto = buf.offset + buf.length;
|
||||
int ord = 0;
|
||||
int offset = buf.offset;
|
||||
int prev = 0;
|
||||
while (offset < upto) {
|
||||
byte b = buf.bytes[offset++];
|
||||
if (b >= 0) {
|
||||
prev = ord = ((ord << 7) | b) + prev;
|
||||
counts[ord]++;
|
||||
ord = 0;
|
||||
} else {
|
||||
ord = (ord << 7) | (b & 0x7F);
|
||||
}
|
||||
}
|
||||
}
|
||||
++doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void countParents(int[] parents) {
|
||||
// counts[0] is the count of ROOT, which we don't care about and counts[1]
|
||||
// can only update counts[0], so we don't bother to visit it too. also,
|
||||
// since parents always have lower ordinals than their children, we traverse
|
||||
// the array backwards. this also allows us to update just the immediate
|
||||
// parent's count (actually, otherwise it would be a mistake).
|
||||
for (int i = counts.length - 1; i > 1; i--) {
|
||||
int count = counts[i];
|
||||
if (count > 0) {
|
||||
int parent = parents[i];
|
||||
if (parent != 0) {
|
||||
counts[parent] += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized List<FacetResult> getFacetResults() throws IOException {
|
||||
try {
|
||||
// first, count matching documents' facets
|
||||
countFacets();
|
||||
|
||||
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
|
||||
|
||||
if (fsp.indexingParams.getOrdinalPolicy() == OrdinalPolicy.NO_PARENTS) {
|
||||
// need to count parents
|
||||
countParents(arrays.parents());
|
||||
}
|
||||
|
||||
// compute top-K
|
||||
final int[] children = arrays.children();
|
||||
final int[] siblings = arrays.siblings();
|
||||
List<FacetResult> res = new ArrayList<FacetResult>();
|
||||
for (FacetRequest fr : fsp.facetRequests) {
|
||||
int rootOrd = taxoReader.getOrdinal(fr.categoryPath);
|
||||
if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
|
||||
continue;
|
||||
}
|
||||
FacetResultNode root = new FacetResultNode();
|
||||
root.ordinal = rootOrd;
|
||||
root.label = fr.categoryPath;
|
||||
root.value = counts[rootOrd];
|
||||
if (fr.getNumResults() > taxoReader.getSize()) {
|
||||
// specialize this case, user is interested in all available results
|
||||
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
|
||||
int child = children[rootOrd];
|
||||
while (child != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int count = counts[child];
|
||||
if (count > 0) {
|
||||
FacetResultNode node = new FacetResultNode();
|
||||
node.label = taxoReader.getPath(child);
|
||||
node.value = count;
|
||||
nodes.add(node);
|
||||
}
|
||||
child = siblings[child];
|
||||
}
|
||||
root.residue = 0;
|
||||
root.subResults = nodes;
|
||||
res.add(new FacetResult(fr, root, nodes.size()));
|
||||
continue;
|
||||
}
|
||||
|
||||
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
|
||||
FacetResultNodeQueue pq = new FacetResultNodeQueue(fr.getNumResults(), true);
|
||||
FacetResultNode top = pq.top();
|
||||
int child = children[rootOrd];
|
||||
int numResults = 0; // count the number of results
|
||||
int residue = 0;
|
||||
while (child != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int count = counts[child];
|
||||
if (count > top.value) {
|
||||
residue += top.value;
|
||||
top.value = count;
|
||||
top.ordinal = child;
|
||||
top = pq.updateTop();
|
||||
++numResults;
|
||||
} else {
|
||||
residue += count;
|
||||
}
|
||||
child = siblings[child];
|
||||
}
|
||||
|
||||
// pop() the least (sentinel) elements
|
||||
int pqsize = pq.size();
|
||||
int size = numResults < pqsize ? numResults : pqsize;
|
||||
for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
|
||||
|
||||
// create the FacetResultNodes.
|
||||
FacetResultNode[] subResults = new FacetResultNode[size];
|
||||
for (int i = size - 1; i >= 0; i--) {
|
||||
FacetResultNode node = pq.pop();
|
||||
node.label = taxoReader.getPath(node.ordinal);
|
||||
subResults[i] = node;
|
||||
}
|
||||
root.residue = residue;
|
||||
root.subResults = Arrays.asList(subResults);
|
||||
res.add(new FacetResult(fr, root, size));
|
||||
}
|
||||
return res;
|
||||
} finally {
|
||||
facetArrays.free();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
// the actual work is done post-collection, so we always support out-of-order.
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
}
|
||||
|
||||
// TODO: review ResultSortUtils queues and check if we can reuse any of them here
|
||||
// and then alleviate the SortOrder/SortBy constraint
|
||||
private static class FacetResultNodeQueue extends PriorityQueue<FacetResultNode> {
|
||||
|
||||
public FacetResultNodeQueue(int maxSize, boolean prepopulate) {
|
||||
super(maxSize, prepopulate);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FacetResultNode getSentinelObject() {
|
||||
return new FacetResultNode();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(FacetResultNode a, FacetResultNode b) {
|
||||
if (a.value < b.value) return true;
|
||||
if (a.value > b.value) return false;
|
||||
// both have the same value, break tie by ordinal
|
||||
return a.ordinal < b.ordinal;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -47,7 +47,7 @@ public final class DrillDown {
|
|||
* @see #term(FacetIndexingParams, CategoryPath)
|
||||
*/
|
||||
public static final Term term(FacetSearchParams sParams, CategoryPath path) {
|
||||
return term(sParams.getFacetIndexingParams(), path);
|
||||
return term(sParams.indexingParams, path);
|
||||
}
|
||||
|
||||
/** Return a drill-down {@link Term} for a category. */
|
||||
|
@ -103,7 +103,7 @@ public final class DrillDown {
|
|||
* @see #query(FacetIndexingParams, Query, CategoryPath...)
|
||||
*/
|
||||
public static final Query query(FacetSearchParams sParams, Query baseQuery, CategoryPath... paths) {
|
||||
return query(sParams.getFacetIndexingParams(), baseQuery, paths);
|
||||
return query(sParams.indexingParams, baseQuery, paths);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -120,7 +120,7 @@ public abstract class FacetResultsHandler {
|
|||
* rendered facet results, fixed their counts, and now it is needed
|
||||
* to sort the results differently according to the fixed counts.
|
||||
* @param facetResult result to be rearranged.
|
||||
* @see FacetResultNode#setValue(double)
|
||||
* @see FacetResultNode#value
|
||||
*/
|
||||
public abstract FacetResult rearrangeFacetResult(FacetResult facetResult);
|
||||
|
||||
|
|
|
@ -138,7 +138,7 @@ public abstract class FacetsAccumulator {
|
|||
|
||||
/** check if all requests are complementable */
|
||||
protected boolean mayComplement() {
|
||||
for (FacetRequest freq:searchParams.getFacetRequests()) {
|
||||
for (FacetRequest freq:searchParams.facetRequests) {
|
||||
if (!freq.supportsComplements()) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -3,15 +3,13 @@ package org.apache.lucene.facet.search;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Collector;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -31,109 +29,35 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Collector for facet accumulation. *
|
||||
* A {@link Collector} which executes faceted search and computes the weight of
|
||||
* requested facets. To get the facet results you should call
|
||||
* {@link #getFacetResults()}.
|
||||
* {@link #create(FacetSearchParams, IndexReader, TaxonomyReader)} returns the
|
||||
* most optimized {@link FacetsCollector} for the given parameters.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FacetsCollector extends Collector {
|
||||
|
||||
protected final FacetsAccumulator facetsAccumulator;
|
||||
private ScoredDocIdCollector scoreDocIdCollector;
|
||||
private List<FacetResult> results;
|
||||
private Object resultsGuard;
|
||||
|
||||
public abstract class FacetsCollector extends Collector {
|
||||
|
||||
/**
|
||||
* Create a collector for accumulating facets while collecting documents
|
||||
* during search.
|
||||
*
|
||||
* @param facetSearchParams
|
||||
* faceted search parameters defining which facets are required and
|
||||
* how.
|
||||
* @param indexReader
|
||||
* searched index.
|
||||
* @param taxonomyReader
|
||||
* taxonomy containing the facets.
|
||||
* Returns the most optimized {@link FacetsCollector} for the given search
|
||||
* parameters. The returned {@link FacetsCollector} is guaranteed to satisfy
|
||||
* the requested parameters.
|
||||
*/
|
||||
public FacetsCollector(FacetSearchParams facetSearchParams,
|
||||
IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
|
||||
resultsGuard = new Object();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link ScoredDocIdCollector} to be used as the first phase of
|
||||
* the facet collection. If all facetRequests are do not require the
|
||||
* document score, a ScoredDocIdCollector which does not store the document
|
||||
* scores would be returned. Otherwise a SDIC which does store the documents
|
||||
* will be returned, having an initial allocated space for 1000 such
|
||||
* documents' scores.
|
||||
*/
|
||||
protected ScoredDocIdCollector initScoredDocCollector(
|
||||
FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
boolean scoresNeeded = false;
|
||||
for (FacetRequest frq : facetSearchParams.getFacetRequests()) {
|
||||
if (frq.requireDocumentScore()) {
|
||||
scoresNeeded = true;
|
||||
break;
|
||||
}
|
||||
public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
|
||||
if (CountingFacetsCollector.assertParams(fsp) == null) {
|
||||
return new CountingFacetsCollector(fsp, taxoReader);
|
||||
}
|
||||
return ScoredDocIdCollector.create(indexReader.maxDoc(), scoresNeeded);
|
||||
|
||||
return new StandardFacetsCollector(fsp, indexReader, taxoReader);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create the {@link FacetsAccumulator} to be used. Default is
|
||||
* {@link StandardFacetsAccumulator}. Called once at the constructor of the collector.
|
||||
*
|
||||
* @param facetSearchParams
|
||||
* The search params.
|
||||
* @param indexReader
|
||||
* A reader to the index to search in.
|
||||
* @param taxonomyReader
|
||||
* A reader to the active taxonomy.
|
||||
* @return The {@link FacetsAccumulator} to use.
|
||||
* Returns a {@link FacetResult} per {@link FacetRequest} set in
|
||||
* {@link FacetSearchParams}. Note that if one of the {@link FacetRequest
|
||||
* requests} is for a {@link CategoryPath} that does not exist in the taxonomy,
|
||||
* no matching {@link FacetResult} will be returned.
|
||||
*/
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams,
|
||||
IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
return new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return accumulated facets results (according to faceted search parameters)
|
||||
* for collected documents.
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public List<FacetResult> getFacetResults() throws IOException {
|
||||
synchronized (resultsGuard) { // over protection
|
||||
if (results == null) {
|
||||
// lazy creation but just once
|
||||
results = facetsAccumulator.accumulate(scoreDocIdCollector.getScoredDocIDs());
|
||||
scoreDocIdCollector = null;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
scoreDocIdCollector.collect(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
scoreDocIdCollector.setNextReader(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
scoreDocIdCollector.setScorer(scorer);
|
||||
}
|
||||
public abstract List<FacetResult> getFacetResults() throws IOException;
|
||||
|
||||
}
|
||||
|
|
|
@ -32,8 +32,8 @@ import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
|
|||
* <p>
|
||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
||||
* does not guarantee accurate values for
|
||||
* {@link FacetResult#getNumValidDescendants()} &
|
||||
* {@link FacetResultNode#getResidue()}.
|
||||
* {@link FacetResult#getNumValidDescendants()} and
|
||||
* {@link FacetResultNode#residue}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
|
@ -87,7 +87,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
this.facetArrays = facetArrays;
|
||||
// can only be computed later when docids size is known
|
||||
isUsingComplements = false;
|
||||
partitionSize = PartitionsUtils.partitionSize(searchParams.getFacetIndexingParams(), taxonomyReader);
|
||||
partitionSize = PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader);
|
||||
maxPartitions = (int) Math.ceil(this.taxonomyReader.getSize() / (double) partitionSize);
|
||||
accumulateGuard = new Object();
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
public StandardFacetsAccumulator(FacetSearchParams searchParams,
|
||||
IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
this(searchParams, indexReader, taxonomyReader, new FacetArrays(
|
||||
PartitionsUtils.partitionSize(searchParams.getFacetIndexingParams(), taxonomyReader)));
|
||||
PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,7 +112,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
if (isUsingComplements) {
|
||||
try {
|
||||
totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader,
|
||||
searchParams.getFacetIndexingParams());
|
||||
searchParams.indexingParams);
|
||||
if (totalFacetCounts != null) {
|
||||
docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader);
|
||||
} else {
|
||||
|
@ -159,7 +159,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
// In this implementation merges happen after each partition,
|
||||
// but other impl could merge only at the end.
|
||||
final HashSet<FacetRequest> handledRequests = new HashSet<FacetRequest>();
|
||||
for (FacetRequest fr : searchParams.getFacetRequests()) {
|
||||
for (FacetRequest fr : searchParams.facetRequests) {
|
||||
// Handle and merge only facet requests which were not already handled.
|
||||
if (handledRequests.add(fr)) {
|
||||
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
|
||||
|
@ -178,7 +178,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
|
||||
// gather results from all requests into a list for returning them
|
||||
List<FacetResult> res = new ArrayList<FacetResult>();
|
||||
for (FacetRequest fr : searchParams.getFacetRequests()) {
|
||||
for (FacetRequest fr : searchParams.facetRequests) {
|
||||
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
|
||||
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
|
||||
if (tmpResult == null) {
|
||||
|
@ -321,8 +321,8 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
|
||||
HashMap<CategoryListIterator, Aggregator> categoryLists = new HashMap<CategoryListIterator, Aggregator>();
|
||||
|
||||
FacetIndexingParams indexingParams = searchParams.getFacetIndexingParams();
|
||||
for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
|
||||
FacetIndexingParams indexingParams = searchParams.indexingParams;
|
||||
for (FacetRequest facetRequest : searchParams.facetRequests) {
|
||||
Aggregator categoryAggregator = facetRequest.createAggregator(isUsingComplements, facetArrays, taxonomyReader);
|
||||
|
||||
CategoryListIterator cli = indexingParams.getCategoryListParams(facetRequest.categoryPath).createCategoryListIterator(partition);
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link FacetsCollector} which allows initilizing e.g.
|
||||
* {@link FacetsAccumulator}. Supports facet partitions, generic
|
||||
* {@link FacetRequest facet requests}, {@link CategoryListParams} etc.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> this collector, with the default {@link FacetsAccumulator} does
|
||||
* not support category lists which were indexed with
|
||||
* {@link OrdinalPolicy#NO_PARENTS}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class StandardFacetsCollector extends FacetsCollector {
|
||||
|
||||
protected final FacetsAccumulator facetsAccumulator;
|
||||
private ScoredDocIdCollector scoreDocIdCollector;
|
||||
private List<FacetResult> results;
|
||||
private Object resultsGuard;
|
||||
|
||||
/**
|
||||
* Create a collector for accumulating facets while collecting documents
|
||||
* during search.
|
||||
*
|
||||
* @param facetSearchParams
|
||||
* faceted search parameters defining which facets are required and
|
||||
* how.
|
||||
* @param indexReader
|
||||
* searched index.
|
||||
* @param taxonomyReader
|
||||
* taxonomy containing the facets.
|
||||
*/
|
||||
public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
|
||||
resultsGuard = new Object();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link ScoredDocIdCollector} to be used as the first phase of
|
||||
* the facet collection. If all facetRequests are do not require the
|
||||
* document score, a ScoredDocIdCollector which does not store the document
|
||||
* scores would be returned. Otherwise a SDIC which does store the documents
|
||||
* will be returned, having an initial allocated space for 1000 such
|
||||
* documents' scores.
|
||||
*/
|
||||
protected ScoredDocIdCollector initScoredDocCollector(FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
boolean scoresNeeded = false;
|
||||
for (FacetRequest frq : facetSearchParams.facetRequests) {
|
||||
if (frq.requireDocumentScore()) {
|
||||
scoresNeeded = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ScoredDocIdCollector.create(indexReader.maxDoc(), scoresNeeded);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the {@link FacetsAccumulator} to be used. Default is
|
||||
* {@link StandardFacetsAccumulator}. Called once at the constructor of the collector.
|
||||
*
|
||||
* @param facetSearchParams
|
||||
* The search params.
|
||||
* @param indexReader
|
||||
* A reader to the index to search in.
|
||||
* @param taxonomyReader
|
||||
* A reader to the active taxonomy.
|
||||
* @return The {@link FacetsAccumulator} to use.
|
||||
*/
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
return new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getFacetResults() throws IOException {
|
||||
synchronized (resultsGuard) { // over protection
|
||||
if (results == null) {
|
||||
// lazy creation but just once
|
||||
results = facetsAccumulator.accumulate(scoreDocIdCollector.getScoredDocIDs());
|
||||
scoreDocIdCollector = null;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
scoreDocIdCollector.collect(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
scoreDocIdCollector.setNextReader(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
scoreDocIdCollector.setScorer(scorer);
|
||||
}
|
||||
|
||||
}
|
|
@ -7,7 +7,6 @@ import org.apache.lucene.facet.search.params.FacetRequest;
|
|||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.facet.util.ResultSortUtils;
|
||||
|
@ -64,8 +63,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
}
|
||||
|
||||
// TODO (Facet): should initial value of "residue" depend on aggregator if not sum?
|
||||
MutableFacetResultNode parentResultNode =
|
||||
new MutableFacetResultNode(ordinal, value);
|
||||
FacetResultNode parentResultNode = new FacetResultNode(ordinal, value);
|
||||
|
||||
Heap<FacetResultNode> heap = ResultSortUtils.createSuitableHeap(facetRequest);
|
||||
int totalFacets = heapDescendants(ordinal, heap, parentResultNode, facetArrays, offset);
|
||||
|
@ -80,7 +78,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException {
|
||||
|
||||
int ordinal = taxonomyReader.getOrdinal(facetRequest.categoryPath);
|
||||
MutableFacetResultNode resNode = new MutableFacetResultNode(ordinal, 0);
|
||||
FacetResultNode resNode = new FacetResultNode(ordinal, 0);
|
||||
|
||||
int totalFacets = 0;
|
||||
Heap<FacetResultNode> heap = null;
|
||||
|
@ -91,7 +89,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
TopKFacetResult fres = (TopKFacetResult) tmpFres;
|
||||
totalFacets += fres.getNumValidDescendants();
|
||||
// set the value for the result node representing the facet request
|
||||
resNode.increaseValue(fres.getFacetResultNode().getValue());
|
||||
resNode.value += fres.getFacetResultNode().value;
|
||||
Heap<FacetResultNode> tmpHeap = fres.getHeap();
|
||||
if (heap == null) {
|
||||
heap = tmpHeap;
|
||||
|
@ -102,7 +100,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
|
||||
FacetResultNode a = heap.insertWithOverflow(tmpHeap.pop());
|
||||
if (a != null) {
|
||||
resNode.increaseResidue(a.getResidue());
|
||||
resNode.residue += a.residue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -119,8 +117,8 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
* they join the overall priority queue pq of size K.
|
||||
* @return total number of descendants considered here by pq, excluding ordinal itself.
|
||||
*/
|
||||
private int heapDescendants(int ordinal, Heap<FacetResultNode> pq,
|
||||
MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) throws IOException {
|
||||
private int heapDescendants(int ordinal, Heap<FacetResultNode> pq, FacetResultNode parentResultNode,
|
||||
FacetArrays facetArrays, int offset) throws IOException {
|
||||
int partitionSize = facetArrays.arrayLength;
|
||||
int endOffset = offset + partitionSize;
|
||||
ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays();
|
||||
|
@ -172,16 +170,20 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
if (value != 0 && !Double.isNaN(value)) {
|
||||
// Count current ordinal -- the TOS
|
||||
if (reusable == null) {
|
||||
reusable = new MutableFacetResultNode(tosOrdinal, value);
|
||||
reusable = new FacetResultNode(tosOrdinal, value);
|
||||
} else {
|
||||
// it is safe to cast since reusable was created here.
|
||||
((MutableFacetResultNode)reusable).reset(tosOrdinal, value);
|
||||
reusable.ordinal = tosOrdinal;
|
||||
reusable.value = value;
|
||||
reusable.subResults.clear();
|
||||
reusable.label = null;
|
||||
reusable.residue = 0;
|
||||
}
|
||||
++childrenCounter;
|
||||
reusable = pq.insertWithOverflow(reusable);
|
||||
if (reusable != null) {
|
||||
// TODO (Facet): is other logic (not add) needed, per aggregator?
|
||||
parentResultNode.increaseResidue(reusable.getValue());
|
||||
parentResultNode.residue += reusable.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -205,9 +207,12 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
TopKFacetResult res = (TopKFacetResult) tmpResult; // cast is safe by contract of this class
|
||||
if (res != null) {
|
||||
Heap<FacetResultNode> heap = res.getHeap();
|
||||
MutableFacetResultNode resNode = (MutableFacetResultNode)res.getFacetResultNode(); // cast safe too
|
||||
FacetResultNode resNode = res.getFacetResultNode();
|
||||
if (resNode.subResults == FacetResultNode.EMPTY_SUB_RESULTS) {
|
||||
resNode.subResults = new ArrayList<FacetResultNode>();
|
||||
}
|
||||
for (int i = heap.size(); i > 0; i--) {
|
||||
resNode.insertSubResult(heap.pop());
|
||||
resNode.subResults.add(0, heap.pop());
|
||||
}
|
||||
}
|
||||
return res;
|
||||
|
@ -218,8 +223,8 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
TopKFacetResult res = (TopKFacetResult) facetResult; // cast is safe by contract of this class
|
||||
Heap<FacetResultNode> heap = res.getHeap();
|
||||
heap.clear(); // just to be safe
|
||||
MutableFacetResultNode topFrn = (MutableFacetResultNode) res.getFacetResultNode(); // safe cast
|
||||
for (FacetResultNode frn : topFrn.getSubResults()) {
|
||||
FacetResultNode topFrn = res.getFacetResultNode();
|
||||
for (FacetResultNode frn : topFrn.subResults) {
|
||||
heap.add(frn);
|
||||
}
|
||||
int size = heap.size();
|
||||
|
@ -227,23 +232,22 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
for (int i = heap.size(); i > 0; i--) {
|
||||
subResults.add(0,heap.pop());
|
||||
}
|
||||
topFrn.setSubResults(subResults);
|
||||
topFrn.subResults = subResults;
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
// label top K sub results
|
||||
public void labelResult(FacetResult facetResult) throws IOException {
|
||||
if (facetResult != null) { // any result to label?
|
||||
FacetResultNode facetResultNode = facetResult.getFacetResultNode();
|
||||
if (facetResultNode != null) { // any result to label?
|
||||
facetResultNode.getLabel(taxonomyReader);
|
||||
facetResultNode.label = taxonomyReader.getPath(facetResultNode.ordinal);
|
||||
int num2label = facetRequest.getNumLabel();
|
||||
for (FacetResultNode frn : facetResultNode.getSubResults()) {
|
||||
for (FacetResultNode frn : facetResultNode.subResults) {
|
||||
if (--num2label < 0) {
|
||||
break;
|
||||
}
|
||||
frn.getLabel(taxonomyReader);
|
||||
frn.label = taxonomyReader.getPath(frn.ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -267,7 +271,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
* @param facetResultNode top result node for this facet result.
|
||||
* @param totalFacets - number of children of the targetFacet, up till the requested depth.
|
||||
*/
|
||||
TopKFacetResult(FacetRequest facetRequest, MutableFacetResultNode facetResultNode, int totalFacets) {
|
||||
TopKFacetResult(FacetRequest facetRequest, FacetResultNode facetResultNode, int totalFacets) {
|
||||
super(facetRequest, facetResultNode, totalFacets);
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@ import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
|
|||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
@ -39,7 +38,7 @@ import org.apache.lucene.util.collections.IntToObjectMap;
|
|||
* subtree of the taxonomy tree. Its root node,
|
||||
* {@link FacetResult#getFacetResultNode()}, is the facet specified by
|
||||
* {@link FacetRequest#categoryPath}, and the enumerated children,
|
||||
* {@link FacetResultNode#getSubResults()}, of each node in that
|
||||
* {@link FacetResultNode#subResults}, of each node in that
|
||||
* {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()})
|
||||
* among its children in the taxonomy. Top in the sense
|
||||
* {@link FacetRequest#getSortBy()}, which can be by the values aggregated in
|
||||
|
@ -70,8 +69,7 @@ import org.apache.lucene.util.collections.IntToObjectMap;
|
|||
*/
|
||||
public class TopKInEachNodeHandler extends FacetResultsHandler {
|
||||
|
||||
public TopKInEachNodeHandler(TaxonomyReader taxonomyReader,
|
||||
FacetRequest facetRequest) {
|
||||
public TopKInEachNodeHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest) {
|
||||
super(taxonomyReader, facetRequest);
|
||||
}
|
||||
|
||||
|
@ -546,7 +544,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
|
||||
@Override
|
||||
protected boolean lessThan(FacetResultNode arg1, FacetResultNode arg2) {
|
||||
return merger.leftGoesNow(arg2.getOrdinal(), arg2.getValue(), arg1.getOrdinal(), arg1.getValue());
|
||||
return merger.leftGoesNow(arg2.ordinal, arg2.value, arg1.ordinal, arg1.value);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -718,14 +716,11 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
if (node == null) {
|
||||
return;
|
||||
}
|
||||
node.getLabel(this.taxonomyReader); // attach a label -- category path -- to the node
|
||||
if (null == node.getSubResults()) {
|
||||
return; // if node has no children -- done
|
||||
}
|
||||
node.label = taxonomyReader.getPath(node.ordinal);
|
||||
|
||||
// otherwise, label the first numToLabel of these children, and recursively -- their children.
|
||||
// label the first numToLabel of these children, and recursively -- their children.
|
||||
int numLabeled = 0;
|
||||
for (FacetResultNode frn : node.getSubResults()) {
|
||||
for (FacetResultNode frn : node.subResults) {
|
||||
// go over the children of node from first to last, no more than numToLable of them
|
||||
recursivelyLabel(frn, numToLabel);
|
||||
if (++numLabeled >= numToLabel) {
|
||||
|
@ -743,24 +738,23 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
public FacetResult rearrangeFacetResult(FacetResult facetResult) {
|
||||
PriorityQueue<FacetResultNode> nodesHeap =
|
||||
new ResultNodeHeap(this.facetRequest.getNumResults(), this.getSuitableACComparator());
|
||||
MutableFacetResultNode topFrn = (MutableFacetResultNode) facetResult.getFacetResultNode(); // safe cast
|
||||
FacetResultNode topFrn = facetResult.getFacetResultNode();
|
||||
rearrangeChilrenOfNode(topFrn, nodesHeap);
|
||||
return facetResult;
|
||||
}
|
||||
|
||||
private void rearrangeChilrenOfNode(FacetResultNode node,
|
||||
PriorityQueue<FacetResultNode> nodesHeap) {
|
||||
private void rearrangeChilrenOfNode(FacetResultNode node, PriorityQueue<FacetResultNode> nodesHeap) {
|
||||
nodesHeap.clear(); // just to be safe
|
||||
for (FacetResultNode frn : node.getSubResults()) {
|
||||
for (FacetResultNode frn : node.subResults) {
|
||||
nodesHeap.add(frn);
|
||||
}
|
||||
int size = nodesHeap.size();
|
||||
ArrayList<FacetResultNode> subResults = new ArrayList<FacetResultNode>(size);
|
||||
while (nodesHeap.size()>0) {
|
||||
subResults.add(0,nodesHeap.pop());
|
||||
while (nodesHeap.size() > 0) {
|
||||
subResults.add(0, nodesHeap.pop());
|
||||
}
|
||||
((MutableFacetResultNode)node).setSubResults(subResults);
|
||||
for (FacetResultNode frn : node.getSubResults()) {
|
||||
node.subResults = subResults;
|
||||
for (FacetResultNode frn : node.subResults) {
|
||||
rearrangeChilrenOfNode(frn, nodesHeap);
|
||||
}
|
||||
|
||||
|
@ -777,13 +771,13 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
if (tmp.isRootNodeIncluded) {
|
||||
value = tmp.rootNodeValue;
|
||||
}
|
||||
MutableFacetResultNode root = generateNode (ordinal, value, tmp.mapToAACOs);
|
||||
FacetResultNode root = generateNode(ordinal, value, tmp.mapToAACOs);
|
||||
return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
|
||||
|
||||
}
|
||||
|
||||
private MutableFacetResultNode generateNode (int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {
|
||||
MutableFacetResultNode node = new MutableFacetResultNode(ordinal, val);
|
||||
private FacetResultNode generateNode(int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {
|
||||
FacetResultNode node = new FacetResultNode(ordinal, val);
|
||||
AACO aaco = mapToAACOs.get(ordinal);
|
||||
if (null == aaco) {
|
||||
return node;
|
||||
|
@ -792,8 +786,8 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
for (int i = 0; i < aaco.ordinals.length; i++) {
|
||||
list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
|
||||
}
|
||||
node.setSubResults(list);
|
||||
node.setResidue(aaco.residue);
|
||||
node.subResults = list;
|
||||
node.residue = aaco.residue;
|
||||
return node;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,9 +8,7 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
|
@ -150,14 +148,13 @@ public class TotalFacetCounts {
|
|||
}
|
||||
|
||||
// needed because FacetSearchParams do not allow empty FacetRequests
|
||||
private static final List<FacetRequest> DUMMY_REQ = Arrays.asList(
|
||||
new FacetRequest[] { new CountFacetRequest(CategoryPath.EMPTY, 1) });
|
||||
private static final FacetRequest DUMMY_REQ = new CountFacetRequest(CategoryPath.EMPTY, 1);
|
||||
|
||||
static TotalFacetCounts compute(final IndexReader indexReader, final TaxonomyReader taxonomy,
|
||||
final FacetIndexingParams facetIndexingParams) throws IOException {
|
||||
int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
|
||||
final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize];
|
||||
FacetSearchParams newSearchParams = new FacetSearchParams(DUMMY_REQ, facetIndexingParams);
|
||||
FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams, DUMMY_REQ);
|
||||
//createAllListsSearchParams(facetIndexingParams, this.totalCounts);
|
||||
FacetsAccumulator fe = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
|
||||
@Override
|
||||
|
|
|
@ -35,8 +35,8 @@ import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
|||
*/
|
||||
public class FacetSearchParams {
|
||||
|
||||
protected final FacetIndexingParams indexingParams;
|
||||
protected final List<FacetRequest> facetRequests;
|
||||
public final FacetIndexingParams indexingParams;
|
||||
public final List<FacetRequest> facetRequests;
|
||||
|
||||
/**
|
||||
* Initializes with the given {@link FacetRequest requests} and default
|
||||
|
@ -57,6 +57,14 @@ public class FacetSearchParams {
|
|||
public FacetSearchParams(List<FacetRequest> facetRequests) {
|
||||
this(facetRequests, FacetIndexingParams.ALL_PARENTS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initilizes with the given {@link FacetRequest requests} and
|
||||
* {@link FacetIndexingParams}.
|
||||
*/
|
||||
public FacetSearchParams(FacetIndexingParams indexingParams, FacetRequest... facetRequests) {
|
||||
this(Arrays.asList(facetRequests), indexingParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initilizes with the given {@link FacetRequest requests} and
|
||||
|
@ -66,24 +74,8 @@ public class FacetSearchParams {
|
|||
if (facetRequests == null || facetRequests.size() == 0) {
|
||||
throw new IllegalArgumentException("at least one FacetRequest must be defined");
|
||||
}
|
||||
this.indexingParams = indexingParams;
|
||||
this.facetRequests = facetRequests;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link FacetIndexingParams} that were passed to the
|
||||
* constructor.
|
||||
*/
|
||||
public FacetIndexingParams getFacetIndexingParams() {
|
||||
return indexingParams;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of {@link FacetRequest facet requests} that were passed to
|
||||
* the constructor.
|
||||
*/
|
||||
public List<FacetRequest> getFacetRequests() {
|
||||
return facetRequests;
|
||||
this.indexingParams = indexingParams;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -92,10 +84,10 @@ public class FacetSearchParams {
|
|||
final char NEWLINE = '\n';
|
||||
|
||||
StringBuilder sb = new StringBuilder("IndexingParams: ");
|
||||
sb.append(NEWLINE).append(TAB).append(getFacetIndexingParams());
|
||||
sb.append(NEWLINE).append(TAB).append(indexingParams);
|
||||
|
||||
sb.append(NEWLINE).append("FacetRequests:");
|
||||
for (FacetRequest facetRequest : getFacetRequests()) {
|
||||
for (FacetRequest facetRequest : facetRequests) {
|
||||
sb.append(NEWLINE).append(TAB).append(facetRequest);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
package org.apache.lucene.facet.search.results;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.search.FacetResultsHandler;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.sampling.SampleFixer;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
|
@ -26,85 +27,86 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Result of faceted search for a certain taxonomy node.
|
||||
* Result of faceted search for a certain taxonomy node. This class serves as a
|
||||
* bin of different attributes of the result node, such as its {@link #ordinal}
|
||||
* as well as {@link #label}. You are not expected to modify those values.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface FacetResultNode {
|
||||
public class FacetResultNode {
|
||||
|
||||
public static final List<FacetResultNode> EMPTY_SUB_RESULTS = Collections.emptyList();
|
||||
|
||||
/** The category ordinal of this node. */
|
||||
public int ordinal;
|
||||
|
||||
/**
|
||||
* String representation of this facet result node.
|
||||
* Use with caution: might return a very long string.
|
||||
* @param prefix prefix for each result line
|
||||
*/
|
||||
public String toString(String prefix);
|
||||
|
||||
/**
|
||||
* Ordinal of the category of this result.
|
||||
*/
|
||||
public int getOrdinal();
|
||||
|
||||
/**
|
||||
* Category path of the category of this result, or null if not computed,
|
||||
* because the application did not request to compute it.
|
||||
* To force computing the label in case not yet computed use
|
||||
* {@link #getLabel(TaxonomyReader)}.
|
||||
* @see FacetRequest#getNumLabel()
|
||||
* @see #getLabel(TaxonomyReader)
|
||||
*/
|
||||
public CategoryPath getLabel();
|
||||
|
||||
/**
|
||||
* Category path of the category of this result.
|
||||
* If not already computed, will be computed now.
|
||||
* <p>
|
||||
* Use with <b>caution</b>: loading a label for results is costly, performance wise.
|
||||
* Therefore force labels loading only when really needed.
|
||||
* @param taxonomyReader taxonomy reader for forcing (lazy) labeling of this result.
|
||||
* @throws IOException on error
|
||||
* @see FacetRequest#getNumLabel()
|
||||
*/
|
||||
public CategoryPath getLabel(TaxonomyReader taxonomyReader) throws IOException;
|
||||
|
||||
/**
|
||||
* Value of this result - usually either count or a value derived from some
|
||||
* computing on the association of it.
|
||||
*/
|
||||
public double getValue();
|
||||
|
||||
/**
|
||||
* Value of screened out sub results.
|
||||
* The {@link CategoryPath label} of this result. May be {@code null} if not
|
||||
* computed, in which case use {@link TaxonomyReader#getPath(int)} to label
|
||||
* it.
|
||||
* <p>
|
||||
* If only part of valid results are returned, e.g. because top K were requested,
|
||||
* provide info on "what else is there under this result node".
|
||||
* <b>NOTE:</b> by default, all nodes are labeled. Only when
|
||||
* {@link FacetRequest#getNumLabel()} <
|
||||
* {@link FacetRequest#getNumResults()} there will be unlabeled nodes.
|
||||
*/
|
||||
public double getResidue();
|
||||
public CategoryPath label;
|
||||
|
||||
/**
|
||||
* The value of this result. Its actual type depends on the
|
||||
* {@link FacetRequest} used (e.g. in case of {@link CountFacetRequest} it is
|
||||
* {@code int}).
|
||||
*/
|
||||
public double value;
|
||||
|
||||
/**
|
||||
* Contained sub results.
|
||||
* These are either child facets, if a tree result was requested, or simply descendants, in case
|
||||
* tree result was not requested. In the first case, all returned are both descendants of
|
||||
* this node in the taxonomy and siblings of each other in the taxonomy.
|
||||
* In the latter case they are only guaranteed to be descendants of
|
||||
* this node in the taxonomy.
|
||||
* The total value of screened out sub results. If only part of the results
|
||||
* were returned (usually because only the top-K categories are requested),
|
||||
* then this provides information on "what else is there under this result
|
||||
* node".
|
||||
*/
|
||||
public Iterable<? extends FacetResultNode> getSubResults();
|
||||
|
||||
public double residue;
|
||||
|
||||
/**
|
||||
* Number of sub results
|
||||
*/
|
||||
public int getNumSubResults();
|
||||
|
||||
/**
|
||||
* Expert: Set a new value for this result node.
|
||||
* The sub-results of this result. If {@link FacetRequest#getResultMode()} is
|
||||
* {@link ResultMode#PER_NODE_IN_TREE}, every sub result denotes an immediate
|
||||
* child of this node. Otherwise, it is a descendant of any level.
|
||||
* <p>
|
||||
* Allows to modify the value of this facet node.
|
||||
* Used for example to tune a sampled value, e.g. by
|
||||
* {@link SampleFixer#fixResult(org.apache.lucene.facet.search.ScoredDocIDs, FacetResult)}
|
||||
* @param value the new value to set
|
||||
* @see #getValue()
|
||||
* @see FacetResultsHandler#rearrangeFacetResult(FacetResult)
|
||||
* <b>NOTE:</b> this member should not be {@code null}. To denote that a
|
||||
* result does not have sub results, set it to {@link #EMPTY_SUB_RESULTS} (or
|
||||
* don't modify it).
|
||||
*/
|
||||
public void setValue(double value);
|
||||
public List<FacetResultNode> subResults = EMPTY_SUB_RESULTS;
|
||||
|
||||
}
|
||||
public FacetResultNode() {
|
||||
// empty constructor
|
||||
}
|
||||
|
||||
public FacetResultNode(int ordinal, double value) {
|
||||
this.ordinal = ordinal;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toString("");
|
||||
}
|
||||
|
||||
/** Returns a String representation of this facet result node. */
|
||||
public String toString(String prefix) {
|
||||
StringBuilder sb = new StringBuilder(prefix);
|
||||
if (label == null) {
|
||||
sb.append("not labeled (ordinal=").append(ordinal).append(")");
|
||||
} else {
|
||||
sb.append(label.toString());
|
||||
}
|
||||
sb.append(" (").append(Double.toString(value)).append(")");
|
||||
if (residue > 0) {
|
||||
sb.append(" (residue=").append(residue).append(")");
|
||||
}
|
||||
for (FacetResultNode sub : subResults) {
|
||||
sb.append("\n").append(prefix).append(sub.toString(prefix + " "));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,353 +0,0 @@
|
|||
package org.apache.lucene.facet.search.results;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mutable implementation for Result of faceted search for a certain taxonomy node.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class MutableFacetResultNode implements FacetResultNode {
|
||||
|
||||
/**
|
||||
* Empty sub results to be returned when there are no results.
|
||||
* We never return null, so that code using this can remain simpler.
|
||||
*/
|
||||
private static final ArrayList<FacetResultNode> EMPTY_SUB_RESULTS = new ArrayList<FacetResultNode>();
|
||||
|
||||
private int ordinal;
|
||||
private CategoryPath label = null;
|
||||
private double value;
|
||||
private double residue;
|
||||
private List<FacetResultNode> subResults;
|
||||
|
||||
/**
|
||||
* Create a Facet Result Node.
|
||||
*
|
||||
* @param ordinal
|
||||
* ordinal in the taxonomy of the category of this result.
|
||||
* @param value
|
||||
* value this result.
|
||||
*/
|
||||
public MutableFacetResultNode(int ordinal, double value) {
|
||||
this(ordinal, value, 0, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset a facet Result Node.
|
||||
* <p>
|
||||
* Used at the population of facet results, not intended for regular use by
|
||||
* applications.
|
||||
*
|
||||
* @param ordinal
|
||||
* ordinal in the taxonomy of the category of this result.
|
||||
* @param value
|
||||
* value of this result.
|
||||
*/
|
||||
public void reset(int ordinal, double value) {
|
||||
this.ordinal = ordinal;
|
||||
this.value = value;
|
||||
if (subResults != null) {
|
||||
subResults.clear();
|
||||
}
|
||||
label = null;
|
||||
residue = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Facet Result Node.
|
||||
*
|
||||
* @param ordinal
|
||||
* ordinal in the taxonomy of the category of this result.
|
||||
* @param value
|
||||
* value of this result.
|
||||
* @param residue
|
||||
* Value of screened out sub results.
|
||||
* @param label
|
||||
* label of the category path of this result.
|
||||
* @param subResults
|
||||
* - sub results, usually descendants, sometimes child results, of
|
||||
* this result - depending on the request.
|
||||
*/
|
||||
public MutableFacetResultNode(int ordinal, double value, double residue,
|
||||
CategoryPath label, List<FacetResultNode> subResults) {
|
||||
this.ordinal = ordinal;
|
||||
this.value = value;
|
||||
this.residue = residue;
|
||||
this.label = label;
|
||||
this.subResults = subResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a mutable facet result node from another result node
|
||||
* @param other other result node to copy from
|
||||
* @param takeSubResults set to true to take also sub results of other node
|
||||
*/
|
||||
public MutableFacetResultNode(FacetResultNode other, boolean takeSubResults) {
|
||||
this(other.getOrdinal(), other.getValue(), other.getResidue(), other
|
||||
.getLabel(), takeSubResults ? resultsToList(other.getSubResults())
|
||||
: null);
|
||||
}
|
||||
|
||||
private static List<FacetResultNode> resultsToList(
|
||||
Iterable<? extends FacetResultNode> subResults) {
|
||||
if (subResults == null) {
|
||||
return null;
|
||||
}
|
||||
ArrayList<FacetResultNode> res = new ArrayList<FacetResultNode>();
|
||||
for (FacetResultNode r : subResults) {
|
||||
res.add(r);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toString("");
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of sub results.
|
||||
*/
|
||||
private int numSubResults() {
|
||||
if (subResults == null) {
|
||||
return 0;
|
||||
}
|
||||
return subResults.size();
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.facet.search.results2.FacetResultNode#toString(java.lang.
|
||||
* String)
|
||||
*/
|
||||
@Override
|
||||
public String toString(String prefix) {
|
||||
StringBuilder sb = new StringBuilder(prefix);
|
||||
|
||||
sb.append("Facet Result Node with ").append(numSubResults()).append(
|
||||
" sub result nodes.\n");
|
||||
|
||||
// label
|
||||
sb.append(prefix).append("Name: ").append(getLabel()).append("\n");
|
||||
|
||||
// value
|
||||
sb.append(prefix).append("Value: ").append(value).append("\n");
|
||||
|
||||
// residue
|
||||
sb.append(prefix).append("Residue: ").append(residue).append("\n");
|
||||
|
||||
if (subResults != null) {
|
||||
int i = 0;
|
||||
for (FacetResultNode subRes : subResults) {
|
||||
sb.append("\n").append(prefix).append("Subresult #").append(i++)
|
||||
.append("\n").append(subRes.toString(prefix + "\t"));
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int getOrdinal() {
|
||||
return ordinal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final CategoryPath getLabel() {
|
||||
return label;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the label of the category of this result.
|
||||
* @param label the label to set.
|
||||
* @see #getLabel()
|
||||
*/
|
||||
public void setLabel(CategoryPath label) {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final double getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value of this result.
|
||||
*
|
||||
* @param value
|
||||
* the value to set
|
||||
* @see #getValue()
|
||||
*/
|
||||
@Override
|
||||
public void setValue(double value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* increase the value for this result.
|
||||
* @param addedValue the value to add
|
||||
* @see #getValue()
|
||||
*/
|
||||
public void increaseValue(double addedValue) {
|
||||
this.value += addedValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final double getResidue() {
|
||||
return residue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the residue.
|
||||
* @param residue the residue to set
|
||||
* @see #getResidue()
|
||||
*/
|
||||
public void setResidue(double residue) {
|
||||
this.residue = residue;
|
||||
}
|
||||
|
||||
/**
|
||||
* increase the residue for this result.
|
||||
* @param addedResidue the residue to add
|
||||
* @see #getResidue()
|
||||
*/
|
||||
public void increaseResidue(double addedResidue) {
|
||||
this.residue += addedResidue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Iterable<? extends FacetResultNode> getSubResults() {
|
||||
return subResults != null ? subResults : EMPTY_SUB_RESULTS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim sub results to a given size.
|
||||
* <p>
|
||||
* Note: Although the {@link #getResidue()} is not guaranteed to be
|
||||
* accurate, it is worth fixing it, as possible, by taking under account the
|
||||
* trimmed sub-nodes.
|
||||
*/
|
||||
public void trimSubResults(int size) {
|
||||
if (subResults == null || subResults.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ArrayList<FacetResultNode> trimmed = new ArrayList<FacetResultNode>(size);
|
||||
for (int i = 0; i < subResults.size() && i < size; i++) {
|
||||
MutableFacetResultNode trimmedNode = toImpl(subResults.get(i));
|
||||
trimmedNode.trimSubResults(size);
|
||||
trimmed.add(trimmedNode);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are trimming, it means Sampling is in effect and the extra
|
||||
* (over-sampled) results are being trimmed. Although the residue is not
|
||||
* guaranteed to be accurate for Sampling, we try our best to fix it.
|
||||
* The node's residue now will take under account the sub-nodes we're
|
||||
* trimming.
|
||||
*/
|
||||
for (int i = size; i < subResults.size(); i++) {
|
||||
increaseResidue(subResults.get(i).getValue());
|
||||
}
|
||||
|
||||
subResults = trimmed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the sub results.
|
||||
* @param subResults the sub-results to set
|
||||
*/
|
||||
public void setSubResults(List<FacetResultNode> subResults) {
|
||||
this.subResults = subResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a sub result (as last).
|
||||
* @param subRes sub-result to be appended
|
||||
*/
|
||||
public void appendSubResult(FacetResultNode subRes) {
|
||||
if (subResults == null) {
|
||||
subResults = new ArrayList<FacetResultNode>();
|
||||
}
|
||||
subResults.add(subRes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert sub result (as first).
|
||||
* @param subRes sub-result to be inserted
|
||||
*/
|
||||
public void insertSubResult(FacetResultNode subRes) {
|
||||
if (subResults == null) {
|
||||
subResults = new ArrayList<FacetResultNode>();
|
||||
}
|
||||
subResults.add(0, subRes);
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.facet.search.results.FacetResultNode#getLabel(org.apache.lucene
|
||||
* .facet.taxonomy.TaxonomyReader)
|
||||
*/
|
||||
@Override
|
||||
public final CategoryPath getLabel(TaxonomyReader taxonomyReader)
|
||||
throws IOException {
|
||||
if (label == null) {
|
||||
label = taxonomyReader.getPath(ordinal);
|
||||
}
|
||||
return label;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.facet.search.results.FacetResultNode#getNumSubResults()
|
||||
*/
|
||||
@Override
|
||||
public final int getNumSubResults() {
|
||||
return subResults == null ? 0 : subResults.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal utility: turn a result node into an implementation class
|
||||
* with richer API that allows modifying it.
|
||||
* <p>
|
||||
* In case that input result node is already of an implementation
|
||||
* class only casting is done, but in any case we pay the price
|
||||
* of checking "instance of".
|
||||
* @param frn facet result node to be turned into an implementation class object
|
||||
*/
|
||||
public static MutableFacetResultNode toImpl(FacetResultNode frn) {
|
||||
if (frn instanceof MutableFacetResultNode) {
|
||||
return (MutableFacetResultNode) frn;
|
||||
}
|
||||
return new MutableFacetResultNode(frn, true);
|
||||
}
|
||||
|
||||
}
|
|
@ -11,7 +11,6 @@ import org.apache.lucene.facet.search.params.FacetRequest;
|
|||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
|
@ -41,7 +40,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
||||
* does not guarantee accurate values for
|
||||
* {@link FacetResult#getNumValidDescendants()} &
|
||||
* {@link FacetResultNode#getResidue()}.
|
||||
* {@link FacetResultNode#residue}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -169,12 +168,39 @@ public abstract class Sampler {
|
|||
|
||||
FacetRequest origFrq = sampledFreq.orig;
|
||||
|
||||
MutableFacetResultNode trimmedRootNode = MutableFacetResultNode.toImpl(facetResult.getFacetResultNode());
|
||||
trimmedRootNode.trimSubResults(origFrq.getNumResults());
|
||||
FacetResultNode trimmedRootNode = facetResult.getFacetResultNode();
|
||||
trimSubResults(trimmedRootNode, origFrq.getNumResults());
|
||||
|
||||
return new FacetResult(origFrq, trimmedRootNode, facetResult.getNumValidDescendants());
|
||||
}
|
||||
|
||||
/** Trim sub results to a given size. */
|
||||
private void trimSubResults(FacetResultNode node, int size) {
|
||||
if (node.subResults == FacetResultNode.EMPTY_SUB_RESULTS || node.subResults.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ArrayList<FacetResultNode> trimmed = new ArrayList<FacetResultNode>(size);
|
||||
for (int i = 0; i < node.subResults.size() && i < size; i++) {
|
||||
FacetResultNode trimmedNode = node.subResults.get(i);
|
||||
trimSubResults(trimmedNode, size);
|
||||
trimmed.add(trimmedNode);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are trimming, it means Sampling is in effect and the extra
|
||||
* (over-sampled) results are being trimmed. Although the residue is not
|
||||
* guaranteed to be accurate for Sampling, we try our best to fix it.
|
||||
* The node's residue now will take under account the sub-nodes we're
|
||||
* trimming.
|
||||
*/
|
||||
for (int i = size; i < node.subResults.size(); i++) {
|
||||
node.residue += node.subResults.get(i).value;
|
||||
}
|
||||
|
||||
node.subResults = trimmed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Over-sampled search params, wrapping each request with an over-sampled one.
|
||||
*/
|
||||
|
@ -184,11 +210,11 @@ public abstract class Sampler {
|
|||
double overSampleFactor = getSamplingParams().getOversampleFactor();
|
||||
if (overSampleFactor > 1) { // any factoring to do?
|
||||
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
||||
for (FacetRequest frq : original.getFacetRequests()) {
|
||||
for (FacetRequest frq : original.facetRequests) {
|
||||
int overSampledNumResults = (int) Math.ceil(frq.getNumResults() * overSampleFactor);
|
||||
facetRequests.add(new OverSampledFacetRequest(frq, overSampledNumResults));
|
||||
}
|
||||
res = new FacetSearchParams(facetRequests, original.getFacetIndexingParams());
|
||||
res = new FacetSearchParams(facetRequests, original.indexingParams);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
||||
* does not guarantee accurate values for
|
||||
* {@link FacetResult#getNumValidDescendants()} &
|
||||
* {@link FacetResultNode#getResidue()}.
|
||||
* {@link FacetResultNode#residue}.
|
||||
*
|
||||
* @see Sampler
|
||||
* @lucene.experimental
|
||||
|
|
|
@ -74,10 +74,9 @@ class TakmiSampleFixer implements SampleFixer {
|
|||
* docids in effect
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds)
|
||||
throws IOException {
|
||||
private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds) throws IOException {
|
||||
recount(facetResNode, docIds);
|
||||
for (FacetResultNode frn : facetResNode.getSubResults()) {
|
||||
for (FacetResultNode frn : facetResNode.subResults) {
|
||||
fixResultNode(frn, docIds);
|
||||
}
|
||||
}
|
||||
|
@ -101,7 +100,10 @@ class TakmiSampleFixer implements SampleFixer {
|
|||
* facet results was exercise, we need to calculate them anyway, so
|
||||
* in essence sampling with recounting spends some extra cycles for
|
||||
* labeling results for which labels are not required. */
|
||||
CategoryPath catPath = fresNode.getLabel(taxonomyReader); // force labeling
|
||||
if (fresNode.label == null) {
|
||||
fresNode.label = taxonomyReader.getPath(fresNode.ordinal);
|
||||
}
|
||||
CategoryPath catPath = fresNode.label;
|
||||
|
||||
Term drillDownTerm = DrillDown.term(searchParams, catPath);
|
||||
// TODO (Facet): avoid Multi*?
|
||||
|
@ -109,8 +111,7 @@ class TakmiSampleFixer implements SampleFixer {
|
|||
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
|
||||
drillDownTerm.field(), drillDownTerm.bytes(),
|
||||
0), docIds.iterator());
|
||||
|
||||
fresNode.setValue(updatedCount);
|
||||
fresNode.value = updatedCount;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -48,12 +48,19 @@ public class CategoryPath implements Comparable<CategoryPath> {
|
|||
|
||||
// Used by subpath
|
||||
private CategoryPath(CategoryPath copyFrom, int prefixLen) {
|
||||
// while the code which calls this method is safe, at some point a test
|
||||
// tripped on AIOOBE in toString, but we failed to reproduce. adding the
|
||||
// assert as a safety check.
|
||||
assert prefixLen > 0 && prefixLen <= copyFrom.components.length :
|
||||
"prefixLen cannot be negative nor larger than the given components' length: prefixLen=" + prefixLen
|
||||
+ " components.length=" + copyFrom.components.length;
|
||||
this.components = copyFrom.components;
|
||||
length = prefixLen;
|
||||
}
|
||||
|
||||
/** Construct from the given path components. */
|
||||
public CategoryPath(String... components) {
|
||||
assert components.length > 0 : "use CategoryPath.EMPTY to create an empty path";
|
||||
this.components = components;
|
||||
length = components.length;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,6 @@ import java.util.Map;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -30,7 +29,7 @@ import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache
|
|||
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.CorruptIndexException; // javadocs
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -45,7 +44,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException; // javadocs
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.store.NativeFSLockFactory;
|
||||
import org.apache.lucene.store.SimpleFSLockFactory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -303,8 +302,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
|
||||
// Make sure we use a MergePolicy which always merges adjacent segments and thus
|
||||
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
|
||||
return new IndexWriterConfig(Version.LUCENE_50,
|
||||
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
|
||||
return new IndexWriterConfig(Version.LUCENE_50, null).setOpenMode(openMode).setMergePolicy(
|
||||
new LogByteSizeMergePolicy());
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
package org.apache.lucene.facet.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
||||
/** Prints how many ords are under each dimension. */
|
||||
|
||||
public class PrintTaxonomyStats {
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
boolean printTree = false;
|
||||
String path = null;
|
||||
for(int i=0;i<args.length;i++) {
|
||||
if (args[i].equals("-printTree")) {
|
||||
printTree = true;
|
||||
} else {
|
||||
path = args[i];
|
||||
}
|
||||
}
|
||||
if (args.length != (printTree ? 2 : 1)) {
|
||||
System.out.println("\nUsage: java -classpath ... org.apache.lucene.facet.util.PrintTaxonomyStats [-printTree] /path/to/taxononmy/index\n");
|
||||
System.exit(1);
|
||||
}
|
||||
Directory dir = FSDirectory.open(new File(path));
|
||||
TaxonomyReader r = new DirectoryTaxonomyReader(dir);
|
||||
printStats(r, System.out, printTree);
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException {
|
||||
ParallelTaxonomyArrays arrays = r.getParallelTaxonomyArrays();
|
||||
//int[] parents = arrays.parents();
|
||||
int[] children = arrays.children();
|
||||
int[] siblings = arrays.siblings();
|
||||
out.println(r.getSize() + " total categories.");
|
||||
|
||||
int childOrd = children[TaxonomyReader.ROOT_ORDINAL];
|
||||
while(childOrd != -1) {
|
||||
CategoryPath cp = r.getPath(childOrd);
|
||||
int childOrd2 = children[childOrd];
|
||||
int numImmediateChildren = 0;
|
||||
while(childOrd2 != -1) {
|
||||
numImmediateChildren++;
|
||||
childOrd2 = siblings[childOrd2];
|
||||
}
|
||||
out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, childOrd, children, siblings)) + " total categories");
|
||||
if (printTree) {
|
||||
printAllChildren(out, r, childOrd, children, siblings, " ", 1);
|
||||
}
|
||||
childOrd = siblings[childOrd];
|
||||
}
|
||||
}
|
||||
|
||||
private static int countAllChildren(TaxonomyReader r, int ord, int[] children, int[] siblings) throws IOException {
|
||||
int childOrd = children[ord];
|
||||
int count = 0;
|
||||
while(childOrd != -1) {
|
||||
count += 1+countAllChildren(r, childOrd, children, siblings);
|
||||
childOrd = siblings[childOrd];
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, int[] children, int[] siblings, String indent, int depth) throws IOException {
|
||||
int childOrd = children[ord];
|
||||
while(childOrd != -1) {
|
||||
out.println(indent + "/" + r.getPath(childOrd).components[depth]);
|
||||
printAllChildren(out, r, childOrd, children, siblings, indent + " ", depth+1);
|
||||
childOrd = siblings[childOrd];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -73,12 +73,12 @@ public class ResultSortUtils {
|
|||
|
||||
@Override
|
||||
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
|
||||
double value0 = arg0.getValue();
|
||||
double value1 = arg1.getValue();
|
||||
double value0 = arg0.value;
|
||||
double value1 = arg1.value;
|
||||
|
||||
int valueCompare = Double.compare(value0, value1);
|
||||
if (valueCompare == 0) {
|
||||
return arg0.getOrdinal() < arg1.getOrdinal();
|
||||
return arg0.ordinal < arg1.ordinal;
|
||||
}
|
||||
|
||||
return valueCompare < 0;
|
||||
|
@ -93,40 +93,38 @@ public class ResultSortUtils {
|
|||
|
||||
@Override
|
||||
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
|
||||
double value0 = arg0.getValue();
|
||||
double value1 = arg1.getValue();
|
||||
double value0 = arg0.value;
|
||||
double value1 = arg1.value;
|
||||
|
||||
int valueCompare = Double.compare(value0, value1);
|
||||
if (valueCompare == 0) {
|
||||
return arg0.getOrdinal() > arg1.getOrdinal();
|
||||
return arg0.ordinal > arg1.ordinal;
|
||||
}
|
||||
|
||||
return valueCompare > 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static class MinOrdinalHeap extends
|
||||
PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
|
||||
private static class MinOrdinalHeap extends PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
|
||||
public MinOrdinalHeap(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
|
||||
return arg0.getOrdinal() < arg1.getOrdinal();
|
||||
return arg0.ordinal < arg1.ordinal;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class MaxOrdinalHeap extends
|
||||
PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
|
||||
private static class MaxOrdinalHeap extends PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
|
||||
public MaxOrdinalHeap(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
|
||||
return arg0.getOrdinal() > arg1.getOrdinal();
|
||||
return arg0.ordinal > arg1.ordinal;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -156,10 +154,9 @@ public class ResultSortUtils {
|
|||
Collections.sort(resultNodes, new Comparator<FacetResultNode>() {
|
||||
@Override
|
||||
public int compare(FacetResultNode o1, FacetResultNode o2) {
|
||||
int value = Double.compare(o1.getValue(), o2
|
||||
.getValue());
|
||||
int value = Double.compare(o1.value, o2.value);
|
||||
if (value == 0) {
|
||||
value = o1.getOrdinal() - o2.getOrdinal();
|
||||
value = o1.ordinal - o2.ordinal;
|
||||
}
|
||||
if (accending) {
|
||||
value = -value;
|
||||
|
@ -198,4 +195,5 @@ public class ResultSortUtils {
|
|||
resultNodes.clear();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -196,7 +196,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
|
|||
* test with different faceted search params.
|
||||
*/
|
||||
protected FacetSearchParams getFacetSearchParams(FacetIndexingParams iParams, FacetRequest... facetRequests) {
|
||||
return new FacetSearchParams(Arrays.asList(facetRequests), iParams);
|
||||
return new FacetSearchParams(iParams, facetRequests);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -246,7 +246,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
|
|||
/** convenience method: convert sub results to an array */
|
||||
protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
|
||||
ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
|
||||
for (FacetResultNode frn : parentRes.getSubResults()) {
|
||||
for (FacetResultNode frn : parentRes.subResults) {
|
||||
a.add(frn);
|
||||
}
|
||||
return a.toArray(new FacetResultNode[0]);
|
||||
|
@ -305,42 +305,27 @@ public abstract class FacetTestBase extends LuceneTestCase {
|
|||
|
||||
/** Validate counts for returned facets, and that there are not too many results */
|
||||
private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth, FacetResultNode resNode, int reqNumResults) throws Exception {
|
||||
int actualNumResults = resNode.getNumSubResults();
|
||||
int actualNumResults = resNode.subResults.size();
|
||||
if (VERBOSE) {
|
||||
System.out.println("NumResults: " + actualNumResults);
|
||||
}
|
||||
assertTrue("Too many results!", actualNumResults <= reqNumResults);
|
||||
for (FacetResultNode subRes : resNode.getSubResults()) {
|
||||
assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
|
||||
for (FacetResultNode subRes : resNode.subResults) {
|
||||
assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.label).intValue(), (int)subRes.value);
|
||||
assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Validate results equality */
|
||||
protected static void assertSameResults(List<FacetResult> expected, List<FacetResult> actual) {
|
||||
String expectedResults = resStringValueOnly(expected);
|
||||
String actualResults = resStringValueOnly(actual);
|
||||
if (!expectedResults.equals(actualResults)) {
|
||||
System.err.println("Results are not the same!");
|
||||
System.err.println("Expected:\n" + expectedResults);
|
||||
System.err.println("Actual:\n" + actualResults);
|
||||
throw new NotSameResultError();
|
||||
}
|
||||
}
|
||||
|
||||
/** exclude the residue and numDecendants because it is incorrect in sampling */
|
||||
private static final String resStringValueOnly(List<FacetResult> results) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (FacetResult facetRes : results) {
|
||||
sb.append(facetRes.toString()).append('\n');
|
||||
}
|
||||
return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
|
||||
}
|
||||
|
||||
/** Special Error class for ability to ignore only this error and retry... */
|
||||
public static class NotSameResultError extends Error {
|
||||
public NotSameResultError() {
|
||||
super("Results are not the same!");
|
||||
assertEquals("wrong number of facet results", expected.size(), actual.size());
|
||||
int size = expected.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
FacetResult expectedResult = expected.get(i);
|
||||
FacetResult actualResult = actual.get(i);
|
||||
String expectedStr = FacetTestUtils.toSimpleString(expectedResult);
|
||||
String actualStr = FacetTestUtils.toSimpleString(actualResult);
|
||||
assertEquals("Results not the same!\nExpected:" + expectedStr + "\nActual:\n" + actualStr, expectedStr, actualStr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,12 +4,14 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
@ -45,71 +47,6 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
public class FacetTestUtils {
|
||||
|
||||
public static Directory[][] createIndexTaxonomyDirs(int number) {
|
||||
Directory[][] dirs = new Directory[number][2];
|
||||
for (int i = 0; i < number; i++) {
|
||||
dirs[i][0] = LuceneTestCase.newDirectory();
|
||||
dirs[i][1] = LuceneTestCase.newDirectory();
|
||||
}
|
||||
return dirs;
|
||||
}
|
||||
|
||||
public static IndexTaxonomyReaderPair[] createIndexTaxonomyReaderPair(
|
||||
Directory[][] dirs) throws IOException {
|
||||
IndexTaxonomyReaderPair[] pairs = new IndexTaxonomyReaderPair[dirs.length];
|
||||
for (int i = 0; i < dirs.length; i++) {
|
||||
IndexTaxonomyReaderPair pair = new IndexTaxonomyReaderPair();
|
||||
pair.indexReader = DirectoryReader.open(dirs[i][0]);
|
||||
pair.indexSearcher = new IndexSearcher(pair.indexReader);
|
||||
pair.taxReader = new DirectoryTaxonomyReader(dirs[i][1]);
|
||||
pairs[i] = pair;
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
|
||||
public static IndexTaxonomyWriterPair[] createIndexTaxonomyWriterPair(
|
||||
Directory[][] dirs) throws IOException {
|
||||
IndexTaxonomyWriterPair[] pairs = new IndexTaxonomyWriterPair[dirs.length];
|
||||
for (int i = 0; i < dirs.length; i++) {
|
||||
IndexTaxonomyWriterPair pair = new IndexTaxonomyWriterPair();
|
||||
pair.indexWriter = new IndexWriter(dirs[i][0], new IndexWriterConfig(
|
||||
LuceneTestCase.TEST_VERSION_CURRENT, new StandardAnalyzer(
|
||||
LuceneTestCase.TEST_VERSION_CURRENT)));
|
||||
pair.taxWriter = new DirectoryTaxonomyWriter(dirs[i][1]);
|
||||
pair.indexWriter.commit();
|
||||
pair.taxWriter.commit();
|
||||
pairs[i] = pair;
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
|
||||
public static Collector[] search(IndexSearcher searcher,
|
||||
TaxonomyReader taxonomyReader, FacetIndexingParams iParams, int k,
|
||||
String... facetNames) throws IOException {
|
||||
|
||||
Collector[] collectors = new Collector[2];
|
||||
|
||||
List<FacetRequest> fRequests = new ArrayList<FacetRequest>();
|
||||
for (String facetName : facetNames) {
|
||||
CategoryPath cp = new CategoryPath(facetName);
|
||||
FacetRequest fq = new CountFacetRequest(cp, k);
|
||||
fRequests.add(fq);
|
||||
}
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams(fRequests, iParams);
|
||||
|
||||
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(
|
||||
searcher.getIndexReader().maxDoc(), true);
|
||||
FacetsCollector facetsCollector = new FacetsCollector(
|
||||
facetSearchParams, searcher.getIndexReader(), taxonomyReader);
|
||||
Collector mColl = MultiCollector.wrap(topDocsCollector, facetsCollector);
|
||||
|
||||
collectors[0] = topDocsCollector;
|
||||
collectors[1] = facetsCollector;
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), mColl);
|
||||
return collectors;
|
||||
}
|
||||
|
||||
public static class IndexTaxonomyReaderPair {
|
||||
public DirectoryReader indexReader;
|
||||
public DirectoryTaxonomyReader taxReader;
|
||||
|
@ -137,4 +74,76 @@ public class FacetTestUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static Directory[][] createIndexTaxonomyDirs(int number) {
|
||||
Directory[][] dirs = new Directory[number][2];
|
||||
for (int i = 0; i < number; i++) {
|
||||
dirs[i][0] = LuceneTestCase.newDirectory();
|
||||
dirs[i][1] = LuceneTestCase.newDirectory();
|
||||
}
|
||||
return dirs;
|
||||
}
|
||||
|
||||
public static IndexTaxonomyReaderPair[] createIndexTaxonomyReaderPair(Directory[][] dirs) throws IOException {
|
||||
IndexTaxonomyReaderPair[] pairs = new IndexTaxonomyReaderPair[dirs.length];
|
||||
for (int i = 0; i < dirs.length; i++) {
|
||||
IndexTaxonomyReaderPair pair = new IndexTaxonomyReaderPair();
|
||||
pair.indexReader = DirectoryReader.open(dirs[i][0]);
|
||||
pair.indexSearcher = new IndexSearcher(pair.indexReader);
|
||||
pair.taxReader = new DirectoryTaxonomyReader(dirs[i][1]);
|
||||
pairs[i] = pair;
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
|
||||
public static IndexTaxonomyWriterPair[] createIndexTaxonomyWriterPair(Directory[][] dirs) throws IOException {
|
||||
IndexTaxonomyWriterPair[] pairs = new IndexTaxonomyWriterPair[dirs.length];
|
||||
for (int i = 0; i < dirs.length; i++) {
|
||||
IndexTaxonomyWriterPair pair = new IndexTaxonomyWriterPair();
|
||||
pair.indexWriter = new IndexWriter(dirs[i][0], new IndexWriterConfig(
|
||||
LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(LuceneTestCase.random())));
|
||||
pair.taxWriter = new DirectoryTaxonomyWriter(dirs[i][1]);
|
||||
pair.indexWriter.commit();
|
||||
pair.taxWriter.commit();
|
||||
pairs[i] = pair;
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
|
||||
public static Collector[] search(IndexSearcher searcher, TaxonomyReader taxonomyReader, FacetIndexingParams iParams,
|
||||
int k, String... facetNames) throws IOException {
|
||||
|
||||
Collector[] collectors = new Collector[2];
|
||||
|
||||
List<FacetRequest> fRequests = new ArrayList<FacetRequest>();
|
||||
for (String facetName : facetNames) {
|
||||
CategoryPath cp = new CategoryPath(facetName);
|
||||
FacetRequest fq = new CountFacetRequest(cp, k);
|
||||
fRequests.add(fq);
|
||||
}
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams(fRequests, iParams);
|
||||
|
||||
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(searcher.getIndexReader().maxDoc(), true);
|
||||
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, searcher.getIndexReader(), taxonomyReader);
|
||||
Collector mColl = MultiCollector.wrap(topDocsCollector, facetsCollector);
|
||||
|
||||
collectors[0] = topDocsCollector;
|
||||
collectors[1] = facetsCollector;
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), mColl);
|
||||
return collectors;
|
||||
}
|
||||
|
||||
public static String toSimpleString(FacetResult fr) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
toSimpleString(0, sb, fr.getFacetResultNode(), "");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static void toSimpleString(int depth, StringBuilder sb, FacetResultNode node, String indent) {
|
||||
sb.append(indent + node.label.components[depth] + " (" + (int) node.value + ")\n");
|
||||
for (FacetResultNode childNode : node.subResults) {
|
||||
toSimpleString(depth + 1, sb, childNode, indent + " ");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -45,10 +45,10 @@ public class TestAssociationExample extends LuceneTestCase {
|
|||
assertEquals("Wrong number of results!", 1, res.getFacetResults().size());
|
||||
assertEquals("Wrong number of facets!", 2, res.getFacetResults().get(0).getNumValidDescendants());
|
||||
|
||||
Iterable<? extends FacetResultNode> it = res.getFacetResults().get(0).getFacetResultNode().getSubResults();
|
||||
Iterable<? extends FacetResultNode> it = res.getFacetResults().get(0).getFacetResultNode().subResults;
|
||||
int i = 0;
|
||||
for (FacetResultNode fResNode : it) {
|
||||
assertEquals("Wrong result for facet "+fResNode.getLabel(), expectedResults[i++], fResNode.getValue(), 1E-5);
|
||||
assertEquals("Wrong result for facet "+fResNode.label, expectedResults[i++], fResNode.value, 1E-5);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,45 +43,35 @@ public class TestMultiCLExample extends LuceneTestCase {
|
|||
List<FacetResult> results = exampleResults.getFacetResults();
|
||||
FacetResult result = results.get(0);
|
||||
assertNotNull("Result should not be null", result);
|
||||
assertEquals("Invalid label", "5", result.getFacetResultNode()
|
||||
.getLabel().toString());
|
||||
assertEquals("Invalid value", 2.0, result.getFacetResultNode()
|
||||
.getValue(), 0.0);
|
||||
assertEquals("Invalid # of subresults", 3, result.getFacetResultNode()
|
||||
.getNumSubResults());
|
||||
FacetResultNode node = result.getFacetResultNode();
|
||||
assertEquals("Invalid label", "5", node.label.toString());
|
||||
assertEquals("Invalid value", 2.0, node.value, 0.0);
|
||||
assertEquals("Invalid # of subresults", 3, node.subResults.size());
|
||||
|
||||
Iterator<? extends FacetResultNode> subResults = result
|
||||
.getFacetResultNode().getSubResults().iterator();
|
||||
Iterator<? extends FacetResultNode> subResults = node.subResults.iterator();
|
||||
FacetResultNode sub = subResults.next();
|
||||
assertEquals("Invalid subresult value", 1.0, sub.getValue(), 0.0);
|
||||
assertEquals("Invalid subresult label", "5/2", sub.getLabel()
|
||||
.toString());
|
||||
assertEquals("Invalid subresult value", 1.0, sub.value, 0.0);
|
||||
assertEquals("Invalid subresult label", "5/2", sub.label.toString());
|
||||
sub = subResults.next();
|
||||
assertEquals("Invalid subresult value", 1.0, sub.getValue(), 0.0);
|
||||
assertEquals("Invalid subresult label", "5/7", sub.getLabel()
|
||||
.toString());
|
||||
assertEquals("Invalid subresult value", 1.0, sub.value, 0.0);
|
||||
assertEquals("Invalid subresult label", "5/7", sub.label.toString());
|
||||
sub = subResults.next();
|
||||
assertEquals("Invalid subresult value", 1.0, sub.getValue(), 0.0);
|
||||
assertEquals("Invalid subresult label", "5/5", sub.getLabel()
|
||||
.toString());
|
||||
assertEquals("Invalid subresult value", 1.0, sub.value, 0.0);
|
||||
assertEquals("Invalid subresult label", "5/5", sub.label.toString());
|
||||
|
||||
result = results.get(1);
|
||||
node = result.getFacetResultNode();
|
||||
assertNotNull("Result should not be null", result);
|
||||
assertEquals("Invalid label", "5/5", result.getFacetResultNode()
|
||||
.getLabel().toString());
|
||||
assertEquals("Invalid value", 1,
|
||||
result.getFacetResultNode().getValue(), 0.0);
|
||||
assertEquals("Invalid number of subresults", 0, result
|
||||
.getFacetResultNode().getNumSubResults());
|
||||
assertEquals("Invalid label", "5/5", node.label.toString());
|
||||
assertEquals("Invalid value", 1, node.value, 0.0);
|
||||
assertEquals("Invalid number of subresults", 0, node.subResults.size());
|
||||
|
||||
result = results.get(2);
|
||||
node = result.getFacetResultNode();
|
||||
assertNotNull("Result should not be null", result);
|
||||
assertEquals("Invalid label", "6/2", result.getFacetResultNode()
|
||||
.getLabel().toString());
|
||||
assertEquals("Invalid value", 1,
|
||||
result.getFacetResultNode().getValue(), 0.0);
|
||||
assertEquals("Invalid number of subresults", 0, result
|
||||
.getFacetResultNode().getNumSubResults());
|
||||
assertEquals("Invalid label", "6/2", node.label.toString());
|
||||
assertEquals("Invalid value", 1, node.value, 0.0);
|
||||
assertEquals("Invalid number of subresults", 0, node.subResults.size());
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -57,11 +57,11 @@ public class TestSimpleExample extends LuceneTestCase {
|
|||
FacetResult facetResult = res.getFacetResults().get(0);
|
||||
assertEquals("Wrong number of facets!",2, facetResult.getNumValidDescendants());
|
||||
|
||||
Iterator<? extends FacetResultNode> resIterator = facetResult.getFacetResultNode().getSubResults().iterator();
|
||||
Iterator<? extends FacetResultNode> resIterator = facetResult.getFacetResultNode().subResults.iterator();
|
||||
assertTrue("Too few results", resIterator.hasNext());
|
||||
assertEquals("wrong count for first result out of 2", 1, (int)resIterator.next().getValue());
|
||||
assertEquals("wrong count for first result out of 2", 1, (int)resIterator.next().value);
|
||||
assertTrue("Too few results", resIterator.hasNext());
|
||||
assertEquals("wrong count for second result out of 2", 1, (int)resIterator.next().getValue());
|
||||
assertEquals("wrong count for second result out of 2", 1, (int)resIterator.next().value);
|
||||
assertFalse("Too many results!", resIterator.hasNext());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,13 +71,13 @@ public class OrdinalMappingReaderTest extends LuceneTestCase {
|
|||
DirectoryTaxonomyReader taxReader = new DirectoryTaxonomyReader(taxDir);
|
||||
IndexSearcher searcher = newSearcher(reader1);
|
||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("tag"), NUM_DOCS));
|
||||
FacetsCollector collector = new FacetsCollector(fsp, reader1, taxReader);
|
||||
FacetsCollector collector = FacetsCollector.create(fsp, reader1, taxReader);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
FacetResult result = collector.getFacetResults().get(0);
|
||||
FacetResultNode node = result.getFacetResultNode();
|
||||
for (FacetResultNode facet: node.getSubResults()) {
|
||||
int weight = (int)facet.getValue();
|
||||
int label = Integer.parseInt(facet.getLabel().components[1]);
|
||||
for (FacetResultNode facet: node.subResults) {
|
||||
int weight = (int)facet.value;
|
||||
int label = Integer.parseInt(facet.label.components[1]);
|
||||
//System.out.println(label + ": " + weight);
|
||||
if (VERBOSE) {
|
||||
System.out.println(label + ": " + weight);
|
||||
|
|
|
@ -266,15 +266,15 @@ public class TestFacetsPayloadMigrationReader extends LuceneTestCase {
|
|||
requests.add(new CountFacetRequest(new CategoryPath(dim), 5));
|
||||
}
|
||||
FacetSearchParams fsp = new FacetSearchParams(requests, fip);
|
||||
FacetsCollector fc = new FacetsCollector(fsp, indexReader, taxoReader);
|
||||
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
|
||||
MatchAllDocsQuery base = new MatchAllDocsQuery();
|
||||
searcher.search(base, fc);
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals(requests.size(), facetResults.size());
|
||||
for (FacetResult res : facetResults) {
|
||||
FacetResultNode node = res.getFacetResultNode();
|
||||
String dim = node.getLabel().components[0];
|
||||
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) node.getValue());
|
||||
String dim = node.label.components[0];
|
||||
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) node.value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -283,18 +283,16 @@ public class TestFacetsPayloadMigrationReader extends LuceneTestCase {
|
|||
// verify drill-down
|
||||
for (String dim : expectedCounts.keySet()) {
|
||||
CategoryPath drillDownCP = new CategoryPath(dim);
|
||||
ArrayList<FacetRequest> request = new ArrayList<FacetRequest>(1);
|
||||
request.add(new CountFacetRequest(drillDownCP, 10));
|
||||
FacetSearchParams fsp = new FacetSearchParams(request, fip);
|
||||
FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(drillDownCP, 10));
|
||||
Query drillDown = DrillDown.query(fsp, new MatchAllDocsQuery(), drillDownCP);
|
||||
TotalHitCountCollector total = new TotalHitCountCollector();
|
||||
FacetsCollector fc = new FacetsCollector(fsp, indexReader, taxoReader);
|
||||
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
|
||||
searcher.search(drillDown, MultiCollector.wrap(fc, total));
|
||||
assertTrue("no results for drill-down query " + drillDown, total.getTotalHits() > 0);
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals(1, facetResults.size());
|
||||
FacetResultNode rootNode = facetResults.get(0).getFacetResultNode();
|
||||
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) rootNode.getValue());
|
||||
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) rootNode.value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,6 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
|
|||
assertEquals("3 characters should be written", 3, numchars);
|
||||
assertEquals("wrong drill-down term text", expectedDDText, new String(
|
||||
buf, 0, numchars));
|
||||
CategoryListParams clParams = dfip.getCategoryListParams(null);
|
||||
assertEquals("partition for all ordinals is the first", "",
|
||||
PartitionsUtils.partitionNameByOrdinal(dfip, 250));
|
||||
assertEquals("for partition 0, the same name should be returned",
|
||||
|
@ -75,7 +74,7 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
|
|||
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
|
||||
assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
|
||||
for (int i = 0; i < 30; i++) {
|
||||
int nComponents = random().nextInt(10);
|
||||
int nComponents = random().nextInt(10) + 1;
|
||||
String[] components = new String[nComponents];
|
||||
for (int j = 0; j < components.length; j++) {
|
||||
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
|
||||
|
|
|
@ -0,0 +1,515 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.params.ScoreFacetRequest;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.collections.ObjectToIntMap;
|
||||
import org.apache.lucene.util.encoding.IntEncoder;
|
||||
import org.apache.lucene.util.encoding.VInt8IntEncoder;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class CountingFacetsCollectorTest extends LuceneTestCase {
|
||||
|
||||
private static final Term A = new Term("f", "a");
|
||||
private static final CategoryPath CP_A = new CategoryPath("A"), CP_B = new CategoryPath("B");
|
||||
private static final int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3;
|
||||
private static final CategoryPath[] CATEGORIES_A, CATEGORIES_B;
|
||||
static {
|
||||
CATEGORIES_A = new CategoryPath[NUM_CHILDREN_CP_A];
|
||||
for (int i = 0; i < NUM_CHILDREN_CP_A; i++) {
|
||||
CATEGORIES_A[i] = new CategoryPath(CP_A.components[0], Integer.toString(i));
|
||||
}
|
||||
CATEGORIES_B = new CategoryPath[NUM_CHILDREN_CP_B];
|
||||
for (int i = 0; i < NUM_CHILDREN_CP_B; i++) {
|
||||
CATEGORIES_B[i] = new CategoryPath(CP_B.components[0], Integer.toString(i));
|
||||
}
|
||||
}
|
||||
|
||||
protected static Directory indexDir, taxoDir;
|
||||
protected static ObjectToIntMap<CategoryPath> allExpectedCounts, termExpectedCounts;
|
||||
protected static int numChildrenIndexedA, numChildrenIndexedB;
|
||||
|
||||
@AfterClass
|
||||
public static void afterClassCountingFacetsCollectorTest() throws Exception {
|
||||
IOUtils.close(indexDir, taxoDir);
|
||||
}
|
||||
|
||||
private static List<CategoryPath> randomCategories(Random random) {
|
||||
// add random categories from the two dimensions, ensuring that the same
|
||||
// category is not added twice.
|
||||
int numFacetsA = random.nextInt(3) + 1; // 1-3
|
||||
int numFacetsB = random.nextInt(2) + 1; // 1-2
|
||||
ArrayList<CategoryPath> categories_a = new ArrayList<CategoryPath>();
|
||||
categories_a.addAll(Arrays.asList(CATEGORIES_A));
|
||||
ArrayList<CategoryPath> categories_b = new ArrayList<CategoryPath>();
|
||||
categories_b.addAll(Arrays.asList(CATEGORIES_B));
|
||||
Collections.shuffle(categories_a, random);
|
||||
Collections.shuffle(categories_b, random);
|
||||
|
||||
ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>();
|
||||
categories.addAll(categories_a.subList(0, numFacetsA));
|
||||
categories.addAll(categories_b.subList(0, numFacetsB));
|
||||
return categories;
|
||||
}
|
||||
|
||||
private static void addField(Document doc) {
|
||||
doc.add(new StringField(A.field(), A.text(), Store.NO));
|
||||
}
|
||||
|
||||
private static void addFacets(Document doc, FacetFields facetFields, boolean updateTermExpectedCounts)
|
||||
throws IOException {
|
||||
List<CategoryPath> docCategories = randomCategories(random());
|
||||
for (CategoryPath cp : docCategories) {
|
||||
allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1);
|
||||
if (updateTermExpectedCounts) {
|
||||
termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
|
||||
}
|
||||
}
|
||||
// add 1 to each dimension
|
||||
allExpectedCounts.put(CP_A, allExpectedCounts.get(CP_A) + 1);
|
||||
allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
|
||||
if (updateTermExpectedCounts) {
|
||||
termExpectedCounts.put(CP_A, termExpectedCounts.get(CP_A) + 1);
|
||||
termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
|
||||
}
|
||||
|
||||
facetFields.addFields(doc, docCategories);
|
||||
}
|
||||
|
||||
private static void indexDocsNoFacets(IndexWriter indexWriter) throws IOException {
|
||||
int numDocs = atLeast(2);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
addField(doc);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
indexWriter.commit(); // flush a segment
|
||||
}
|
||||
|
||||
private static void indexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
|
||||
ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
|
||||
Random random = random();
|
||||
int numDocs = atLeast(random, 2);
|
||||
FacetFields facetFields = new FacetFields(taxoWriter);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
addFacets(doc, facetFields, false);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
indexWriter.commit(); // flush a segment
|
||||
}
|
||||
|
||||
private static void indexDocsWithFacetsAndTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
|
||||
ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
|
||||
Random random = random();
|
||||
int numDocs = atLeast(random, 2);
|
||||
FacetFields facetFields = new FacetFields(taxoWriter);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
addFacets(doc, facetFields, true);
|
||||
addField(doc);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
indexWriter.commit(); // flush a segment
|
||||
}
|
||||
|
||||
private static void indexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
|
||||
ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
|
||||
Random random = random();
|
||||
int numDocs = atLeast(random, 2);
|
||||
FacetFields facetFields = new FacetFields(taxoWriter);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
boolean hasContent = random.nextBoolean();
|
||||
if (hasContent) {
|
||||
addField(doc);
|
||||
}
|
||||
addFacets(doc, facetFields, hasContent);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
indexWriter.commit(); // flush a segment
|
||||
}
|
||||
|
||||
// initialize expectedCounts w/ 0 for all categories
|
||||
private static ObjectToIntMap<CategoryPath> newCounts() {
|
||||
ObjectToIntMap<CategoryPath> counts = new ObjectToIntMap<CategoryPath>();
|
||||
counts.put(CP_A, 0);
|
||||
counts.put(CP_B, 0);
|
||||
for (CategoryPath cp : CATEGORIES_A) {
|
||||
counts.put(cp, 0);
|
||||
}
|
||||
for (CategoryPath cp : CATEGORIES_B) {
|
||||
counts.put(cp, 0);
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClassCountingFacetsCollectorTest() throws Exception {
|
||||
indexDir = newDirectory();
|
||||
taxoDir = newDirectory();
|
||||
|
||||
// create an index which has:
|
||||
// 1. Segment with no categories, but matching results
|
||||
// 2. Segment w/ categories, but no results
|
||||
// 3. Segment w/ categories and results
|
||||
// 4. Segment w/ categories, but only some results
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments
|
||||
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
|
||||
allExpectedCounts = newCounts();
|
||||
termExpectedCounts = newCounts();
|
||||
|
||||
// segment w/ no categories
|
||||
indexDocsNoFacets(indexWriter);
|
||||
|
||||
// segment w/ categories, no content
|
||||
indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);
|
||||
|
||||
// segment w/ categories and content
|
||||
indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);
|
||||
|
||||
// segment w/ categories and some content
|
||||
indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);
|
||||
|
||||
// set num children indexed from each dimension
|
||||
for (CategoryPath cp : CATEGORIES_A) {
|
||||
if (termExpectedCounts.get(cp) > 0) {
|
||||
++numChildrenIndexedA;
|
||||
}
|
||||
}
|
||||
for (CategoryPath cp : CATEGORIES_B) {
|
||||
if (termExpectedCounts.get(cp) > 0) {
|
||||
++numChildrenIndexedB;
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(indexWriter, taxoWriter);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidValidParams() throws Exception {
|
||||
final CategoryPath dummyCP = new CategoryPath("a");
|
||||
final FacetRequest dummyFR = new CountFacetRequest(dummyCP, 10);
|
||||
|
||||
// only CountFacetRequests are allowed
|
||||
assertNotNull("only CountFacetRequests should be allowed",
|
||||
CountingFacetsCollector.assertParams(new FacetSearchParams(new ScoreFacetRequest(dummyCP, 10))));
|
||||
|
||||
// only depth=1
|
||||
FacetRequest cfr = new CountFacetRequest(dummyCP, 10);
|
||||
cfr.setDepth(2);
|
||||
assertNotNull("only depth 1 should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
|
||||
|
||||
// only SortOrder.DESCENDING
|
||||
cfr = new CountFacetRequest(dummyCP, 10);
|
||||
cfr.setSortOrder(SortOrder.ASCENDING);
|
||||
assertNotNull("only SortOrder.DESCENDING should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
|
||||
|
||||
// only SortBy.VALUE
|
||||
cfr = new CountFacetRequest(dummyCP, 10);
|
||||
cfr.setSortBy(SortBy.ORDINAL);
|
||||
assertNotNull("only SortBy.VALUE should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
|
||||
|
||||
// no numToLabel
|
||||
cfr = new CountFacetRequest(dummyCP, 10);
|
||||
cfr.setNumLabel(2);
|
||||
assertNotNull("numToLabel should not be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
|
||||
|
||||
FacetIndexingParams fip = new FacetIndexingParams(new CategoryListParams("moo")) {
|
||||
@Override
|
||||
public List<CategoryListParams> getAllCategoryListParams() {
|
||||
return Arrays.asList(new CategoryListParams[] { clParams, clParams });
|
||||
}
|
||||
};
|
||||
assertNotNull("only one CLP should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR)));
|
||||
|
||||
fip = new FacetIndexingParams(new CategoryListParams("moo")) {
|
||||
final CategoryListParams clp = new CategoryListParams() {
|
||||
@Override
|
||||
public IntEncoder createEncoder() {
|
||||
return new VInt8IntEncoder();
|
||||
}
|
||||
};
|
||||
@Override
|
||||
public List<CategoryListParams> getAllCategoryListParams() {
|
||||
return Collections.singletonList(clp);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CategoryListParams getCategoryListParams(CategoryPath category) {
|
||||
return clp;
|
||||
}
|
||||
};
|
||||
assertNotNull("only DGapVIntEncoder should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR)));
|
||||
|
||||
fip = new FacetIndexingParams(new CategoryListParams("moo")) {
|
||||
@Override
|
||||
public int getPartitionSize() {
|
||||
return 2;
|
||||
}
|
||||
};
|
||||
assertNotNull("partitions should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDifferentNumResults() throws Exception {
|
||||
// test the collector w/ FacetRequests and different numResults
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
|
||||
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
|
||||
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
|
||||
TermQuery q = new TermQuery(A);
|
||||
searcher.search(q, fc);
|
||||
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals("invalid number of facet results", 2, facetResults.size());
|
||||
for (FacetResult res : facetResults) {
|
||||
FacetResultNode root = res.getFacetResultNode();
|
||||
assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
|
||||
assertEquals("invalid residue", 0, (int) root.residue);
|
||||
for (FacetResultNode child : root.subResults) {
|
||||
assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testResidue() throws Exception {
|
||||
// test the collector's handling of residue
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
// asking for top 1 is the only way to guarantee there will be a residue
|
||||
// provided that enough children were indexed (see below)
|
||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, 1), new CountFacetRequest(CP_B, 1));
|
||||
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
|
||||
TermQuery q = new TermQuery(A);
|
||||
searcher.search(q, fc);
|
||||
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals("invalid number of facet results", 2, facetResults.size());
|
||||
for (FacetResult res : facetResults) {
|
||||
FacetResultNode root = res.getFacetResultNode();
|
||||
assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
|
||||
// make sure randomness didn't pick only one child of root (otherwise there's no residue)
|
||||
int numChildrenIndexed = res.getFacetRequest().categoryPath == CP_A ? numChildrenIndexedA : numChildrenIndexedB;
|
||||
if (numChildrenIndexed > 1) {
|
||||
assertTrue("expected residue", root.residue > 0);
|
||||
}
|
||||
for (FacetResultNode child : root.subResults) {
|
||||
assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllCounts() throws Exception {
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
|
||||
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
|
||||
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals("invalid number of facet results", 2, facetResults.size());
|
||||
for (FacetResult res : facetResults) {
|
||||
FacetResultNode root = res.getFacetResultNode();
|
||||
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
|
||||
assertEquals("invalid residue", 0, (int) root.residue);
|
||||
for (FacetResultNode child : root.subResults) {
|
||||
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBigNumResults() throws Exception {
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, Integer.MAX_VALUE),
|
||||
new CountFacetRequest(CP_B, Integer.MAX_VALUE));
|
||||
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals("invalid number of facet results", 2, facetResults.size());
|
||||
for (FacetResult res : facetResults) {
|
||||
FacetResultNode root = res.getFacetResultNode();
|
||||
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
|
||||
assertEquals("invalid residue", 0, (int) root.residue);
|
||||
for (FacetResultNode child : root.subResults) {
|
||||
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDirectSource() throws Exception {
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
|
||||
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
|
||||
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader, new FacetArrays(taxoReader.getSize()), true);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals("invalid number of facet results", 2, facetResults.size());
|
||||
for (FacetResult res : facetResults) {
|
||||
FacetResultNode root = res.getFacetResultNode();
|
||||
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
|
||||
assertEquals("invalid residue", 0, (int) root.residue);
|
||||
for (FacetResultNode child : root.subResults) {
|
||||
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoParents() throws Exception {
|
||||
// TODO: when OrdinalPolicy is on CLP, index the NO_PARENTS categories into
|
||||
// their own dimension, and avoid this index creation
|
||||
Directory indexDir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
conf.setMaxBufferedDocs(2);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
FacetIndexingParams fip = new FacetIndexingParams() {
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy() {
|
||||
return OrdinalPolicy.NO_PARENTS;
|
||||
}
|
||||
};
|
||||
FacetFields facetFields = new FacetFields(taxoWriter, fip);
|
||||
ObjectToIntMap<CategoryPath> expCounts = newCounts();
|
||||
|
||||
// index few docs with categories, not sharing parents.
|
||||
int numDocs = atLeast(10);
|
||||
final CategoryPath cpc = new CategoryPath("L1", "L2", "L3");
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>();
|
||||
CategoryPath cpa = CATEGORIES_A[random().nextInt(NUM_CHILDREN_CP_A)];
|
||||
CategoryPath cpb = CATEGORIES_B[random().nextInt(NUM_CHILDREN_CP_B)];
|
||||
categories.add(cpa);
|
||||
categories.add(cpb);
|
||||
categories.add(cpc);
|
||||
expCounts.put(cpa, expCounts.get(cpa) + 1);
|
||||
expCounts.put(cpb, expCounts.get(cpb) + 1);
|
||||
facetFields.addFields(doc, categories);
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
expCounts.put(CP_A, numDocs);
|
||||
expCounts.put(CP_B, numDocs);
|
||||
for (int i = 0; i < cpc.length; i++) {
|
||||
expCounts.put(cpc.subpath(i+1), numDocs);
|
||||
}
|
||||
|
||||
IOUtils.close(indexWriter, taxoWriter);
|
||||
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
|
||||
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B), new CountFacetRequest(cpc.subpath(1), 10));
|
||||
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
assertEquals("invalid number of facet results", 3, facetResults.size());
|
||||
for (FacetResult res : facetResults) {
|
||||
FacetResultNode root = res.getFacetResultNode();
|
||||
assertEquals("wrong count for " + root.label, expCounts.get(root.label), (int) root.value);
|
||||
assertEquals("invalid residue", 0, (int) root.residue);
|
||||
for (FacetResultNode child : root.subResults) {
|
||||
assertEquals("wrong count for " + child.label, expCounts.get(child.label), (int) child.value);
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
|
||||
IOUtils.close(indexDir, taxoDir);
|
||||
}
|
||||
|
||||
}
|
|
@ -29,11 +29,10 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|||
public class SamplingWrapperTest extends BaseSampleTestTopK {
|
||||
|
||||
@Override
|
||||
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
|
||||
TaxonomyReader taxoReader, IndexReader indexReader,
|
||||
FacetSearchParams searchParams) {
|
||||
FacetsAccumulator fExtrctr = new StandardFacetsAccumulator(searchParams,
|
||||
indexReader, taxoReader);
|
||||
return new SamplingWrapper(fExtrctr, sampler);
|
||||
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
|
||||
IndexReader indexReader, FacetSearchParams searchParams) {
|
||||
FacetsAccumulator fa = new StandardFacetsAccumulator(searchParams, indexReader, taxoReader);
|
||||
return new SamplingWrapper(fa, sampler);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,20 +17,23 @@ package org.apache.lucene.facet.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.facet.FacetTestUtils;
|
||||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.facet.util.PrintTaxonomyStats;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -89,7 +92,7 @@ public class TestDemoFacets extends LuceneTestCase {
|
|||
new CountFacetRequest(new CategoryPath("Author"), 10));
|
||||
|
||||
// Aggregatses the facet counts:
|
||||
FacetsCollector c = new FacetsCollector(fsp, searcher.getIndexReader(), taxoReader);
|
||||
FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
|
||||
|
||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||
// for all non-deleted docs in the index); normally
|
||||
|
@ -101,20 +104,31 @@ public class TestDemoFacets extends LuceneTestCase {
|
|||
List<FacetResult> results = c.getFacetResults();
|
||||
assertEquals(2, results.size());
|
||||
assertEquals("Publish Date (5)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n",
|
||||
toSimpleString(results.get(0)));
|
||||
FacetTestUtils.toSimpleString(results.get(0)));
|
||||
assertEquals("Author (5)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n",
|
||||
toSimpleString(results.get(1)));
|
||||
FacetTestUtils.toSimpleString(results.get(1)));
|
||||
|
||||
|
||||
// Now user drills down on Publish Date/2010:
|
||||
fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10));
|
||||
Query q2 = DrillDown.query(fsp, new MatchAllDocsQuery(), new CategoryPath("Publish Date/2010", '/'));
|
||||
c = new FacetsCollector(fsp, searcher.getIndexReader(), taxoReader);
|
||||
c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
|
||||
searcher.search(q2, c);
|
||||
results = c.getFacetResults();
|
||||
assertEquals(1, results.size());
|
||||
assertEquals("Author (2)\n Lisa (1)\n Bob (1)\n",
|
||||
toSimpleString(results.get(0)));
|
||||
FacetTestUtils.toSimpleString(results.get(0)));
|
||||
|
||||
// Smoke test PrintTaxonomyStats:
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
PrintTaxonomyStats.printStats(taxoReader, new PrintStream(bos, false, "UTF-8"), true);
|
||||
String result = bos.toString("UTF-8");
|
||||
assertTrue(result.indexOf("/Author: 4 immediate children; 5 total categories") != -1);
|
||||
assertTrue(result.indexOf("/Publish Date: 3 immediate children; 12 total categories") != -1);
|
||||
// Make sure at least a few nodes of the tree came out:
|
||||
assertTrue(result.indexOf(" /1999") != -1);
|
||||
assertTrue(result.indexOf(" /2012") != -1);
|
||||
assertTrue(result.indexOf(" /20") != -1);
|
||||
|
||||
taxoReader.close();
|
||||
searcher.getIndexReader().close();
|
||||
|
@ -122,16 +136,4 @@ public class TestDemoFacets extends LuceneTestCase {
|
|||
taxoDir.close();
|
||||
}
|
||||
|
||||
private String toSimpleString(FacetResult fr) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
toSimpleString(0, sb, fr.getFacetResultNode(), "");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private void toSimpleString(int depth, StringBuilder sb, FacetResultNode node, String indent) {
|
||||
sb.append(indent + node.getLabel().components[depth] + " (" + (int) node.getValue() + ")\n");
|
||||
for(FacetResultNode childNode : node.getSubResults()) {
|
||||
toSimpleString(depth+1, sb, childNode, indent + " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -118,8 +118,8 @@ public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
|
|||
FacetResultNode parentResWithComp = countResWithComplement.get(0).getFacetResultNode();
|
||||
FacetResultNode parentResNoComp = countResWithComplement.get(0).getFacetResultNode();
|
||||
|
||||
assertEquals("Wrong number of top count aggregated categories with complement!",3,parentResWithComp.getNumSubResults());
|
||||
assertEquals("Wrong number of top count aggregated categories no complement!",3,parentResNoComp.getNumSubResults());
|
||||
assertEquals("Wrong number of top count aggregated categories with complement!",3,parentResWithComp.subResults.size());
|
||||
assertEquals("Wrong number of top count aggregated categories no complement!",3,parentResNoComp.subResults.size());
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ package org.apache.lucene.facet.search;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
@ -53,7 +53,7 @@ public class TestFacetsCollector extends LuceneTestCase {
|
|||
|
||||
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new KeywordAnalyzer()));
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
|
||||
FacetFields facetFields = new FacetFields(taxonomyWriter);
|
||||
for(int i = atLeast(2000); i > 0; --i) {
|
||||
|
@ -71,12 +71,12 @@ public class TestFacetsCollector extends LuceneTestCase {
|
|||
DirectoryReader r = DirectoryReader.open(indexDir);
|
||||
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(sParams, r, taxo);
|
||||
FacetsCollector fc = FacetsCollector.create(sParams, r, taxo);
|
||||
TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false);
|
||||
new IndexSearcher(r).search(new MatchAllDocsQuery(), MultiCollector.wrap(fc, topDocs));
|
||||
|
||||
List<FacetResult> res = fc.getFacetResults();
|
||||
double value = res.get(0).getFacetResultNode().getValue();
|
||||
double value = res.get(0).getFacetResultNode().value;
|
||||
double expected = topDocs.topDocs().getMaxScore() * r.numDocs();
|
||||
assertEquals(expected, value, 1E-10);
|
||||
|
||||
|
|
|
@ -271,7 +271,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
|
||||
FacetResult results = res.get(0);
|
||||
FacetResultNode resNode = results.getFacetResultNode();
|
||||
Iterable<? extends FacetResultNode> subResults = resNode.getSubResults();
|
||||
Iterable<? extends FacetResultNode> subResults = resNode.subResults;
|
||||
Iterator<? extends FacetResultNode> subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Band", 5.0);
|
||||
|
@ -280,7 +280,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
|
||||
results = res.get(1);
|
||||
resNode = results.getFacetResultNode();
|
||||
subResults = resNode.getSubResults();
|
||||
subResults = resNode.subResults;
|
||||
subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Band", 5.0);
|
||||
|
@ -294,7 +294,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
|
||||
results = res.get(2);
|
||||
resNode = results.getFacetResultNode();
|
||||
subResults = resNode.getSubResults();
|
||||
subResults = resNode.subResults;
|
||||
subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Author", 3.0);
|
||||
|
@ -304,7 +304,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
|
||||
results = res.get(3);
|
||||
resNode = results.getFacetResultNode();
|
||||
subResults = resNode.getSubResults();
|
||||
subResults = resNode.subResults;
|
||||
subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Band/Rock & Pop", 4.0);
|
||||
|
@ -334,7 +334,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
FacetSearchParams facetSearchParams = new FacetSearchParams(facetRequests, iParams);
|
||||
|
||||
// perform documents search and facets accumulation
|
||||
FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, ir, tr);
|
||||
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, ir, tr);
|
||||
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
|
||||
return facetsCollector;
|
||||
}
|
||||
|
@ -350,8 +350,8 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private static void checkResult(FacetResultNode sub, String label, double value) {
|
||||
assertEquals("Label of subresult " + sub.getLabel() + " was incorrect", label, sub.getLabel().toString());
|
||||
assertEquals("Value for " + sub.getLabel() + " subresult was incorrect", value, sub.getValue(), 0.0);
|
||||
assertEquals("Label of subresult " + sub.label + " was incorrect", label, sub.label.toString());
|
||||
assertEquals("Value for " + sub.label + " subresult was incorrect", value, sub.value, 0.0);
|
||||
}
|
||||
|
||||
}
|
|
@ -44,7 +44,7 @@ public class TestSameRequestAccumulation extends FacetTestBase {
|
|||
final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10);
|
||||
FacetSearchParams fsp = new FacetSearchParams(facetRequest);
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(fsp, indexReader, taxoReader);
|
||||
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
final String expected = fc.getFacetResults().get(0).toString();
|
||||
|
@ -53,9 +53,9 @@ public class TestSameRequestAccumulation extends FacetTestBase {
|
|||
fsp = new FacetSearchParams(facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10));
|
||||
|
||||
// make sure the search params holds 3 requests now
|
||||
assertEquals(3, fsp.getFacetRequests().size());
|
||||
assertEquals(3, fsp.facetRequests.size());
|
||||
|
||||
fc = new FacetsCollector(fsp, indexReader, taxoReader);
|
||||
fc = FacetsCollector.create(fsp, indexReader, taxoReader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
List<FacetResult> actual = fc.getFacetResults();
|
||||
|
||||
|
|
|
@ -89,10 +89,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
|
|||
FacetResultNode parentCountRes = countRes.get(0).getFacetResultNode();
|
||||
FacetResultNode parentScoreRes = scoreRes.get(0).getFacetResultNode();
|
||||
|
||||
assertEquals("Wrong number of top count aggregated categories!", 3,
|
||||
parentCountRes.getNumSubResults());
|
||||
assertEquals("Wrong number of top score aggregated categories!", 3,
|
||||
parentScoreRes.getNumSubResults());
|
||||
assertEquals("Wrong number of top count aggregated categories!", 3, parentCountRes.subResults.size());
|
||||
assertEquals("Wrong number of top score aggregated categories!", 3, parentScoreRes.subResults.size());
|
||||
|
||||
// rely on that facet value is computed as doc-score, and
|
||||
// accordingly compare values of the two top-category results.
|
||||
|
@ -101,12 +99,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
|
|||
FacetResultNode[] scoreResNodes = resultNodesAsArray(parentScoreRes);
|
||||
|
||||
for (int i = 0; i < scoreResNodes.length; i++) {
|
||||
assertEquals("Ordinals differ!",
|
||||
countResNodes[i].getOrdinal(), scoreResNodes[i].getOrdinal());
|
||||
assertEquals("Wrong scores!",
|
||||
constScore * countResNodes[i].getValue(),
|
||||
scoreResNodes[i].getValue(),
|
||||
Double.MIN_VALUE);
|
||||
assertEquals("Ordinals differ!", countResNodes[i].ordinal, scoreResNodes[i].ordinal);
|
||||
assertEquals("Wrong scores!", constScore * countResNodes[i].value, scoreResNodes[i].value, Double.MIN_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -102,24 +101,22 @@ public class TestStandardFacetsAccumulator extends LuceneTestCase {
|
|||
|
||||
// search for "f:a", only segments 1 and 3 should match results
|
||||
Query q = new TermQuery(new Term("f", "a"));
|
||||
ArrayList<FacetRequest> requests = new ArrayList<FacetRequest>(1);
|
||||
CountFacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) {
|
||||
FacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) {
|
||||
@Override
|
||||
public boolean supportsComplements() {
|
||||
return false; // disable complements
|
||||
}
|
||||
};
|
||||
requests.add(countNoComplements);
|
||||
FacetSearchParams fsp = new FacetSearchParams(requests, fip);
|
||||
FacetsCollector fc = new FacetsCollector(fsp , indexReader, taxoReader);
|
||||
FacetSearchParams fsp = new FacetSearchParams(fip, countNoComplements);
|
||||
FacetsCollector fc = new StandardFacetsCollector(fsp , indexReader, taxoReader);
|
||||
indexSearcher.search(q, fc);
|
||||
List<FacetResult> results = fc.getFacetResults();
|
||||
assertEquals("received too many facet results", 1, results.size());
|
||||
FacetResultNode frn = results.get(0).getFacetResultNode();
|
||||
assertEquals("wrong weight for \"A\"", 4, (int) frn.getValue());
|
||||
assertEquals("wrong number of children", 2, frn.getNumSubResults());
|
||||
for (FacetResultNode node : frn.getSubResults()) {
|
||||
assertEquals("wrong weight for child " + node.getLabel(), 2, (int) node.getValue());
|
||||
assertEquals("wrong weight for \"A\"", 4, (int) frn.value);
|
||||
assertEquals("wrong number of children", 2, frn.subResults.size());
|
||||
for (FacetResultNode node : frn.subResults) {
|
||||
assertEquals("wrong weight for child " + node.label, 2, (int) node.value);
|
||||
}
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
|
||||
|
|
|
@ -165,7 +165,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
facetRequests.add(cfrb20);
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams(facetRequests, iParams);
|
||||
|
||||
FacetArrays facetArrays = new FacetArrays(PartitionsUtils.partitionSize(facetSearchParams.getFacetIndexingParams(), tr));
|
||||
FacetArrays facetArrays = new FacetArrays(PartitionsUtils.partitionSize(facetSearchParams.indexingParams, tr));
|
||||
FacetsAccumulator fctExtrctr = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, facetArrays);
|
||||
fctExtrctr.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
|
||||
long start = System.currentTimeMillis();
|
||||
|
@ -181,40 +181,40 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(9, fr.getNumValidDescendants());
|
||||
FacetResultNode parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.getNumSubResults());
|
||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
||||
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
|
||||
// a/c has residue 0, and one child a/c/1 with value 1.
|
||||
double [] expectedValues0 = { 8.0, 2.0, 3.0, 0.0, 2.0, 0.0, 6.0, 0.0, 1.0, 0.0 };
|
||||
int i = 0;
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
|
||||
for (FacetResultNode node2 : node.getSubResults()) {
|
||||
assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE);
|
||||
for (FacetResultNode node2 : node.subResults) {
|
||||
assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
// now just change the value of the first child of the root to 5, and then rearrange
|
||||
// expected are: first a/c of value 6 and residue 0, and one child a/c/1 with value 1
|
||||
// then a/b with value 5 and residue 2, and both children: a/b/2 with value 3, and a/b/1 with value 2.
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
node.setValue(5.0);
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
node.value = 5.0;
|
||||
break;
|
||||
}
|
||||
// now rearrange
|
||||
double [] expectedValues00 = { 6.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0 };
|
||||
fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr);
|
||||
i = 0;
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
assertEquals(expectedValues00[i++], node.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(expectedValues00[i++], node.getResidue(), Double.MIN_VALUE);
|
||||
for (FacetResultNode node2 : node.getSubResults()) {
|
||||
assertEquals(expectedValues00[i++], node2.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(expectedValues00[i++], node2.getResidue(), Double.MIN_VALUE);
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
assertEquals(expectedValues00[i++], node.value, Double.MIN_VALUE);
|
||||
assertEquals(expectedValues00[i++], node.residue, Double.MIN_VALUE);
|
||||
for (FacetResultNode node2 : node.subResults) {
|
||||
assertEquals(expectedValues00[i++], node2.value, Double.MIN_VALUE);
|
||||
assertEquals(expectedValues00[i++], node2.residue, Double.MIN_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -222,19 +222,19 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(9, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.getNumSubResults());
|
||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
||||
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
|
||||
// a/c has residue 0, and one child a/c/1 with value 1.
|
||||
i = 0;
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
|
||||
for (FacetResultNode node2 : node.getSubResults()) {
|
||||
assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE);
|
||||
for (FacetResultNode node2 : node.subResults) {
|
||||
assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE);
|
||||
assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -242,70 +242,70 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants(), 4);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.getNumSubResults());
|
||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
// two nodes sorted by descending values:
|
||||
// a/b with value 8 and residue 0 (because no children considered),
|
||||
// and a/c with value 6 and residue 0 (because no children considered)
|
||||
double [] expectedValues2 = { 8.0, 0.0, 6.0, 0.0 };
|
||||
i = 0;
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
assertEquals(expectedValues2[i++], node.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(expectedValues2[i++], node.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(node.getNumSubResults(), 0);
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
assertEquals(expectedValues2[i++], node.value, Double.MIN_VALUE);
|
||||
assertEquals(expectedValues2[i++], node.residue, Double.MIN_VALUE);
|
||||
assertEquals(node.subResults.size(), 0);
|
||||
}
|
||||
|
||||
fr = facetResults.get(3); // a/b, depth=3, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.getNumSubResults());
|
||||
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
double [] expectedValues3 = { 3.0, 2.0 };
|
||||
i = 0;
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(0, node.getNumSubResults());
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
|
||||
assertEquals(0.0, node.residue, Double.MIN_VALUE);
|
||||
assertEquals(0, node.subResults.size());
|
||||
}
|
||||
|
||||
fr = facetResults.get(4); // a/b, depth=2, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.getNumSubResults());
|
||||
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
i = 0;
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(0, node.getNumSubResults());
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
|
||||
assertEquals(0.0, node.residue, Double.MIN_VALUE);
|
||||
assertEquals(0, node.subResults.size());
|
||||
}
|
||||
|
||||
fr = facetResults.get(5); // a/b, depth=1, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.getNumSubResults());
|
||||
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
i = 0;
|
||||
for (FacetResultNode node : parentRes.getSubResults()) {
|
||||
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(0, node.getNumSubResults());
|
||||
for (FacetResultNode node : parentRes.subResults) {
|
||||
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
|
||||
assertEquals(0.0, node.residue, Double.MIN_VALUE);
|
||||
assertEquals(0, node.subResults.size());
|
||||
}
|
||||
|
||||
fr = facetResults.get(6); // a/b, depth=0, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(0, parentRes.getNumSubResults());
|
||||
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(0.0, parentRes.residue, Double.MIN_VALUE);
|
||||
assertEquals(0, parentRes.subResults.size());
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
|
||||
// doctor, depth=1, K=2
|
||||
|
|
|
@ -89,7 +89,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
// do different facet counts and compare to control
|
||||
FacetSearchParams sParams = getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize));
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader) {
|
||||
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
|
@ -99,52 +99,46 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
};
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
long start = System.currentTimeMillis();
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("Time: " + (end - start));
|
||||
}
|
||||
|
||||
FacetResult fr = facetResults.get(0);
|
||||
FacetResultNode parentRes = fr.getFacetResultNode();
|
||||
assertEquals(13.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||
FacetResultNode[] frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(7.0, frn[0].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(6.0, frn[1].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
||||
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
||||
|
||||
fr = facetResults.get(1);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(13.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(7.0, frn[0].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(6.0, frn[1].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[2].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[3].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[4].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[5].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
||||
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[2].value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[3].value, Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[4].value, Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[5].value, Double.MIN_VALUE);
|
||||
|
||||
fr = facetResults.get(2);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(7.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(7.0, parentRes.value, Double.MIN_VALUE);
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(2.0, frn[0].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[1].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[2].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[3].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[0].value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[1].value, Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[2].value, Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[3].value, Double.MIN_VALUE);
|
||||
|
||||
fr = facetResults.get(3);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(2.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(2.0, parentRes.value, Double.MIN_VALUE);
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(0, frn.length);
|
||||
|
||||
fr = facetResults.get(4);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(6.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(6.0, parentRes.value, Double.MIN_VALUE);
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(1.0, frn[0].getValue(), Double.MIN_VALUE);
|
||||
assertEquals(1.0, frn[0].value, Double.MIN_VALUE);
|
||||
closeAll();
|
||||
}
|
||||
}
|
||||
|
@ -159,10 +153,10 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
|
||||
// do different facet counts and compare to control
|
||||
CategoryPath path = new CategoryPath("a", "b");
|
||||
FacetSearchParams sParams = getFacetSearchParams(
|
||||
getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||
FacetSearchParams sParams = getFacetSearchParams(getFacetIndexingParams(partitionSize),
|
||||
new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader) {
|
||||
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
|
@ -172,13 +166,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
};
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
long start = System.currentTimeMillis();
|
||||
List<FacetResult> results = fc.getFacetResults();
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("Time: " + (end - start));
|
||||
}
|
||||
|
||||
assertEquals("Should only be one result as there's only one request", 1, results.size());
|
||||
FacetResult res = results.get(0);
|
||||
|
@ -188,7 +176,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
FacetSearchParams sParams2 = getFacetSearchParams(
|
||||
getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||
|
||||
FacetsCollector fc2 = new FacetsCollector(sParams2, indexReader, taxoReader) {
|
||||
FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
|
@ -226,18 +214,12 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
getFacetIndexingParams(partitionSize),
|
||||
new CountFacetRequest(path, 10));
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader);
|
||||
FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader);
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("Time: " + (end - start));
|
||||
}
|
||||
|
||||
assertEquals("Shouldn't have found anything for a FacetRequest "
|
||||
+ "of a facet that doesn't exist in the index.", 0, facetResults.size());
|
||||
|
||||
|
|
|
@ -4,15 +4,14 @@ import java.io.IOException;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -37,7 +36,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
|||
throws IOException {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, partitionSize);
|
||||
FacetsCollector fc = new FacetsCollector(facetSearchParams, indexReader, taxoReader) {
|
||||
FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(
|
||||
FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
|
@ -88,15 +87,15 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
|||
int k = 0;
|
||||
for (FacetResult fr : allFacetResults) {
|
||||
FacetResultNode topResNode = fr.getFacetResultNode();
|
||||
maxNumNodes = Math.max(maxNumNodes, topResNode.getNumSubResults());
|
||||
maxNumNodes = Math.max(maxNumNodes, topResNode.subResults.size());
|
||||
int prevCount = Integer.MAX_VALUE;
|
||||
int pos = 0;
|
||||
for (FacetResultNode frn: topResNode.getSubResults()) {
|
||||
assertTrue("wrong counts order: prev="+prevCount+" curr="+frn.getValue(), prevCount>=frn.getValue());
|
||||
prevCount = (int) frn.getValue();
|
||||
String key = k+"--"+frn.getLabel()+"=="+frn.getValue();
|
||||
for (FacetResultNode frn: topResNode.subResults) {
|
||||
assertTrue("wrong counts order: prev="+prevCount+" curr="+frn.value, prevCount>=frn.value);
|
||||
prevCount = (int) frn.value;
|
||||
String key = k+"--"+frn.label+"=="+frn.value;
|
||||
if (VERBOSE) {
|
||||
System.out.println(frn.getLabel() + " - " + frn.getValue() + " "+key+" "+pos);
|
||||
System.out.println(frn.label + " - " + frn.value + " "+key+" "+pos);
|
||||
}
|
||||
all.put(key, pos++); // will use this later to verify order of sub-results
|
||||
}
|
||||
|
@ -113,12 +112,12 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
|||
k = 0;
|
||||
for (FacetResult fr : someResults) {
|
||||
FacetResultNode topResNode = fr.getFacetResultNode();
|
||||
assertTrue("too many results: n="+n+" but got "+topResNode.getNumSubResults(), n>=topResNode.getNumSubResults());
|
||||
assertTrue("too many results: n="+n+" but got "+topResNode.subResults.size(), n>=topResNode.subResults.size());
|
||||
int pos = 0;
|
||||
for (FacetResultNode frn: topResNode.getSubResults()) {
|
||||
String key = k+"--"+frn.getLabel()+"=="+frn.getValue();
|
||||
for (FacetResultNode frn: topResNode.subResults) {
|
||||
String key = k+"--"+frn.label+"=="+frn.value;
|
||||
if (VERBOSE) {
|
||||
System.out.println(frn.getLabel() + " - " + frn.getValue() + " "+key+" "+pos);
|
||||
System.out.println(frn.label + " - " + frn.value + " "+key+" "+pos);
|
||||
}
|
||||
Integer origPos = all.get(key);
|
||||
assertNotNull("missing in all results: "+frn,origPos);
|
||||
|
|
|
@ -226,9 +226,9 @@ public class TestTotalFacetCountsCache extends LuceneTestCase {
|
|||
FacetResult result = results.get(i);
|
||||
assertNotNull("Result should not be null", result);
|
||||
FacetResultNode resNode = result.getFacetResultNode();
|
||||
assertEquals("Invalid label", expLabels[i], resNode.getLabel().toString());
|
||||
assertEquals("Invalid value", expValues[i], resNode.getValue(), 0.0);
|
||||
assertEquals("Invalid number of subresults", 0, resNode.getNumSubResults());
|
||||
assertEquals("Invalid label", expLabels[i], resNode.label.toString());
|
||||
assertEquals("Invalid value", expValues[i], resNode.value, 0.0);
|
||||
assertEquals("Invalid number of subresults", 0, resNode.subResults.size());
|
||||
}
|
||||
// we're done, close the index reader and the taxonomy.
|
||||
slowIndexReader.close();
|
||||
|
|
|
@ -110,7 +110,7 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
|
|||
|
||||
Query q = new MatchAllDocsQuery();
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
|
||||
FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo);
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.search(q, fc);
|
||||
|
@ -118,8 +118,8 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
|
|||
|
||||
assertNotNull("No results!",res);
|
||||
assertEquals("Wrong number of results!",2, res.size());
|
||||
assertEquals("Wrong count for category 'a'!",200, (int) res.get(0).getFacetResultNode().getValue());
|
||||
assertEquals("Wrong count for category 'b'!",150, (int) res.get(1).getFacetResultNode().getValue());
|
||||
assertEquals("Wrong count for category 'a'!",200, (int) res.get(0).getFacetResultNode().value);
|
||||
assertEquals("Wrong count for category 'b'!",150, (int) res.get(1).getFacetResultNode().value);
|
||||
|
||||
taxo.close();
|
||||
}
|
||||
|
@ -135,7 +135,7 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
|
|||
|
||||
Query q = new MatchAllDocsQuery();
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
|
||||
FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo);
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.search(q, fc);
|
||||
|
@ -143,8 +143,8 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
|
|||
|
||||
assertNotNull("No results!",res);
|
||||
assertEquals("Wrong number of results!",2, res.size());
|
||||
assertEquals("Wrong count for category 'a'!",50f, (float) res.get(0).getFacetResultNode().getValue(), 0.00001);
|
||||
assertEquals("Wrong count for category 'b'!",10f, (float) res.get(1).getFacetResultNode().getValue(), 0.00001);
|
||||
assertEquals("Wrong count for category 'a'!",50f, (float) res.get(0).getFacetResultNode().value, 0.00001);
|
||||
assertEquals("Wrong count for category 'b'!",10f, (float) res.get(1).getFacetResultNode().value, 0.00001);
|
||||
|
||||
taxo.close();
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
|
|||
|
||||
Query q = new MatchAllDocsQuery();
|
||||
|
||||
FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
|
||||
FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo);
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.search(q, fc);
|
||||
|
|
|
@ -3,22 +3,22 @@ package org.apache.lucene.facet.search.sampling;
|
|||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.facet.search.BaseTestTopK;
|
||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||
import org.apache.lucene.facet.search.ScoredDocIdCollector;
|
||||
import org.apache.lucene.facet.search.StandardFacetsCollector;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -48,7 +48,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
@Override
|
||||
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) {
|
||||
FacetSearchParams res = super.searchParamsWithRequests(numResults, partitionSize);
|
||||
for (FacetRequest req : res.getFacetRequests()) {
|
||||
for (FacetRequest req : res.facetRequests) {
|
||||
// randomize the way we aggregate results
|
||||
if (random().nextBoolean()) {
|
||||
req.setResultMode(ResultMode.GLOBAL_FLAT);
|
||||
|
@ -78,7 +78,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false);
|
||||
|
||||
FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize);
|
||||
FacetsCollector fc = new FacetsCollector(expectedSearchParams, indexReader, taxoReader);
|
||||
FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader);
|
||||
|
||||
searcher.search(q, MultiCollector.wrap(docCollector, fc));
|
||||
|
||||
|
@ -97,7 +97,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
|
||||
|
||||
break; // succeeded
|
||||
} catch (NotSameResultError e) {
|
||||
} catch (AssertionError e) {
|
||||
if (nTrial >= RETRIES - 1) {
|
||||
throw e; // no more retries allowed, must fail
|
||||
}
|
||||
|
@ -120,7 +120,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
|
||||
private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler,
|
||||
FacetSearchParams samplingSearchParams) {
|
||||
FacetsCollector samplingFC = new FacetsCollector(samplingSearchParams, indexReader, taxoReader) {
|
||||
FacetsCollector samplingFC = new StandardFacetsCollector(samplingSearchParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
|
|
|
@ -8,6 +8,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
import org.apache.lucene.facet.search.StandardFacetsCollector;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
|
||||
|
@ -85,10 +86,8 @@ public class OversampleWithDepthTest extends LuceneTestCase {
|
|||
FacetResultNode rootNode = res.getFacetResultNode();
|
||||
|
||||
// Each node below root should also have sub-results as the requested depth was '2'
|
||||
for (FacetResultNode node : rootNode.getSubResults()) {
|
||||
assertTrue("node " + node.getLabel()
|
||||
+ " should have had children as the requested depth was '2'",
|
||||
node.getNumSubResults() > 0);
|
||||
for (FacetResultNode node : rootNode.subResults) {
|
||||
assertTrue("node " + node.label + " should have had children as the requested depth was '2'", node.subResults.size() > 0);
|
||||
}
|
||||
|
||||
IOUtils.close(r, tr, indexDir, taxoDir);
|
||||
|
@ -111,11 +110,10 @@ public class OversampleWithDepthTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
/** search reader <code>r</code>*/
|
||||
private FacetResult searchWithFacets(IndexReader r,
|
||||
TaxonomyReader tr, FacetSearchParams fsp, final SamplingParams params)
|
||||
throws IOException {
|
||||
private FacetResult searchWithFacets(IndexReader r, TaxonomyReader tr, FacetSearchParams fsp,
|
||||
final SamplingParams params) throws IOException {
|
||||
// a FacetsCollector with a sampling accumulator
|
||||
FacetsCollector fcWithSampling = new FacetsCollector(fsp, r, tr) {
|
||||
FacetsCollector fcWithSampling = new StandardFacetsCollector(fsp, r, tr) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
|
|
|
@ -28,10 +28,8 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|||
public class SamplingAccumulatorTest extends BaseSampleTestTopK {
|
||||
|
||||
@Override
|
||||
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
|
||||
TaxonomyReader taxoReader, IndexReader indexReader,
|
||||
FacetSearchParams searchParams) {
|
||||
return new SamplingAccumulator(sampler, searchParams, indexReader,
|
||||
taxoReader);
|
||||
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
|
||||
IndexReader indexReader, FacetSearchParams searchParams) {
|
||||
return new SamplingAccumulator(sampler, searchParams, indexReader, taxoReader);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -131,9 +131,6 @@ public class TestCategoryPath extends LuceneTestCase {
|
|||
CategoryPath p = new CategoryPath("hello", "world", "yo");
|
||||
assertEquals(3, p.length);
|
||||
assertEquals("hello/world/yo", p.toString('/'));
|
||||
|
||||
p = new CategoryPath(new String[0]);
|
||||
assertEquals(0, p.length);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -353,7 +353,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// test TaxonomyReader.getCategory():
|
||||
for (int i=0; i<tr.getSize(); i++) {
|
||||
for (int i = 1; i < tr.getSize(); i++) {
|
||||
CategoryPath expectedCategory = new CategoryPath(expectedCategories[i]);
|
||||
CategoryPath category = tr.getPath(i);
|
||||
if (!expectedCategory.equals(category)) {
|
||||
|
@ -367,7 +367,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));
|
||||
|
||||
// test TaxonomyReader.getOrdinal():
|
||||
for (int i=0; i<expectedCategories.length; i++) {
|
||||
for (int i = 1; i < expectedCategories.length; i++) {
|
||||
int expectedOrdinal = i;
|
||||
int ordinal = tr.getOrdinal(new CategoryPath(expectedCategories[i]));
|
||||
if (expectedOrdinal != ordinal) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Random;
|
|||
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
|
||||
|
@ -66,6 +67,7 @@ public abstract class CompressingCodec extends FilterCodec {
|
|||
}
|
||||
|
||||
private final CompressingStoredFieldsFormat storedFieldsFormat;
|
||||
private final CompressingTermVectorsFormat termVectorsFormat;
|
||||
|
||||
/**
|
||||
* Creates a compressing codec with a given segment suffix
|
||||
|
@ -73,6 +75,7 @@ public abstract class CompressingCodec extends FilterCodec {
|
|||
public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
|
||||
super(name, new Lucene42Codec());
|
||||
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
|
||||
this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -87,8 +90,13 @@ public abstract class CompressingCodec extends FilterCodec {
|
|||
return storedFieldsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermVectorsFormat termVectorsFormat() {
|
||||
return termVectorsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName() + "(storedFieldsFormat=" + storedFieldsFormat + ")";
|
||||
return getName() + "(storedFieldsFormat=" + storedFieldsFormat + ", termVectorsFormat=" + termVectorsFormat + ")";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
|
@ -116,6 +117,7 @@ public class TestBuiltInEvaluators extends AbstractDataImportHandlerTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
@Ignore("fails if somewhere on earth is a DST change")
|
||||
public void testDateFormatEvaluator() {
|
||||
Evaluator dateFormatEval = new DateFormatEvaluator();
|
||||
ContextImpl context = new ContextImpl(null, resolver, null,
|
||||
|
|
Loading…
Reference in New Issue