Merged /lucene/dev/trunk:r1435377-1436565

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436566 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-21 19:23:25 +00:00
commit 1801cac63e
67 changed files with 4059 additions and 1073 deletions

View File

@ -29,6 +29,15 @@ Changes in backwards compatibility policy
support in-memory caching, CategoryListCache was removed too.
(Shai Erera, Michael McCandless)
* LUCENE-4697: FacetResultNode is now a concrete class with public members
(instead of getter methods). (Shai Erera)
* LUCENE-4600: FacetsCollector is now an abstract class with two
implementations: StandardFacetsCollector (the old version of
FacetsCollector) and CountingFacetsCollector. FacetsCollector.create()
returns the most optimized collector for the given parameters.
(Shai Erera, Michael McCandless)
Optimizations
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
@ -49,6 +58,13 @@ New Features
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the
default). (Shai Erera)
* LUCENE-4703: Add simple PrintTaxonomyStats tool to see summary
information about the facets taxonomy index. (Mike McCandless)
* LUCENE-4599: New oal.codecs.compressing.CompressingTermVectorsFormat which
compresses term vectors into chunks of documents similarly to
CompressingStoredFieldsFormat. (Adrien Grand)
======================= Lucene 4.1.0 =======================
Changes in backwards compatibility policy

View File

@ -18,8 +18,9 @@ package org.apache.lucene.benchmark.byTask.feeds;
*/
import java.io.IOException;
import java.util.List;
import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
import org.apache.lucene.facet.taxonomy.CategoryPath;
/**
* Source items for facets.
@ -29,12 +30,11 @@ import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
public abstract class FacetSource extends ContentItemsSource {
/**
* Returns the next {@link CategoryAssociationsContainer facets content item}.
* Implementations must account for multi-threading, as multiple threads can
* call this method simultaneously.
* Fills the next facets content items in the given list. Implementations must
* account for multi-threading, as multiple threads can call this method
* simultaneously.
*/
public abstract CategoryAssociationsContainer getNextFacets(CategoryAssociationsContainer facets)
throws NoMoreDataException, IOException;
public abstract void getNextFacets(List<CategoryPath> facets) throws NoMoreDataException, IOException;
@Override
public void resetInputs() throws IOException {

View File

@ -18,10 +18,10 @@ package org.apache.lucene.benchmark.byTask.feeds;
*/
import java.io.IOException;
import java.util.List;
import java.util.Random;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
import org.apache.lucene.facet.taxonomy.CategoryPath;
/**
@ -29,42 +29,38 @@ import org.apache.lucene.facet.taxonomy.CategoryPath;
* <p>
* Supports the following parameters:
* <ul>
* <li><b>rand.seed</b> - defines the seed to initialize Random with (default: <b>13</b>).
* <li><b>rand.seed</b> - defines the seed to initialize {@link Random} with
* (default: <b>13</b>).
* <li><b>max.doc.facets</b> - maximal #facets per doc (default: <b>10</b>).
* Actual number of facets in a certain doc would be anything between 1 and that number.
* <li><b>max.facet.depth</b> - maximal #components in a facet (default: <b>3</b>).
* Actual number of components in a certain facet would be anything between 1 and that number.
* Actual number of facets in a certain doc would be anything between 1 and that
* number.
* <li><b>max.facet.depth</b> - maximal #components in a facet (default:
* <b>3</b>). Actual number of components in a certain facet would be anything
* between 1 and that number.
* </ul>
*/
public class RandomFacetSource extends FacetSource {
Random random;
private int maxDocFacets = 10;
private int maxFacetDepth = 3;
private Random random;
private int maxDocFacets;
private int maxFacetDepth;
private int maxValue = maxDocFacets * maxFacetDepth;
@Override
public CategoryAssociationsContainer getNextFacets(CategoryAssociationsContainer facets)
throws NoMoreDataException, IOException {
if (facets == null) {
facets = new CategoryAssociationsContainer();
} else {
facets.clear();
}
int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
public void getNextFacets(List<CategoryPath> facets) throws NoMoreDataException, IOException {
facets.clear();
int numFacets = 1 + random.nextInt(maxDocFacets); // at least one facet to each doc
for (int i = 0; i < numFacets; i++) {
int depth = 1 + random.nextInt(maxFacetDepth - 1); // depth 0 is not useful
int depth = 1 + random.nextInt(maxFacetDepth); // depth 0 is not useful
String[] components = new String[depth];
for (int k = 0; k < depth; k++) {
components[k] = Integer.toString(random.nextInt(maxValue));
addItem();
}
CategoryPath cp = new CategoryPath(components);
facets.setAssociation(cp, null);
facets.add(cp);
addBytes(cp.toString().length()); // very rough approximation
}
return facets;
}
@Override
@ -76,8 +72,8 @@ public class RandomFacetSource extends FacetSource {
public void setConfig(Config config) {
super.setConfig(config);
random = new Random(config.get("rand.seed", 13));
maxDocFacets = config.get("max.doc.facets", 200);
maxFacetDepth = config.get("max.facet.depth", 10);
maxDocFacets = config.get("max.doc.facets", 10);
maxFacetDepth = config.get("max.facet.depth", 3);
maxValue = maxDocFacets * maxFacetDepth;
}
}

View File

@ -17,49 +17,56 @@ package org.apache.lucene.benchmark.byTask.tasks;
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
import org.apache.lucene.facet.associations.CategoryAssociationsContainer;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.taxonomy.CategoryPath;
/**
* Add a faceted document.
* <p>
* Config properties:
* <ul>
* <li><b>with.facets</b>=&lt;tells whether to actually add any facets to the document| Default: true&gt;
* <br>This config property allows to easily compare the performance of adding docs with and without facets.
* Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy).
* </ul>
* <li><b>with.facets</b>=&lt;tells whether to actually add any facets to the
* document| Default: true&gt; <br>
* This config property allows to easily compare the performance of adding docs
* with and without facets. Note that facets are created even when this is
* false, just that they are not added to the document (nor to the taxonomy).
* </ul>
* <p>
* See {@link AddDocTask} for general document parameters and configuration.
* <p>
* Makes use of the {@link FacetSource} in effect - see {@link PerfRunData} for facet source settings.
* Makes use of the {@link FacetSource} in effect - see {@link PerfRunData} for
* facet source settings.
*/
public class AddFacetedDocTask extends AddDocTask {
private final List<CategoryPath> facets = new ArrayList<CategoryPath>();
private FacetFields facetFields;
public AddFacetedDocTask(PerfRunData runData) {
super(runData);
}
private CategoryAssociationsContainer facets = null;
private FacetFields facetFields = null;
private boolean withFacets = true;
@Override
public void setup() throws Exception {
super.setup();
// create the facets even if they should not be added - allows to measure the effect of just adding facets
facets = getRunData().getFacetSource().getNextFacets(facets);
withFacets = getRunData().getConfig().get("with.facets", true);
if (withFacets) {
facetFields = new FacetFields(getRunData().getTaxonomyWriter());
if (facetFields == null) {
boolean withFacets = getRunData().getConfig().get("with.facets", true);
if (withFacets) {
FacetSource facetsSource = getRunData().getFacetSource();
facetFields = withFacets ? new FacetFields(getRunData().getTaxonomyWriter()) : null;
facetsSource.getNextFacets(facets);
}
}
}
@Override
protected String getLogMessage(int recsCount) {
if (!withFacets) {
if (facetFields == null) {
return super.getLogMessage(recsCount);
}
return super.getLogMessage(recsCount)+ " with facets";
@ -67,7 +74,7 @@ public class AddFacetedDocTask extends AddDocTask {
@Override
public int doLogic() throws Exception {
if (withFacets) {
if (facetFields != null) {
facetFields.addFields(doc, facets);
}
return super.doLogic();

View File

@ -205,6 +205,7 @@
<exclude name="queryparser/classes/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.class"/>
<exclude name="queryparser/classes/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.class"/>
<exclude name="queryparser/classes/java/org/apache/lucene/queryparser/surround/parser/QueryParserTokenManager.class"/>
<exclude name="facet/classes/java/org/apache/lucene/facet/util/PrintTaxonomyStats.class"/>
</fileset>
</forbidden-apis>
</target>

View File

@ -0,0 +1,102 @@
package org.apache.lucene.codecs.compressing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* A {@link TermVectorsFormat} that compresses chunks of documents together in
* order to improve the compression ratio.
* @lucene.experimental
*/
public final class CompressingTermVectorsFormat extends TermVectorsFormat {
private final String formatName;
private final String segmentSuffix;
private final CompressionMode compressionMode;
private final int chunkSize;
/**
* Create a new {@link CompressingTermVectorsFormat}.
* <p>
* <code>formatName</code> is the name of the format. This name will be used
* in the file formats to perform
* {@link CodecUtil#checkHeader(org.apache.lucene.store.DataInput, String, int, int) codec header checks}.
* <p>
* The <code>compressionMode</code> parameter allows you to choose between
* compression algorithms that have various compression and decompression
* speeds so that you can pick the one that best fits your indexing and
* searching throughput. You should never instantiate two
* {@link CompressingTermVectorsFormat}s that have the same name but
* different {@link CompressionMode}s.
* <p>
* <code>chunkSize</code> is the minimum byte size of a chunk of documents.
* Higher values of <code>chunkSize</code> should improve the compression
* ratio but will require more memory at indexing time and might make document
* loading a little slower (depending on the size of your OS cache compared
* to the size of your index).
*
* @param formatName the name of the {@link StoredFieldsFormat}
* @param segmentSuffix a suffix to append to files created by this format
* @param compressionMode the {@link CompressionMode} to use
* @param chunkSize the minimum number of bytes of a single chunk of stored documents
* @see CompressionMode
*/
public CompressingTermVectorsFormat(String formatName, String segmentSuffix,
CompressionMode compressionMode, int chunkSize) {
this.formatName = formatName;
this.segmentSuffix = segmentSuffix;
this.compressionMode = compressionMode;
if (chunkSize < 1) {
throw new IllegalArgumentException("chunkSize must be >= 1");
}
this.chunkSize = chunkSize;
}
@Override
public TermVectorsReader vectorsReader(Directory directory,
SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context)
throws IOException {
return new CompressingTermVectorsReader(directory, segmentInfo, segmentSuffix,
fieldInfos, context, formatName, compressionMode);
}
@Override
public TermVectorsWriter vectorsWriter(Directory directory,
SegmentInfo segmentInfo, IOContext context) throws IOException {
return new CompressingTermVectorsWriter(directory, segmentInfo, segmentSuffix,
context, formatName, compressionMode, chunkSize);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(compressionMode=" + compressionMode
+ ", chunkSize=" + chunkSize + ")";
}
}

View File

@ -0,0 +1,818 @@
package org.apache.lucene.codecs.compressing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Deque;
import java.util.Iterator;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
* {@link TermVectorsWriter} for {@link CompressingTermVectorsFormat}.
* @lucene.experimental
*/
public final class CompressingTermVectorsWriter extends TermVectorsWriter {
static final String VECTORS_EXTENSION = "tvd";
static final String VECTORS_INDEX_EXTENSION = "tvx";
static final String CODEC_SFX_IDX = "Index";
static final String CODEC_SFX_DAT = "Data";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final int BLOCK_SIZE = 64;
static final int POSITIONS = 0x01;
static final int OFFSETS = 0x02;
static final int PAYLOADS = 0x04;
static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
private final Directory directory;
private final String segment;
private final String segmentSuffix;
private CompressingStoredFieldsIndexWriter indexWriter;
private IndexOutput vectorsStream;
private final CompressionMode compressionMode;
private final Compressor compressor;
private final int chunkSize;
/** a pending doc */
private class DocData {
final int numFields;
final Deque<FieldData> fields;
final int posStart, offStart, payStart;
DocData(int numFields, int posStart, int offStart, int payStart) {
this.numFields = numFields;
this.fields = new ArrayDeque<FieldData>(numFields);
this.posStart = posStart;
this.offStart = offStart;
this.payStart = payStart;
}
FieldData addField(int fieldNum, int numTerms, boolean positions, boolean offsets, boolean payloads) {
final FieldData field;
if (fields.isEmpty()) {
field = new FieldData(fieldNum, numTerms, positions, offsets, payloads, posStart, offStart, payStart);
} else {
final FieldData last = fields.getLast();
final int posStart = last.posStart + (last.hasPositions ? last.totalPositions : 0);
final int offStart = last.offStart + (last.hasOffsets ? last.totalPositions : 0);
final int payStart = last.payStart + (last.hasPayloads ? last.totalPositions : 0);
field = new FieldData(fieldNum, numTerms, positions, offsets, payloads, posStart, offStart, payStart);
}
fields.add(field);
return field;
}
}
private DocData addDocData(int numVectorFields) {
FieldData last = null;
for (Iterator<DocData> it = pendingDocs.descendingIterator(); it.hasNext(); ) {
final DocData doc = it.next();
if (!doc.fields.isEmpty()) {
last = doc.fields.getLast();
break;
}
}
final DocData doc;
if (last == null) {
doc = new DocData(numVectorFields, 0, 0, 0);
} else {
final int posStart = last.posStart + (last.hasPositions ? last.totalPositions : 0);
final int offStart = last.offStart + (last.hasOffsets ? last.totalPositions : 0);
final int payStart = last.payStart + (last.hasPayloads ? last.totalPositions : 0);
doc = new DocData(numVectorFields, posStart, offStart, payStart);
}
pendingDocs.add(doc);
return doc;
}
/** a pending field */
private class FieldData {
final boolean hasPositions, hasOffsets, hasPayloads;
final int fieldNum, flags, numTerms;
final int[] freqs, prefixLengths, suffixLengths;
final int posStart, offStart, payStart;
int totalPositions;
int ord;
FieldData(int fieldNum, int numTerms, boolean positions, boolean offsets, boolean payloads,
int posStart, int offStart, int payStart) {
this.fieldNum = fieldNum;
this.numTerms = numTerms;
this.hasPositions = positions;
this.hasOffsets = offsets;
this.hasPayloads = payloads;
this.flags = (positions ? POSITIONS : 0) | (offsets ? OFFSETS : 0) | (payloads ? PAYLOADS : 0);
this.freqs = new int[numTerms];
this.prefixLengths = new int[numTerms];
this.suffixLengths = new int[numTerms];
this.posStart = posStart;
this.offStart = offStart;
this.payStart = payStart;
totalPositions = 0;
ord = 0;
}
void addTerm(int freq, int prefixLength, int suffixLength) {
freqs[ord] = freq;
prefixLengths[ord] = prefixLength;
suffixLengths[ord] = suffixLength;
++ord;
}
void addPosition(int position, int startOffset, int length, int payloadLength) {
if (hasPositions) {
if (posStart + totalPositions == positionsBuf.length) {
positionsBuf = ArrayUtil.grow(positionsBuf);
}
positionsBuf[posStart + totalPositions] = position;
}
if (hasOffsets) {
if (offStart + totalPositions == startOffsetsBuf.length) {
final int newLength = ArrayUtil.oversize(offStart + totalPositions, 4);
startOffsetsBuf = Arrays.copyOf(startOffsetsBuf, newLength);
lengthsBuf = Arrays.copyOf(lengthsBuf, newLength);
}
startOffsetsBuf[offStart + totalPositions] = startOffset;
lengthsBuf[offStart + totalPositions] = length;
}
if (hasPayloads) {
if (payStart + totalPositions == payloadLengthsBuf.length) {
payloadLengthsBuf = ArrayUtil.grow(payloadLengthsBuf);
}
payloadLengthsBuf[payStart + totalPositions] = payloadLength;
}
++totalPositions;
}
}
private int numDocs; // total number of docs seen
private final Deque<DocData> pendingDocs; // pending docs
private DocData curDoc; // current document
private FieldData curField; // current field
private final BytesRef lastTerm;
private int[] positionsBuf, startOffsetsBuf, lengthsBuf, payloadLengthsBuf;
private final GrowableByteArrayDataOutput termSuffixes; // buffered term suffixes
private final GrowableByteArrayDataOutput payloadBytes; // buffered term payloads
private final BlockPackedWriter writer;
/** Sole constructor. */
public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
String formatName, CompressionMode compressionMode, int chunkSize) throws IOException {
assert directory != null;
this.directory = directory;
this.segment = si.name;
this.segmentSuffix = segmentSuffix;
this.compressionMode = compressionMode;
this.compressor = compressionMode.newCompressor();
this.chunkSize = chunkSize;
numDocs = 0;
pendingDocs = new ArrayDeque<DocData>();
termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.oversize(chunkSize, 1));
payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.oversize(1, 1));
lastTerm = new BytesRef(ArrayUtil.oversize(30, 1));
boolean success = false;
IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context);
try {
vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
CodecUtil.writeHeader(vectorsStream, codecNameDat, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
indexStream = null;
vectorsStream.writeVInt(PackedInts.VERSION_CURRENT);
vectorsStream.writeVInt(chunkSize);
writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE);
positionsBuf = new int[1024];
startOffsetsBuf = new int[1024];
lengthsBuf = new int[1024];
payloadLengthsBuf = new int[1024];
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(indexStream);
abort();
}
}
}
@Override
public void close() throws IOException {
try {
IOUtils.close(vectorsStream, indexWriter);
} finally {
vectorsStream = null;
indexWriter = null;
}
}
@Override
public void abort() {
IOUtils.closeWhileHandlingException(this);
IOUtils.deleteFilesIgnoringExceptions(directory,
IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION),
IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION));
}
@Override
public void startDocument(int numVectorFields) throws IOException {
curDoc = addDocData(numVectorFields);
}
@Override
public void finishDocument() throws IOException {
// append the payload bytes of the doc after its terms
termSuffixes.writeBytes(payloadBytes.bytes, payloadBytes.length);
payloadBytes.length = 0;
++numDocs;
if (triggerFlush()) {
flush();
}
curDoc = null;
}
@Override
public void startField(FieldInfo info, int numTerms, boolean positions,
boolean offsets, boolean payloads) throws IOException {
curField = curDoc.addField(info.number, numTerms, positions, offsets, payloads);
lastTerm.length = 0;
}
@Override
public void finishField() throws IOException {
curField = null;
}
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
assert freq >= 1;
final int prefix = StringHelper.bytesDifference(lastTerm, term);
curField.addTerm(freq, prefix, term.length - prefix);
termSuffixes.writeBytes(term.bytes, term.offset + prefix, term.length - prefix);
// copy last term
if (lastTerm.bytes.length < term.length) {
lastTerm.bytes = new byte[ArrayUtil.oversize(term.length, 1)];
}
lastTerm.offset = 0;
lastTerm.length = term.length;
System.arraycopy(term.bytes, term.offset, lastTerm.bytes, 0, term.length);
}
@Override
public void addPosition(int position, int startOffset, int endOffset,
BytesRef payload) throws IOException {
assert curField.flags != 0;
curField.addPosition(position, startOffset, endOffset - startOffset, payload == null ? 0 : payload.length);
if (curField.hasPayloads && payload != null) {
payloadBytes.writeBytes(payload.bytes, payload.offset, payload.length);
}
}
private boolean triggerFlush() {
return termSuffixes.length >= chunkSize || pendingDocs.size() >= chunkSize;
}
private void flush() throws IOException {
final int chunkDocs = pendingDocs.size();
assert chunkDocs > 0 : chunkDocs;
// write the index file
indexWriter.writeIndex(chunkDocs, vectorsStream.getFilePointer());
final int docBase = numDocs - chunkDocs;
vectorsStream.writeVInt(docBase);
vectorsStream.writeVInt(chunkDocs);
// total number of fields of the chunk
final int totalFields = flushNumFields(chunkDocs);
if (totalFields > 0) {
// unique field numbers (sorted)
final int[] fieldNums = flushFieldNums();
// offsets in the array of unique field numbers
flushFields(totalFields, fieldNums);
// flags (does the field have positions, offsets, payloads?)
flushFlags(totalFields, fieldNums);
// number of terms of each field
flushNumTerms(totalFields);
// prefix and suffix lengths for each field
flushTermLengths();
// term freqs - 1 (because termFreq is always >=1) for each term
flushTermFreqs();
// positions for all terms, when enabled
flushPositions();
// offsets for all terms, when enabled
flushOffsets(fieldNums);
// payload lengths for all terms, when enabled
flushPayloadLengths();
// compress terms and payloads and write them to the output
compressor.compress(termSuffixes.bytes, 0, termSuffixes.length, vectorsStream);
}
// reset
pendingDocs.clear();
curDoc = null;
curField = null;
termSuffixes.length = 0;
}
private int flushNumFields(int chunkDocs) throws IOException {
if (chunkDocs == 1) {
final int numFields = pendingDocs.getFirst().numFields;
vectorsStream.writeVInt(numFields);
return numFields;
} else {
writer.reset(vectorsStream);
int totalFields = 0;
for (DocData dd : pendingDocs) {
writer.add(dd.numFields);
totalFields += dd.numFields;
}
writer.finish();
return totalFields;
}
}
/** Returns a sorted array containing unique field numbers */
private int[] flushFieldNums() throws IOException {
SortedSet<Integer> fieldNums = new TreeSet<Integer>();
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
fieldNums.add(fd.fieldNum);
}
}
final int numDistinctFields = fieldNums.size();
assert numDistinctFields > 0;
final int bitsRequired = PackedInts.bitsRequired(fieldNums.last());
final int token = (Math.min(numDistinctFields - 1, 0x07) << 5) | bitsRequired;
vectorsStream.writeByte((byte) token);
if (numDistinctFields - 1 >= 0x07) {
vectorsStream.writeVInt(numDistinctFields - 1 - 0x07);
}
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, fieldNums.size(), bitsRequired, 1);
for (Integer fieldNum : fieldNums) {
writer.add(fieldNum);
}
writer.finish();
int[] fns = new int[fieldNums.size()];
int i = 0;
for (Integer key : fieldNums) {
fns[i++] = key;
}
return fns;
}
private void flushFields(int totalFields, int[] fieldNums) throws IOException {
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, totalFields, PackedInts.bitsRequired(fieldNums.length - 1), 1);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
assert fieldNumIndex >= 0;
writer.add(fieldNumIndex);
}
}
writer.finish();
}
private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
// check if fields always have the same flags
boolean nonChangingFlags = true;
int[] fieldFlags = new int[fieldNums.length];
Arrays.fill(fieldFlags, -1);
outer:
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
assert fieldNumOff >= 0;
if (fieldFlags[fieldNumOff] == -1) {
fieldFlags[fieldNumOff] = fd.flags;
} else if (fieldFlags[fieldNumOff] != fd.flags) {
nonChangingFlags = false;
break outer;
}
}
}
if (nonChangingFlags) {
// write one flag per field num
vectorsStream.writeVInt(0);
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1);
for (int flags : fieldFlags) {
assert flags >= 0;
writer.add(flags);
}
assert writer.ord() == fieldFlags.length - 1;
writer.finish();
} else {
// write one flag for every field instance
vectorsStream.writeVInt(1);
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
writer.add(fd.flags);
}
}
assert writer.ord() == totalFields - 1;
writer.finish();
}
}
private void flushNumTerms(int totalFields) throws IOException {
int maxNumTerms = 0;
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
maxNumTerms |= fd.numTerms;
}
}
final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
vectorsStream.writeVInt(bitsRequired);
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(
vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
writer.add(fd.numTerms);
}
}
assert writer.ord() == totalFields - 1;
writer.finish();
}
private void flushTermLengths() throws IOException {
writer.reset(vectorsStream);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
for (int i = 0; i < fd.numTerms; ++i) {
writer.add(fd.prefixLengths[i]);
}
}
}
writer.finish();
writer.reset(vectorsStream);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
for (int i = 0; i < fd.numTerms; ++i) {
writer.add(fd.suffixLengths[i]);
}
}
}
writer.finish();
}
private void flushTermFreqs() throws IOException {
writer.reset(vectorsStream);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
for (int i = 0; i < fd.numTerms; ++i) {
writer.add(fd.freqs[i] - 1);
}
}
}
writer.finish();
}
private void flushPositions() throws IOException {
writer.reset(vectorsStream);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
if (fd.hasPositions) {
int pos = 0;
for (int i = 0; i < fd.numTerms; ++i) {
int previousPosition = 0;
for (int j = 0; j < fd.freqs[i]; ++j) {
final int position = positionsBuf[fd .posStart + pos++];
writer.add(position - previousPosition);
previousPosition = position;
}
}
assert pos == fd.totalPositions;
}
}
}
writer.finish();
}
private void flushOffsets(int[] fieldNums) throws IOException {
boolean hasOffsets = false;
long[] sumPos = new long[fieldNums.length];
long[] sumOffsets = new long[fieldNums.length];
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
hasOffsets |= fd.hasOffsets;
if (fd.hasOffsets && fd.hasPositions) {
final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
int pos = 0;
for (int i = 0; i < fd.numTerms; ++i) {
int previousPos = 0;
int previousOff = 0;
for (int j = 0; j < fd.freqs[i]; ++j) {
final int position = positionsBuf[fd.posStart + pos];
final int startOffset = startOffsetsBuf[fd.offStart + pos];
sumPos[fieldNumOff] += position - previousPos;
sumOffsets[fieldNumOff] += startOffset - previousOff;
previousPos = position;
previousOff = startOffset;
++pos;
}
}
assert pos == fd.totalPositions;
}
}
}
if (!hasOffsets) {
// nothing to do
return;
}
final float[] charsPerTerm = new float[fieldNums.length];
for (int i = 0; i < fieldNums.length; ++i) {
charsPerTerm[i] = (sumPos[i] <= 0 || sumOffsets[i] <= 0) ? 0 : (float) ((double) sumOffsets[i] / sumPos[i]);
}
// start offsets
for (int i = 0; i < fieldNums.length; ++i) {
vectorsStream.writeInt(Float.floatToRawIntBits(charsPerTerm[i]));
}
writer.reset(vectorsStream);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
if ((fd.flags & OFFSETS) != 0) {
final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
final float cpt = charsPerTerm[fieldNumOff];
int pos = 0;
for (int i = 0; i < fd.numTerms; ++i) {
int previousPos = 0;
int previousOff = 0;
for (int j = 0; j < fd.freqs[i]; ++j) {
final int position = fd.hasPositions ? positionsBuf[fd.posStart + pos] : 0;
final int startOffset = startOffsetsBuf[fd.offStart + pos];
writer.add(startOffset - previousOff - (int) (cpt * (position - previousPos)));
previousPos = position;
previousOff = startOffset;
++pos;
}
}
}
}
}
writer.finish();
// lengths
writer.reset(vectorsStream);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
if ((fd.flags & OFFSETS) != 0) {
int pos = 0;
for (int i = 0; i < fd.numTerms; ++i) {
for (int j = 0; j < fd.freqs[i]; ++j) {
writer.add(lengthsBuf[fd.offStart + pos++] - fd.prefixLengths[i] - fd.suffixLengths[i]);
}
}
assert pos == fd.totalPositions;
}
}
}
writer.finish();
}
private void flushPayloadLengths() throws IOException {
writer.reset(vectorsStream);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
if (fd.hasPayloads) {
for (int i = 0; i < fd.totalPositions; ++i) {
writer.add(payloadLengthsBuf[fd.payStart + i]);
}
}
}
}
writer.finish();
}
@Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
if (!pendingDocs.isEmpty()) {
flush();
}
if (numDocs != this.numDocs) {
throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs);
}
indexWriter.finish(numDocs);
}
@Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public void addProx(int numProx, DataInput positions, DataInput offsets)
throws IOException {
assert (curField.hasPositions) == (positions != null);
assert (curField.hasOffsets) == (offsets != null);
if (curField.hasPositions) {
final int posStart = curField.posStart + curField.totalPositions;
if (posStart + numProx > positionsBuf.length) {
positionsBuf = ArrayUtil.grow(positionsBuf, posStart + numProx);
}
int position = 0;
if (curField.hasPayloads) {
final int payStart = curField.payStart + curField.totalPositions;
if (payStart + numProx > payloadLengthsBuf.length) {
payloadLengthsBuf = ArrayUtil.grow(payloadLengthsBuf, payStart + numProx);
}
for (int i = 0; i < numProx; ++i) {
final int code = positions.readVInt();
if ((code & 1) != 0) {
// This position has a payload
final int payloadLength = positions.readVInt();
payloadLengthsBuf[payStart + i] = payloadLength;
payloadBytes.copyBytes(positions, payloadLength);
} else {
payloadLengthsBuf[payStart + i] = 0;
}
position += code >>> 1;
positionsBuf[posStart + i] = position;
}
} else {
for (int i = 0; i < numProx; ++i) {
position += (positions.readVInt() >>> 1);
positionsBuf[posStart + i] = position;
}
}
}
if (curField.hasOffsets) {
final int offStart = curField.offStart + curField.totalPositions;
if (offStart + numProx > startOffsetsBuf.length) {
final int newLength = ArrayUtil.oversize(offStart + numProx, 4);
startOffsetsBuf = Arrays.copyOf(startOffsetsBuf, newLength);
lengthsBuf = Arrays.copyOf(lengthsBuf, newLength);
}
int lastOffset = 0, startOffset, endOffset;
for (int i = 0; i < numProx; ++i) {
startOffset = lastOffset + offsets.readVInt();
endOffset = startOffset + offsets.readVInt();
lastOffset = endOffset;
startOffsetsBuf[offStart + i] = startOffset;
lengthsBuf[offStart + i] = endOffset - startOffset;
}
}
curField.totalPositions += numProx;
}
@Override
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
int idx = 0;
for (AtomicReader reader : mergeState.readers) {
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
CompressingTermVectorsReader matchingVectorsReader = null;
if (matchingSegmentReader != null) {
final TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
// we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
if (vectorsReader != null && vectorsReader instanceof CompressingTermVectorsReader) {
matchingVectorsReader = (CompressingTermVectorsReader) vectorsReader;
}
}
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
if (matchingVectorsReader == null
|| matchingVectorsReader.getCompressionMode() != compressionMode
|| matchingVectorsReader.getChunkSize() != chunkSize
|| matchingVectorsReader.getPackedIntsVersion() != PackedInts.VERSION_CURRENT) {
// naive merge...
for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
final Fields vectors = reader.getTermVectors(i);
addAllDocVectors(vectors, mergeState);
++docCount;
mergeState.checkAbort.work(300);
}
} else {
final CompressingStoredFieldsIndexReader index = matchingVectorsReader.getIndex();
final IndexInput vectorsStream = matchingVectorsReader.getVectorsStream();
for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; ) {
if (pendingDocs.isEmpty()
&& (i == 0 || index.getStartPointer(i - 1) < index.getStartPointer(i))) { // start of a chunk
final long startPointer = index.getStartPointer(i);
vectorsStream.seek(startPointer);
final int docBase = vectorsStream.readVInt();
final int chunkDocs = vectorsStream.readVInt();
assert docBase + chunkDocs <= matchingSegmentReader.maxDoc();
if (docBase + chunkDocs < matchingSegmentReader.maxDoc()
&& nextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) {
final long chunkEnd = index.getStartPointer(docBase + chunkDocs);
final long chunkLength = chunkEnd - vectorsStream.getFilePointer();
indexWriter.writeIndex(chunkDocs, this.vectorsStream.getFilePointer());
this.vectorsStream.writeVInt(docCount);
this.vectorsStream.writeVInt(chunkDocs);
this.vectorsStream.copyBytes(vectorsStream, chunkLength);
docCount += chunkDocs;
this.numDocs += chunkDocs;
mergeState.checkAbort.work(300 * chunkDocs);
i = nextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
} else {
for (; i < docBase + chunkDocs; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
final Fields vectors = reader.getTermVectors(i);
addAllDocVectors(vectors, mergeState);
++docCount;
mergeState.checkAbort.work(300);
}
}
} else {
final Fields vectors = reader.getTermVectors(i);
addAllDocVectors(vectors, mergeState);
++docCount;
mergeState.checkAbort.work(300);
i = nextLiveDoc(i + 1, liveDocs, maxDoc);
}
}
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) {
if (liveDocs == null) {
return doc;
}
while (doc < maxDoc && !liveDocs.get(doc)) {
++doc;
}
return doc;
}
private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc) {
if (liveDocs == null) {
return maxDoc;
}
while (doc < maxDoc && liveDocs.get(doc)) {
++doc;
}
return doc;
}
}

View File

@ -71,11 +71,12 @@ public final class BlockPackedReader {
return i;
}
final DataInput in;
DataInput in;
final int packedIntsVersion;
final long valueCount;
long valueCount;
final int blockSize;
final LongsRef values;
final long[] values;
final LongsRef valuesRef;
byte[] blocks;
int off;
long ord;
@ -87,10 +88,17 @@ public final class BlockPackedReader {
*/
public BlockPackedReader(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
checkBlockSize(blockSize);
this.in = in;
this.packedIntsVersion = packedIntsVersion;
this.blockSize = blockSize;
this.values = new LongsRef(blockSize);
this.values = new long[blockSize];
this.valuesRef = new LongsRef(this.values, 0, 0);
reset(in, valueCount);
}
/** Reset the current reader to wrap a stream of <code>valueCount</code>
* values contained in <code>in</code>. The block size remains unchanged. */
public void reset(DataInput in, long valueCount) {
this.in = in;
assert valueCount >= 0;
this.valueCount = valueCount;
off = blockSize;
@ -159,9 +167,15 @@ public final class BlockPackedReader {
/** Read the next value. */
public long next() throws IOException {
next(1);
assert values.length == 1;
return values.longs[values.offset];
if (ord == valueCount) {
throw new EOFException();
}
if (off == blockSize) {
refill();
}
final long value = values[off++];
++ord;
return value;
}
/** Read between <tt>1</tt> and <code>count</code> values. */
@ -177,11 +191,11 @@ public final class BlockPackedReader {
count = Math.min(count, blockSize - off);
count = (int) Math.min(count, valueCount - ord);
values.offset = off;
values.length = count;
valuesRef.offset = off;
valuesRef.length = count;
off += count;
ord += count;
return values;
return valuesRef;
}
private void refill() throws IOException {
@ -195,7 +209,7 @@ public final class BlockPackedReader {
assert minEquals0 || minValue != 0;
if (bitsPerValue == 0) {
Arrays.fill(values.longs, minValue);
Arrays.fill(values, minValue);
} else {
final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
final int iterations = blockSize / decoder.valueCount();
@ -208,11 +222,11 @@ public final class BlockPackedReader {
final int blocksCount = (int) PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, bitsPerValue);
in.readBytes(blocks, 0, blocksCount);
decoder.decode(blocks, 0, values.longs, 0, iterations);
decoder.decode(blocks, 0, values, 0, iterations);
if (minValue != 0) {
for (int i = 0; i < valueCount; ++i) {
values.longs[i] += minValue;
values[i] += minValue;
}
}
}

View File

@ -62,7 +62,7 @@ public final class BlockPackedWriter {
out.writeByte((byte) i);
}
final DataOutput out;
DataOutput out;
final long[] values;
byte[] blocks;
int off;
@ -75,8 +75,14 @@ public final class BlockPackedWriter {
*/
public BlockPackedWriter(DataOutput out, int blockSize) {
checkBlockSize(blockSize);
this.out = out;
reset(out);
values = new long[blockSize];
}
/** Reset this writer to wrap <code>out</code>. The block size remains unchanged. */
public void reset(DataOutput out) {
assert out != null;
this.out = out;
off = 0;
ord = 0L;
finished = false;
@ -99,7 +105,8 @@ public final class BlockPackedWriter {
}
/** Flush all buffered data to disk. This instance is not usable anymore
* after this method has been called. */
* after this method has been called until {@link #reset(DataOutput)} has
* been called. */
public void finish() throws IOException {
checkNotFinished();
if (off > 0) {

View File

@ -1500,7 +1500,15 @@ public class TestIndexWriter extends LuceneTestCase {
doc.add(newField("c", "val", customType));
writer.addDocument(doc);
// Adding just one document does not call flush yet.
assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length);
int computedExtraFileCount = 0;
for (String file : dir.listAll()) {
if (file.lastIndexOf('.') < 0
// don't count stored fields and term vectors in
|| !Arrays.asList("fdx", "fdt", "tvx", "tvd", "tvf").contains(file.substring(file.lastIndexOf('.') + 1))) {
++computedExtraFileCount;
}
}
assertEquals("only the stored and term vector files should exist in the directory", extraFileCount, computedExtraFileCount);
doc = new Document();
doc.add(newField("c", "val", customType));

View File

@ -17,21 +17,38 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
public class TestPayloadsOnVectors extends LuceneTestCase {
@ -141,4 +158,314 @@ public class TestPayloadsOnVectors extends LuceneTestCase {
writer.close();
dir.close();
}
// custom impl to test cases that are forbidden by the default OffsetAttribute impl
static class PermissiveOffsetAttributeImpl extends AttributeImpl implements OffsetAttribute {
int start, end;
@Override
public int startOffset() {
return start;
}
@Override
public int endOffset() {
return end;
}
@Override
public void setOffset(int startOffset, int endOffset) {
// no check!
start = startOffset;
end = endOffset;
}
@Override
public void clear() {
start = end = 0;
}
@Override
public boolean equals(Object other) {
if (other == this) {
return true;
}
if (other instanceof PermissiveOffsetAttributeImpl) {
PermissiveOffsetAttributeImpl o = (PermissiveOffsetAttributeImpl) other;
return o.start == start && o.end == end;
}
return false;
}
@Override
public int hashCode() {
return start + 31 * end;
}
@Override
public void copyTo(AttributeImpl target) {
OffsetAttribute t = (OffsetAttribute) target;
t.setOffset(start, end);
}
}
static BytesRef randomPayload() {
final int len = random().nextInt(5);
if (len == 0) {
return null;
}
final BytesRef payload = new BytesRef(len);
random().nextBytes(payload.bytes);
payload.length = len;
return payload;
}
class RandomTokenStream extends TokenStream {
final String[] terms;
final int[] positionsIncrements;
final int[] positions;
final int[] startOffsets, endOffsets;
final BytesRef[] payloads;
final Map<Integer, Set<Integer>> positionToTerms;
final Map<Integer, Set<Integer>> startOffsetToTerms;
final CharTermAttribute termAtt;
final PositionIncrementAttribute piAtt;
final OffsetAttribute oAtt;
final PayloadAttribute pAtt;
int i = 0;
RandomTokenStream(int len, String[] sampleTerms, boolean weird) {
terms = new String[len];
positionsIncrements = new int[len];
positions = new int[len];
startOffsets = new int[len];
endOffsets = new int[len];
payloads = new BytesRef[len];
for (int i = 0; i < len; ++i) {
terms[i] = RandomPicks.randomFrom(random(), sampleTerms);
if (weird) {
positionsIncrements[i] = random().nextInt(1 << 18);
startOffsets[i] = random().nextInt();
endOffsets[i] = random().nextInt();
} else if (i == 0) {
positionsIncrements[i] = _TestUtil.nextInt(random(), 1, 1 << 5);
startOffsets[i] = _TestUtil.nextInt(random(), 0, 1 << 16);
endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
} else {
positionsIncrements[i] = _TestUtil.nextInt(random(), 0, 1 << 5);
startOffsets[i] = startOffsets[i-1] + _TestUtil.nextInt(random(), 0, 1 << 16);
endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
}
}
for (int i = 0; i < len; ++i) {
if (i == 0) {
positions[i] = positionsIncrements[i] - 1;
} else {
positions[i] = positions[i - 1] + positionsIncrements[i];
}
}
if (rarely()) {
Arrays.fill(payloads, randomPayload());
} else {
for (int i = 0; i < len; ++i) {
payloads[i] = randomPayload();
}
}
positionToTerms = new HashMap<Integer, Set<Integer>>();
startOffsetToTerms = new HashMap<Integer, Set<Integer>>();
for (int i = 0; i < len; ++i) {
if (!positionToTerms.containsKey(positions[i])) {
positionToTerms.put(positions[i], new HashSet<Integer>(1));
}
positionToTerms.get(positions[i]).add(i);
if (!startOffsetToTerms.containsKey(startOffsets[i])) {
startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
}
startOffsetToTerms.get(startOffsets[i]).add(i);
}
addAttributeImpl(new PermissiveOffsetAttributeImpl());
termAtt = addAttribute(CharTermAttribute.class);
piAtt = addAttribute(PositionIncrementAttribute.class);
oAtt = addAttribute(OffsetAttribute.class);
pAtt = addAttribute(PayloadAttribute.class);
}
@Override
public final boolean incrementToken() throws IOException {
if (i < terms.length) {
termAtt.setLength(0).append(terms[i]);
piAtt.setPositionIncrement(positionsIncrements[i]);
oAtt.setOffset(startOffsets[i], endOffsets[i]);
pAtt.setPayload(payloads[i]);
++i;
return true;
} else {
return false;
}
}
}
static FieldType randomFieldType() {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(random().nextBoolean());
ft.setStoreTermVectorOffsets(random().nextBoolean());
if (random().nextBoolean()) {
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorPayloads(true);
}
ft.freeze();
return ft;
}
public void testRandomVectors() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
String[] sampleTerms = new String[RandomInts.randomIntBetween(random(), 20, 50)];
for (int i = 0; i < sampleTerms.length; ++i) {
sampleTerms[i] = _TestUtil.randomUnicodeString(random());
}
FieldType ft = randomFieldType();
// generate random documents and index them
final String[] fieldNames = new String[_TestUtil.nextInt(random(), 1, 200)];
for (int i = 0; i < fieldNames.length; ++i) {
String fieldName;
do {
fieldName = _TestUtil.randomSimpleString(random());
} while ("id".equals(fieldName));
fieldNames[i] = fieldName;
}
final int numDocs = _TestUtil.nextInt(random(), 10, 100);
@SuppressWarnings("unchecked")
final Map<String, RandomTokenStream>[] fieldValues = new Map[numDocs];
for (int i = 0; i < numDocs; ++i) {
fieldValues[i] = new HashMap<String, RandomTokenStream>();
final int numFields = _TestUtil.nextInt(random(), 0, rarely() ? fieldNames.length : 5);
for (int j = 0; j < numFields; ++j) {
final String fieldName = fieldNames[(i+j*31) % fieldNames.length];
final int tokenStreamLen = _TestUtil.nextInt(random(), 1, rarely() ? 300 : 5);
fieldValues[i].put(fieldName, new RandomTokenStream(tokenStreamLen, sampleTerms, rarely()));
}
}
// index them
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new IntField("id", i, Store.YES));
for (Map.Entry<String, RandomTokenStream> entry : fieldValues[i].entrySet()) {
doc.add(new Field(entry.getKey(), entry.getValue(), ft));
}
iw.addDocument(doc);
}
iw.commit();
// make sure the format can merge
iw.forceMerge(2);
// read term vectors
final DirectoryReader reader = DirectoryReader.open(dir);
for (int i = 0; i < 100; ++i) {
final int docID = random().nextInt(numDocs);
final Map<String, RandomTokenStream> fvs = fieldValues[reader.document(docID).getField("id").numericValue().intValue()];
final Fields fields = reader.getTermVectors(docID);
if (fvs.isEmpty()) {
assertNull(fields);
} else {
Set<String> fns = new HashSet<String>();
for (String field : fields) {
fns.add(field);
}
assertEquals(fields.size(), fns.size());
assertEquals(fvs.keySet(), fns);
for (String field : fields) {
final RandomTokenStream tk = fvs.get(field);
assert tk != null;
final Terms terms = fields.terms(field);
assertEquals(ft.storeTermVectorPositions(), terms.hasPositions());
assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets());
assertEquals(1, terms.getDocCount());
final TermsEnum termsEnum = terms.iterator(null);
while (termsEnum.next() != null) {
assertEquals(1, termsEnum.docFreq());
final DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
final DocsEnum docsEnum = docsAndPositionsEnum == null ? termsEnum.docs(null, null) : docsAndPositionsEnum;
if (ft.storeTermVectorOffsets() || ft.storeTermVectorPositions()) {
assertNotNull(docsAndPositionsEnum);
}
assertEquals(0, docsEnum.nextDoc());
if (terms.hasPositions() || terms.hasOffsets()) {
final int freq = docsEnum.freq();
assertTrue(freq >= 1);
if (docsAndPositionsEnum != null) {
for (int k = 0; k < freq; ++k) {
final int position = docsAndPositionsEnum.nextPosition();
final Set<Integer> indexes;
if (terms.hasPositions()) {
indexes = tk.positionToTerms.get(position);
assertNotNull(tk.positionToTerms.keySet().toString() + " does not contain " + position, indexes);
} else {
indexes = tk.startOffsetToTerms.get(docsAndPositionsEnum.startOffset());
assertNotNull(indexes);
}
if (terms.hasPositions()) {
boolean foundPosition = false;
for (int index : indexes) {
if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && tk.positions[index] == position) {
foundPosition = true;
break;
}
}
assertTrue(foundPosition);
}
if (terms.hasOffsets()) {
boolean foundOffset = false;
for (int index : indexes) {
if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && tk.startOffsets[index] == docsAndPositionsEnum.startOffset() && tk.endOffsets[index] == docsAndPositionsEnum.endOffset()) {
foundOffset = true;
break;
}
}
assertTrue(foundOffset);
}
if (terms.hasPayloads()) {
boolean foundPayload = false;
for (int index : indexes) {
if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && equals(tk.payloads[index], docsAndPositionsEnum.getPayload())) {
foundPayload = true;
break;
}
}
assertTrue(foundPayload);
}
}
}
}
assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
}
}
}
}
IOUtils.close(reader, iw, dir);
}
private static boolean equals(Object o1, Object o2) {
if (o1 == null) {
return o2 == null;
} else {
return o1.equals(o2);
}
}
}

View File

@ -39,8 +39,6 @@
<path id="test.classpath">
<path refid="test.base.classpath" />
<pathelement location="${build.dir}/classes/examples" />
<!-- TODO, cut over tests to MockAnalyzer etc and nuke this dependency -->
<pathelement path="${analyzers-common.jar}" />
</path>
<path id="classpath">

View File

@ -109,8 +109,7 @@ public class MultiCLSearcher {
// behavior - in those
// situations other, more low-level interfaces are available, as
// demonstrated in other search examples.
FacetsCollector facetsCollector = new FacetsCollector(
facetSearchParams, indexReader, taxo);
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, indexReader, taxo);
// perform documents search and facets accumulation
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));

View File

@ -104,9 +104,9 @@ public class SimpleSearcher {
}
// Faceted search parameters indicate which facets are we interested in
FacetSearchParams facetSearchParams = new FacetSearchParams(Arrays.asList(facetRequests), indexingParams);
FacetSearchParams facetSearchParams = new FacetSearchParams(indexingParams, facetRequests);
FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, indexReader, taxoReader);
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, indexReader, taxoReader);
// perform documents search and facets accumulation
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
@ -148,9 +148,9 @@ public class SimpleSearcher {
// assume the user is interested in the second sub-result
// (just take the second sub-result returned by the iterator - we know there are 3 results!)
Iterator<? extends FacetResultNode> resIterator = fres.getFacetResultNode().getSubResults().iterator();
Iterator<? extends FacetResultNode> resIterator = fres.getFacetResultNode().subResults.iterator();
resIterator.next(); // skip first result
CategoryPath categoryOfInterest = resIterator.next().getLabel();
CategoryPath categoryOfInterest = resIterator.next().label;
// drill-down preparation: turn the base query into a drill-down query for the category of interest
Query q2 = DrillDown.query(indexingParams, baseQuery, categoryOfInterest);

View File

@ -48,7 +48,7 @@ public class FacetIndexingParams {
/**
* A {@link FacetIndexingParams} which fixes {@link OrdinalPolicy} to
* {@link OrdinalPolicy#NO_PARENTS}. This is a singleton equivalent to new
* {@link OrdinalPolicy#ALL_PARENTS}. This is a singleton equivalent to new
* {@link #FacetIndexingParams()}.
*/
public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();

View File

@ -37,8 +37,8 @@ import org.apache.lucene.index.IndexReader;
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} &
* {@link FacetResultNode#getResidue()}.
* {@link FacetResult#getNumValidDescendants()} and
* {@link FacetResultNode#residue}.
*
* @lucene.experimental
*/

View File

@ -0,0 +1,346 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.encoding.DGapVInt8IntDecoder;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link Collector} which counts facets associated with matching documents.
* This {@link Collector} can be used only in the following conditions:
* <ul>
* <li>All {@link FacetRequest requests} must be {@link CountFacetRequest}, with
* their {@link FacetRequest#getDepth() depth} equals to 1, and
* {@link FacetRequest#getNumLabel()} must be &ge; than
* {@link FacetRequest#getNumResults()}. Also, their sorting options must be
* {@link SortOrder#DESCENDING} and {@link SortBy#VALUE} (although ties are
* broken by ordinals).
* <li>Partitions should be disabled (
* {@link FacetIndexingParams#getPartitionSize()} should return
* Integer.MAX_VALUE).
* <li>There can be only one {@link CategoryListParams} in the
* {@link FacetIndexingParams}, with {@link DGapVInt8IntDecoder}.
* </ul>
*
* <p>
* <b>NOTE:</b> this colletro uses {@link DocValues#getSource()} by default,
* which pre-loads the values into memory. If your application cannot afford the
* RAM, you should use
* {@link #CountingFacetsCollector(FacetSearchParams, TaxonomyReader, FacetArrays, boolean)}
* and specify to use a direct source (corresponds to
* {@link DocValues#getDirectSource()}).
*
* <p>
* <b>NOTE:</b> this collector supports category lists that were indexed with
* {@link OrdinalPolicy#NO_PARENTS}, by counting up the parents too, after
* resolving the leafs counts. Note though that it is your responsibility to
* guarantee that indeed a document wasn't indexed with two categories that
* share a common parent, or otherwise the parent's count will be wrong.
*
* @lucene.experimental
*/
public class CountingFacetsCollector extends FacetsCollector {
private final FacetSearchParams fsp;
private final TaxonomyReader taxoReader;
private final BytesRef buf = new BytesRef(32);
private final FacetArrays facetArrays;
private final int[] counts;
private final String facetsField;
private final boolean useDirectSource;
private final HashMap<Source,FixedBitSet> matchingDocs = new HashMap<Source,FixedBitSet>();
private DocValues facetsValues;
private FixedBitSet bits;
public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) {
this(fsp, taxoReader, new FacetArrays(taxoReader.getSize()), false);
}
public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader, FacetArrays facetArrays,
boolean useDirectSource) {
assert facetArrays.arrayLength >= taxoReader.getSize() : "too small facet array";
assert assertParams(fsp) == null : assertParams(fsp);
this.fsp = fsp;
this.taxoReader = taxoReader;
this.facetArrays = facetArrays;
this.counts = facetArrays.getIntArray();
this.facetsField = fsp.indexingParams.getCategoryListParams(null).field;
this.useDirectSource = useDirectSource;
}
/**
* Asserts that this {@link FacetsCollector} can handle the given
* {@link FacetSearchParams}. Returns {@code null} if true, otherwise an error
* message.
*/
static String assertParams(FacetSearchParams fsp) {
// verify that all facet requests are CountFacetRequest
for (FacetRequest fr : fsp.facetRequests) {
if (!(fr instanceof CountFacetRequest)) {
return "all FacetRequests must be CountFacetRequest";
}
if (fr.getDepth() != 1) {
return "all requests must be of depth 1";
}
if (fr.getNumLabel() < fr.getNumResults()) {
return "this Collector always labels all requested results";
}
if (fr.getSortOrder() != SortOrder.DESCENDING) {
return "this Collector always sorts results in descending order";
}
if (fr.getSortBy() != SortBy.VALUE) {
return "this Collector always sorts by results' values";
}
}
// verify that there's only one CategoryListParams
List<CategoryListParams> clps = fsp.indexingParams.getAllCategoryListParams();
if (clps.size() != 1) {
return "this Collector supports only one CategoryListParams";
}
// verify DGapVInt decoder
CategoryListParams clp = clps.get(0);
if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) {
return "this Collector supports only DGap + VInt encoding";
}
// verify that partitions are disabled
if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) {
return "this Collector does not support partitions";
}
return null;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
facetsValues = context.reader().docValues(facetsField);
if (facetsValues != null) {
Source facetSource = useDirectSource ? facetsValues.getDirectSource() : facetsValues.getSource();
bits = new FixedBitSet(context.reader().maxDoc());
matchingDocs.put(facetSource, bits);
}
}
@Override
public void collect(int doc) throws IOException {
if (facetsValues == null) {
return;
}
bits.set(doc);
}
private void countFacets() {
for (Entry<Source,FixedBitSet> entry : matchingDocs.entrySet()) {
Source facetsSource = entry.getKey();
FixedBitSet bits = entry.getValue();
int doc = 0;
int length = bits.length();
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
facetsSource .getBytes(doc, buf);
if (buf.length > 0) {
// this document has facets
int upto = buf.offset + buf.length;
int ord = 0;
int offset = buf.offset;
int prev = 0;
while (offset < upto) {
byte b = buf.bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
counts[ord]++;
ord = 0;
} else {
ord = (ord << 7) | (b & 0x7F);
}
}
}
++doc;
}
}
}
private void countParents(int[] parents) {
// counts[0] is the count of ROOT, which we don't care about and counts[1]
// can only update counts[0], so we don't bother to visit it too. also,
// since parents always have lower ordinals than their children, we traverse
// the array backwards. this also allows us to update just the immediate
// parent's count (actually, otherwise it would be a mistake).
for (int i = counts.length - 1; i > 1; i--) {
int count = counts[i];
if (count > 0) {
int parent = parents[i];
if (parent != 0) {
counts[parent] += count;
}
}
}
}
@Override
public synchronized List<FacetResult> getFacetResults() throws IOException {
try {
// first, count matching documents' facets
countFacets();
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
if (fsp.indexingParams.getOrdinalPolicy() == OrdinalPolicy.NO_PARENTS) {
// need to count parents
countParents(arrays.parents());
}
// compute top-K
final int[] children = arrays.children();
final int[] siblings = arrays.siblings();
List<FacetResult> res = new ArrayList<FacetResult>();
for (FacetRequest fr : fsp.facetRequests) {
int rootOrd = taxoReader.getOrdinal(fr.categoryPath);
if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
continue;
}
FacetResultNode root = new FacetResultNode();
root.ordinal = rootOrd;
root.label = fr.categoryPath;
root.value = counts[rootOrd];
if (fr.getNumResults() > taxoReader.getSize()) {
// specialize this case, user is interested in all available results
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
int child = children[rootOrd];
while (child != TaxonomyReader.INVALID_ORDINAL) {
int count = counts[child];
if (count > 0) {
FacetResultNode node = new FacetResultNode();
node.label = taxoReader.getPath(child);
node.value = count;
nodes.add(node);
}
child = siblings[child];
}
root.residue = 0;
root.subResults = nodes;
res.add(new FacetResult(fr, root, nodes.size()));
continue;
}
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
FacetResultNodeQueue pq = new FacetResultNodeQueue(fr.getNumResults(), true);
FacetResultNode top = pq.top();
int child = children[rootOrd];
int numResults = 0; // count the number of results
int residue = 0;
while (child != TaxonomyReader.INVALID_ORDINAL) {
int count = counts[child];
if (count > top.value) {
residue += top.value;
top.value = count;
top.ordinal = child;
top = pq.updateTop();
++numResults;
} else {
residue += count;
}
child = siblings[child];
}
// pop() the least (sentinel) elements
int pqsize = pq.size();
int size = numResults < pqsize ? numResults : pqsize;
for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
// create the FacetResultNodes.
FacetResultNode[] subResults = new FacetResultNode[size];
for (int i = size - 1; i >= 0; i--) {
FacetResultNode node = pq.pop();
node.label = taxoReader.getPath(node.ordinal);
subResults[i] = node;
}
root.residue = residue;
root.subResults = Arrays.asList(subResults);
res.add(new FacetResult(fr, root, size));
}
return res;
} finally {
facetArrays.free();
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
// the actual work is done post-collection, so we always support out-of-order.
return true;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
}
// TODO: review ResultSortUtils queues and check if we can reuse any of them here
// and then alleviate the SortOrder/SortBy constraint
private static class FacetResultNodeQueue extends PriorityQueue<FacetResultNode> {
public FacetResultNodeQueue(int maxSize, boolean prepopulate) {
super(maxSize, prepopulate);
}
@Override
protected FacetResultNode getSentinelObject() {
return new FacetResultNode();
}
@Override
protected boolean lessThan(FacetResultNode a, FacetResultNode b) {
if (a.value < b.value) return true;
if (a.value > b.value) return false;
// both have the same value, break tie by ordinal
return a.ordinal < b.ordinal;
}
}
}

View File

@ -47,7 +47,7 @@ public final class DrillDown {
* @see #term(FacetIndexingParams, CategoryPath)
*/
public static final Term term(FacetSearchParams sParams, CategoryPath path) {
return term(sParams.getFacetIndexingParams(), path);
return term(sParams.indexingParams, path);
}
/** Return a drill-down {@link Term} for a category. */
@ -103,7 +103,7 @@ public final class DrillDown {
* @see #query(FacetIndexingParams, Query, CategoryPath...)
*/
public static final Query query(FacetSearchParams sParams, Query baseQuery, CategoryPath... paths) {
return query(sParams.getFacetIndexingParams(), baseQuery, paths);
return query(sParams.indexingParams, baseQuery, paths);
}
}

View File

@ -120,7 +120,7 @@ public abstract class FacetResultsHandler {
* rendered facet results, fixed their counts, and now it is needed
* to sort the results differently according to the fixed counts.
* @param facetResult result to be rearranged.
* @see FacetResultNode#setValue(double)
* @see FacetResultNode#value
*/
public abstract FacetResult rearrangeFacetResult(FacetResult facetResult);

View File

@ -138,7 +138,7 @@ public abstract class FacetsAccumulator {
/** check if all requests are complementable */
protected boolean mayComplement() {
for (FacetRequest freq:searchParams.getFacetRequests()) {
for (FacetRequest freq:searchParams.facetRequests) {
if (!freq.supportsComplements()) {
return false;
}

View File

@ -3,15 +3,13 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -31,109 +29,35 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
*/
/**
* Collector for facet accumulation. *
* A {@link Collector} which executes faceted search and computes the weight of
* requested facets. To get the facet results you should call
* {@link #getFacetResults()}.
* {@link #create(FacetSearchParams, IndexReader, TaxonomyReader)} returns the
* most optimized {@link FacetsCollector} for the given parameters.
*
* @lucene.experimental
*/
public class FacetsCollector extends Collector {
protected final FacetsAccumulator facetsAccumulator;
private ScoredDocIdCollector scoreDocIdCollector;
private List<FacetResult> results;
private Object resultsGuard;
public abstract class FacetsCollector extends Collector {
/**
* Create a collector for accumulating facets while collecting documents
* during search.
*
* @param facetSearchParams
* faceted search parameters defining which facets are required and
* how.
* @param indexReader
* searched index.
* @param taxonomyReader
* taxonomy containing the facets.
* Returns the most optimized {@link FacetsCollector} for the given search
* parameters. The returned {@link FacetsCollector} is guaranteed to satisfy
* the requested parameters.
*/
public FacetsCollector(FacetSearchParams facetSearchParams,
IndexReader indexReader, TaxonomyReader taxonomyReader) {
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
resultsGuard = new Object();
}
/**
* Create a {@link ScoredDocIdCollector} to be used as the first phase of
* the facet collection. If all facetRequests are do not require the
* document score, a ScoredDocIdCollector which does not store the document
* scores would be returned. Otherwise a SDIC which does store the documents
* will be returned, having an initial allocated space for 1000 such
* documents' scores.
*/
protected ScoredDocIdCollector initScoredDocCollector(
FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
boolean scoresNeeded = false;
for (FacetRequest frq : facetSearchParams.getFacetRequests()) {
if (frq.requireDocumentScore()) {
scoresNeeded = true;
break;
}
public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
if (CountingFacetsCollector.assertParams(fsp) == null) {
return new CountingFacetsCollector(fsp, taxoReader);
}
return ScoredDocIdCollector.create(indexReader.maxDoc(), scoresNeeded);
return new StandardFacetsCollector(fsp, indexReader, taxoReader);
}
/**
* Create the {@link FacetsAccumulator} to be used. Default is
* {@link StandardFacetsAccumulator}. Called once at the constructor of the collector.
*
* @param facetSearchParams
* The search params.
* @param indexReader
* A reader to the index to search in.
* @param taxonomyReader
* A reader to the active taxonomy.
* @return The {@link FacetsAccumulator} to use.
* Returns a {@link FacetResult} per {@link FacetRequest} set in
* {@link FacetSearchParams}. Note that if one of the {@link FacetRequest
* requests} is for a {@link CategoryPath} that does not exist in the taxonomy,
* no matching {@link FacetResult} will be returned.
*/
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams,
IndexReader indexReader,
TaxonomyReader taxonomyReader) {
return new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
}
/**
* Return accumulated facets results (according to faceted search parameters)
* for collected documents.
* @throws IOException on error
*/
public List<FacetResult> getFacetResults() throws IOException {
synchronized (resultsGuard) { // over protection
if (results == null) {
// lazy creation but just once
results = facetsAccumulator.accumulate(scoreDocIdCollector.getScoredDocIDs());
scoreDocIdCollector = null;
}
return results;
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
@Override
public void collect(int doc) throws IOException {
scoreDocIdCollector.collect(doc);
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
scoreDocIdCollector.setNextReader(context);
}
@Override
public void setScorer(Scorer scorer) throws IOException {
scoreDocIdCollector.setScorer(scorer);
}
public abstract List<FacetResult> getFacetResults() throws IOException;
}

View File

@ -32,8 +32,8 @@ import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} &
* {@link FacetResultNode#getResidue()}.
* {@link FacetResult#getNumValidDescendants()} and
* {@link FacetResultNode#residue}.
*
* @lucene.experimental
*/

View File

@ -87,7 +87,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
this.facetArrays = facetArrays;
// can only be computed later when docids size is known
isUsingComplements = false;
partitionSize = PartitionsUtils.partitionSize(searchParams.getFacetIndexingParams(), taxonomyReader);
partitionSize = PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader);
maxPartitions = (int) Math.ceil(this.taxonomyReader.getSize() / (double) partitionSize);
accumulateGuard = new Object();
}
@ -95,7 +95,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
public StandardFacetsAccumulator(FacetSearchParams searchParams,
IndexReader indexReader, TaxonomyReader taxonomyReader) {
this(searchParams, indexReader, taxonomyReader, new FacetArrays(
PartitionsUtils.partitionSize(searchParams.getFacetIndexingParams(), taxonomyReader)));
PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader)));
}
@Override
@ -112,7 +112,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
if (isUsingComplements) {
try {
totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader,
searchParams.getFacetIndexingParams());
searchParams.indexingParams);
if (totalFacetCounts != null) {
docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader);
} else {
@ -159,7 +159,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
// In this implementation merges happen after each partition,
// but other impl could merge only at the end.
final HashSet<FacetRequest> handledRequests = new HashSet<FacetRequest>();
for (FacetRequest fr : searchParams.getFacetRequests()) {
for (FacetRequest fr : searchParams.facetRequests) {
// Handle and merge only facet requests which were not already handled.
if (handledRequests.add(fr)) {
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
@ -178,7 +178,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
// gather results from all requests into a list for returning them
List<FacetResult> res = new ArrayList<FacetResult>();
for (FacetRequest fr : searchParams.getFacetRequests()) {
for (FacetRequest fr : searchParams.facetRequests) {
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) {
@ -321,8 +321,8 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
HashMap<CategoryListIterator, Aggregator> categoryLists = new HashMap<CategoryListIterator, Aggregator>();
FacetIndexingParams indexingParams = searchParams.getFacetIndexingParams();
for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
FacetIndexingParams indexingParams = searchParams.indexingParams;
for (FacetRequest facetRequest : searchParams.facetRequests) {
Aggregator categoryAggregator = facetRequest.createAggregator(isUsingComplements, facetArrays, taxonomyReader);
CategoryListIterator cli = indexingParams.getCategoryListParams(facetRequest.categoryPath).createCategoryListIterator(partition);

View File

@ -0,0 +1,139 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Scorer;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetsCollector} which allows initilizing e.g.
* {@link FacetsAccumulator}. Supports facet partitions, generic
* {@link FacetRequest facet requests}, {@link CategoryListParams} etc.
*
* <p>
* <b>NOTE:</b> this collector, with the default {@link FacetsAccumulator} does
* not support category lists which were indexed with
* {@link OrdinalPolicy#NO_PARENTS}.
*
* @lucene.experimental
*/
public class StandardFacetsCollector extends FacetsCollector {
protected final FacetsAccumulator facetsAccumulator;
private ScoredDocIdCollector scoreDocIdCollector;
private List<FacetResult> results;
private Object resultsGuard;
/**
* Create a collector for accumulating facets while collecting documents
* during search.
*
* @param facetSearchParams
* faceted search parameters defining which facets are required and
* how.
* @param indexReader
* searched index.
* @param taxonomyReader
* taxonomy containing the facets.
*/
public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
resultsGuard = new Object();
}
/**
* Create a {@link ScoredDocIdCollector} to be used as the first phase of
* the facet collection. If all facetRequests are do not require the
* document score, a ScoredDocIdCollector which does not store the document
* scores would be returned. Otherwise a SDIC which does store the documents
* will be returned, having an initial allocated space for 1000 such
* documents' scores.
*/
protected ScoredDocIdCollector initScoredDocCollector(FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
boolean scoresNeeded = false;
for (FacetRequest frq : facetSearchParams.facetRequests) {
if (frq.requireDocumentScore()) {
scoresNeeded = true;
break;
}
}
return ScoredDocIdCollector.create(indexReader.maxDoc(), scoresNeeded);
}
/**
* Create the {@link FacetsAccumulator} to be used. Default is
* {@link StandardFacetsAccumulator}. Called once at the constructor of the collector.
*
* @param facetSearchParams
* The search params.
* @param indexReader
* A reader to the index to search in.
* @param taxonomyReader
* A reader to the active taxonomy.
* @return The {@link FacetsAccumulator} to use.
*/
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
return new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
}
@Override
public List<FacetResult> getFacetResults() throws IOException {
synchronized (resultsGuard) { // over protection
if (results == null) {
// lazy creation but just once
results = facetsAccumulator.accumulate(scoreDocIdCollector.getScoredDocIDs());
scoreDocIdCollector = null;
}
return results;
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
@Override
public void collect(int doc) throws IOException {
scoreDocIdCollector.collect(doc);
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
scoreDocIdCollector.setNextReader(context);
}
@Override
public void setScorer(Scorer scorer) throws IOException {
scoreDocIdCollector.setScorer(scorer);
}
}

View File

@ -7,7 +7,6 @@ import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.facet.util.ResultSortUtils;
@ -64,8 +63,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
}
// TODO (Facet): should initial value of "residue" depend on aggregator if not sum?
MutableFacetResultNode parentResultNode =
new MutableFacetResultNode(ordinal, value);
FacetResultNode parentResultNode = new FacetResultNode(ordinal, value);
Heap<FacetResultNode> heap = ResultSortUtils.createSuitableHeap(facetRequest);
int totalFacets = heapDescendants(ordinal, heap, parentResultNode, facetArrays, offset);
@ -80,7 +78,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException {
int ordinal = taxonomyReader.getOrdinal(facetRequest.categoryPath);
MutableFacetResultNode resNode = new MutableFacetResultNode(ordinal, 0);
FacetResultNode resNode = new FacetResultNode(ordinal, 0);
int totalFacets = 0;
Heap<FacetResultNode> heap = null;
@ -91,7 +89,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
TopKFacetResult fres = (TopKFacetResult) tmpFres;
totalFacets += fres.getNumValidDescendants();
// set the value for the result node representing the facet request
resNode.increaseValue(fres.getFacetResultNode().getValue());
resNode.value += fres.getFacetResultNode().value;
Heap<FacetResultNode> tmpHeap = fres.getHeap();
if (heap == null) {
heap = tmpHeap;
@ -102,7 +100,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
FacetResultNode a = heap.insertWithOverflow(tmpHeap.pop());
if (a != null) {
resNode.increaseResidue(a.getResidue());
resNode.residue += a.residue;
}
}
}
@ -119,8 +117,8 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
* they join the overall priority queue pq of size K.
* @return total number of descendants considered here by pq, excluding ordinal itself.
*/
private int heapDescendants(int ordinal, Heap<FacetResultNode> pq,
MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) throws IOException {
private int heapDescendants(int ordinal, Heap<FacetResultNode> pq, FacetResultNode parentResultNode,
FacetArrays facetArrays, int offset) throws IOException {
int partitionSize = facetArrays.arrayLength;
int endOffset = offset + partitionSize;
ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays();
@ -172,16 +170,20 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
if (value != 0 && !Double.isNaN(value)) {
// Count current ordinal -- the TOS
if (reusable == null) {
reusable = new MutableFacetResultNode(tosOrdinal, value);
reusable = new FacetResultNode(tosOrdinal, value);
} else {
// it is safe to cast since reusable was created here.
((MutableFacetResultNode)reusable).reset(tosOrdinal, value);
reusable.ordinal = tosOrdinal;
reusable.value = value;
reusable.subResults.clear();
reusable.label = null;
reusable.residue = 0;
}
++childrenCounter;
reusable = pq.insertWithOverflow(reusable);
if (reusable != null) {
// TODO (Facet): is other logic (not add) needed, per aggregator?
parentResultNode.increaseResidue(reusable.getValue());
parentResultNode.residue += reusable.value;
}
}
}
@ -205,9 +207,12 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
TopKFacetResult res = (TopKFacetResult) tmpResult; // cast is safe by contract of this class
if (res != null) {
Heap<FacetResultNode> heap = res.getHeap();
MutableFacetResultNode resNode = (MutableFacetResultNode)res.getFacetResultNode(); // cast safe too
FacetResultNode resNode = res.getFacetResultNode();
if (resNode.subResults == FacetResultNode.EMPTY_SUB_RESULTS) {
resNode.subResults = new ArrayList<FacetResultNode>();
}
for (int i = heap.size(); i > 0; i--) {
resNode.insertSubResult(heap.pop());
resNode.subResults.add(0, heap.pop());
}
}
return res;
@ -218,8 +223,8 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
TopKFacetResult res = (TopKFacetResult) facetResult; // cast is safe by contract of this class
Heap<FacetResultNode> heap = res.getHeap();
heap.clear(); // just to be safe
MutableFacetResultNode topFrn = (MutableFacetResultNode) res.getFacetResultNode(); // safe cast
for (FacetResultNode frn : topFrn.getSubResults()) {
FacetResultNode topFrn = res.getFacetResultNode();
for (FacetResultNode frn : topFrn.subResults) {
heap.add(frn);
}
int size = heap.size();
@ -227,23 +232,22 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
for (int i = heap.size(); i > 0; i--) {
subResults.add(0,heap.pop());
}
topFrn.setSubResults(subResults);
topFrn.subResults = subResults;
return res;
}
@Override
// label top K sub results
public void labelResult(FacetResult facetResult) throws IOException {
if (facetResult != null) { // any result to label?
FacetResultNode facetResultNode = facetResult.getFacetResultNode();
if (facetResultNode != null) { // any result to label?
facetResultNode.getLabel(taxonomyReader);
facetResultNode.label = taxonomyReader.getPath(facetResultNode.ordinal);
int num2label = facetRequest.getNumLabel();
for (FacetResultNode frn : facetResultNode.getSubResults()) {
for (FacetResultNode frn : facetResultNode.subResults) {
if (--num2label < 0) {
break;
}
frn.getLabel(taxonomyReader);
frn.label = taxonomyReader.getPath(frn.ordinal);
}
}
}
@ -267,7 +271,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
* @param facetResultNode top result node for this facet result.
* @param totalFacets - number of children of the targetFacet, up till the requested depth.
*/
TopKFacetResult(FacetRequest facetRequest, MutableFacetResultNode facetResultNode, int totalFacets) {
TopKFacetResult(FacetRequest facetRequest, FacetResultNode facetResultNode, int totalFacets) {
super(facetRequest, facetResultNode, totalFacets);
}

View File

@ -9,7 +9,6 @@ import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.util.PriorityQueue;
@ -39,7 +38,7 @@ import org.apache.lucene.util.collections.IntToObjectMap;
* subtree of the taxonomy tree. Its root node,
* {@link FacetResult#getFacetResultNode()}, is the facet specified by
* {@link FacetRequest#categoryPath}, and the enumerated children,
* {@link FacetResultNode#getSubResults()}, of each node in that
* {@link FacetResultNode#subResults}, of each node in that
* {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()})
* among its children in the taxonomy. Top in the sense
* {@link FacetRequest#getSortBy()}, which can be by the values aggregated in
@ -70,8 +69,7 @@ import org.apache.lucene.util.collections.IntToObjectMap;
*/
public class TopKInEachNodeHandler extends FacetResultsHandler {
public TopKInEachNodeHandler(TaxonomyReader taxonomyReader,
FacetRequest facetRequest) {
public TopKInEachNodeHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest) {
super(taxonomyReader, facetRequest);
}
@ -546,7 +544,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
@Override
protected boolean lessThan(FacetResultNode arg1, FacetResultNode arg2) {
return merger.leftGoesNow(arg2.getOrdinal(), arg2.getValue(), arg1.getOrdinal(), arg1.getValue());
return merger.leftGoesNow(arg2.ordinal, arg2.value, arg1.ordinal, arg1.value);
}
}
@ -718,14 +716,11 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
if (node == null) {
return;
}
node.getLabel(this.taxonomyReader); // attach a label -- category path -- to the node
if (null == node.getSubResults()) {
return; // if node has no children -- done
}
node.label = taxonomyReader.getPath(node.ordinal);
// otherwise, label the first numToLabel of these children, and recursively -- their children.
// label the first numToLabel of these children, and recursively -- their children.
int numLabeled = 0;
for (FacetResultNode frn : node.getSubResults()) {
for (FacetResultNode frn : node.subResults) {
// go over the children of node from first to last, no more than numToLable of them
recursivelyLabel(frn, numToLabel);
if (++numLabeled >= numToLabel) {
@ -743,24 +738,23 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
public FacetResult rearrangeFacetResult(FacetResult facetResult) {
PriorityQueue<FacetResultNode> nodesHeap =
new ResultNodeHeap(this.facetRequest.getNumResults(), this.getSuitableACComparator());
MutableFacetResultNode topFrn = (MutableFacetResultNode) facetResult.getFacetResultNode(); // safe cast
FacetResultNode topFrn = facetResult.getFacetResultNode();
rearrangeChilrenOfNode(topFrn, nodesHeap);
return facetResult;
}
private void rearrangeChilrenOfNode(FacetResultNode node,
PriorityQueue<FacetResultNode> nodesHeap) {
private void rearrangeChilrenOfNode(FacetResultNode node, PriorityQueue<FacetResultNode> nodesHeap) {
nodesHeap.clear(); // just to be safe
for (FacetResultNode frn : node.getSubResults()) {
for (FacetResultNode frn : node.subResults) {
nodesHeap.add(frn);
}
int size = nodesHeap.size();
ArrayList<FacetResultNode> subResults = new ArrayList<FacetResultNode>(size);
while (nodesHeap.size()>0) {
subResults.add(0,nodesHeap.pop());
while (nodesHeap.size() > 0) {
subResults.add(0, nodesHeap.pop());
}
((MutableFacetResultNode)node).setSubResults(subResults);
for (FacetResultNode frn : node.getSubResults()) {
node.subResults = subResults;
for (FacetResultNode frn : node.subResults) {
rearrangeChilrenOfNode(frn, nodesHeap);
}
@ -777,13 +771,13 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
if (tmp.isRootNodeIncluded) {
value = tmp.rootNodeValue;
}
MutableFacetResultNode root = generateNode (ordinal, value, tmp.mapToAACOs);
FacetResultNode root = generateNode(ordinal, value, tmp.mapToAACOs);
return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
}
private MutableFacetResultNode generateNode (int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {
MutableFacetResultNode node = new MutableFacetResultNode(ordinal, val);
private FacetResultNode generateNode(int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {
FacetResultNode node = new FacetResultNode(ordinal, val);
AACO aaco = mapToAACOs.get(ordinal);
if (null == aaco) {
return node;
@ -792,8 +786,8 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
for (int i = 0; i < aaco.ordinals.length; i++) {
list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
}
node.setSubResults(list);
node.setResidue(aaco.residue);
node.subResults = list;
node.residue = aaco.residue;
return node;
}

View File

@ -8,9 +8,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.facet.index.params.CategoryListParams;
@ -150,14 +148,13 @@ public class TotalFacetCounts {
}
// needed because FacetSearchParams do not allow empty FacetRequests
private static final List<FacetRequest> DUMMY_REQ = Arrays.asList(
new FacetRequest[] { new CountFacetRequest(CategoryPath.EMPTY, 1) });
private static final FacetRequest DUMMY_REQ = new CountFacetRequest(CategoryPath.EMPTY, 1);
static TotalFacetCounts compute(final IndexReader indexReader, final TaxonomyReader taxonomy,
final FacetIndexingParams facetIndexingParams) throws IOException {
int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize];
FacetSearchParams newSearchParams = new FacetSearchParams(DUMMY_REQ, facetIndexingParams);
FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams, DUMMY_REQ);
//createAllListsSearchParams(facetIndexingParams, this.totalCounts);
FacetsAccumulator fe = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
@Override

View File

@ -35,8 +35,8 @@ import org.apache.lucene.facet.index.params.FacetIndexingParams;
*/
public class FacetSearchParams {
protected final FacetIndexingParams indexingParams;
protected final List<FacetRequest> facetRequests;
public final FacetIndexingParams indexingParams;
public final List<FacetRequest> facetRequests;
/**
* Initializes with the given {@link FacetRequest requests} and default
@ -57,6 +57,14 @@ public class FacetSearchParams {
public FacetSearchParams(List<FacetRequest> facetRequests) {
this(facetRequests, FacetIndexingParams.ALL_PARENTS);
}
/**
* Initilizes with the given {@link FacetRequest requests} and
* {@link FacetIndexingParams}.
*/
public FacetSearchParams(FacetIndexingParams indexingParams, FacetRequest... facetRequests) {
this(Arrays.asList(facetRequests), indexingParams);
}
/**
* Initilizes with the given {@link FacetRequest requests} and
@ -66,24 +74,8 @@ public class FacetSearchParams {
if (facetRequests == null || facetRequests.size() == 0) {
throw new IllegalArgumentException("at least one FacetRequest must be defined");
}
this.indexingParams = indexingParams;
this.facetRequests = facetRequests;
}
/**
* Returns the {@link FacetIndexingParams} that were passed to the
* constructor.
*/
public FacetIndexingParams getFacetIndexingParams() {
return indexingParams;
}
/**
* Returns the list of {@link FacetRequest facet requests} that were passed to
* the constructor.
*/
public List<FacetRequest> getFacetRequests() {
return facetRequests;
this.indexingParams = indexingParams;
}
@Override
@ -92,10 +84,10 @@ public class FacetSearchParams {
final char NEWLINE = '\n';
StringBuilder sb = new StringBuilder("IndexingParams: ");
sb.append(NEWLINE).append(TAB).append(getFacetIndexingParams());
sb.append(NEWLINE).append(TAB).append(indexingParams);
sb.append(NEWLINE).append("FacetRequests:");
for (FacetRequest facetRequest : getFacetRequests()) {
for (FacetRequest facetRequest : facetRequests) {
sb.append(NEWLINE).append(TAB).append(facetRequest);
}

View File

@ -1,10 +1,11 @@
package org.apache.lucene.facet.search.results;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.sampling.SampleFixer;
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@ -26,85 +27,86 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
*/
/**
* Result of faceted search for a certain taxonomy node.
* Result of faceted search for a certain taxonomy node. This class serves as a
* bin of different attributes of the result node, such as its {@link #ordinal}
* as well as {@link #label}. You are not expected to modify those values.
*
* @lucene.experimental
*/
public interface FacetResultNode {
public class FacetResultNode {
public static final List<FacetResultNode> EMPTY_SUB_RESULTS = Collections.emptyList();
/** The category ordinal of this node. */
public int ordinal;
/**
* String representation of this facet result node.
* Use with caution: might return a very long string.
* @param prefix prefix for each result line
*/
public String toString(String prefix);
/**
* Ordinal of the category of this result.
*/
public int getOrdinal();
/**
* Category path of the category of this result, or null if not computed,
* because the application did not request to compute it.
* To force computing the label in case not yet computed use
* {@link #getLabel(TaxonomyReader)}.
* @see FacetRequest#getNumLabel()
* @see #getLabel(TaxonomyReader)
*/
public CategoryPath getLabel();
/**
* Category path of the category of this result.
* If not already computed, will be computed now.
* <p>
* Use with <b>caution</b>: loading a label for results is costly, performance wise.
* Therefore force labels loading only when really needed.
* @param taxonomyReader taxonomy reader for forcing (lazy) labeling of this result.
* @throws IOException on error
* @see FacetRequest#getNumLabel()
*/
public CategoryPath getLabel(TaxonomyReader taxonomyReader) throws IOException;
/**
* Value of this result - usually either count or a value derived from some
* computing on the association of it.
*/
public double getValue();
/**
* Value of screened out sub results.
* The {@link CategoryPath label} of this result. May be {@code null} if not
* computed, in which case use {@link TaxonomyReader#getPath(int)} to label
* it.
* <p>
* If only part of valid results are returned, e.g. because top K were requested,
* provide info on "what else is there under this result node".
* <b>NOTE:</b> by default, all nodes are labeled. Only when
* {@link FacetRequest#getNumLabel()} &lt;
* {@link FacetRequest#getNumResults()} there will be unlabeled nodes.
*/
public double getResidue();
public CategoryPath label;
/**
* The value of this result. Its actual type depends on the
* {@link FacetRequest} used (e.g. in case of {@link CountFacetRequest} it is
* {@code int}).
*/
public double value;
/**
* Contained sub results.
* These are either child facets, if a tree result was requested, or simply descendants, in case
* tree result was not requested. In the first case, all returned are both descendants of
* this node in the taxonomy and siblings of each other in the taxonomy.
* In the latter case they are only guaranteed to be descendants of
* this node in the taxonomy.
* The total value of screened out sub results. If only part of the results
* were returned (usually because only the top-K categories are requested),
* then this provides information on "what else is there under this result
* node".
*/
public Iterable<? extends FacetResultNode> getSubResults();
public double residue;
/**
* Number of sub results
*/
public int getNumSubResults();
/**
* Expert: Set a new value for this result node.
* The sub-results of this result. If {@link FacetRequest#getResultMode()} is
* {@link ResultMode#PER_NODE_IN_TREE}, every sub result denotes an immediate
* child of this node. Otherwise, it is a descendant of any level.
* <p>
* Allows to modify the value of this facet node.
* Used for example to tune a sampled value, e.g. by
* {@link SampleFixer#fixResult(org.apache.lucene.facet.search.ScoredDocIDs, FacetResult)}
* @param value the new value to set
* @see #getValue()
* @see FacetResultsHandler#rearrangeFacetResult(FacetResult)
* <b>NOTE:</b> this member should not be {@code null}. To denote that a
* result does not have sub results, set it to {@link #EMPTY_SUB_RESULTS} (or
* don't modify it).
*/
public void setValue(double value);
public List<FacetResultNode> subResults = EMPTY_SUB_RESULTS;
}
public FacetResultNode() {
// empty constructor
}
public FacetResultNode(int ordinal, double value) {
this.ordinal = ordinal;
this.value = value;
}
@Override
public String toString() {
return toString("");
}
/** Returns a String representation of this facet result node. */
public String toString(String prefix) {
StringBuilder sb = new StringBuilder(prefix);
if (label == null) {
sb.append("not labeled (ordinal=").append(ordinal).append(")");
} else {
sb.append(label.toString());
}
sb.append(" (").append(Double.toString(value)).append(")");
if (residue > 0) {
sb.append(" (residue=").append(residue).append(")");
}
for (FacetResultNode sub : subResults) {
sb.append("\n").append(prefix).append(sub.toString(prefix + " "));
}
return sb.toString();
}
}

View File

@ -1,353 +0,0 @@
package org.apache.lucene.facet.search.results;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Mutable implementation for Result of faceted search for a certain taxonomy node.
*
* @lucene.experimental
*/
public class MutableFacetResultNode implements FacetResultNode {
/**
* Empty sub results to be returned when there are no results.
* We never return null, so that code using this can remain simpler.
*/
private static final ArrayList<FacetResultNode> EMPTY_SUB_RESULTS = new ArrayList<FacetResultNode>();
private int ordinal;
private CategoryPath label = null;
private double value;
private double residue;
private List<FacetResultNode> subResults;
/**
* Create a Facet Result Node.
*
* @param ordinal
* ordinal in the taxonomy of the category of this result.
* @param value
* value this result.
*/
public MutableFacetResultNode(int ordinal, double value) {
this(ordinal, value, 0, null, null);
}
/**
* Reset a facet Result Node.
* <p>
* Used at the population of facet results, not intended for regular use by
* applications.
*
* @param ordinal
* ordinal in the taxonomy of the category of this result.
* @param value
* value of this result.
*/
public void reset(int ordinal, double value) {
this.ordinal = ordinal;
this.value = value;
if (subResults != null) {
subResults.clear();
}
label = null;
residue = 0;
}
/**
* Create a Facet Result Node.
*
* @param ordinal
* ordinal in the taxonomy of the category of this result.
* @param value
* value of this result.
* @param residue
* Value of screened out sub results.
* @param label
* label of the category path of this result.
* @param subResults
* - sub results, usually descendants, sometimes child results, of
* this result - depending on the request.
*/
public MutableFacetResultNode(int ordinal, double value, double residue,
CategoryPath label, List<FacetResultNode> subResults) {
this.ordinal = ordinal;
this.value = value;
this.residue = residue;
this.label = label;
this.subResults = subResults;
}
/**
* Create a mutable facet result node from another result node
* @param other other result node to copy from
* @param takeSubResults set to true to take also sub results of other node
*/
public MutableFacetResultNode(FacetResultNode other, boolean takeSubResults) {
this(other.getOrdinal(), other.getValue(), other.getResidue(), other
.getLabel(), takeSubResults ? resultsToList(other.getSubResults())
: null);
}
private static List<FacetResultNode> resultsToList(
Iterable<? extends FacetResultNode> subResults) {
if (subResults == null) {
return null;
}
ArrayList<FacetResultNode> res = new ArrayList<FacetResultNode>();
for (FacetResultNode r : subResults) {
res.add(r);
}
return res;
}
@Override
public String toString() {
return toString("");
}
/**
* Number of sub results.
*/
private int numSubResults() {
if (subResults == null) {
return 0;
}
return subResults.size();
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.facet.search.results2.FacetResultNode#toString(java.lang.
* String)
*/
@Override
public String toString(String prefix) {
StringBuilder sb = new StringBuilder(prefix);
sb.append("Facet Result Node with ").append(numSubResults()).append(
" sub result nodes.\n");
// label
sb.append(prefix).append("Name: ").append(getLabel()).append("\n");
// value
sb.append(prefix).append("Value: ").append(value).append("\n");
// residue
sb.append(prefix).append("Residue: ").append(residue).append("\n");
if (subResults != null) {
int i = 0;
for (FacetResultNode subRes : subResults) {
sb.append("\n").append(prefix).append("Subresult #").append(i++)
.append("\n").append(subRes.toString(prefix + "\t"));
}
}
return sb.toString();
}
@Override
public final int getOrdinal() {
return ordinal;
}
@Override
public final CategoryPath getLabel() {
return label;
}
/**
* Set the label of the category of this result.
* @param label the label to set.
* @see #getLabel()
*/
public void setLabel(CategoryPath label) {
this.label = label;
}
@Override
public final double getValue() {
return value;
}
/**
* Set the value of this result.
*
* @param value
* the value to set
* @see #getValue()
*/
@Override
public void setValue(double value) {
this.value = value;
}
/**
* increase the value for this result.
* @param addedValue the value to add
* @see #getValue()
*/
public void increaseValue(double addedValue) {
this.value += addedValue;
}
@Override
public final double getResidue() {
return residue;
}
/**
* Set the residue.
* @param residue the residue to set
* @see #getResidue()
*/
public void setResidue(double residue) {
this.residue = residue;
}
/**
* increase the residue for this result.
* @param addedResidue the residue to add
* @see #getResidue()
*/
public void increaseResidue(double addedResidue) {
this.residue += addedResidue;
}
@Override
public final Iterable<? extends FacetResultNode> getSubResults() {
return subResults != null ? subResults : EMPTY_SUB_RESULTS;
}
/**
* Trim sub results to a given size.
* <p>
* Note: Although the {@link #getResidue()} is not guaranteed to be
* accurate, it is worth fixing it, as possible, by taking under account the
* trimmed sub-nodes.
*/
public void trimSubResults(int size) {
if (subResults == null || subResults.size() == 0) {
return;
}
ArrayList<FacetResultNode> trimmed = new ArrayList<FacetResultNode>(size);
for (int i = 0; i < subResults.size() && i < size; i++) {
MutableFacetResultNode trimmedNode = toImpl(subResults.get(i));
trimmedNode.trimSubResults(size);
trimmed.add(trimmedNode);
}
/*
* If we are trimming, it means Sampling is in effect and the extra
* (over-sampled) results are being trimmed. Although the residue is not
* guaranteed to be accurate for Sampling, we try our best to fix it.
* The node's residue now will take under account the sub-nodes we're
* trimming.
*/
for (int i = size; i < subResults.size(); i++) {
increaseResidue(subResults.get(i).getValue());
}
subResults = trimmed;
}
/**
* Set the sub results.
* @param subResults the sub-results to set
*/
public void setSubResults(List<FacetResultNode> subResults) {
this.subResults = subResults;
}
/**
* Append a sub result (as last).
* @param subRes sub-result to be appended
*/
public void appendSubResult(FacetResultNode subRes) {
if (subResults == null) {
subResults = new ArrayList<FacetResultNode>();
}
subResults.add(subRes);
}
/**
* Insert sub result (as first).
* @param subRes sub-result to be inserted
*/
public void insertSubResult(FacetResultNode subRes) {
if (subResults == null) {
subResults = new ArrayList<FacetResultNode>();
}
subResults.add(0, subRes);
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.facet.search.results.FacetResultNode#getLabel(org.apache.lucene
* .facet.taxonomy.TaxonomyReader)
*/
@Override
public final CategoryPath getLabel(TaxonomyReader taxonomyReader)
throws IOException {
if (label == null) {
label = taxonomyReader.getPath(ordinal);
}
return label;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.facet.search.results.FacetResultNode#getNumSubResults()
*/
@Override
public final int getNumSubResults() {
return subResults == null ? 0 : subResults.size();
}
/**
* Internal utility: turn a result node into an implementation class
* with richer API that allows modifying it.
* <p>
* In case that input result node is already of an implementation
* class only casting is done, but in any case we pay the price
* of checking "instance of".
* @param frn facet result node to be turned into an implementation class object
*/
public static MutableFacetResultNode toImpl(FacetResultNode frn) {
if (frn instanceof MutableFacetResultNode) {
return (MutableFacetResultNode) frn;
}
return new MutableFacetResultNode(frn, true);
}
}

View File

@ -11,7 +11,6 @@ import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
@ -41,7 +40,7 @@ import org.apache.lucene.index.IndexReader;
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} &
* {@link FacetResultNode#getResidue()}.
* {@link FacetResultNode#residue}.
*
* @lucene.experimental
*/
@ -169,12 +168,39 @@ public abstract class Sampler {
FacetRequest origFrq = sampledFreq.orig;
MutableFacetResultNode trimmedRootNode = MutableFacetResultNode.toImpl(facetResult.getFacetResultNode());
trimmedRootNode.trimSubResults(origFrq.getNumResults());
FacetResultNode trimmedRootNode = facetResult.getFacetResultNode();
trimSubResults(trimmedRootNode, origFrq.getNumResults());
return new FacetResult(origFrq, trimmedRootNode, facetResult.getNumValidDescendants());
}
/** Trim sub results to a given size. */
private void trimSubResults(FacetResultNode node, int size) {
if (node.subResults == FacetResultNode.EMPTY_SUB_RESULTS || node.subResults.size() == 0) {
return;
}
ArrayList<FacetResultNode> trimmed = new ArrayList<FacetResultNode>(size);
for (int i = 0; i < node.subResults.size() && i < size; i++) {
FacetResultNode trimmedNode = node.subResults.get(i);
trimSubResults(trimmedNode, size);
trimmed.add(trimmedNode);
}
/*
* If we are trimming, it means Sampling is in effect and the extra
* (over-sampled) results are being trimmed. Although the residue is not
* guaranteed to be accurate for Sampling, we try our best to fix it.
* The node's residue now will take under account the sub-nodes we're
* trimming.
*/
for (int i = size; i < node.subResults.size(); i++) {
node.residue += node.subResults.get(i).value;
}
node.subResults = trimmed;
}
/**
* Over-sampled search params, wrapping each request with an over-sampled one.
*/
@ -184,11 +210,11 @@ public abstract class Sampler {
double overSampleFactor = getSamplingParams().getOversampleFactor();
if (overSampleFactor > 1) { // any factoring to do?
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
for (FacetRequest frq : original.getFacetRequests()) {
for (FacetRequest frq : original.facetRequests) {
int overSampledNumResults = (int) Math.ceil(frq.getNumResults() * overSampleFactor);
facetRequests.add(new OverSampledFacetRequest(frq, overSampledNumResults));
}
res = new FacetSearchParams(facetRequests, original.getFacetIndexingParams());
res = new FacetSearchParams(facetRequests, original.indexingParams);
}
return res;
}

View File

@ -49,7 +49,7 @@ import org.apache.lucene.index.IndexReader;
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} &
* {@link FacetResultNode#getResidue()}.
* {@link FacetResultNode#residue}.
*
* @see Sampler
* @lucene.experimental

View File

@ -74,10 +74,9 @@ class TakmiSampleFixer implements SampleFixer {
* docids in effect
* @throws IOException If there is a low-level I/O error.
*/
private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds)
throws IOException {
private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds) throws IOException {
recount(facetResNode, docIds);
for (FacetResultNode frn : facetResNode.getSubResults()) {
for (FacetResultNode frn : facetResNode.subResults) {
fixResultNode(frn, docIds);
}
}
@ -101,7 +100,10 @@ class TakmiSampleFixer implements SampleFixer {
* facet results was exercise, we need to calculate them anyway, so
* in essence sampling with recounting spends some extra cycles for
* labeling results for which labels are not required. */
CategoryPath catPath = fresNode.getLabel(taxonomyReader); // force labeling
if (fresNode.label == null) {
fresNode.label = taxonomyReader.getPath(fresNode.ordinal);
}
CategoryPath catPath = fresNode.label;
Term drillDownTerm = DrillDown.term(searchParams, catPath);
// TODO (Facet): avoid Multi*?
@ -109,8 +111,7 @@ class TakmiSampleFixer implements SampleFixer {
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
drillDownTerm.field(), drillDownTerm.bytes(),
0), docIds.iterator());
fresNode.setValue(updatedCount);
fresNode.value = updatedCount;
}
/**

View File

@ -48,12 +48,19 @@ public class CategoryPath implements Comparable<CategoryPath> {
// Used by subpath
private CategoryPath(CategoryPath copyFrom, int prefixLen) {
// while the code which calls this method is safe, at some point a test
// tripped on AIOOBE in toString, but we failed to reproduce. adding the
// assert as a safety check.
assert prefixLen > 0 && prefixLen <= copyFrom.components.length :
"prefixLen cannot be negative nor larger than the given components' length: prefixLen=" + prefixLen
+ " components.length=" + copyFrom.components.length;
this.components = copyFrom.components;
length = prefixLen;
}
/** Construct from the given path components. */
public CategoryPath(String... components) {
assert components.length > 0 : "use CategoryPath.EMPTY to create an empty path";
this.components = components;
length = components.length;
}

View File

@ -14,7 +14,6 @@ import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
@ -30,7 +29,7 @@ import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CorruptIndexException; // javadocs
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
@ -45,7 +44,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException; // javadocs
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.NativeFSLockFactory;
import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.util.BytesRef;
@ -303,8 +302,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// Make sure we use a MergePolicy which always merges adjacent segments and thus
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
return new IndexWriterConfig(Version.LUCENE_50,
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
return new IndexWriterConfig(Version.LUCENE_50, null).setOpenMode(openMode).setMergePolicy(
new LogByteSizeMergePolicy());
}

View File

@ -0,0 +1,98 @@
package org.apache.lucene.facet.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/** Prints how many ords are under each dimension. */
public class PrintTaxonomyStats {
public static void main(String[] args) throws IOException {
boolean printTree = false;
String path = null;
for(int i=0;i<args.length;i++) {
if (args[i].equals("-printTree")) {
printTree = true;
} else {
path = args[i];
}
}
if (args.length != (printTree ? 2 : 1)) {
System.out.println("\nUsage: java -classpath ... org.apache.lucene.facet.util.PrintTaxonomyStats [-printTree] /path/to/taxononmy/index\n");
System.exit(1);
}
Directory dir = FSDirectory.open(new File(path));
TaxonomyReader r = new DirectoryTaxonomyReader(dir);
printStats(r, System.out, printTree);
r.close();
dir.close();
}
public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException {
ParallelTaxonomyArrays arrays = r.getParallelTaxonomyArrays();
//int[] parents = arrays.parents();
int[] children = arrays.children();
int[] siblings = arrays.siblings();
out.println(r.getSize() + " total categories.");
int childOrd = children[TaxonomyReader.ROOT_ORDINAL];
while(childOrd != -1) {
CategoryPath cp = r.getPath(childOrd);
int childOrd2 = children[childOrd];
int numImmediateChildren = 0;
while(childOrd2 != -1) {
numImmediateChildren++;
childOrd2 = siblings[childOrd2];
}
out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, childOrd, children, siblings)) + " total categories");
if (printTree) {
printAllChildren(out, r, childOrd, children, siblings, " ", 1);
}
childOrd = siblings[childOrd];
}
}
private static int countAllChildren(TaxonomyReader r, int ord, int[] children, int[] siblings) throws IOException {
int childOrd = children[ord];
int count = 0;
while(childOrd != -1) {
count += 1+countAllChildren(r, childOrd, children, siblings);
childOrd = siblings[childOrd];
}
return count;
}
private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, int[] children, int[] siblings, String indent, int depth) throws IOException {
int childOrd = children[ord];
while(childOrd != -1) {
out.println(indent + "/" + r.getPath(childOrd).components[depth]);
printAllChildren(out, r, childOrd, children, siblings, indent + " ", depth+1);
childOrd = siblings[childOrd];
}
}
}

View File

@ -73,12 +73,12 @@ public class ResultSortUtils {
@Override
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
double value0 = arg0.getValue();
double value1 = arg1.getValue();
double value0 = arg0.value;
double value1 = arg1.value;
int valueCompare = Double.compare(value0, value1);
if (valueCompare == 0) {
return arg0.getOrdinal() < arg1.getOrdinal();
return arg0.ordinal < arg1.ordinal;
}
return valueCompare < 0;
@ -93,40 +93,38 @@ public class ResultSortUtils {
@Override
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
double value0 = arg0.getValue();
double value1 = arg1.getValue();
double value0 = arg0.value;
double value1 = arg1.value;
int valueCompare = Double.compare(value0, value1);
if (valueCompare == 0) {
return arg0.getOrdinal() > arg1.getOrdinal();
return arg0.ordinal > arg1.ordinal;
}
return valueCompare > 0;
}
}
private static class MinOrdinalHeap extends
PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
private static class MinOrdinalHeap extends PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
public MinOrdinalHeap(int size) {
super(size);
}
@Override
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
return arg0.getOrdinal() < arg1.getOrdinal();
return arg0.ordinal < arg1.ordinal;
}
}
private static class MaxOrdinalHeap extends
PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
private static class MaxOrdinalHeap extends PriorityQueue<FacetResultNode> implements Heap<FacetResultNode> {
public MaxOrdinalHeap(int size) {
super(size);
}
@Override
protected boolean lessThan(FacetResultNode arg0, FacetResultNode arg1) {
return arg0.getOrdinal() > arg1.getOrdinal();
return arg0.ordinal > arg1.ordinal;
}
}
@ -156,10 +154,9 @@ public class ResultSortUtils {
Collections.sort(resultNodes, new Comparator<FacetResultNode>() {
@Override
public int compare(FacetResultNode o1, FacetResultNode o2) {
int value = Double.compare(o1.getValue(), o2
.getValue());
int value = Double.compare(o1.value, o2.value);
if (value == 0) {
value = o1.getOrdinal() - o2.getOrdinal();
value = o1.ordinal - o2.ordinal;
}
if (accending) {
value = -value;
@ -198,4 +195,5 @@ public class ResultSortUtils {
resultNodes.clear();
}
}
}

View File

@ -196,7 +196,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
* test with different faceted search params.
*/
protected FacetSearchParams getFacetSearchParams(FacetIndexingParams iParams, FacetRequest... facetRequests) {
return new FacetSearchParams(Arrays.asList(facetRequests), iParams);
return new FacetSearchParams(iParams, facetRequests);
}
/**
@ -246,7 +246,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
/** convenience method: convert sub results to an array */
protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
for (FacetResultNode frn : parentRes.getSubResults()) {
for (FacetResultNode frn : parentRes.subResults) {
a.add(frn);
}
return a.toArray(new FacetResultNode[0]);
@ -305,42 +305,27 @@ public abstract class FacetTestBase extends LuceneTestCase {
/** Validate counts for returned facets, and that there are not too many results */
private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth, FacetResultNode resNode, int reqNumResults) throws Exception {
int actualNumResults = resNode.getNumSubResults();
int actualNumResults = resNode.subResults.size();
if (VERBOSE) {
System.out.println("NumResults: " + actualNumResults);
}
assertTrue("Too many results!", actualNumResults <= reqNumResults);
for (FacetResultNode subRes : resNode.getSubResults()) {
assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
for (FacetResultNode subRes : resNode.subResults) {
assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.label).intValue(), (int)subRes.value);
assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
}
}
/** Validate results equality */
protected static void assertSameResults(List<FacetResult> expected, List<FacetResult> actual) {
String expectedResults = resStringValueOnly(expected);
String actualResults = resStringValueOnly(actual);
if (!expectedResults.equals(actualResults)) {
System.err.println("Results are not the same!");
System.err.println("Expected:\n" + expectedResults);
System.err.println("Actual:\n" + actualResults);
throw new NotSameResultError();
}
}
/** exclude the residue and numDecendants because it is incorrect in sampling */
private static final String resStringValueOnly(List<FacetResult> results) {
StringBuilder sb = new StringBuilder();
for (FacetResult facetRes : results) {
sb.append(facetRes.toString()).append('\n');
}
return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
}
/** Special Error class for ability to ignore only this error and retry... */
public static class NotSameResultError extends Error {
public NotSameResultError() {
super("Results are not the same!");
assertEquals("wrong number of facet results", expected.size(), actual.size());
int size = expected.size();
for (int i = 0; i < size; i++) {
FacetResult expectedResult = expected.get(i);
FacetResult actualResult = actual.get(i);
String expectedStr = FacetTestUtils.toSimpleString(expectedResult);
String actualStr = FacetTestUtils.toSimpleString(actualResult);
assertEquals("Results not the same!\nExpected:" + expectedStr + "\nActual:\n" + actualStr, expectedStr, actualStr);
}
}

View File

@ -4,12 +4,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
@ -45,71 +47,6 @@ import org.apache.lucene.util.LuceneTestCase;
public class FacetTestUtils {
public static Directory[][] createIndexTaxonomyDirs(int number) {
Directory[][] dirs = new Directory[number][2];
for (int i = 0; i < number; i++) {
dirs[i][0] = LuceneTestCase.newDirectory();
dirs[i][1] = LuceneTestCase.newDirectory();
}
return dirs;
}
public static IndexTaxonomyReaderPair[] createIndexTaxonomyReaderPair(
Directory[][] dirs) throws IOException {
IndexTaxonomyReaderPair[] pairs = new IndexTaxonomyReaderPair[dirs.length];
for (int i = 0; i < dirs.length; i++) {
IndexTaxonomyReaderPair pair = new IndexTaxonomyReaderPair();
pair.indexReader = DirectoryReader.open(dirs[i][0]);
pair.indexSearcher = new IndexSearcher(pair.indexReader);
pair.taxReader = new DirectoryTaxonomyReader(dirs[i][1]);
pairs[i] = pair;
}
return pairs;
}
public static IndexTaxonomyWriterPair[] createIndexTaxonomyWriterPair(
Directory[][] dirs) throws IOException {
IndexTaxonomyWriterPair[] pairs = new IndexTaxonomyWriterPair[dirs.length];
for (int i = 0; i < dirs.length; i++) {
IndexTaxonomyWriterPair pair = new IndexTaxonomyWriterPair();
pair.indexWriter = new IndexWriter(dirs[i][0], new IndexWriterConfig(
LuceneTestCase.TEST_VERSION_CURRENT, new StandardAnalyzer(
LuceneTestCase.TEST_VERSION_CURRENT)));
pair.taxWriter = new DirectoryTaxonomyWriter(dirs[i][1]);
pair.indexWriter.commit();
pair.taxWriter.commit();
pairs[i] = pair;
}
return pairs;
}
public static Collector[] search(IndexSearcher searcher,
TaxonomyReader taxonomyReader, FacetIndexingParams iParams, int k,
String... facetNames) throws IOException {
Collector[] collectors = new Collector[2];
List<FacetRequest> fRequests = new ArrayList<FacetRequest>();
for (String facetName : facetNames) {
CategoryPath cp = new CategoryPath(facetName);
FacetRequest fq = new CountFacetRequest(cp, k);
fRequests.add(fq);
}
FacetSearchParams facetSearchParams = new FacetSearchParams(fRequests, iParams);
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(
searcher.getIndexReader().maxDoc(), true);
FacetsCollector facetsCollector = new FacetsCollector(
facetSearchParams, searcher.getIndexReader(), taxonomyReader);
Collector mColl = MultiCollector.wrap(topDocsCollector, facetsCollector);
collectors[0] = topDocsCollector;
collectors[1] = facetsCollector;
searcher.search(new MatchAllDocsQuery(), mColl);
return collectors;
}
public static class IndexTaxonomyReaderPair {
public DirectoryReader indexReader;
public DirectoryTaxonomyReader taxReader;
@ -137,4 +74,76 @@ public class FacetTestUtils {
}
}
public static Directory[][] createIndexTaxonomyDirs(int number) {
Directory[][] dirs = new Directory[number][2];
for (int i = 0; i < number; i++) {
dirs[i][0] = LuceneTestCase.newDirectory();
dirs[i][1] = LuceneTestCase.newDirectory();
}
return dirs;
}
public static IndexTaxonomyReaderPair[] createIndexTaxonomyReaderPair(Directory[][] dirs) throws IOException {
IndexTaxonomyReaderPair[] pairs = new IndexTaxonomyReaderPair[dirs.length];
for (int i = 0; i < dirs.length; i++) {
IndexTaxonomyReaderPair pair = new IndexTaxonomyReaderPair();
pair.indexReader = DirectoryReader.open(dirs[i][0]);
pair.indexSearcher = new IndexSearcher(pair.indexReader);
pair.taxReader = new DirectoryTaxonomyReader(dirs[i][1]);
pairs[i] = pair;
}
return pairs;
}
public static IndexTaxonomyWriterPair[] createIndexTaxonomyWriterPair(Directory[][] dirs) throws IOException {
IndexTaxonomyWriterPair[] pairs = new IndexTaxonomyWriterPair[dirs.length];
for (int i = 0; i < dirs.length; i++) {
IndexTaxonomyWriterPair pair = new IndexTaxonomyWriterPair();
pair.indexWriter = new IndexWriter(dirs[i][0], new IndexWriterConfig(
LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(LuceneTestCase.random())));
pair.taxWriter = new DirectoryTaxonomyWriter(dirs[i][1]);
pair.indexWriter.commit();
pair.taxWriter.commit();
pairs[i] = pair;
}
return pairs;
}
public static Collector[] search(IndexSearcher searcher, TaxonomyReader taxonomyReader, FacetIndexingParams iParams,
int k, String... facetNames) throws IOException {
Collector[] collectors = new Collector[2];
List<FacetRequest> fRequests = new ArrayList<FacetRequest>();
for (String facetName : facetNames) {
CategoryPath cp = new CategoryPath(facetName);
FacetRequest fq = new CountFacetRequest(cp, k);
fRequests.add(fq);
}
FacetSearchParams facetSearchParams = new FacetSearchParams(fRequests, iParams);
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(searcher.getIndexReader().maxDoc(), true);
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, searcher.getIndexReader(), taxonomyReader);
Collector mColl = MultiCollector.wrap(topDocsCollector, facetsCollector);
collectors[0] = topDocsCollector;
collectors[1] = facetsCollector;
searcher.search(new MatchAllDocsQuery(), mColl);
return collectors;
}
public static String toSimpleString(FacetResult fr) {
StringBuilder sb = new StringBuilder();
toSimpleString(0, sb, fr.getFacetResultNode(), "");
return sb.toString();
}
private static void toSimpleString(int depth, StringBuilder sb, FacetResultNode node, String indent) {
sb.append(indent + node.label.components[depth] + " (" + (int) node.value + ")\n");
for (FacetResultNode childNode : node.subResults) {
toSimpleString(depth + 1, sb, childNode, indent + " ");
}
}
}

View File

@ -45,10 +45,10 @@ public class TestAssociationExample extends LuceneTestCase {
assertEquals("Wrong number of results!", 1, res.getFacetResults().size());
assertEquals("Wrong number of facets!", 2, res.getFacetResults().get(0).getNumValidDescendants());
Iterable<? extends FacetResultNode> it = res.getFacetResults().get(0).getFacetResultNode().getSubResults();
Iterable<? extends FacetResultNode> it = res.getFacetResults().get(0).getFacetResultNode().subResults;
int i = 0;
for (FacetResultNode fResNode : it) {
assertEquals("Wrong result for facet "+fResNode.getLabel(), expectedResults[i++], fResNode.getValue(), 1E-5);
assertEquals("Wrong result for facet "+fResNode.label, expectedResults[i++], fResNode.value, 1E-5);
}
}

View File

@ -43,45 +43,35 @@ public class TestMultiCLExample extends LuceneTestCase {
List<FacetResult> results = exampleResults.getFacetResults();
FacetResult result = results.get(0);
assertNotNull("Result should not be null", result);
assertEquals("Invalid label", "5", result.getFacetResultNode()
.getLabel().toString());
assertEquals("Invalid value", 2.0, result.getFacetResultNode()
.getValue(), 0.0);
assertEquals("Invalid # of subresults", 3, result.getFacetResultNode()
.getNumSubResults());
FacetResultNode node = result.getFacetResultNode();
assertEquals("Invalid label", "5", node.label.toString());
assertEquals("Invalid value", 2.0, node.value, 0.0);
assertEquals("Invalid # of subresults", 3, node.subResults.size());
Iterator<? extends FacetResultNode> subResults = result
.getFacetResultNode().getSubResults().iterator();
Iterator<? extends FacetResultNode> subResults = node.subResults.iterator();
FacetResultNode sub = subResults.next();
assertEquals("Invalid subresult value", 1.0, sub.getValue(), 0.0);
assertEquals("Invalid subresult label", "5/2", sub.getLabel()
.toString());
assertEquals("Invalid subresult value", 1.0, sub.value, 0.0);
assertEquals("Invalid subresult label", "5/2", sub.label.toString());
sub = subResults.next();
assertEquals("Invalid subresult value", 1.0, sub.getValue(), 0.0);
assertEquals("Invalid subresult label", "5/7", sub.getLabel()
.toString());
assertEquals("Invalid subresult value", 1.0, sub.value, 0.0);
assertEquals("Invalid subresult label", "5/7", sub.label.toString());
sub = subResults.next();
assertEquals("Invalid subresult value", 1.0, sub.getValue(), 0.0);
assertEquals("Invalid subresult label", "5/5", sub.getLabel()
.toString());
assertEquals("Invalid subresult value", 1.0, sub.value, 0.0);
assertEquals("Invalid subresult label", "5/5", sub.label.toString());
result = results.get(1);
node = result.getFacetResultNode();
assertNotNull("Result should not be null", result);
assertEquals("Invalid label", "5/5", result.getFacetResultNode()
.getLabel().toString());
assertEquals("Invalid value", 1,
result.getFacetResultNode().getValue(), 0.0);
assertEquals("Invalid number of subresults", 0, result
.getFacetResultNode().getNumSubResults());
assertEquals("Invalid label", "5/5", node.label.toString());
assertEquals("Invalid value", 1, node.value, 0.0);
assertEquals("Invalid number of subresults", 0, node.subResults.size());
result = results.get(2);
node = result.getFacetResultNode();
assertNotNull("Result should not be null", result);
assertEquals("Invalid label", "6/2", result.getFacetResultNode()
.getLabel().toString());
assertEquals("Invalid value", 1,
result.getFacetResultNode().getValue(), 0.0);
assertEquals("Invalid number of subresults", 0, result
.getFacetResultNode().getNumSubResults());
assertEquals("Invalid label", "6/2", node.label.toString());
assertEquals("Invalid value", 1, node.value, 0.0);
assertEquals("Invalid number of subresults", 0, node.subResults.size());
}

View File

@ -57,11 +57,11 @@ public class TestSimpleExample extends LuceneTestCase {
FacetResult facetResult = res.getFacetResults().get(0);
assertEquals("Wrong number of facets!",2, facetResult.getNumValidDescendants());
Iterator<? extends FacetResultNode> resIterator = facetResult.getFacetResultNode().getSubResults().iterator();
Iterator<? extends FacetResultNode> resIterator = facetResult.getFacetResultNode().subResults.iterator();
assertTrue("Too few results", resIterator.hasNext());
assertEquals("wrong count for first result out of 2", 1, (int)resIterator.next().getValue());
assertEquals("wrong count for first result out of 2", 1, (int)resIterator.next().value);
assertTrue("Too few results", resIterator.hasNext());
assertEquals("wrong count for second result out of 2", 1, (int)resIterator.next().getValue());
assertEquals("wrong count for second result out of 2", 1, (int)resIterator.next().value);
assertFalse("Too many results!", resIterator.hasNext());
}
}

View File

@ -71,13 +71,13 @@ public class OrdinalMappingReaderTest extends LuceneTestCase {
DirectoryTaxonomyReader taxReader = new DirectoryTaxonomyReader(taxDir);
IndexSearcher searcher = newSearcher(reader1);
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("tag"), NUM_DOCS));
FacetsCollector collector = new FacetsCollector(fsp, reader1, taxReader);
FacetsCollector collector = FacetsCollector.create(fsp, reader1, taxReader);
searcher.search(new MatchAllDocsQuery(), collector);
FacetResult result = collector.getFacetResults().get(0);
FacetResultNode node = result.getFacetResultNode();
for (FacetResultNode facet: node.getSubResults()) {
int weight = (int)facet.getValue();
int label = Integer.parseInt(facet.getLabel().components[1]);
for (FacetResultNode facet: node.subResults) {
int weight = (int)facet.value;
int label = Integer.parseInt(facet.label.components[1]);
//System.out.println(label + ": " + weight);
if (VERBOSE) {
System.out.println(label + ": " + weight);

View File

@ -266,15 +266,15 @@ public class TestFacetsPayloadMigrationReader extends LuceneTestCase {
requests.add(new CountFacetRequest(new CategoryPath(dim), 5));
}
FacetSearchParams fsp = new FacetSearchParams(requests, fip);
FacetsCollector fc = new FacetsCollector(fsp, indexReader, taxoReader);
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
MatchAllDocsQuery base = new MatchAllDocsQuery();
searcher.search(base, fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals(requests.size(), facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode node = res.getFacetResultNode();
String dim = node.getLabel().components[0];
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) node.getValue());
String dim = node.label.components[0];
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) node.value);
}
}
@ -283,18 +283,16 @@ public class TestFacetsPayloadMigrationReader extends LuceneTestCase {
// verify drill-down
for (String dim : expectedCounts.keySet()) {
CategoryPath drillDownCP = new CategoryPath(dim);
ArrayList<FacetRequest> request = new ArrayList<FacetRequest>(1);
request.add(new CountFacetRequest(drillDownCP, 10));
FacetSearchParams fsp = new FacetSearchParams(request, fip);
FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(drillDownCP, 10));
Query drillDown = DrillDown.query(fsp, new MatchAllDocsQuery(), drillDownCP);
TotalHitCountCollector total = new TotalHitCountCollector();
FacetsCollector fc = new FacetsCollector(fsp, indexReader, taxoReader);
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
searcher.search(drillDown, MultiCollector.wrap(fc, total));
assertTrue("no results for drill-down query " + drillDown, total.getTotalHits() > 0);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals(1, facetResults.size());
FacetResultNode rootNode = facetResults.get(0).getFacetResultNode();
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) rootNode.getValue());
assertEquals("wrong count for " + dim, expectedCounts.get(dim).intValue(), (int) rootNode.value);
}
}

View File

@ -46,7 +46,6 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
assertEquals("3 characters should be written", 3, numchars);
assertEquals("wrong drill-down term text", expectedDDText, new String(
buf, 0, numchars));
CategoryListParams clParams = dfip.getCategoryListParams(null);
assertEquals("partition for all ordinals is the first", "",
PartitionsUtils.partitionNameByOrdinal(dfip, 250));
assertEquals("for partition 0, the same name should be returned",
@ -75,7 +74,7 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
for (int i = 0; i < 30; i++) {
int nComponents = random().nextInt(10);
int nComponents = random().nextInt(10) + 1;
String[] components = new String[nComponents];
for (int j = 0; j < components.length; j++) {
components[j] = (Integer.valueOf(random().nextInt(30))).toString();

View File

@ -0,0 +1,515 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.params.ScoreFacetRequest;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.collections.ObjectToIntMap;
import org.apache.lucene.util.encoding.IntEncoder;
import org.apache.lucene.util.encoding.VInt8IntEncoder;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class CountingFacetsCollectorTest extends LuceneTestCase {
private static final Term A = new Term("f", "a");
private static final CategoryPath CP_A = new CategoryPath("A"), CP_B = new CategoryPath("B");
private static final int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3;
private static final CategoryPath[] CATEGORIES_A, CATEGORIES_B;
static {
CATEGORIES_A = new CategoryPath[NUM_CHILDREN_CP_A];
for (int i = 0; i < NUM_CHILDREN_CP_A; i++) {
CATEGORIES_A[i] = new CategoryPath(CP_A.components[0], Integer.toString(i));
}
CATEGORIES_B = new CategoryPath[NUM_CHILDREN_CP_B];
for (int i = 0; i < NUM_CHILDREN_CP_B; i++) {
CATEGORIES_B[i] = new CategoryPath(CP_B.components[0], Integer.toString(i));
}
}
protected static Directory indexDir, taxoDir;
protected static ObjectToIntMap<CategoryPath> allExpectedCounts, termExpectedCounts;
protected static int numChildrenIndexedA, numChildrenIndexedB;
@AfterClass
public static void afterClassCountingFacetsCollectorTest() throws Exception {
IOUtils.close(indexDir, taxoDir);
}
private static List<CategoryPath> randomCategories(Random random) {
// add random categories from the two dimensions, ensuring that the same
// category is not added twice.
int numFacetsA = random.nextInt(3) + 1; // 1-3
int numFacetsB = random.nextInt(2) + 1; // 1-2
ArrayList<CategoryPath> categories_a = new ArrayList<CategoryPath>();
categories_a.addAll(Arrays.asList(CATEGORIES_A));
ArrayList<CategoryPath> categories_b = new ArrayList<CategoryPath>();
categories_b.addAll(Arrays.asList(CATEGORIES_B));
Collections.shuffle(categories_a, random);
Collections.shuffle(categories_b, random);
ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>();
categories.addAll(categories_a.subList(0, numFacetsA));
categories.addAll(categories_b.subList(0, numFacetsB));
return categories;
}
private static void addField(Document doc) {
doc.add(new StringField(A.field(), A.text(), Store.NO));
}
private static void addFacets(Document doc, FacetFields facetFields, boolean updateTermExpectedCounts)
throws IOException {
List<CategoryPath> docCategories = randomCategories(random());
for (CategoryPath cp : docCategories) {
allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1);
if (updateTermExpectedCounts) {
termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
}
}
// add 1 to each dimension
allExpectedCounts.put(CP_A, allExpectedCounts.get(CP_A) + 1);
allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
if (updateTermExpectedCounts) {
termExpectedCounts.put(CP_A, termExpectedCounts.get(CP_A) + 1);
termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
}
facetFields.addFields(doc, docCategories);
}
private static void indexDocsNoFacets(IndexWriter indexWriter) throws IOException {
int numDocs = atLeast(2);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addField(doc);
indexWriter.addDocument(doc);
}
indexWriter.commit(); // flush a segment
}
private static void indexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
Random random = random();
int numDocs = atLeast(random, 2);
FacetFields facetFields = new FacetFields(taxoWriter);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addFacets(doc, facetFields, false);
indexWriter.addDocument(doc);
}
indexWriter.commit(); // flush a segment
}
private static void indexDocsWithFacetsAndTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
Random random = random();
int numDocs = atLeast(random, 2);
FacetFields facetFields = new FacetFields(taxoWriter);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addFacets(doc, facetFields, true);
addField(doc);
indexWriter.addDocument(doc);
}
indexWriter.commit(); // flush a segment
}
private static void indexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
Random random = random();
int numDocs = atLeast(random, 2);
FacetFields facetFields = new FacetFields(taxoWriter);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
boolean hasContent = random.nextBoolean();
if (hasContent) {
addField(doc);
}
addFacets(doc, facetFields, hasContent);
indexWriter.addDocument(doc);
}
indexWriter.commit(); // flush a segment
}
// initialize expectedCounts w/ 0 for all categories
private static ObjectToIntMap<CategoryPath> newCounts() {
ObjectToIntMap<CategoryPath> counts = new ObjectToIntMap<CategoryPath>();
counts.put(CP_A, 0);
counts.put(CP_B, 0);
for (CategoryPath cp : CATEGORIES_A) {
counts.put(cp, 0);
}
for (CategoryPath cp : CATEGORIES_B) {
counts.put(cp, 0);
}
return counts;
}
@BeforeClass
public static void beforeClassCountingFacetsCollectorTest() throws Exception {
indexDir = newDirectory();
taxoDir = newDirectory();
// create an index which has:
// 1. Segment with no categories, but matching results
// 2. Segment w/ categories, but no results
// 3. Segment w/ categories and results
// 4. Segment w/ categories, but only some results
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
allExpectedCounts = newCounts();
termExpectedCounts = newCounts();
// segment w/ no categories
indexDocsNoFacets(indexWriter);
// segment w/ categories, no content
indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);
// segment w/ categories and content
indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);
// segment w/ categories and some content
indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);
// set num children indexed from each dimension
for (CategoryPath cp : CATEGORIES_A) {
if (termExpectedCounts.get(cp) > 0) {
++numChildrenIndexedA;
}
}
for (CategoryPath cp : CATEGORIES_B) {
if (termExpectedCounts.get(cp) > 0) {
++numChildrenIndexedB;
}
}
IOUtils.close(indexWriter, taxoWriter);
}
@Test
public void testInvalidValidParams() throws Exception {
final CategoryPath dummyCP = new CategoryPath("a");
final FacetRequest dummyFR = new CountFacetRequest(dummyCP, 10);
// only CountFacetRequests are allowed
assertNotNull("only CountFacetRequests should be allowed",
CountingFacetsCollector.assertParams(new FacetSearchParams(new ScoreFacetRequest(dummyCP, 10))));
// only depth=1
FacetRequest cfr = new CountFacetRequest(dummyCP, 10);
cfr.setDepth(2);
assertNotNull("only depth 1 should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
// only SortOrder.DESCENDING
cfr = new CountFacetRequest(dummyCP, 10);
cfr.setSortOrder(SortOrder.ASCENDING);
assertNotNull("only SortOrder.DESCENDING should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
// only SortBy.VALUE
cfr = new CountFacetRequest(dummyCP, 10);
cfr.setSortBy(SortBy.ORDINAL);
assertNotNull("only SortBy.VALUE should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
// no numToLabel
cfr = new CountFacetRequest(dummyCP, 10);
cfr.setNumLabel(2);
assertNotNull("numToLabel should not be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
FacetIndexingParams fip = new FacetIndexingParams(new CategoryListParams("moo")) {
@Override
public List<CategoryListParams> getAllCategoryListParams() {
return Arrays.asList(new CategoryListParams[] { clParams, clParams });
}
};
assertNotNull("only one CLP should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR)));
fip = new FacetIndexingParams(new CategoryListParams("moo")) {
final CategoryListParams clp = new CategoryListParams() {
@Override
public IntEncoder createEncoder() {
return new VInt8IntEncoder();
}
};
@Override
public List<CategoryListParams> getAllCategoryListParams() {
return Collections.singletonList(clp);
}
@Override
public CategoryListParams getCategoryListParams(CategoryPath category) {
return clp;
}
};
assertNotNull("only DGapVIntEncoder should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR)));
fip = new FacetIndexingParams(new CategoryListParams("moo")) {
@Override
public int getPartitionSize() {
return 2;
}
};
assertNotNull("partitions should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR)));
}
@Test
public void testDifferentNumResults() throws Exception {
// test the collector w/ FacetRequests and different numResults
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
TermQuery q = new TermQuery(A);
searcher.search(q, fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals("invalid number of facet results", 2, facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
}
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testResidue() throws Exception {
// test the collector's handling of residue
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
// asking for top 1 is the only way to guarantee there will be a residue
// provided that enough children were indexed (see below)
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, 1), new CountFacetRequest(CP_B, 1));
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
TermQuery q = new TermQuery(A);
searcher.search(q, fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals("invalid number of facet results", 2, facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
// make sure randomness didn't pick only one child of root (otherwise there's no residue)
int numChildrenIndexed = res.getFacetRequest().categoryPath == CP_A ? numChildrenIndexedA : numChildrenIndexedB;
if (numChildrenIndexed > 1) {
assertTrue("expected residue", root.residue > 0);
}
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
}
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testAllCounts() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals("invalid number of facet results", 2, facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
}
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testBigNumResults() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, Integer.MAX_VALUE),
new CountFacetRequest(CP_B, Integer.MAX_VALUE));
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals("invalid number of facet results", 2, facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
}
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testDirectSource() throws Exception {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader, new FacetArrays(taxoReader.getSize()), true);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals("invalid number of facet results", 2, facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
}
}
IOUtils.close(indexReader, taxoReader);
}
@Test
public void testNoParents() throws Exception {
// TODO: when OrdinalPolicy is on CLP, index the NO_PARENTS categories into
// their own dimension, and avoid this index creation
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
conf.setMaxBufferedDocs(2);
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetIndexingParams fip = new FacetIndexingParams() {
@Override
public OrdinalPolicy getOrdinalPolicy() {
return OrdinalPolicy.NO_PARENTS;
}
};
FacetFields facetFields = new FacetFields(taxoWriter, fip);
ObjectToIntMap<CategoryPath> expCounts = newCounts();
// index few docs with categories, not sharing parents.
int numDocs = atLeast(10);
final CategoryPath cpc = new CategoryPath("L1", "L2", "L3");
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>();
CategoryPath cpa = CATEGORIES_A[random().nextInt(NUM_CHILDREN_CP_A)];
CategoryPath cpb = CATEGORIES_B[random().nextInt(NUM_CHILDREN_CP_B)];
categories.add(cpa);
categories.add(cpb);
categories.add(cpc);
expCounts.put(cpa, expCounts.get(cpa) + 1);
expCounts.put(cpb, expCounts.get(cpb) + 1);
facetFields.addFields(doc, categories);
indexWriter.addDocument(doc);
}
expCounts.put(CP_A, numDocs);
expCounts.put(CP_B, numDocs);
for (int i = 0; i < cpc.length; i++) {
expCounts.put(cpc.subpath(i+1), numDocs);
}
IOUtils.close(indexWriter, taxoWriter);
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A),
new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B), new CountFacetRequest(cpc.subpath(1), 10));
FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults();
assertEquals("invalid number of facet results", 3, facetResults.size());
for (FacetResult res : facetResults) {
FacetResultNode root = res.getFacetResultNode();
assertEquals("wrong count for " + root.label, expCounts.get(root.label), (int) root.value);
assertEquals("invalid residue", 0, (int) root.residue);
for (FacetResultNode child : root.subResults) {
assertEquals("wrong count for " + child.label, expCounts.get(child.label), (int) child.value);
}
}
IOUtils.close(indexReader, taxoReader);
IOUtils.close(indexDir, taxoDir);
}
}

View File

@ -29,11 +29,10 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
public class SamplingWrapperTest extends BaseSampleTestTopK {
@Override
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
TaxonomyReader taxoReader, IndexReader indexReader,
FacetSearchParams searchParams) {
FacetsAccumulator fExtrctr = new StandardFacetsAccumulator(searchParams,
indexReader, taxoReader);
return new SamplingWrapper(fExtrctr, sampler);
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
IndexReader indexReader, FacetSearchParams searchParams) {
FacetsAccumulator fa = new StandardFacetsAccumulator(searchParams, indexReader, taxoReader);
return new SamplingWrapper(fa, sampler);
}
}

View File

@ -17,20 +17,23 @@ package org.apache.lucene.facet.search;
* limitations under the License.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetTestUtils;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.PrintTaxonomyStats;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
@ -89,7 +92,7 @@ public class TestDemoFacets extends LuceneTestCase {
new CountFacetRequest(new CategoryPath("Author"), 10));
// Aggregatses the facet counts:
FacetsCollector c = new FacetsCollector(fsp, searcher.getIndexReader(), taxoReader);
FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
@ -101,20 +104,31 @@ public class TestDemoFacets extends LuceneTestCase {
List<FacetResult> results = c.getFacetResults();
assertEquals(2, results.size());
assertEquals("Publish Date (5)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n",
toSimpleString(results.get(0)));
FacetTestUtils.toSimpleString(results.get(0)));
assertEquals("Author (5)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n",
toSimpleString(results.get(1)));
FacetTestUtils.toSimpleString(results.get(1)));
// Now user drills down on Publish Date/2010:
fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10));
Query q2 = DrillDown.query(fsp, new MatchAllDocsQuery(), new CategoryPath("Publish Date/2010", '/'));
c = new FacetsCollector(fsp, searcher.getIndexReader(), taxoReader);
c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
searcher.search(q2, c);
results = c.getFacetResults();
assertEquals(1, results.size());
assertEquals("Author (2)\n Lisa (1)\n Bob (1)\n",
toSimpleString(results.get(0)));
FacetTestUtils.toSimpleString(results.get(0)));
// Smoke test PrintTaxonomyStats:
ByteArrayOutputStream bos = new ByteArrayOutputStream();
PrintTaxonomyStats.printStats(taxoReader, new PrintStream(bos, false, "UTF-8"), true);
String result = bos.toString("UTF-8");
assertTrue(result.indexOf("/Author: 4 immediate children; 5 total categories") != -1);
assertTrue(result.indexOf("/Publish Date: 3 immediate children; 12 total categories") != -1);
// Make sure at least a few nodes of the tree came out:
assertTrue(result.indexOf(" /1999") != -1);
assertTrue(result.indexOf(" /2012") != -1);
assertTrue(result.indexOf(" /20") != -1);
taxoReader.close();
searcher.getIndexReader().close();
@ -122,16 +136,4 @@ public class TestDemoFacets extends LuceneTestCase {
taxoDir.close();
}
private String toSimpleString(FacetResult fr) {
StringBuilder sb = new StringBuilder();
toSimpleString(0, sb, fr.getFacetResultNode(), "");
return sb.toString();
}
private void toSimpleString(int depth, StringBuilder sb, FacetResultNode node, String indent) {
sb.append(indent + node.getLabel().components[depth] + " (" + (int) node.getValue() + ")\n");
for(FacetResultNode childNode : node.getSubResults()) {
toSimpleString(depth+1, sb, childNode, indent + " ");
}
}
}

View File

@ -118,8 +118,8 @@ public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
FacetResultNode parentResWithComp = countResWithComplement.get(0).getFacetResultNode();
FacetResultNode parentResNoComp = countResWithComplement.get(0).getFacetResultNode();
assertEquals("Wrong number of top count aggregated categories with complement!",3,parentResWithComp.getNumSubResults());
assertEquals("Wrong number of top count aggregated categories no complement!",3,parentResNoComp.getNumSubResults());
assertEquals("Wrong number of top count aggregated categories with complement!",3,parentResWithComp.subResults.size());
assertEquals("Wrong number of top count aggregated categories no complement!",3,parentResNoComp.subResults.size());
}

View File

@ -3,7 +3,7 @@ package org.apache.lucene.facet.search;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
@ -53,7 +53,7 @@ public class TestFacetsCollector extends LuceneTestCase {
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new KeywordAnalyzer()));
TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FacetFields facetFields = new FacetFields(taxonomyWriter);
for(int i = atLeast(2000); i > 0; --i) {
@ -71,12 +71,12 @@ public class TestFacetsCollector extends LuceneTestCase {
DirectoryReader r = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
FacetsCollector fc = new FacetsCollector(sParams, r, taxo);
FacetsCollector fc = FacetsCollector.create(sParams, r, taxo);
TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false);
new IndexSearcher(r).search(new MatchAllDocsQuery(), MultiCollector.wrap(fc, topDocs));
List<FacetResult> res = fc.getFacetResults();
double value = res.get(0).getFacetResultNode().getValue();
double value = res.get(0).getFacetResultNode().value;
double expected = topDocs.topDocs().getMaxScore() * r.numDocs();
assertEquals(expected, value, 1E-10);

View File

@ -271,7 +271,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
FacetResult results = res.get(0);
FacetResultNode resNode = results.getFacetResultNode();
Iterable<? extends FacetResultNode> subResults = resNode.getSubResults();
Iterable<? extends FacetResultNode> subResults = resNode.subResults;
Iterator<? extends FacetResultNode> subIter = subResults.iterator();
checkResult(resNode, "Band", 5.0);
@ -280,7 +280,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
results = res.get(1);
resNode = results.getFacetResultNode();
subResults = resNode.getSubResults();
subResults = resNode.subResults;
subIter = subResults.iterator();
checkResult(resNode, "Band", 5.0);
@ -294,7 +294,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
results = res.get(2);
resNode = results.getFacetResultNode();
subResults = resNode.getSubResults();
subResults = resNode.subResults;
subIter = subResults.iterator();
checkResult(resNode, "Author", 3.0);
@ -304,7 +304,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
results = res.get(3);
resNode = results.getFacetResultNode();
subResults = resNode.getSubResults();
subResults = resNode.subResults;
subIter = subResults.iterator();
checkResult(resNode, "Band/Rock & Pop", 4.0);
@ -334,7 +334,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
FacetSearchParams facetSearchParams = new FacetSearchParams(facetRequests, iParams);
// perform documents search and facets accumulation
FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, ir, tr);
FacetsCollector facetsCollector = FacetsCollector.create(facetSearchParams, ir, tr);
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
return facetsCollector;
}
@ -350,8 +350,8 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
}
private static void checkResult(FacetResultNode sub, String label, double value) {
assertEquals("Label of subresult " + sub.getLabel() + " was incorrect", label, sub.getLabel().toString());
assertEquals("Value for " + sub.getLabel() + " subresult was incorrect", value, sub.getValue(), 0.0);
assertEquals("Label of subresult " + sub.label + " was incorrect", label, sub.label.toString());
assertEquals("Value for " + sub.label + " subresult was incorrect", value, sub.value, 0.0);
}
}

View File

@ -44,7 +44,7 @@ public class TestSameRequestAccumulation extends FacetTestBase {
final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10);
FacetSearchParams fsp = new FacetSearchParams(facetRequest);
FacetsCollector fc = new FacetsCollector(fsp, indexReader, taxoReader);
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
searcher.search(new MatchAllDocsQuery(), fc);
final String expected = fc.getFacetResults().get(0).toString();
@ -53,9 +53,9 @@ public class TestSameRequestAccumulation extends FacetTestBase {
fsp = new FacetSearchParams(facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10));
// make sure the search params holds 3 requests now
assertEquals(3, fsp.getFacetRequests().size());
assertEquals(3, fsp.facetRequests.size());
fc = new FacetsCollector(fsp, indexReader, taxoReader);
fc = FacetsCollector.create(fsp, indexReader, taxoReader);
searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> actual = fc.getFacetResults();

View File

@ -89,10 +89,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
FacetResultNode parentCountRes = countRes.get(0).getFacetResultNode();
FacetResultNode parentScoreRes = scoreRes.get(0).getFacetResultNode();
assertEquals("Wrong number of top count aggregated categories!", 3,
parentCountRes.getNumSubResults());
assertEquals("Wrong number of top score aggregated categories!", 3,
parentScoreRes.getNumSubResults());
assertEquals("Wrong number of top count aggregated categories!", 3, parentCountRes.subResults.size());
assertEquals("Wrong number of top score aggregated categories!", 3, parentScoreRes.subResults.size());
// rely on that facet value is computed as doc-score, and
// accordingly compare values of the two top-category results.
@ -101,12 +99,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
FacetResultNode[] scoreResNodes = resultNodesAsArray(parentScoreRes);
for (int i = 0; i < scoreResNodes.length; i++) {
assertEquals("Ordinals differ!",
countResNodes[i].getOrdinal(), scoreResNodes[i].getOrdinal());
assertEquals("Wrong scores!",
constScore * countResNodes[i].getValue(),
scoreResNodes[i].getValue(),
Double.MIN_VALUE);
assertEquals("Ordinals differ!", countResNodes[i].ordinal, scoreResNodes[i].ordinal);
assertEquals("Wrong scores!", constScore * countResNodes[i].value, scoreResNodes[i].value, Double.MIN_VALUE);
}
}

View File

@ -1,7 +1,6 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -102,24 +101,22 @@ public class TestStandardFacetsAccumulator extends LuceneTestCase {
// search for "f:a", only segments 1 and 3 should match results
Query q = new TermQuery(new Term("f", "a"));
ArrayList<FacetRequest> requests = new ArrayList<FacetRequest>(1);
CountFacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) {
FacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) {
@Override
public boolean supportsComplements() {
return false; // disable complements
}
};
requests.add(countNoComplements);
FacetSearchParams fsp = new FacetSearchParams(requests, fip);
FacetsCollector fc = new FacetsCollector(fsp , indexReader, taxoReader);
FacetSearchParams fsp = new FacetSearchParams(fip, countNoComplements);
FacetsCollector fc = new StandardFacetsCollector(fsp , indexReader, taxoReader);
indexSearcher.search(q, fc);
List<FacetResult> results = fc.getFacetResults();
assertEquals("received too many facet results", 1, results.size());
FacetResultNode frn = results.get(0).getFacetResultNode();
assertEquals("wrong weight for \"A\"", 4, (int) frn.getValue());
assertEquals("wrong number of children", 2, frn.getNumSubResults());
for (FacetResultNode node : frn.getSubResults()) {
assertEquals("wrong weight for child " + node.getLabel(), 2, (int) node.getValue());
assertEquals("wrong weight for \"A\"", 4, (int) frn.value);
assertEquals("wrong number of children", 2, frn.subResults.size());
for (FacetResultNode node : frn.subResults) {
assertEquals("wrong weight for child " + node.label, 2, (int) node.value);
}
IOUtils.close(indexReader, taxoReader);

View File

@ -165,7 +165,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
facetRequests.add(cfrb20);
FacetSearchParams facetSearchParams = new FacetSearchParams(facetRequests, iParams);
FacetArrays facetArrays = new FacetArrays(PartitionsUtils.partitionSize(facetSearchParams.getFacetIndexingParams(), tr));
FacetArrays facetArrays = new FacetArrays(PartitionsUtils.partitionSize(facetSearchParams.indexingParams, tr));
FacetsAccumulator fctExtrctr = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, facetArrays);
fctExtrctr.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
long start = System.currentTimeMillis();
@ -181,40 +181,40 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(9, fr.getNumValidDescendants());
FacetResultNode parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(2, parentRes.getNumSubResults());
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values: a/b with 8 and a/c with 6
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
// a/c has residue 0, and one child a/c/1 with value 1.
double [] expectedValues0 = { 8.0, 2.0, 3.0, 0.0, 2.0, 0.0, 6.0, 0.0, 1.0, 0.0 };
int i = 0;
for (FacetResultNode node : parentRes.getSubResults()) {
assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
for (FacetResultNode node2 : node.getSubResults()) {
assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE);
for (FacetResultNode node2 : node.subResults) {
assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE);
}
}
// now just change the value of the first child of the root to 5, and then rearrange
// expected are: first a/c of value 6 and residue 0, and one child a/c/1 with value 1
// then a/b with value 5 and residue 2, and both children: a/b/2 with value 3, and a/b/1 with value 2.
for (FacetResultNode node : parentRes.getSubResults()) {
node.setValue(5.0);
for (FacetResultNode node : parentRes.subResults) {
node.value = 5.0;
break;
}
// now rearrange
double [] expectedValues00 = { 6.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0 };
fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr);
i = 0;
for (FacetResultNode node : parentRes.getSubResults()) {
assertEquals(expectedValues00[i++], node.getValue(), Double.MIN_VALUE);
assertEquals(expectedValues00[i++], node.getResidue(), Double.MIN_VALUE);
for (FacetResultNode node2 : node.getSubResults()) {
assertEquals(expectedValues00[i++], node2.getValue(), Double.MIN_VALUE);
assertEquals(expectedValues00[i++], node2.getResidue(), Double.MIN_VALUE);
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues00[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues00[i++], node.residue, Double.MIN_VALUE);
for (FacetResultNode node2 : node.subResults) {
assertEquals(expectedValues00[i++], node2.value, Double.MIN_VALUE);
assertEquals(expectedValues00[i++], node2.residue, Double.MIN_VALUE);
}
}
@ -222,19 +222,19 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(9, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(2, parentRes.getNumSubResults());
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values: a/b with 8 and a/c with 6
// a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
// a/c has residue 0, and one child a/c/1 with value 1.
i = 0;
for (FacetResultNode node : parentRes.getSubResults()) {
assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
for (FacetResultNode node2 : node.getSubResults()) {
assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE);
for (FacetResultNode node2 : node.subResults) {
assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE);
assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE);
}
}
@ -242,70 +242,70 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(4, fr.getNumValidDescendants(), 4);
parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(2, parentRes.getNumSubResults());
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values:
// a/b with value 8 and residue 0 (because no children considered),
// and a/c with value 6 and residue 0 (because no children considered)
double [] expectedValues2 = { 8.0, 0.0, 6.0, 0.0 };
i = 0;
for (FacetResultNode node : parentRes.getSubResults()) {
assertEquals(expectedValues2[i++], node.getValue(), Double.MIN_VALUE);
assertEquals(expectedValues2[i++], node.getResidue(), Double.MIN_VALUE);
assertEquals(node.getNumSubResults(), 0);
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues2[i++], node.value, Double.MIN_VALUE);
assertEquals(expectedValues2[i++], node.residue, Double.MIN_VALUE);
assertEquals(node.subResults.size(), 0);
}
fr = facetResults.get(3); // a/b, depth=3, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(2, parentRes.getNumSubResults());
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
double [] expectedValues3 = { 3.0, 2.0 };
i = 0;
for (FacetResultNode node : parentRes.getSubResults()) {
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
assertEquals(0, node.getNumSubResults());
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
assertEquals(0.0, node.residue, Double.MIN_VALUE);
assertEquals(0, node.subResults.size());
}
fr = facetResults.get(4); // a/b, depth=2, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(2, parentRes.getNumSubResults());
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
i = 0;
for (FacetResultNode node : parentRes.getSubResults()) {
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
assertEquals(0, node.getNumSubResults());
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
assertEquals(0.0, node.residue, Double.MIN_VALUE);
assertEquals(0, node.subResults.size());
}
fr = facetResults.get(5); // a/b, depth=1, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(2, parentRes.getNumSubResults());
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size());
i = 0;
for (FacetResultNode node : parentRes.getSubResults()) {
assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
assertEquals(0, node.getNumSubResults());
for (FacetResultNode node : parentRes.subResults) {
assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
assertEquals(0.0, node.residue, Double.MIN_VALUE);
assertEquals(0, node.subResults.size());
}
fr = facetResults.get(6); // a/b, depth=0, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(0, parentRes.getNumSubResults());
assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
assertEquals(0.0, parentRes.residue, Double.MIN_VALUE);
assertEquals(0, parentRes.subResults.size());
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
// doctor, depth=1, K=2

View File

@ -89,7 +89,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
// do different facet counts and compare to control
FacetSearchParams sParams = getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize));
FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader) {
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
@Override
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
@ -99,52 +99,46 @@ public class TestTopKResultsHandler extends BaseTestTopK {
};
searcher.search(new MatchAllDocsQuery(), fc);
long start = System.currentTimeMillis();
List<FacetResult> facetResults = fc.getFacetResults();
long end = System.currentTimeMillis();
if (VERBOSE) {
System.out.println("Time: " + (end - start));
}
FacetResult fr = facetResults.get(0);
FacetResultNode parentRes = fr.getFacetResultNode();
assertEquals(13.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
FacetResultNode[] frn = resultNodesAsArray(parentRes);
assertEquals(7.0, frn[0].getValue(), Double.MIN_VALUE);
assertEquals(6.0, frn[1].getValue(), Double.MIN_VALUE);
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
fr = facetResults.get(1);
parentRes = fr.getFacetResultNode();
assertEquals(13.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
frn = resultNodesAsArray(parentRes);
assertEquals(7.0, frn[0].getValue(), Double.MIN_VALUE);
assertEquals(6.0, frn[1].getValue(), Double.MIN_VALUE);
assertEquals(2.0, frn[2].getValue(), Double.MIN_VALUE);
assertEquals(2.0, frn[3].getValue(), Double.MIN_VALUE);
assertEquals(1.0, frn[4].getValue(), Double.MIN_VALUE);
assertEquals(1.0, frn[5].getValue(), Double.MIN_VALUE);
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
assertEquals(2.0, frn[2].value, Double.MIN_VALUE);
assertEquals(2.0, frn[3].value, Double.MIN_VALUE);
assertEquals(1.0, frn[4].value, Double.MIN_VALUE);
assertEquals(1.0, frn[5].value, Double.MIN_VALUE);
fr = facetResults.get(2);
parentRes = fr.getFacetResultNode();
assertEquals(7.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(7.0, parentRes.value, Double.MIN_VALUE);
frn = resultNodesAsArray(parentRes);
assertEquals(2.0, frn[0].getValue(), Double.MIN_VALUE);
assertEquals(2.0, frn[1].getValue(), Double.MIN_VALUE);
assertEquals(1.0, frn[2].getValue(), Double.MIN_VALUE);
assertEquals(1.0, frn[3].getValue(), Double.MIN_VALUE);
assertEquals(2.0, frn[0].value, Double.MIN_VALUE);
assertEquals(2.0, frn[1].value, Double.MIN_VALUE);
assertEquals(1.0, frn[2].value, Double.MIN_VALUE);
assertEquals(1.0, frn[3].value, Double.MIN_VALUE);
fr = facetResults.get(3);
parentRes = fr.getFacetResultNode();
assertEquals(2.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(2.0, parentRes.value, Double.MIN_VALUE);
frn = resultNodesAsArray(parentRes);
assertEquals(0, frn.length);
fr = facetResults.get(4);
parentRes = fr.getFacetResultNode();
assertEquals(6.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(6.0, parentRes.value, Double.MIN_VALUE);
frn = resultNodesAsArray(parentRes);
assertEquals(1.0, frn[0].getValue(), Double.MIN_VALUE);
assertEquals(1.0, frn[0].value, Double.MIN_VALUE);
closeAll();
}
}
@ -159,10 +153,10 @@ public class TestTopKResultsHandler extends BaseTestTopK {
// do different facet counts and compare to control
CategoryPath path = new CategoryPath("a", "b");
FacetSearchParams sParams = getFacetSearchParams(
getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE));
FacetSearchParams sParams = getFacetSearchParams(getFacetIndexingParams(partitionSize),
new CountFacetRequest(path, Integer.MAX_VALUE));
FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader) {
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
@Override
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
@ -172,13 +166,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
};
searcher.search(new MatchAllDocsQuery(), fc);
long start = System.currentTimeMillis();
List<FacetResult> results = fc.getFacetResults();
long end = System.currentTimeMillis();
if (VERBOSE) {
System.out.println("Time: " + (end - start));
}
assertEquals("Should only be one result as there's only one request", 1, results.size());
FacetResult res = results.get(0);
@ -188,7 +176,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
FacetSearchParams sParams2 = getFacetSearchParams(
getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE));
FacetsCollector fc2 = new FacetsCollector(sParams2, indexReader, taxoReader) {
FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) {
@Override
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
@ -226,18 +214,12 @@ public class TestTopKResultsHandler extends BaseTestTopK {
getFacetIndexingParams(partitionSize),
new CountFacetRequest(path, 10));
FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader);
FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader);
searcher.search(new MatchAllDocsQuery(), fc);
long start = System.currentTimeMillis();
List<FacetResult> facetResults = fc.getFacetResults();
long end = System.currentTimeMillis();
if (VERBOSE) {
System.out.println("Time: " + (end - start));
}
assertEquals("Shouldn't have found anything for a FacetRequest "
+ "of a facet that doesn't exist in the index.", 0, facetResults.size());

View File

@ -4,15 +4,14 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.junit.Test;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -37,7 +36,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
throws IOException {
Query q = new MatchAllDocsQuery();
FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, partitionSize);
FacetsCollector fc = new FacetsCollector(facetSearchParams, indexReader, taxoReader) {
FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) {
@Override
protected FacetsAccumulator initFacetsAccumulator(
FacetSearchParams facetSearchParams, IndexReader indexReader,
@ -88,15 +87,15 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
int k = 0;
for (FacetResult fr : allFacetResults) {
FacetResultNode topResNode = fr.getFacetResultNode();
maxNumNodes = Math.max(maxNumNodes, topResNode.getNumSubResults());
maxNumNodes = Math.max(maxNumNodes, topResNode.subResults.size());
int prevCount = Integer.MAX_VALUE;
int pos = 0;
for (FacetResultNode frn: topResNode.getSubResults()) {
assertTrue("wrong counts order: prev="+prevCount+" curr="+frn.getValue(), prevCount>=frn.getValue());
prevCount = (int) frn.getValue();
String key = k+"--"+frn.getLabel()+"=="+frn.getValue();
for (FacetResultNode frn: topResNode.subResults) {
assertTrue("wrong counts order: prev="+prevCount+" curr="+frn.value, prevCount>=frn.value);
prevCount = (int) frn.value;
String key = k+"--"+frn.label+"=="+frn.value;
if (VERBOSE) {
System.out.println(frn.getLabel() + " - " + frn.getValue() + " "+key+" "+pos);
System.out.println(frn.label + " - " + frn.value + " "+key+" "+pos);
}
all.put(key, pos++); // will use this later to verify order of sub-results
}
@ -113,12 +112,12 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
k = 0;
for (FacetResult fr : someResults) {
FacetResultNode topResNode = fr.getFacetResultNode();
assertTrue("too many results: n="+n+" but got "+topResNode.getNumSubResults(), n>=topResNode.getNumSubResults());
assertTrue("too many results: n="+n+" but got "+topResNode.subResults.size(), n>=topResNode.subResults.size());
int pos = 0;
for (FacetResultNode frn: topResNode.getSubResults()) {
String key = k+"--"+frn.getLabel()+"=="+frn.getValue();
for (FacetResultNode frn: topResNode.subResults) {
String key = k+"--"+frn.label+"=="+frn.value;
if (VERBOSE) {
System.out.println(frn.getLabel() + " - " + frn.getValue() + " "+key+" "+pos);
System.out.println(frn.label + " - " + frn.value + " "+key+" "+pos);
}
Integer origPos = all.get(key);
assertNotNull("missing in all results: "+frn,origPos);

View File

@ -226,9 +226,9 @@ public class TestTotalFacetCountsCache extends LuceneTestCase {
FacetResult result = results.get(i);
assertNotNull("Result should not be null", result);
FacetResultNode resNode = result.getFacetResultNode();
assertEquals("Invalid label", expLabels[i], resNode.getLabel().toString());
assertEquals("Invalid value", expValues[i], resNode.getValue(), 0.0);
assertEquals("Invalid number of subresults", 0, resNode.getNumSubResults());
assertEquals("Invalid label", expLabels[i], resNode.label.toString());
assertEquals("Invalid value", expValues[i], resNode.value, 0.0);
assertEquals("Invalid number of subresults", 0, resNode.subResults.size());
}
// we're done, close the index reader and the taxonomy.
slowIndexReader.close();

View File

@ -110,7 +110,7 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
Query q = new MatchAllDocsQuery();
FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo);
IndexSearcher searcher = newSearcher(reader);
searcher.search(q, fc);
@ -118,8 +118,8 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
assertNotNull("No results!",res);
assertEquals("Wrong number of results!",2, res.size());
assertEquals("Wrong count for category 'a'!",200, (int) res.get(0).getFacetResultNode().getValue());
assertEquals("Wrong count for category 'b'!",150, (int) res.get(1).getFacetResultNode().getValue());
assertEquals("Wrong count for category 'a'!",200, (int) res.get(0).getFacetResultNode().value);
assertEquals("Wrong count for category 'b'!",150, (int) res.get(1).getFacetResultNode().value);
taxo.close();
}
@ -135,7 +135,7 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
Query q = new MatchAllDocsQuery();
FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo);
IndexSearcher searcher = newSearcher(reader);
searcher.search(q, fc);
@ -143,8 +143,8 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
assertNotNull("No results!",res);
assertEquals("Wrong number of results!",2, res.size());
assertEquals("Wrong count for category 'a'!",50f, (float) res.get(0).getFacetResultNode().getValue(), 0.00001);
assertEquals("Wrong count for category 'b'!",10f, (float) res.get(1).getFacetResultNode().getValue(), 0.00001);
assertEquals("Wrong count for category 'a'!",50f, (float) res.get(0).getFacetResultNode().value, 0.00001);
assertEquals("Wrong count for category 'b'!",10f, (float) res.get(1).getFacetResultNode().value, 0.00001);
taxo.close();
}
@ -165,7 +165,7 @@ public class AssociationsFacetRequestTest extends LuceneTestCase {
Query q = new MatchAllDocsQuery();
FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo);
IndexSearcher searcher = newSearcher(reader);
searcher.search(q, fc);

View File

@ -3,22 +3,22 @@ package org.apache.lucene.facet.search.sampling;
import java.util.List;
import java.util.Random;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.facet.search.BaseTestTopK;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.ScoredDocIdCollector;
import org.apache.lucene.facet.search.StandardFacetsCollector;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -48,7 +48,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
@Override
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) {
FacetSearchParams res = super.searchParamsWithRequests(numResults, partitionSize);
for (FacetRequest req : res.getFacetRequests()) {
for (FacetRequest req : res.facetRequests) {
// randomize the way we aggregate results
if (random().nextBoolean()) {
req.setResultMode(ResultMode.GLOBAL_FLAT);
@ -78,7 +78,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false);
FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize);
FacetsCollector fc = new FacetsCollector(expectedSearchParams, indexReader, taxoReader);
FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader);
searcher.search(q, MultiCollector.wrap(docCollector, fc));
@ -97,7 +97,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
break; // succeeded
} catch (NotSameResultError e) {
} catch (AssertionError e) {
if (nTrial >= RETRIES - 1) {
throw e; // no more retries allowed, must fail
}
@ -120,7 +120,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler,
FacetSearchParams samplingSearchParams) {
FacetsCollector samplingFC = new FacetsCollector(samplingSearchParams, indexReader, taxoReader) {
FacetsCollector samplingFC = new StandardFacetsCollector(samplingSearchParams, indexReader, taxoReader) {
@Override
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {

View File

@ -8,6 +8,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.StandardFacetsCollector;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
@ -85,10 +86,8 @@ public class OversampleWithDepthTest extends LuceneTestCase {
FacetResultNode rootNode = res.getFacetResultNode();
// Each node below root should also have sub-results as the requested depth was '2'
for (FacetResultNode node : rootNode.getSubResults()) {
assertTrue("node " + node.getLabel()
+ " should have had children as the requested depth was '2'",
node.getNumSubResults() > 0);
for (FacetResultNode node : rootNode.subResults) {
assertTrue("node " + node.label + " should have had children as the requested depth was '2'", node.subResults.size() > 0);
}
IOUtils.close(r, tr, indexDir, taxoDir);
@ -111,11 +110,10 @@ public class OversampleWithDepthTest extends LuceneTestCase {
}
/** search reader <code>r</code>*/
private FacetResult searchWithFacets(IndexReader r,
TaxonomyReader tr, FacetSearchParams fsp, final SamplingParams params)
throws IOException {
private FacetResult searchWithFacets(IndexReader r, TaxonomyReader tr, FacetSearchParams fsp,
final SamplingParams params) throws IOException {
// a FacetsCollector with a sampling accumulator
FacetsCollector fcWithSampling = new FacetsCollector(fsp, r, tr) {
FacetsCollector fcWithSampling = new StandardFacetsCollector(fsp, r, tr) {
@Override
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {

View File

@ -28,10 +28,8 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
public class SamplingAccumulatorTest extends BaseSampleTestTopK {
@Override
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
TaxonomyReader taxoReader, IndexReader indexReader,
FacetSearchParams searchParams) {
return new SamplingAccumulator(sampler, searchParams, indexReader,
taxoReader);
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
IndexReader indexReader, FacetSearchParams searchParams) {
return new SamplingAccumulator(sampler, searchParams, indexReader, taxoReader);
}
}

View File

@ -131,9 +131,6 @@ public class TestCategoryPath extends LuceneTestCase {
CategoryPath p = new CategoryPath("hello", "world", "yo");
assertEquals(3, p.length);
assertEquals("hello/world/yo", p.toString('/'));
p = new CategoryPath(new String[0]);
assertEquals(0, p.length);
}
@Test

View File

@ -353,7 +353,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
}
// test TaxonomyReader.getCategory():
for (int i=0; i<tr.getSize(); i++) {
for (int i = 1; i < tr.getSize(); i++) {
CategoryPath expectedCategory = new CategoryPath(expectedCategories[i]);
CategoryPath category = tr.getPath(i);
if (!expectedCategory.equals(category)) {
@ -367,7 +367,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));
// test TaxonomyReader.getOrdinal():
for (int i=0; i<expectedCategories.length; i++) {
for (int i = 1; i < expectedCategories.length; i++) {
int expectedOrdinal = i;
int ordinal = tr.getOrdinal(new CategoryPath(expectedCategories[i]));
if (expectedOrdinal != ordinal) {

View File

@ -21,6 +21,7 @@ import java.util.Random;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
@ -66,6 +67,7 @@ public abstract class CompressingCodec extends FilterCodec {
}
private final CompressingStoredFieldsFormat storedFieldsFormat;
private final CompressingTermVectorsFormat termVectorsFormat;
/**
* Creates a compressing codec with a given segment suffix
@ -73,6 +75,7 @@ public abstract class CompressingCodec extends FilterCodec {
public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
super(name, new Lucene42Codec());
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize);
}
/**
@ -87,8 +90,13 @@ public abstract class CompressingCodec extends FilterCodec {
return storedFieldsFormat;
}
@Override
public TermVectorsFormat termVectorsFormat() {
return termVectorsFormat;
}
@Override
public String toString() {
return getName() + "(storedFieldsFormat=" + storedFieldsFormat + ")";
return getName() + "(storedFieldsFormat=" + storedFieldsFormat + ", termVectorsFormat=" + termVectorsFormat + ")";
}
}

View File

@ -17,6 +17,7 @@
package org.apache.solr.handler.dataimport;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import java.net.URLEncoder;
@ -116,6 +117,7 @@ public class TestBuiltInEvaluators extends AbstractDataImportHandlerTestCase {
}
@Test
@Ignore("fails if somewhere on earth is a DST change")
public void testDateFormatEvaluator() {
Evaluator dateFormatEval = new DateFormatEvaluator();
ContextImpl context = new ContextImpl(null, resolver, null,