LUCENE-5339: finish cutover

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1545466 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-11-26 00:12:57 +00:00
parent 92b1e23071
commit 0255424864
187 changed files with 92 additions and 25664 deletions

View File

@ -1,74 +0,0 @@
package org.apache.lucene.facet.associations;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.facet.index.DrillDownStream;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.BytesRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link DrillDownStream} which adds to each drill-down token a
* payload according to the {@link CategoryAssociation} defined in the
* {@link CategoryAssociationsContainer}.
*
* @lucene.experimental
*/
public class AssociationsDrillDownStream extends DrillDownStream {
private final PayloadAttribute payloadAttribute;
private final BytesRef payload;
private final ByteArrayDataOutput output = new ByteArrayDataOutput();
private final CategoryAssociationsContainer associations;
public AssociationsDrillDownStream(CategoryAssociationsContainer associations, FacetIndexingParams indexingParams) {
super(associations, indexingParams);
this.associations = associations;
payloadAttribute = addAttribute(PayloadAttribute.class);
BytesRef bytes = payloadAttribute.getPayload();
if (bytes == null) {
bytes = new BytesRef(new byte[4]);
payloadAttribute.setPayload(bytes);
}
bytes.offset = 0;
this.payload = bytes;
}
@Override
protected void addAdditionalAttributes(FacetLabel cp, boolean isParent) {
if (isParent) {
return; // associations are not added to parent categories
}
CategoryAssociation association = associations.getAssociation(cp);
if (association == null) {
// it is ok to set a null association for a category - it's treated as a
// regular category in that case.
return;
}
if (payload.bytes.length < association.maxBytesNeeded()) {
payload.grow(association.maxBytesNeeded());
}
output.reset(payload.bytes);
association.serialize(output);
payload.length = output.getPosition();
}
}

View File

@ -1,124 +0,0 @@
package org.apache.lucene.facet.associations;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.facet.index.DrillDownStream;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A utility class for adding facet fields to a document. Usually one field will
* be added for all facets, however per the
* {@link FacetIndexingParams#getCategoryListParams(FacetLabel)}, one field
* may be added for every group of facets.
*
* @lucene.experimental
*/
public class AssociationsFacetFields extends FacetFields {
// The drill-down field is added with a TokenStream, hence why it's based on
// TextField type. However for associations, we store a payload with the
// association value, therefore we set IndexOptions to include positions.
private static final FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
static {
DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
DRILL_DOWN_TYPE.freeze();
}
/**
* Constructs a new instance with the {@link FacetIndexingParams#DEFAULT
* default} facet indexing params.
*
* @param taxonomyWriter
* used to resolve given categories to ordinals
*/
public AssociationsFacetFields(TaxonomyWriter taxonomyWriter) {
super(taxonomyWriter);
}
/**
* Constructs a new instance with the given facet indexing params.
*
* @param taxonomyWriter
* used to resolve given categories to ordinals
* @param params
* determines under which fields the categories should be indexed
*/
public AssociationsFacetFields(TaxonomyWriter taxonomyWriter, FacetIndexingParams params) {
super(taxonomyWriter, params);
}
@Override
protected Map<CategoryListParams,Iterable<FacetLabel>> createCategoryListMapping(
Iterable<FacetLabel> categories) {
CategoryAssociationsContainer categoryAssociations = (CategoryAssociationsContainer) categories;
HashMap<CategoryListParams,Iterable<FacetLabel>> categoryLists =
new HashMap<CategoryListParams,Iterable<FacetLabel>>();
for (FacetLabel cp : categories) {
// each category may be indexed under a different field, so add it to the right list.
CategoryListParams clp = indexingParams.getCategoryListParams(cp);
CategoryAssociationsContainer clpContainer = (CategoryAssociationsContainer) categoryLists.get(clp);
if (clpContainer == null) {
clpContainer = new CategoryAssociationsContainer();
categoryLists.put(clp, clpContainer);
}
clpContainer.setAssociation(cp, categoryAssociations.getAssociation(cp));
}
return categoryLists;
}
@Override
protected Map<String,BytesRef> getCategoryListData(CategoryListParams categoryListParams, IntsRef ordinals,
Iterable<FacetLabel> categories) throws IOException {
AssociationsListBuilder associations = new AssociationsListBuilder((CategoryAssociationsContainer) categories);
return associations.build(ordinals, categories);
}
@Override
protected DrillDownStream getDrillDownStream(Iterable<FacetLabel> categories) {
return new AssociationsDrillDownStream((CategoryAssociationsContainer) categories, indexingParams);
}
@Override
protected FieldType drillDownFieldType() {
return DRILL_DOWN_TYPE;
}
@Override
public void addFields(Document doc, Iterable<FacetLabel> categories) throws IOException {
if (!(categories instanceof CategoryAssociationsContainer)) {
throw new IllegalArgumentException("categories must be of type " +
CategoryAssociationsContainer.class.getSimpleName());
}
super.addFields(doc, categories);
}
}

View File

@ -1,82 +0,0 @@
package org.apache.lucene.facet.associations;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.facet.index.CategoryListBuilder;
import org.apache.lucene.facet.index.CountingListBuilder;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link AssociationsListBuilder} which encodes category-association value pairs.
* Every category-association pair is written under the respective association's
* {@link CategoryAssociation#getCategoryListID()}.
* <p>
* <b>NOTE:</b> associations list do not encode the counting list data. You
* should use {@link CountingListBuilder} to build that information and then
* merge the results of both {@link #build(IntsRef, Iterable)}.
*/
public class AssociationsListBuilder implements CategoryListBuilder {
private final CategoryAssociationsContainer associations;
private final ByteArrayDataOutput output = new ByteArrayDataOutput();
public AssociationsListBuilder(CategoryAssociationsContainer associations) {
this.associations = associations;
}
@Override
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<FacetLabel> categories) throws IOException {
final HashMap<String,BytesRef> res = new HashMap<String,BytesRef>();
int idx = 0;
for (FacetLabel cp : categories) {
// build per-association key BytesRef
CategoryAssociation association = associations.getAssociation(cp);
BytesRef bytes = res.get(association.getCategoryListID());
if (bytes == null) {
bytes = new BytesRef(32);
res.put(association.getCategoryListID(), bytes);
}
int maxBytesNeeded = 4 /* int */ + association.maxBytesNeeded() + bytes.length;
if (bytes.bytes.length < maxBytesNeeded) {
bytes.grow(maxBytesNeeded);
}
// reset the output to write from bytes.length (current position) until the end
output.reset(bytes.bytes, bytes.length, bytes.bytes.length - bytes.length);
output.writeInt(ordinals.ints[idx++]);
// encode the association bytes
association.serialize(output);
// update BytesRef
bytes.length = output.getPosition();
}
return res;
}
}

View File

@ -1,48 +0,0 @@
package org.apache.lucene.facet.associations;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Allows associating an arbitrary value with a {@link FacetLabel}.
*
* @lucene.experimental
*/
public interface CategoryAssociation {
/** Serializes the associated value into the given {@link DataOutput}. */
public void serialize(ByteArrayDataOutput output);
/** Deserializes the association value from the given {@link DataInput}. */
public void deserialize(ByteArrayDataInput input);
/** Returns the maximum bytes needed to encode the association value. */
public int maxBytesNeeded();
/**
* Returns the ID of the category association. The ID is used as e.g. the
* term's text under which to encode the association values.
*/
public String getCategoryListID();
}

View File

@ -1,65 +0,0 @@
package org.apache.lucene.facet.associations;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Holds {@link CategoryAssociation} per {@link FacetLabel}. */
public class CategoryAssociationsContainer implements Iterable<FacetLabel> {
private final HashMap<FacetLabel,CategoryAssociation> categoryAssociations =
new HashMap<FacetLabel,CategoryAssociation>();
/**
* Adds the {@link CategoryAssociation} for the given {@link FacetLabel
* category}. Overrides any assocation that was previously set.
*/
public void setAssociation(FacetLabel category, CategoryAssociation association) {
if (association == null) {
throw new IllegalArgumentException("cannot set a null association to a category");
}
categoryAssociations.put(category, association);
}
/**
* Returns the {@link CategoryAssociation} that was set for the
* {@link FacetLabel category}, or {@code null} if none was defined.
*/
public CategoryAssociation getAssociation(FacetLabel category) {
return categoryAssociations.get(category);
}
@Override
public Iterator<FacetLabel> iterator() {
return categoryAssociations.keySet().iterator();
}
/** Clears all category associations. */
public void clear() {
categoryAssociations.clear();
}
@Override
public String toString() {
return categoryAssociations.toString();
}
}

View File

@ -1,80 +0,0 @@
package org.apache.lucene.facet.associations;
import java.io.IOException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** A {@link CategoryAssociation} that associates a float with a category. */
public class CategoryFloatAssociation implements CategoryAssociation {
public static final String ASSOCIATION_LIST_ID = "$assoc_float$";
private float value;
public CategoryFloatAssociation() {
// used for deserialization
}
public CategoryFloatAssociation(float value) {
this.value = value;
}
@Override
public void serialize(ByteArrayDataOutput output) {
try {
output.writeInt(Float.floatToIntBits(value));
} catch (IOException e) {
throw new RuntimeException("unexpected exception writing to a byte[]", e);
}
}
@Override
public void deserialize(ByteArrayDataInput input) {
value = Float.intBitsToFloat(input.readInt());
}
@Override
public int maxBytesNeeded() {
// plain integer
return 4;
}
@Override
public String getCategoryListID() {
return ASSOCIATION_LIST_ID;
}
/**
* Returns the value associated with a category. If you used
* {@link #CategoryFloatAssociation()}, you should call
* {@link #deserialize(ByteArrayDataInput)} before calling this method, or
* otherwise the value returned is undefined.
*/
public float getValue() {
return value;
}
@Override
public String toString() {
return getClass().getSimpleName() + "(" + value + ")";
}
}

View File

@ -1,80 +0,0 @@
package org.apache.lucene.facet.associations;
import java.io.IOException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** A {@link CategoryAssociation} that associates an integer with a category. */
public class CategoryIntAssociation implements CategoryAssociation {
public static final String ASSOCIATION_LIST_ID = "$assoc_int$";
private int value;
public CategoryIntAssociation() {
// used for deserialization
}
public CategoryIntAssociation(int value) {
this.value = value;
}
@Override
public void serialize(ByteArrayDataOutput output) {
try {
output.writeInt(value);
} catch (IOException e) {
throw new RuntimeException("unexpected exception writing to a byte[]", e);
}
}
@Override
public void deserialize(ByteArrayDataInput input) {
value = input.readInt();
}
@Override
public int maxBytesNeeded() {
// plain integer
return 4;
}
@Override
public String getCategoryListID() {
return ASSOCIATION_LIST_ID;
}
/**
* Returns the value associated with a category. If you used
* {@link #CategoryIntAssociation()}, you should call
* {@link #deserialize(ByteArrayDataInput)} before calling this method, or
* otherwise the value returned is undefined.
*/
public int getValue() {
return value;
}
@Override
public String toString() {
return getClass().getSimpleName() + "(" + value + ")";
}
}

View File

@ -1,46 +0,0 @@
package org.apache.lucene.facet.associations;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetRequest} for weighting facets according to their float
* association by summing the association values.
*
* @lucene.experimental
*/
public class SumFloatAssociationFacetRequest extends FacetRequest {
/**
* Create a float association facet request for a given node in the
* taxonomy.
*/
public SumFloatAssociationFacetRequest(FacetLabel path, int num) {
super(path, num);
}
@Override
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
return new SumFloatAssociationFacetsAggregator();
}
}

View File

@ -1,92 +0,0 @@
package org.apache.lucene.facet.associations;
import java.io.IOException;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.search.OrdinalValueResolver;
import org.apache.lucene.facet.search.OrdinalValueResolver.FloatValueResolver;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetsAggregator} which computes the weight of a category as the sum
* of the float values associated with it in the result documents. Assumes that
* the association encoded for each ordinal is {@link CategoryFloatAssociation}.
* <p>
* <b>NOTE:</b> this aggregator does not support
* {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
* aggregates the categories for which you added a {@link CategoryAssociation}.
*
* @lucene.experimental
*/
public class SumFloatAssociationFacetsAggregator implements FacetsAggregator {
private final BytesRef bytes = new BytesRef(32);
@Override
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field + CategoryFloatAssociation.ASSOCIATION_LIST_ID);
if (dv == null) {
return; // no float associations in this reader
}
final int length = matchingDocs.bits.length();
final float[] values = facetArrays.getFloatArray();
int doc = 0;
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
dv.get(doc, bytes);
if (bytes.length > 0) {
// aggreate float association values for ordinals
int bytesUpto = bytes.offset + bytes.length;
int pos = bytes.offset;
while (pos < bytesUpto) {
int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
values[ordinal] += Float.intBitsToFloat(value);
}
}
++doc;
}
}
@Override
public boolean requiresDocScores() {
return false;
}
@Override
public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
// NO-OP: this aggregator does no rollup values to the parents.
}
@Override
public OrdinalValueResolver createOrdinalValueResolver(FacetRequest facetRequest, FacetArrays arrays) {
return new FloatValueResolver(arrays);
}
}

View File

@ -1,46 +0,0 @@
package org.apache.lucene.facet.associations;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetRequest} for weighting facets according to their integer
* association by summing the association values.
*
* @lucene.experimental
*/
public class SumIntAssociationFacetRequest extends FacetRequest {
/**
* Create an integer association facet request for a given node in the
* taxonomy.
*/
public SumIntAssociationFacetRequest(FacetLabel path, int num) {
super(path, num);
}
@Override
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
return new SumIntAssociationFacetsAggregator();
}
}

View File

@ -1,91 +0,0 @@
package org.apache.lucene.facet.associations;
import java.io.IOException;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.search.OrdinalValueResolver;
import org.apache.lucene.facet.search.OrdinalValueResolver.IntValueResolver;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetsAggregator} which computes the weight of a category as the sum
* of the integer values associated with it in the result documents. Assumes
* that the association encoded for each ordinal is
* {@link CategoryIntAssociation}.
* <p>
* <b>NOTE:</b> this aggregator does not support
* {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
* aggregates the categories for which you added a {@link CategoryAssociation}.
*/
public class SumIntAssociationFacetsAggregator implements FacetsAggregator {
private final BytesRef bytes = new BytesRef(32);
@Override
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field + CategoryIntAssociation.ASSOCIATION_LIST_ID);
if (dv == null) {
return; // no int associations in this reader
}
final int length = matchingDocs.bits.length();
final int[] values = facetArrays.getIntArray();
int doc = 0;
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
dv.get(doc, bytes);
if (bytes.length > 0) {
// aggregate association values for ordinals
int bytesUpto = bytes.offset + bytes.length;
int pos = bytes.offset;
while (pos < bytesUpto) {
int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
values[ordinal] += value;
}
}
++doc;
}
}
@Override
public boolean requiresDocScores() {
return false;
}
@Override
public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
// NO-OP: this aggregator does no rollup values to the parents.
}
@Override
public OrdinalValueResolver createOrdinalValueResolver(FacetRequest facetRequest, FacetArrays arrays) {
return new IntValueResolver(arrays);
}
}

View File

@ -1,25 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Category Association</title>
</head>
<body>
Allows associating arbitrary values with a category. The value can be used e.g. to compute
the category's weight during faceted search.
</body>
</html>

View File

@ -1,554 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
import java.util.Iterator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps, similar to Java's HashMap, only
* performance tests showed it performs better.
* <p>
* The hashtable is constructed with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. Capacity is
* adjusted to a power of 2, and there are 2 * capacity entries for the hash.
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* where index 0 is used as 'Ground' or 'NULL'.
* <p>
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the &lt;key,value&gt; pair is allocated.
*
* @lucene.experimental
*/
public class ArrayHashMap<K,V> implements Iterable<V> {
/** Implements an IntIterator which iterates over all the allocated indexes. */
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/** The next not-yet-visited index. */
private int index = 0;
/** Index of the last visited pair. Used in {@link #remove()}. */
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return index != 0;
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
@SuppressWarnings("unchecked")
public void remove() {
ArrayHashMap.this.remove((K) keys[lastIndex]);
}
}
/** Implements an Iterator, used for iteration over the map's keys. */
private final class KeyIterator implements Iterator<K> {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
@SuppressWarnings("unchecked")
public K next() {
return (K) keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/** Implements an Iterator, used for iteration over the map's values. */
private final class ValueIterator implements Iterator<V> {
private IntIterator iterator = new IndexIterator();
ValueIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
@SuppressWarnings("unchecked")
public V next() {
return (V) values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/** Default capacity - in case no capacity was specified in the constructor */
private static final int DEFAULT_CAPACITY = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1).
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/** hashFactor is always (2^(N+1)) - 1. Used for faster hashing. */
private int hashFactor;
/** Holds the unique keys. */
Object[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/** Number of currently stored objects in the map. */
private int size;
/** Holds the values. */
Object[] values;
/** Constructs a map with default capacity. */
public ArrayHashMap() {
this(DEFAULT_CAPACITY);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity minimum capacity for the map.
*/
public ArrayHashMap(int capacity) {
this.capacity = 16;
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
values = new Object[arrayLength];
keys = new Object[arrayLength];
next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
baseHash = new int[baseHashSize];
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
hashFactor = baseHashSize - 1;
size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}. New pairs are always
* inserted to baseHash, and are followed by the old colliding pair.
*/
private void prvt_put(K key, V value) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = value;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/** Calculating the baseHash index using the internal internal <code>hashFactor</code>. */
protected int calcBaseHashIndex(K key) {
return key.hashCode() & hashFactor;
}
/** Empties the map. Generates the "Empty" space list for later allocation. */
public void clear() {
// Clears the hash entries
Arrays.fill(baseHash, 0);
// Set size to zero
size = 0;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[capacity] = 0;
}
/** Returns true iff the key exists in the map. */
public boolean containsKey(K key) {
return find(key) != 0;
}
/** Returns true iff the object exists in the map. */
public boolean containsValue(Object o) {
for (Iterator<V> iterator = iterator(); iterator.hasNext();) {
V object = iterator.next();
if (object.equals(o)) {
return true;
}
}
return false;
}
/** Returns the index of the given key, or zero if the key wasn't found. */
protected int find(K key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex].equals(key)) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Finds the actual index of a given key with it's baseHashIndex. Some methods
* use the baseHashIndex. If those call {@link #find} there's no need to
* re-calculate that hash.
*
* @return the index of the given key, or 0 if the key wasn't found.
*/
private int findForRemove(K key, int baseHashIndex) {
// Start from the hash entry.
prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index].equals(key)) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got thus far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return prev = 0;
}
/** Returns the object mapped with the given key, or null if the key wasn't found. */
@SuppressWarnings("unchecked")
public V get(K key) {
return (V) values[find(key)];
}
/**
* Allocates a new map of double the capacity, and fast-insert the old
* key-value pairs.
*/
@SuppressWarnings("unchecked")
protected void grow() {
ArrayHashMap<K,V> newmap = new ArrayHashMap<K,V>(capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
newmap.prvt_put((K) keys[index], (V) values[index]);
}
// Copy that's data into this.
capacity = newmap.capacity;
size = newmap.size;
firstEmpty = newmap.firstEmpty;
values = newmap.values;
keys = newmap.keys;
next = newmap.next;
baseHash = newmap.baseHash;
hashFactor = newmap.hashFactor;
}
/** Returns true iff the map is empty. */
public boolean isEmpty() {
return size == 0;
}
/** Returns an iterator on the mapped objects. */
@Override
public Iterator<V> iterator() {
return new ValueIterator();
}
/** Returns an iterator on the map keys. */
public Iterator<K> keyIterator() {
return new KeyIterator();
}
/** Prints the baseHash array, used for debugging purposes. */
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(this.baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or null if the key didn't exist.
*/
@SuppressWarnings("unchecked")
public V put(K key, V e) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
V old = (V) values[index];
values[index] = e;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, e);
return null;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or null if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or null if none existed.
*/
@SuppressWarnings("unchecked")
public V remove(K key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return (V) values[index];
}
return null;
}
/** Returns number of pairs currently in the map. */
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return an object array of all the values currently in the map.
*/
public Object[] toArray() {
int j = -1;
Object[] array = new Object[size];
// Iterates over the values, adding them to the array.
for (Iterator<V> iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of V
*
* @param a the array into which the elements of the list are to be stored, if
* it is big enough; otherwise, use as much space as it can.
* @return an array containing the elements of the list
*/
public V[] toArray(V[] a) {
int j = 0;
// Iterates over the values, adding them to the array.
for (Iterator<V> iterator = iterator(); j < a.length
&& iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
if (j < a.length) {
a[j] = null;
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
Iterator<K> keyIterator = keyIterator();
while (keyIterator.hasNext()) {
K key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(Object o) {
ArrayHashMap<K, V> that = (ArrayHashMap<K,V>)o;
if (that.size() != this.size()) {
return false;
}
Iterator<K> it = keyIterator();
while (it.hasNext()) {
K key = it.next();
V v1 = this.get(key);
V v2 = that.get(key);
if ((v1 == null && v2 != null) ||
(v1 != null && v2 == null) ||
(!v1.equals(v2))) {
return false;
}
}
return true;
}
}

View File

@ -1,31 +0,0 @@
package org.apache.lucene.facet.collections;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Iterator interface for primitive double iteration. *
*
* @lucene.experimental
*/
public interface DoubleIterator {
boolean hasNext();
double next();
void remove();
}

View File

@ -1,31 +0,0 @@
package org.apache.lucene.facet.collections;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Iterator interface for primitive int iteration. *
*
* @lucene.experimental
*/
public interface FloatIterator {
boolean hasNext();
float next();
void remove();
}

View File

@ -1,634 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
import java.util.Iterator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps primitive float to Objects of generic type
* T.<br>
* The hashtable is constracted with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. <br>
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
* the hash.
*
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* when index 0 is used as 'Ground' or 'NULL'.<br>
*
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the key,value pair is allocated.
*
* @lucene.experimental
*/
public class FloatToObjectMap<T> implements Iterable<T> {
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
public void remove() {
FloatToObjectMap.this.remove(keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements FloatIterator {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public float next() {
return keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Implements an Iterator of a generic type T used for iteration over the
* map's values.
*/
private final class ValueIterator implements Iterator<T> {
private IntIterator iterator = new IndexIterator();
ValueIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
@SuppressWarnings("unchecked")
public T next() {
return (T) values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
float[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* This array holds the values
*/
Object[] values;
/**
* Constructs a map with default capacity.
*/
public FloatToObjectMap() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public FloatToObjectMap(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.values = new Object[arrayLength];
this.keys = new float[arrayLength];
this.next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given Object
* @param e
* element which is being mapped using the given key
*/
private void prvt_put(float key, T e) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = e;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>.
*/
protected int calcBaseHashIndex(float key) {
return Float.floatToIntBits(key) & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param key
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean containsKey(float key) {
return find(key) != 0;
}
/**
* Checks if the given object exists in the map.<br>
* This method iterates over the collection, trying to find an equal object.
*
* @param o
* object that is checked against the map data.
* @return true if the object exists in the map (in .equals() meaning).
* false otherwise.
*/
public boolean containsValue(Object o) {
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
T object = iterator.next();
if (object.equals(o)) {
return true;
}
}
return false;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(float key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex] == key) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(float key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index] == key) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Returns the object mapped with the given key.
*
* @param key
* int who's mapped object we're interested in.
* @return an object mapped by the given key. null if the key wasn't found.
*/
@SuppressWarnings("unchecked")
public T get(float key) {
return (T) values[find(key)];
}
/**
* Grows the map. Allocates a new map of double the capacity, and
* fast-insert the old key-value pairs.
*/
@SuppressWarnings("unchecked")
protected void grow() {
FloatToObjectMap<T> that = new FloatToObjectMap<T>(
this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_put(this.keys[index], (T) this.values[index]);
}
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.values = that.values;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped objects.
*/
@Override
public Iterator<T> iterator() {
return new ValueIterator();
}
/** Returns an iterator on the map keys. */
public FloatIterator keyIterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for DEBUG purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(this.baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or null if the key didn't exist.
*/
@SuppressWarnings("unchecked")
public T put(float key, T e) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
T old = (T) values[index];
values[index] = e;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, e);
return null;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or null if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or null if none existed.
*/
@SuppressWarnings("unchecked")
public T remove(float key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return (T) values[index];
}
return null;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return an object array of all the values currently in the map.
*/
public Object[] toArray() {
int j = -1;
Object[] array = new Object[size];
// Iterates over the values, adding them to the array.
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of T
*
* @param a
* the array into which the elements of the list are to be
* stored, if it is big enough; otherwise, use whatever space we
* have, setting the one after the true data as null.
*
* @return an array containing the elements of the list
*
*/
public T[] toArray(T[] a) {
int j = 0;
// Iterates over the values, adding them to the array.
for (Iterator<T> iterator = iterator(); j < a.length
&& iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
if (j < a.length) {
a[j] = null;
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
FloatIterator keyIterator = keyIterator();
while (keyIterator.hasNext()) {
float key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(Object o) {
FloatToObjectMap<T> that = (FloatToObjectMap<T>)o;
if (that.size() != this.size()) {
return false;
}
FloatIterator it = keyIterator();
while (it.hasNext()) {
float key = it.next();
if (!that.containsKey(key)) {
return false;
}
T v1 = this.get(key);
T v2 = that.get(key);
if ((v1 == null && v2 != null) ||
(v1 != null && v2 == null) ||
(!v1.equals(v2))) {
return false;
}
}
return true;
}
}

View File

@ -1,252 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A Class wrapper for a grow-able int[] which can be sorted and intersect with
* other IntArrays.
*
* @lucene.experimental
*/
public class IntArray {
/**
* The int[] which holds the data
*/
private int[] data;
/**
* Holds the number of items in the array.
*/
private int size;
/**
* A flag which indicates whether a sort should occur of the array is
* already sorted.
*/
private boolean shouldSort;
/**
* Construct a default IntArray, size 0 and surly a sort should not occur.
*/
public IntArray() {
init(true);
}
private void init(boolean realloc) {
size = 0;
if (realloc) {
data = new int[0];
}
shouldSort = false;
}
/**
* Intersects the data with a given {@link IntHashSet}.
*
* @param set
* A given ArrayHashSetInt which holds the data to be intersected
* against
*/
public void intersect(IntHashSet set) {
int newSize = 0;
for (int i = 0; i < size; ++i) {
if (set.contains(data[i])) {
data[newSize] = data[i];
++newSize;
}
}
this.size = newSize;
}
/**
* Intersects the data with a given IntArray
*
* @param other
* A given IntArray which holds the data to be intersected agains
*/
public void intersect(IntArray other) {
sort();
other.sort();
int myIndex = 0;
int otherIndex = 0;
int newSize = 0;
if (this.size > other.size) {
while (otherIndex < other.size && myIndex < size) {
while (otherIndex < other.size
&& other.data[otherIndex] < data[myIndex]) {
++otherIndex;
}
if (otherIndex == other.size) {
break;
}
while (myIndex < size && other.data[otherIndex] > data[myIndex]) {
++myIndex;
}
if (other.data[otherIndex] == data[myIndex]) {
data[newSize++] = data[myIndex];
++otherIndex;
++myIndex;
}
}
} else {
while (otherIndex < other.size && myIndex < size) {
while (myIndex < size && other.data[otherIndex] > data[myIndex]) {
++myIndex;
}
if (myIndex == size) {
break;
}
while (otherIndex < other.size
&& other.data[otherIndex] < data[myIndex]) {
++otherIndex;
}
if (other.data[otherIndex] == data[myIndex]) {
data[newSize++] = data[myIndex];
++otherIndex;
++myIndex;
}
}
}
this.size = newSize;
}
/**
* Return the size of the Array. Not the allocated size, but the number of
* values actually set.
*
* @return the (filled) size of the array
*/
public int size() {
return size;
}
/**
* Adds a value to the array.
*
* @param value
* value to be added
*/
public void addToArray(int value) {
if (size == data.length) {
int[] newArray = new int[2 * size + 1];
System.arraycopy(data, 0, newArray, 0, size);
data = newArray;
}
data[size] = value;
++size;
shouldSort = true;
}
/**
* Equals method. Checking the sizes, than the values from the last index to
* the first (Statistically for random should be the same but for our
* specific use would find differences faster).
*/
@Override
public boolean equals(Object o) {
if (!(o instanceof IntArray)) {
return false;
}
IntArray array = (IntArray) o;
if (array.size != size) {
return false;
}
sort();
array.sort();
boolean equal = true;
for (int i = size; i > 0 && equal;) {
--i;
equal = (array.data[i] == this.data[i]);
}
return equal;
}
/**
* Sorts the data. If it is needed.
*/
public void sort() {
if (shouldSort) {
shouldSort = false;
Arrays.sort(data, 0, size);
}
}
/**
* Calculates a hash-code for HashTables
*/
@Override
public int hashCode() {
int hash = 0;
for (int i = 0; i < size; ++i) {
hash = data[i] ^ (hash * 31);
}
return hash;
}
/**
* Get an element from a specific index.
*
* @param i
* index of which element should be retrieved.
*/
public int get(int i) {
if (i >= size) {
throw new ArrayIndexOutOfBoundsException(i);
}
return this.data[i];
}
public void set(int idx, int value) {
if (idx >= size) {
throw new ArrayIndexOutOfBoundsException(idx);
}
this.data[idx] = value;
}
/**
* toString or not toString. That is the question!
*/
@Override
public String toString() {
String s = "(" + size + ") ";
for (int i = 0; i < size; ++i) {
s += "" + data[i] + ", ";
}
return s;
}
/**
* Clear the IntArray (set all elements to zero).
* @param resize - if resize is true, then clear actually allocates
* a new array of size 0, essentially 'clearing' the array and freeing
* memory.
*/
public void clear(boolean resize) {
init(resize);
}
}

View File

@ -1,548 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A Set or primitive int. Implemented as a HashMap of int->int. *
*
* @lucene.experimental
*/
public class IntHashSet {
// TODO (Facet): This is wasteful as the "values" are actually the "keys" and
// we could spare this amount of space (capacity * sizeof(int)). Perhaps even
// though it is not OOP, we should re-implement the hash for just that cause.
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
public void remove() {
IntHashSet.this.remove(keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements IntIterator {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public int next() {
return keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
int[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* Constructs a map with default capacity.
*/
public IntHashSet() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public IntHashSet(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.keys = new int[arrayLength];
this.next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given value
*/
private void prvt_add(int key) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>
* .
*/
protected int calcBaseHashIndex(int key) {
return key & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param value
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean contains(int value) {
return find(value) != 0;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(int key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex] == key) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(int key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index] == key) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Grows the map. Allocates a new map of double the capacity, and
* fast-insert the old key-value pairs.
*/
protected void grow() {
IntHashSet that = new IntHashSet(this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_add(this.keys[index]);
}
// for (int i = capacity; i > 0; --i) {
//
// that._add(this.keys[i]);
//
// }
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped objects.
*/
public IntIterator iterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for debug purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(this.baseHash);
}
/**
* Add a mapping int key -> int value.
* <p>
* If the key was already inside just
* updating the value it refers to as the given object.
* <p>
* Otherwise if the map is full, first {@link #grow()} the map.
*
* @param value
* integer which maps the given value
* @return true always.
*/
public boolean add(int value) {
// Does key exists?
int index = find(value);
// Yes!
if (index != 0) {
return true;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_add(value);
return true;
}
/**
* Remove a pair from the map, specified by it's key.
*
* @param value
* specify the value to be removed
*
* @return true if the map was changed (the key was found and removed).
* false otherwise.
*/
public boolean remove(int value) {
int baseHashIndex = calcBaseHashIndex(value);
int index = findForRemove(value, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return true;
}
return false;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return an object array of all the values currently in the map.
*/
public int[] toArray() {
int j = -1;
int[] array = new int[size];
// Iterates over the values, adding them to the array.
for (IntIterator iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of ints
*
* @param a
* the array into which the elements of the map are to be stored,
* if it is big enough; otherwise, a new array of the same
* runtime type is allocated for this purpose.
*
* @return an array containing the values stored in the map
*
*/
public int[] toArray(int[] a) {
int j = 0;
if (a.length < size) {
a = new int[size];
}
// Iterates over the values, adding them to the array.
for (IntIterator iterator = iterator(); j < a.length
&& iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
return a;
}
/**
* I have no idea why would anyone call it - but for debug purposes.<br>
* Prints the entire map, including the index, key, object, next and prev.
*/
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
IntIterator iterator = iterator();
while (iterator.hasNext()) {
sb.append(iterator.next());
if (iterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
public String toHashString() {
String string = "\n";
StringBuffer sb = new StringBuffer();
for (int i = 0; i < this.baseHash.length; i++) {
StringBuffer sb2 = new StringBuffer();
boolean shouldAppend = false;
sb2.append(i + ".\t");
for (int index = baseHash[i]; index != 0; index = next[index]) {
sb2.append(" -> " + keys[index] + "@" + index);
shouldAppend = true;
}
if (shouldAppend) {
sb.append(sb2);
sb.append(string);
}
}
return sb.toString();
}
}

View File

@ -1,31 +0,0 @@
package org.apache.lucene.facet.collections;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Iterator interface for primitive int iteration. *
*
* @lucene.experimental
*/
public interface IntIterator {
boolean hasNext();
int next();
void remove();
}

View File

@ -1,631 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps primitive int to a primitive double.<br>
* The hashtable is constracted with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. <br>
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
* the hash.
*
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* when index 0 is used as 'Ground' or 'NULL'.<br>
*
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the key,value pair is allocated.
*
* @lucene.experimental
*/
public class IntToDoubleMap {
public static final double GROUND = Double.NaN;
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
public void remove() {
IntToDoubleMap.this.remove(keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements IntIterator {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public int next() {
return keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Implements an Iterator of a generic type T used for iteration over the
* map's values.
*/
private final class ValueIterator implements DoubleIterator {
private IntIterator iterator = new IndexIterator();
ValueIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public double next() {
return values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
int[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* This array holds the values
*/
double[] values;
/**
* Constructs a map with default capacity.
*/
public IntToDoubleMap() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public IntToDoubleMap(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.values = new double[arrayLength];
this.keys = new int[arrayLength];
this.next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
this.values[0] = GROUND;
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given Object
* @param v
* double value which is being mapped using the given key
*/
private void prvt_put(int key, double v) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = v;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>
* .
*/
protected int calcBaseHashIndex(int key) {
return key & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param key
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean containsKey(int key) {
return find(key) != 0;
}
/**
* Checks if the given value exists in the map.<br>
* This method iterates over the collection, trying to find an equal object.
*
* @param value
* double value that is checked against the map data.
* @return true if the value exists in the map, false otherwise.
*/
public boolean containsValue(double value) {
for (DoubleIterator iterator = iterator(); iterator.hasNext();) {
double d = iterator.next();
if (d == value) {
return true;
}
}
return false;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(int key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex] == key) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(int key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index] == key) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Returns the value mapped with the given key.
*
* @param key
* int who's mapped object we're interested in.
* @return a double value mapped by the given key. Double.NaN if the key wasn't found.
*/
public double get(int key) {
return values[find(key)];
}
/**
* Grows the map. Allocates a new map of double the capacity, and
* fast-insert the old key-value pairs.
*/
protected void grow() {
IntToDoubleMap that = new IntToDoubleMap(
this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_put(this.keys[index], this.values[index]);
}
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.values = that.values;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped double values.
*/
public DoubleIterator iterator() {
return new ValueIterator();
}
/** Returns an iterator on the map keys. */
public IntIterator keyIterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for debug purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(this.baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or {@link Double#NaN} if the key didn't exist.
*/
public double put(int key, double v) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
double old = values[index];
values[index] = v;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, v);
return Double.NaN;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or {@link Double#NaN} if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or {@link Double#NaN} if none existed.
*/
public double remove(int key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return values[index];
}
return Double.NaN;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return a double array of all the values currently in the map.
*/
public double[] toArray() {
int j = -1;
double[] array = new double[size];
// Iterates over the values, adding them to the array.
for (DoubleIterator iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of T
*
* @param a
* the array into which the elements of the list are to be
* stored. If it is big enough use whatever space we need,
* setting the one after the true data as {@link Double#NaN}.
*
* @return an array containing the elements of the list, using the given
* parameter if big enough, otherwise allocate an appropriate array
* and return it.
*
*/
public double[] toArray(double[] a) {
int j = 0;
if (a.length < this.size()) {
a = new double[this.size()];
}
// Iterates over the values, adding them to the array.
for (DoubleIterator iterator = iterator(); iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
if (j < a.length) {
a[j] = Double.NaN;
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
IntIterator keyIterator = keyIterator();
while (keyIterator.hasNext()) {
int key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@Override
public boolean equals(Object o) {
IntToDoubleMap that = (IntToDoubleMap)o;
if (that.size() != this.size()) {
return false;
}
IntIterator it = keyIterator();
while (it.hasNext()) {
int key = it.next();
if (!that.containsKey(key)) {
return false;
}
double v1 = this.get(key);
double v2 = that.get(key);
if (Double.compare(v1, v2) != 0) {
return false;
}
}
return true;
}
}

View File

@ -1,631 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps primitive int to a primitive float.<br>
* The hashtable is constracted with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. <br>
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
* the hash.
*
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* when index 0 is used as 'Ground' or 'NULL'.<br>
*
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the key,value pair is allocated.
*
* @lucene.experimental
*/
public class IntToFloatMap {
public static final float GROUND = Float.NaN;
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
public void remove() {
IntToFloatMap.this.remove(keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements IntIterator {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public int next() {
return keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Implements an Iterator of a generic type T used for iteration over the
* map's values.
*/
private final class ValueIterator implements FloatIterator {
private IntIterator iterator = new IndexIterator();
ValueIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public float next() {
return values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
int[] keys;
/**
* In case of collisions, we implement a float linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* This array holds the values
*/
float[] values;
/**
* Constructs a map with default capacity.
*/
public IntToFloatMap() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public IntToFloatMap(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.values = new float[arrayLength];
this.keys = new int[arrayLength];
this.next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
this.values[0] = GROUND;
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given Object
* @param v
* float value which is being mapped using the given key
*/
private void prvt_put(int key, float v) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = v;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>
* .
*/
protected int calcBaseHashIndex(int key) {
return key & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param key
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean containsKey(int key) {
return find(key) != 0;
}
/**
* Checks if the given value exists in the map.<br>
* This method iterates over the collection, trying to find an equal object.
*
* @param value
* float value that is checked against the map data.
* @return true if the value exists in the map, false otherwise.
*/
public boolean containsValue(float value) {
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
float d = iterator.next();
if (d == value) {
return true;
}
}
return false;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(int key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex] == key) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(int key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index] == key) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Returns the value mapped with the given key.
*
* @param key
* int who's mapped object we're interested in.
* @return a float value mapped by the given key. float.NaN if the key wasn't found.
*/
public float get(int key) {
return values[find(key)];
}
/**
* Grows the map. Allocates a new map of float the capacity, and
* fast-insert the old key-value pairs.
*/
protected void grow() {
IntToFloatMap that = new IntToFloatMap(
this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_put(this.keys[index], this.values[index]);
}
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.values = that.values;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped float values.
*/
public FloatIterator iterator() {
return new ValueIterator();
}
/** Returns an iterator on the map keys. */
public IntIterator keyIterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for debug purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(this.baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or {@link Float#NaN} if the key didn't exist.
*/
public float put(int key, float v) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
float old = values[index];
values[index] = v;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, v);
return Float.NaN;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or {@link Float#NaN} if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or {@link Float#NaN} if none existed.
*/
public float remove(int key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return values[index];
}
return Float.NaN;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return a float array of all the values currently in the map.
*/
public float[] toArray() {
int j = -1;
float[] array = new float[size];
// Iterates over the values, adding them to the array.
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of T
*
* @param a
* the array into which the elements of the list are to be
* stored. If it is big enough use whatever space we need,
* setting the one after the true data as {@link Float#NaN}.
*
* @return an array containing the elements of the list, using the given
* parameter if big enough, otherwise allocate an appropriate array
* and return it.
*
*/
public float[] toArray(float[] a) {
int j = 0;
if (a.length < this.size()) {
a = new float[this.size()];
}
// Iterates over the values, adding them to the array.
for (FloatIterator iterator = iterator(); iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
if (j < a.length) {
a[j] = Float.NaN;
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
IntIterator keyIterator = keyIterator();
while (keyIterator.hasNext()) {
int key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@Override
public boolean equals(Object o) {
IntToFloatMap that = (IntToFloatMap)o;
if (that.size() != this.size()) {
return false;
}
IntIterator it = keyIterator();
while (it.hasNext()) {
int key = it.next();
if (!that.containsKey(key)) {
return false;
}
float v1 = this.get(key);
float v2 = that.get(key);
if (Float.compare(v1, v2) != 0) {
return false;
}
}
return true;
}
}

View File

@ -1,622 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps primitive int to primitive int.<br>
* The hashtable is constracted with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. <br>
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
* the hash.
*
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* when index 0 is used as 'Ground' or 'NULL'.<br>
*
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the key,value pair is allocated.
*
* @lucene.experimental
*/
public class IntToIntMap {
public static final int GROUD = -1;
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
public void remove() {
IntToIntMap.this.remove(keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements IntIterator {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public int next() {
return keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Implements an IntIterator used for iteration over the map's values.
*/
private final class ValueIterator implements IntIterator {
private IntIterator iterator = new IndexIterator();
ValueIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public int next() {
return values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
int[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* This array holds the values
*/
int[] values;
/**
* Constructs a map with default capacity.
*/
public IntToIntMap() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public IntToIntMap(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.values = new int[arrayLength];
this.keys = new int[arrayLength];
this.next = new int[arrayLength];
this.values[0] = GROUD;
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given value
* @param e
* value which is being mapped using the given key
*/
private void prvt_put(int key, int e) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = e;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>.
*/
protected int calcBaseHashIndex(int key) {
return key & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param key
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean containsKey(int key) {
return find(key) != 0;
}
/**
* Checks if the given object exists in the map.<br>
* This method iterates over the collection, trying to find an equal object.
*
* @param v
* value that is checked against the map data.
* @return true if the value exists in the map (in .equals() meaning).
* false otherwise.
*/
public boolean containsValue(int v) {
for (IntIterator iterator = iterator(); iterator.hasNext();) {
if (v == iterator.next()) {
return true;
}
}
return false;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(int key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex] == key) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(int key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index] == key) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Returns the object mapped with the given key.
*
* @param key
* int who's mapped object we're interested in.
* @return an object mapped by the given key. null if the key wasn't found.
*/
public int get(int key) {
return values[find(key)];
}
/**
* Grows the map. Allocates a new map of double the capacity, and
* fast-insert the old key-value pairs.
*/
protected void grow() {
IntToIntMap that = new IntToIntMap(
this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_put(this.keys[index], this.values[index]);
}
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.values = that.values;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped objects.
*/
public IntIterator iterator() {
return new ValueIterator();
}
/** Returns an iterator on the map keys. */
public IntIterator keyIterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for debug purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(this.baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or 0 if the key didn't exist.
*/
public int put(int key, int e) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
int old = values[index];
values[index] = e;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, e);
return 0;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or 0 if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or 0 if none existed.
*/
public int remove(int key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return values[index];
}
return 0;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return an object array of all the values currently in the map.
*/
public int[] toArray() {
int j = -1;
int[] array = new int[size];
// Iterates over the values, adding them to the array.
for (IntIterator iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of ints
*
* @param a
* the array into which the elements of the map are to be
* stored, if it is big enough; otherwise, a new array of the
* same runtime type is allocated for this purpose.
*
* @return an array containing the values stored in the map
*
*/
public int[] toArray(int[] a) {
int j = 0;
if (a.length < size) {
a = new int[size];
}
// Iterates over the values, adding them to the array.
for (IntIterator iterator = iterator(); j < a.length
&& iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
IntIterator keyIterator = keyIterator();
while (keyIterator.hasNext()) {
int key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@Override
public boolean equals(Object o) {
IntToIntMap that = (IntToIntMap)o;
if (that.size() != this.size()) {
return false;
}
IntIterator it = keyIterator();
while (it.hasNext()) {
int key = it.next();
if (!that.containsKey(key)) {
return false;
}
int v1 = this.get(key);
int v2 = that.get(key);
if (v1 != v2) {
return false;
}
}
return true;
}
}

View File

@ -1,634 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
import java.util.Iterator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps primitive int to Objects of generic type
* T.<br>
* The hashtable is constracted with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. <br>
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
* the hash.
*
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* when index 0 is used as 'Ground' or 'NULL'.<br>
*
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the key,value pair is allocated.
*
* @lucene.experimental
*/
public class IntToObjectMap<T> implements Iterable<T> {
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
public void remove() {
IntToObjectMap.this.remove(keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements IntIterator {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public int next() {
return keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Implements an Iterator of a generic type T used for iteration over the
* map's values.
*/
private final class ValueIterator implements Iterator<T> {
private IntIterator iterator = new IndexIterator();
ValueIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
@SuppressWarnings("unchecked")
public T next() {
return (T) values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
int[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* This array holds the values
*/
Object[] values;
/**
* Constructs a map with default capacity.
*/
public IntToObjectMap() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public IntToObjectMap(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.values = new Object[arrayLength];
this.keys = new int[arrayLength];
this.next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given Object
* @param e
* element which is being mapped using the given key
*/
private void prvt_put(int key, T e) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = e;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>.
*
*/
protected int calcBaseHashIndex(int key) {
return key & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param key
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean containsKey(int key) {
return find(key) != 0;
}
/**
* Checks if the given object exists in the map.<br>
* This method iterates over the collection, trying to find an equal object.
*
* @param o
* object that is checked against the map data.
* @return true if the object exists in the map (in .equals() meaning).
* false otherwise.
*/
public boolean containsValue(Object o) {
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
T object = iterator.next();
if (object.equals(o)) {
return true;
}
}
return false;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(int key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex] == key) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(int key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index] == key) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Returns the object mapped with the given key.
*
* @param key
* int who's mapped object we're interested in.
* @return an object mapped by the given key. null if the key wasn't found.
*/
@SuppressWarnings("unchecked")
public T get(int key) {
return (T) values[find(key)];
}
/**
* Grows the map. Allocates a new map of double the capacity, and
* fast-insert the old key-value pairs.
*/
@SuppressWarnings("unchecked")
protected void grow() {
IntToObjectMap<T> that = new IntToObjectMap<T>(
this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_put(this.keys[index], (T) this.values[index]);
}
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.values = that.values;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped objects.
*/
@Override
public Iterator<T> iterator() {
return new ValueIterator();
}
/** Returns an iterator on the map keys. */
public IntIterator keyIterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for debug purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or null if the key didn't exist.
*/
@SuppressWarnings("unchecked")
public T put(int key, T e) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
T old = (T) values[index];
values[index] = e;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, e);
return null;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or null if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or null if none existed.
*/
@SuppressWarnings("unchecked")
public T remove(int key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return (T) values[index];
}
return null;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return an object array of all the values currently in the map.
*/
public Object[] toArray() {
int j = -1;
Object[] array = new Object[size];
// Iterates over the values, adding them to the array.
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of T
*
* @param a
* the array into which the elements of the list are to be
* stored, if it is big enough; otherwise, use whatever space we
* have, setting the one after the true data as null.
*
* @return an array containing the elements of the list
*
*/
public T[] toArray(T[] a) {
int j = 0;
// Iterates over the values, adding them to the array.
for (Iterator<T> iterator = iterator(); j < a.length
&& iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
if (j < a.length) {
a[j] = null;
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
IntIterator keyIterator = keyIterator();
while (keyIterator.hasNext()) {
int key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(Object o) {
IntToObjectMap<T> that = (IntToObjectMap<T>)o;
if (that.size() != this.size()) {
return false;
}
IntIterator it = keyIterator();
while (it.hasNext()) {
int key = it.next();
if (!that.containsKey(key)) {
return false;
}
T v1 = this.get(key);
T v2 = that.get(key);
if ((v1 == null && v2 != null) ||
(v1 != null && v2 == null) ||
(!v1.equals(v2))) {
return false;
}
}
return true;
}
}

View File

@ -1,623 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
import java.util.Iterator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps Objects of generic type
* T to primitive float values.<br>
* The hashtable is constructed with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. <br>
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
* the hash.
*
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* when index 0 is used as 'Ground' or 'NULL'.<br>
*
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the key,value pair is allocated.
*
* @lucene.experimental
*/
public class ObjectToFloatMap<K> {
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
@SuppressWarnings("unchecked")
public void remove() {
ObjectToFloatMap.this.remove((K) keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements Iterator<K> {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
@SuppressWarnings("unchecked")
public K next() {
return (K) keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Implements an Iterator of a generic type T used for iteration over the
* map's values.
*/
private final class ValueIterator implements FloatIterator {
private IntIterator iterator = new IndexIterator();
ValueIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public float next() {
return values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
Object[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* This array holds the values
*/
float[] values;
/**
* Constructs a map with default capacity.
*/
public ObjectToFloatMap() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public ObjectToFloatMap(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.values = new float[arrayLength];
this.keys = new Object[arrayLength];
this.next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given Object
* @param e
* element which is being mapped using the given key
*/
private void prvt_put(K key, float e) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = e;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>.
*/
protected int calcBaseHashIndex(K key) {
return key.hashCode() & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
values[0] = Float.NaN;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param key
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean containsKey(K key) {
return find(key) != 0;
}
/**
* Checks if the given object exists in the map.<br>
* This method iterates over the collection, trying to find an equal object.
*
* @param o
* object that is checked against the map data.
* @return true if the object exists in the map (in .equals() meaning).
* false otherwise.
*/
public boolean containsValue(float o) {
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
if (o == iterator.next()) {
return true;
}
}
return false;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(K key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex].equals(key)) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(K key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index].equals(key)) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Returns the float mapped with the given key.
*
* @param key
* object who's mapped float we're interested in.
* @return a float mapped by the given key. Float.NaN if the key wasn't found.
*/
public float get(K key) {
return values[find(key)];
}
/**
* Grows the map. Allocates a new map of double the capacity, and
* fast-insert the old key-value pairs.
*/
@SuppressWarnings("unchecked")
protected void grow() {
ObjectToFloatMap<K> that = new ObjectToFloatMap<K>(
this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_put((K) this.keys[index], this.values[index]);
}
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.values = that.values;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped floats.
*/
public FloatIterator iterator() {
return new ValueIterator();
}
/** Returns an iterator on the map keys. */
public Iterator<K> keyIterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for debug purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or {@link Float#NaN} if the key didn't exist.
*/
public float put(K key, float e) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
float old = values[index];
values[index] = e;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, e);
return Float.NaN;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or {@link Float#NaN} if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or {@link Float#NaN} if none existed.
*/
public float remove(K key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return values[index];
}
return Float.NaN;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return an object array of all the values currently in the map.
*/
public float[] toArray() {
int j = -1;
float[] array = new float[size];
// Iterates over the values, adding them to the array.
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of T
*
* @param a
* the array into which the elements of the list are to be
* stored, if it is big enough; otherwise, use as much space as it can.
*
* @return an array containing the elements of the list
*
*/
public float[] toArray(float[] a) {
int j = 0;
// Iterates over the values, adding them to the array.
for (FloatIterator iterator = iterator(); j < a.length
&& iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
if (j < a.length) {
a[j] = Float.NaN;
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
Iterator<K> keyIterator = keyIterator();
while (keyIterator.hasNext()) {
K key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(Object o) {
ObjectToFloatMap<K> that = (ObjectToFloatMap<K>)o;
if (that.size() != this.size()) {
return false;
}
Iterator<K> it = keyIterator();
while (it.hasNext()) {
K key = it.next();
float v1 = this.get(key);
float v2 = that.get(key);
if (Float.compare(v1, v2) != 0) {
return false;
}
}
return true;
}
}

View File

@ -1,622 +0,0 @@
package org.apache.lucene.facet.collections;
import java.util.Arrays;
import java.util.Iterator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An Array-based hashtable which maps Objects of generic type
* T to primitive int values.<br>
* The hashtable is constructed with a given capacity, or 16 as a default. In
* case there's not enough room for new pairs, the hashtable grows. <br>
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
* the hash.
*
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
* when index 0 is used as 'Ground' or 'NULL'.<br>
*
* The arrays are allocated ahead of hash operations, and form an 'empty space'
* list, to which the key,value pair is allocated.
*
* @lucene.experimental
*/
public class ObjectToIntMap<K> {
/**
* Implements an IntIterator which iterates over all the allocated indexes.
*/
private final class IndexIterator implements IntIterator {
/**
* The last used baseHashIndex. Needed for "jumping" from one hash entry
* to another.
*/
private int baseHashIndex = 0;
/**
* The next not-yet-visited index.
*/
private int index = 0;
/**
* Index of the last visited pair. Used in {@link #remove()}.
*/
private int lastIndex = 0;
/**
* Create the Iterator, make <code>index</code> point to the "first"
* index which is not empty. If such does not exist (eg. the map is
* empty) it would be zero.
*/
public IndexIterator() {
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
index = baseHash[baseHashIndex];
if (index != 0) {
break;
}
}
}
@Override
public boolean hasNext() {
return (index != 0);
}
@Override
public int next() {
// Save the last index visited
lastIndex = index;
// next the index
index = next[index];
// if the next index points to the 'Ground' it means we're done with
// the current hash entry and we need to jump to the next one. This
// is done until all the hash entries had been visited.
while (index == 0 && ++baseHashIndex < baseHash.length) {
index = baseHash[baseHashIndex];
}
return lastIndex;
}
@Override
@SuppressWarnings("unchecked")
public void remove() {
ObjectToIntMap.this.remove((K) keys[lastIndex]);
}
}
/**
* Implements an IntIterator, used for iteration over the map's keys.
*/
private final class KeyIterator implements Iterator<K> {
private IntIterator iterator = new IndexIterator();
KeyIterator() { }
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
@SuppressWarnings("unchecked")
public K next() {
return (K) keys[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Implements an Iterator of a generic type T used for iteration over the
* map's values.
*/
private final class ValueIterator implements IntIterator {
private IntIterator iterator = new IndexIterator();
ValueIterator() {}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public int next() {
return values[iterator.next()];
}
@Override
public void remove() {
iterator.remove();
}
}
/**
* Default capacity - in case no capacity was specified in the constructor
*/
private static int defaultCapacity = 16;
/**
* Holds the base hash entries. if the capacity is 2^N, than the base hash
* holds 2^(N+1). It can hold
*/
int[] baseHash;
/**
* The current capacity of the map. Always 2^N and never less than 16. We
* never use the zero index. It is needed to improve performance and is also
* used as "ground".
*/
private int capacity;
/**
* All objects are being allocated at map creation. Those objects are "free"
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
* taken from the free-linked list. as this is just a free list.
*/
private int firstEmpty;
/**
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
*/
private int hashFactor;
/**
* This array holds the unique keys
*/
Object[] keys;
/**
* In case of collisions, we implement a double linked list of the colliding
* hash's with the following next[] and prev[]. Those are also used to store
* the "empty" list.
*/
int[] next;
private int prev;
/**
* Number of currently objects in the map.
*/
private int size;
/**
* This array holds the values
*/
int[] values;
/**
* Constructs a map with default capacity.
*/
public ObjectToIntMap() {
this(defaultCapacity);
}
/**
* Constructs a map with given capacity. Capacity is adjusted to a native
* power of 2, with minimum of 16.
*
* @param capacity
* minimum capacity for the map.
*/
public ObjectToIntMap(int capacity) {
this.capacity = 16;
// Minimum capacity is 16..
while (this.capacity < capacity) {
// Multiply by 2 as long as we're still under the requested capacity
this.capacity <<= 1;
}
// As mentioned, we use the first index (0) as 'Ground', so we need the
// length of the arrays to be one more than the capacity
int arrayLength = this.capacity + 1;
this.values = new int[arrayLength];
this.keys = new Object[arrayLength];
this.next = new int[arrayLength];
// Hash entries are twice as big as the capacity.
int baseHashSize = this.capacity << 1;
this.baseHash = new int[baseHashSize];
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
// {@link #calcBaseHash()}
this.hashFactor = baseHashSize - 1;
this.size = 0;
clear();
}
/**
* Adds a pair to the map. Takes the first empty position from the
* empty-linked-list's head - {@link #firstEmpty}.
*
* New pairs are always inserted to baseHash, and are followed by the old
* colliding pair.
*
* @param key
* integer which maps the given Object
* @param e
* element which is being mapped using the given key
*/
private void prvt_put(K key, int e) {
// Hash entry to which the new pair would be inserted
int hashIndex = calcBaseHashIndex(key);
// 'Allocating' a pair from the "Empty" list.
int objectIndex = firstEmpty;
// Setting data
firstEmpty = next[firstEmpty];
values[objectIndex] = e;
keys[objectIndex] = key;
// Inserting the new pair as the first node in the specific hash entry
next[objectIndex] = baseHash[hashIndex];
baseHash[hashIndex] = objectIndex;
// Announcing a new pair was added!
++size;
}
/**
* Calculating the baseHash index using the internal <code>hashFactor</code>.
*/
protected int calcBaseHashIndex(K key) {
return key.hashCode() & hashFactor;
}
/**
* Empties the map. Generates the "Empty" space list for later allocation.
*/
public void clear() {
// Clears the hash entries
Arrays.fill(this.baseHash, 0);
// Set size to zero
size = 0;
values[0] = Integer.MAX_VALUE;
// Mark all array entries as empty. This is done with
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
// used as 'Ground').
firstEmpty = 1;
// And setting all the <code>next[i]</code> to point at
// <code>i+1</code>.
for (int i = 1; i < this.capacity;) {
next[i] = ++i;
}
// Surly, the last one should point to the 'Ground'.
next[this.capacity] = 0;
}
/**
* Checks if a given key exists in the map.
*
* @param key
* that is checked against the map data.
* @return true if the key exists in the map. false otherwise.
*/
public boolean containsKey(K key) {
return find(key) != 0;
}
/**
* Checks if the given object exists in the map.<br>
* This method iterates over the collection, trying to find an equal object.
*
* @param o
* object that is checked against the map data.
* @return true if the object exists in the map (in .equals() meaning).
* false otherwise.
*/
public boolean containsValue(int o) {
for (IntIterator iterator = iterator(); iterator.hasNext();) {
if (o == iterator.next()) {
return true;
}
}
return false;
}
/**
* Find the actual index of a given key.
*
* @return index of the key. zero if the key wasn't found.
*/
protected int find(K key) {
// Calculate the hash entry.
int baseHashIndex = calcBaseHashIndex(key);
// Start from the hash entry.
int localIndex = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (localIndex != 0) {
// returns the index found in case of of a matching key.
if (keys[localIndex].equals(key)) {
return localIndex;
}
// next the local index
localIndex = next[localIndex];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
return 0;
}
/**
* Find the actual index of a given key with it's baseHashIndex.<br>
* Some methods use the baseHashIndex. If those call {@link #find} there's
* no need to re-calculate that hash.
*
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
* found.
*/
private int findForRemove(K key, int baseHashIndex) {
// Start from the hash entry.
this.prev = 0;
int index = baseHash[baseHashIndex];
// while the index does not point to the 'Ground'
while (index != 0) {
// returns the index found in case of of a matching key.
if (keys[index].equals(key)) {
return index;
}
// next the local index
prev = index;
index = next[index];
}
// If we got this far, it could only mean we did not find the key we
// were asked for. return 'Ground' index.
this.prev = 0;
return 0;
}
/**
* Returns the int mapped with the given key.
*
* @param key
* int who's mapped object we're interested in.
* @return an object mapped by the given key. null if the key wasn't found.
*/
public int get(K key) {
return values[find(key)];
}
/**
* Grows the map. Allocates a new map of double the capacity, and
* fast-insert the old key-value pairs.
*/
@SuppressWarnings("unchecked")
protected void grow() {
ObjectToIntMap<K> that = new ObjectToIntMap<K>(
this.capacity * 2);
// Iterates fast over the collection. Any valid pair is put into the new
// map without checking for duplicates or if there's enough space for
// it.
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
int index = iterator.next();
that.prvt_put((K) this.keys[index], this.values[index]);
}
// Copy that's data into this.
this.capacity = that.capacity;
this.size = that.size;
this.firstEmpty = that.firstEmpty;
this.values = that.values;
this.keys = that.keys;
this.next = that.next;
this.baseHash = that.baseHash;
this.hashFactor = that.hashFactor;
}
/**
*
* @return true if the map is empty. false otherwise.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns a new iterator for the mapped objects.
*/
public IntIterator iterator() {
return new ValueIterator();
}
public Iterator<K> keyIterator() {
return new KeyIterator();
}
/**
* Prints the baseHash array, used for debug purposes.
*/
@SuppressWarnings("unused")
private String getBaseHashAsString() {
return Arrays.toString(baseHash);
}
/**
* Inserts the &lt;key,value&gt; pair into the map. If the key already exists,
* this method updates the mapped value to the given one, returning the old
* mapped value.
*
* @return the old mapped value, or 0 if the key didn't exist.
*/
public int put(K key, int e) {
// Does key exists?
int index = find(key);
// Yes!
if (index != 0) {
// Set new data and exit.
int old = values[index];
values[index] = e;
return old;
}
// Is there enough room for a new pair?
if (size == capacity) {
// No? Than grow up!
grow();
}
// Now that everything is set, the pair can be just put inside with no
// worries.
prvt_put(key, e);
return 0;
}
/**
* Removes a &lt;key,value&gt; pair from the map and returns the mapped value,
* or 0 if the none existed.
*
* @param key used to find the value to remove
* @return the removed value or 0 if none existed.
*/
public int remove(K key) {
int baseHashIndex = calcBaseHashIndex(key);
int index = findForRemove(key, baseHashIndex);
if (index != 0) {
// If it is the first in the collision list, we should promote its
// next colliding element.
if (prev == 0) {
baseHash[baseHashIndex] = next[index];
}
next[prev] = next[index];
next[index] = firstEmpty;
firstEmpty = index;
--size;
return values[index];
}
return 0;
}
/**
* @return number of pairs currently in the map
*/
public int size() {
return this.size;
}
/**
* Translates the mapped pairs' values into an array of Objects
*
* @return an object array of all the values currently in the map.
*/
public int[] toArray() {
int j = -1;
int[] array = new int[size];
// Iterates over the values, adding them to the array.
for (IntIterator iterator = iterator(); iterator.hasNext();) {
array[++j] = iterator.next();
}
return array;
}
/**
* Translates the mapped pairs' values into an array of T
*
* @param a
* the array into which the elements of the list are to be
* stored, if it is big enough; otherwise, use as much space as it can.
*
* @return an array containing the elements of the list
*
*/
public int[] toArray(int[] a) {
int j = 0;
// Iterates over the values, adding them to the array.
for (IntIterator iterator = iterator(); j < a.length
&& iterator.hasNext(); ++j) {
a[j] = iterator.next();
}
if (j < a.length) {
a[j] = Integer.MAX_VALUE;
}
return a;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('{');
Iterator<K> keyIterator = keyIterator();
while (keyIterator.hasNext()) {
K key = keyIterator.next();
sb.append(key);
sb.append('=');
sb.append(get(key));
if (keyIterator.hasNext()) {
sb.append(',');
sb.append(' ');
}
}
sb.append('}');
return sb.toString();
}
@Override
public int hashCode() {
return getClass().hashCode() ^ size();
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(Object o) {
ObjectToIntMap<K> that = (ObjectToIntMap<K>)o;
if (that.size() != this.size()) {
return false;
}
Iterator<K> it = keyIterator();
while (it.hasNext()) {
K key = it.next();
int v1 = this.get(key);
int v2 = that.get(key);
if (Float.compare(v1, v2) != 0) {
return false;
}
}
return true;
}
}

View File

@ -1,24 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Facets Collections</title>
</head>
<body>
Various optimized Collections implementations.
</body>
</html>

View File

@ -1,180 +0,0 @@
package org.apache.lucene.facet.complements;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.facet.old.Aggregator;
import org.apache.lucene.facet.old.CountingAggregator;
import org.apache.lucene.facet.old.OldFacetsAccumulator;
import org.apache.lucene.facet.old.ScoredDocIdsUtils;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.CountFacetRequest;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.index.IndexReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Maintain Total Facet Counts per partition, for given parameters:
* <ul>
* <li>Index reader of an index</li>
* <li>Taxonomy index reader</li>
* <li>Facet indexing params (and particularly the category list params)</li>
* <li></li>
* </ul>
* The total facet counts are maintained as an array of arrays of integers,
* where a separate array is kept for each partition.
*
* @lucene.experimental
*/
public class TotalFacetCounts {
/** total facet counts per partition: totalCounts[partition][ordinal%partitionLength] */
private int[][] totalCounts = null;
private final TaxonomyReader taxonomy;
private final FacetIndexingParams facetIndexingParams;
private final static AtomicInteger atomicGen4Test = new AtomicInteger(1);
/** Creation type for test purposes */
enum CreationType { Computed, Loaded } // for testing
final int gen4test;
final CreationType createType4test;
/**
* Construct by key - from index Directory or by recomputing.
*/
private TotalFacetCounts (TaxonomyReader taxonomy, FacetIndexingParams facetIndexingParams,
int[][] counts, CreationType createType4Test) {
this.taxonomy = taxonomy;
this.facetIndexingParams = facetIndexingParams;
this.totalCounts = counts;
this.createType4test = createType4Test;
this.gen4test = atomicGen4Test.incrementAndGet();
}
/**
* Fill a partition's array with the TotalCountsArray values.
* @param partitionArray array to fill
* @param partition number of required partition
*/
public void fillTotalCountsForPartition(int[] partitionArray, int partition) {
int partitionSize = partitionArray.length;
int[] countArray = totalCounts[partition];
if (countArray == null) {
countArray = new int[partitionSize];
totalCounts[partition] = countArray;
}
int length = Math.min(partitionSize, countArray.length);
System.arraycopy(countArray, 0, partitionArray, 0, length);
}
/**
* Return the total count of an input category
* @param ordinal ordinal of category whose total count is required
*/
public int getTotalCount(int ordinal) {
int partition = PartitionsUtils.partitionNumber(facetIndexingParams,ordinal);
int offset = ordinal % PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
return totalCounts[partition][offset];
}
static TotalFacetCounts loadFromFile(File inputFile, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) throws IOException {
DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
try {
int[][] counts = new int[dis.readInt()][];
for (int i=0; i<counts.length; i++) {
int size = dis.readInt();
if (size<0) {
counts[i] = null;
} else {
counts[i] = new int[size];
for (int j=0; j<size; j++) {
counts[i][j] = dis.readInt();
}
}
}
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Loaded);
} finally {
dis.close();
}
}
static void storeToFile(File outputFile, TotalFacetCounts tfc) throws IOException {
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
try {
dos.writeInt(tfc.totalCounts.length);
for (int[] counts : tfc.totalCounts) {
if (counts == null) {
dos.writeInt(-1);
} else {
dos.writeInt(counts.length);
for (int i : counts) {
dos.writeInt(i);
}
}
}
} finally {
dos.close();
}
}
// needed because FacetSearchParams do not allow empty FacetRequests
private static final FacetRequest DUMMY_REQ = new CountFacetRequest(FacetLabel.EMPTY, 1);
static TotalFacetCounts compute(final IndexReader indexReader, final TaxonomyReader taxonomy,
final FacetIndexingParams facetIndexingParams) throws IOException {
int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize];
FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams, DUMMY_REQ);
//createAllListsSearchParams(facetIndexingParams, this.totalCounts);
OldFacetsAccumulator sfa = new OldFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
@Override
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(
FacetArrays facetArrays, int partition) throws IOException {
Aggregator aggregator = new CountingAggregator(counts[partition]);
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
map.put(clp.createCategoryListIterator(partition), aggregator);
}
return map;
}
};
sfa.setComplementThreshold(OldFacetsAccumulator.DISABLE_COMPLEMENT);
sfa.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader));
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
}
}

View File

@ -1,299 +0,0 @@
package org.apache.lucene.facet.complements;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
* facet indexing params.
*
* @lucene.experimental
*/
public final class TotalFacetCountsCache {
/**
* Default size of in memory cache for computed total facet counts.
* Set to 2 for the case when an application reopened a reader and
* the original one is still in use (Otherwise there will be
* switching again and again between the two.)
*/
public static final int DEFAULT_CACHE_SIZE = 2;
private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
/**
* Get the single instance of this cache
*/
public static TotalFacetCountsCache getSingleton() {
return singleton;
}
/**
* In-memory cache of TFCs.
* <ul>
* <li>It's size is kept within limits through {@link #trimCache()}.
* <li>An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}.
* <li>After each addition to the cache, trimCache is called, to remove entries least recently used.
* </ul>
* @see #markRecentlyUsed(TFCKey)
*/
private ConcurrentHashMap<TFCKey,TotalFacetCounts> cache = new ConcurrentHashMap<TFCKey,TotalFacetCounts>();
/**
* A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
* @see #markRecentlyUsed(TFCKey)
*/
private ConcurrentLinkedQueue<TFCKey> lruKeys = new ConcurrentLinkedQueue<TFCKey>();
private int maxCacheSize = DEFAULT_CACHE_SIZE;
/** private constructor for singleton pattern */
private TotalFacetCountsCache() {
}
/**
* Get the total facet counts for a reader/taxonomy pair and facet indexing
* parameters. If not in cache, computed here and added to the cache for later
* use.
*
* @param indexReader
* the documents index
* @param taxonomy
* the taxonomy index
* @param facetIndexingParams
* facet indexing parameters
* @return the total facet counts.
*/
public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) throws IOException {
// create the key
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
// it is important that this call is not synchronized, so that available TFC
// would not wait for one that needs to be computed.
TotalFacetCounts tfc = cache.get(key);
if (tfc != null) {
markRecentlyUsed(key);
return tfc;
}
return computeAndCache(key);
}
/**
* Mark key as it as recently used.
* <p>
* <b>Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:</b>
* <ol>
* <li>A concurrent {@link LinkedHashMap} would have made this class much simpler.
* But unfortunately, Java does not provide one.
* Instead, we combine two concurrent objects:
* <ul>
* <li>{@link ConcurrentHashMap} for the cached TFCs.
* <li>{@link ConcurrentLinkedQueue} for active keys
* </ul>
* <li>Both {@link #lruKeys} and {@link #cache} are concurrently safe.
* <li>Checks for a cached item through getTotalCounts() are not synchronized.
* Therefore, the case that a needed TFC is in the cache is very fast:
* it does not wait for the computation of other TFCs.
* <li>computeAndCache() is synchronized, and, has a (double) check of the required
* TFC, to avoid computing the same TFC twice.
* <li>A race condition in this method (markRecentlyUsed) might result in two copies
* of the same 'key' in lruKeys, but this is handled by the loop in trimCache(),
* where an attempt to remove the same key twice is a no-op.
* </ol>
*/
private void markRecentlyUsed(TFCKey key) {
lruKeys.remove(key);
lruKeys.add(key);
}
private synchronized void trimCache() {
// loop until cache is of desired size.
while (cache.size()>maxCacheSize ) {
TFCKey key = lruKeys.poll();
if (key==null) { //defensive
// it is defensive since lruKeys presumably covers the cache keys
key = cache.keys().nextElement();
}
// remove this element. Note that an attempt to remove with the same key again is a no-op,
// which gracefully handles the possible race in markRecentlyUsed().
cache.remove(key);
}
}
/**
* compute TFC and cache it, after verifying it was not just added - for this
* matter this method is synchronized, which is not too bad, because there is
* lots of work done in the computations.
*/
private synchronized TotalFacetCounts computeAndCache(TFCKey key) throws IOException {
TotalFacetCounts tfc = cache.get(key);
if (tfc == null) {
tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams);
lruKeys.add(key);
cache.put(key,tfc);
trimCache();
}
return tfc;
}
/**
* Load {@link TotalFacetCounts} matching input parameters from the provided
* outputFile and add them into the cache for the provided indexReader,
* taxonomy, and facetIndexingParams. If a {@link TotalFacetCounts} for these
* parameters already exists in the cache, it will be replaced by the loaded
* one.
*
* @param inputFile
* file from which to read the data
* @param indexReader
* the documents index
* @param taxonomy
* the taxonomy index
* @param facetIndexingParams
* the facet indexing parameters
* @throws IOException
* on error
*/
public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) throws IOException {
if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
}
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
cache.put(key,tfc);
trimCache();
markRecentlyUsed(key);
}
/**
* Store the {@link TotalFacetCounts} matching input parameters into the
* provided outputFile, making them available for a later call to
* {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}. If
* these {@link TotalFacetCounts} are available in the cache, they are used.
* But if they are not in the cache, this call will first compute them (which
* will also add them to the cache).
*
* @param outputFile
* file to store in.
* @param indexReader
* the documents index
* @param taxonomy
* the taxonomy index
* @param facetIndexingParams
* the facet indexing parameters
* @throws IOException
* on error
* @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
*/
public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) throws IOException {
File parentFile = outputFile.getParentFile();
if (
( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
(!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
) {
throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
}
TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams);
TotalFacetCounts.storeToFile(outputFile, tfc);
}
private static class TFCKey {
final IndexReader indexReader;
final TaxonomyReader taxonomy;
private final Iterable<CategoryListParams> clps;
private final int hashCode;
private final int nDels; // needed when a reader used for faceted search was just used for deletion.
final FacetIndexingParams facetIndexingParams;
public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) {
this.indexReader = indexReader;
this.taxonomy = taxonomy;
this.facetIndexingParams = facetIndexingParams;
this.clps = facetIndexingParams.getAllCategoryListParams();
this.nDels = indexReader.numDeletedDocs();
hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object other) {
TFCKey o = (TFCKey) other;
if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
return false;
}
Iterator<CategoryListParams> it1 = clps.iterator();
Iterator<CategoryListParams> it2 = o.clps.iterator();
while (it1.hasNext() && it2.hasNext()) {
if (!it1.next().equals(it2.next())) {
return false;
}
}
return it1.hasNext() == it2.hasNext();
}
}
/**
* Clear the cache.
*/
public synchronized void clear() {
cache.clear();
lruKeys.clear();
}
/**
* @return the maximal cache size
*/
public int getCacheSize() {
return maxCacheSize;
}
/**
* Set the number of TotalFacetCounts arrays that will remain in memory cache.
* <p>
* If new size is smaller than current size, the cache is appropriately trimmed.
* <p>
* Minimal size is 1, so passing zero or negative size would result in size of 1.
* @param size new size to set
*/
public void setCacheSize(int size) {
if (size < 1) size = 1;
int origSize = maxCacheSize;
maxCacheSize = size;
if (maxCacheSize < origSize) { // need to trim only if the cache was reduced
trimCache();
}
}
}

View File

@ -1,27 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Facets Complements counting</title>
</head>
<body>
Allows to cache the total counts of categories, so that during search which
returns a large number of results (>60% of segment size), the complement set
of matching documents is counted. Useful for queries that visit a large
number of documents, e.g. overview queries.
</body>
</html>

View File

@ -1,115 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link IntEncoder} which encodes values in chunks. Implementations of this
* class assume the data which needs encoding consists of small, consecutive
* values, and therefore the encoder is able to compress them better. You can
* read more on the two implementations {@link FourFlagsIntEncoder} and
* {@link EightFlagsIntEncoder}.
* <p>
* Extensions of this class need to implement {@link #encode(IntsRef, BytesRef)}
* in order to build the proper indicator (flags). When enough values were
* accumulated (typically the batch size), extensions can call
* {@link #encodeChunk(BytesRef)} to flush the indicator and the rest of the
* values.
* <p>
* <b>NOTE:</b> flags encoders do not accept values &le; 0 (zero) in their
* {@link #encode(IntsRef, BytesRef)}. For performance reasons they do not check
* that condition, however if such value is passed the result stream may be
* corrupt or an exception will be thrown. Also, these encoders perform the best
* when there are many consecutive small values (depends on the encoder
* implementation). If that is not the case, the encoder will occupy 1 more byte
* for every <i>batch</i> number of integers, over whatever
* {@link VInt8IntEncoder} would have occupied. Therefore make sure to check
* whether your data fits into the conditions of the specific encoder.
* <p>
* For the reasons mentioned above, these encoders are usually chained with
* {@link UniqueValuesIntEncoder} and {@link DGapIntEncoder}.
*
* @lucene.experimental
*/
public abstract class ChunksIntEncoder extends IntEncoder {
/** Holds the values which must be encoded, outside the indicator. */
protected final IntsRef encodeQueue;
/** Represents bits flag byte. */
protected int indicator = 0;
/** Counts the current ordinal of the encoded value. */
protected byte ordinal = 0;
protected ChunksIntEncoder(int chunkSize) {
encodeQueue = new IntsRef(chunkSize);
}
/**
* Encodes the values of the current chunk. First it writes the indicator, and
* then it encodes the values outside the indicator.
*/
protected void encodeChunk(BytesRef buf) {
// ensure there's enough room in the buffer
int maxBytesRequired = buf.length + 1 + encodeQueue.length * 4; /* indicator + at most 4 bytes per positive VInt */
if (buf.bytes.length < maxBytesRequired) {
buf.grow(maxBytesRequired);
}
buf.bytes[buf.length++] = ((byte) indicator);
for (int i = 0; i < encodeQueue.length; i++) {
// it is better if the encoding is inlined like so, and not e.g.
// in a utility method
int value = encodeQueue.ints[i];
if ((value & ~0x7F) == 0) {
buf.bytes[buf.length] = (byte) value;
buf.length++;
} else if ((value & ~0x3FFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
buf.length += 2;
} else if ((value & ~0x1FFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
buf.length += 3;
} else if ((value & ~0xFFFFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
buf.length += 4;
} else {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
buf.length += 5;
}
}
ordinal = 0;
indicator = 0;
encodeQueue.length = 0;
}
}

View File

@ -1,52 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link IntDecoder} which wraps another decoder and reverts the d-gap that
* was encoded by {@link DGapIntEncoder}.
*
* @lucene.experimental
*/
public final class DGapIntDecoder extends IntDecoder {
private final IntDecoder decoder;
public DGapIntDecoder(IntDecoder decoder) {
this.decoder = decoder;
}
@Override
public void decode(BytesRef buf, IntsRef values) {
decoder.decode(buf, values);
int prev = 0;
for (int i = 0; i < values.length; i++) {
values.ints[i] += prev;
prev = values.ints[i];
}
}
@Override
public String toString() {
return "DGap(" + decoder.toString() + ")";
}
}

View File

@ -1,67 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link IntEncoderFilter} which encodes the gap between the given values,
* rather than the values themselves. This encoder usually yields better
* encoding performance space-wise (i.e., the final encoded values consume less
* space) if the values are 'close' to each other.
* <p>
* <b>NOTE:</b> this encoder assumes the values are given to
* {@link #encode(IntsRef, BytesRef)} in an ascending sorted manner, which ensures only
* positive values are encoded and thus yields better performance. If you are
* not sure whether the values are sorted or not, it is possible to chain this
* encoder with {@link SortingIntEncoder} to ensure the values will be
* sorted before encoding.
*
* @lucene.experimental
*/
public final class DGapIntEncoder extends IntEncoderFilter {
/** Initializes with the given encoder. */
public DGapIntEncoder(IntEncoder encoder) {
super(encoder);
}
@Override
public void encode(IntsRef values, BytesRef buf) {
int prev = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int tmp = values.ints[i];
values.ints[i] -= prev;
prev = tmp;
}
encoder.encode(values, buf);
}
@Override
public IntDecoder createMatchingDecoder() {
return new DGapIntDecoder(encoder.createMatchingDecoder());
}
@Override
public String toString() {
return "DGap(" + encoder.toString() + ")";
}
}

View File

@ -1,67 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Decodes values encoded by {@link DGapVInt8IntDecoder}.
*
* @lucene.experimental
*/
public final class DGapVInt8IntDecoder extends IntDecoder {
@Override
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
// grow the buffer up front, even if by a large number of values (buf.length)
// that saves the need to check inside the loop for every decoded value if
// the buffer needs to grow.
if (values.ints.length < buf.length) {
values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)];
}
// it is better if the decoding is inlined like so, and not e.g.
// in a utility method
int upto = buf.offset + buf.length;
int value = 0;
int offset = buf.offset;
int prev = 0;
while (offset < upto) {
byte b = buf.bytes[offset++];
if (b >= 0) {
values.ints[values.length] = ((value << 7) | b) + prev;
value = 0;
prev = values.ints[values.length];
values.length++;
} else {
value = (value << 7) | (b & 0x7F);
}
}
}
@Override
public String toString() {
return "DGapVInt8";
}
}

View File

@ -1,89 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link IntEncoder} which implements variable length encoding for the gap
* between values. It's a specialized form of the combination of
* {@link DGapIntEncoder} and {@link VInt8IntEncoder}.
*
* @see VInt8IntEncoder
* @see DGapIntEncoder
*
* @lucene.experimental
*/
public final class DGapVInt8IntEncoder extends IntEncoder {
@Override
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt
if (buf.bytes.length < maxBytesNeeded) {
buf.grow(maxBytesNeeded);
}
int upto = values.offset + values.length;
int prev = 0;
for (int i = values.offset; i < upto; i++) {
// it is better if the encoding is inlined like so, and not e.g.
// in a utility method
int value = values.ints[i] - prev;
if ((value & ~0x7F) == 0) {
buf.bytes[buf.length] = (byte) value;
buf.length++;
} else if ((value & ~0x3FFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
buf.length += 2;
} else if ((value & ~0x1FFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
buf.length += 3;
} else if ((value & ~0xFFFFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
buf.length += 4;
} else {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
buf.length += 5;
}
prev = values.ints[i];
}
}
@Override
public IntDecoder createMatchingDecoder() {
return new DGapVInt8IntDecoder();
}
@Override
public String toString() {
return "DGapVInt8";
}
}

View File

@ -1,92 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Decodes values encoded with {@link EightFlagsIntEncoder}.
*
* @lucene.experimental
*/
public class EightFlagsIntDecoder extends IntDecoder {
/*
* Holds all combinations of <i>indicator</i> for fast decoding (saves time
* on real-time bit manipulation)
*/
private static final byte[][] DECODE_TABLE = new byte[256][8];
/** Generating all combinations of <i>indicator</i> into separate flags. */
static {
for (int i = 256; i != 0;) {
--i;
for (int j = 8; j != 0;) {
--j;
DECODE_TABLE[i][j] = (byte) ((i >>> j) & 0x1);
}
}
}
@Override
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
int upto = buf.offset + buf.length;
int offset = buf.offset;
while (offset < upto) {
// read indicator
int indicator = buf.bytes[offset++] & 0xFF;
int ordinal = 0;
int capacityNeeded = values.length + 8;
if (values.ints.length < capacityNeeded) {
values.grow(capacityNeeded);
}
// process indicator, until we read 8 values, or end-of-buffer
while (ordinal != 8) {
if (DECODE_TABLE[indicator][ordinal++] == 0) {
if (offset == upto) { // end of buffer
return;
}
// it is better if the decoding is inlined like so, and not e.g.
// in a utility method
int value = 0;
while (true) {
byte b = buf.bytes[offset++];
if (b >= 0) {
values.ints[values.length++] = ((value << 7) | b) + 2;
break;
} else {
value = (value << 7) | (b & 0x7F);
}
}
} else {
values.ints[values.length++] = 1;
}
}
}
}
@Override
public String toString() {
return "EightFlags(VInt8)";
}
}

View File

@ -1,96 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link ChunksIntEncoder} which encodes data in chunks of 8. Every group
* starts with a single byte (called indicator) which represents 8 - 1 bit
* flags, where the value:
* <ul>
* <li>1 means the encoded value is '1'
* <li>0 means the value is encoded using {@link VInt8IntEncoder}, and the
* encoded bytes follow the indicator.<br>
* Since value 0 is illegal, and 1 is encoded in the indicator, the actual value
* that is encoded is <code>value-2</code>, which saves some more bits.
* </ul>
* Encoding example:
* <ul>
* <li>Original values: 6, 16, 5, 9, 7, 1
* <li>After sorting: 1, 5, 6, 7, 9, 16
* <li>D-Gap computing: 1, 4, 1, 1, 2, 5 (so far - done by
* {@link DGapIntEncoder})
* <li>Encoding: 1,0,1,1,0,0,0,0 as the indicator, by 2 (4-2), 0 (2-2), 3 (5-2).
* <li>Binary encode: <u>0 | 0 | 0 | 0 | 1 | 1 | 0 | 1</u> 00000010 00000000
* 00000011 (indicator is <u>underlined</u>).<br>
* <b>NOTE:</b> the order of the values in the indicator is lsb &rArr; msb,
* which allows for more efficient decoding.
* </ul>
*
* @lucene.experimental
*/
public class EightFlagsIntEncoder extends ChunksIntEncoder {
/*
* Holds all combinations of <i>indicator</i> flags for fast encoding (saves
* time on bit manipulation at encode time)
*/
private static final byte[] ENCODE_TABLE = new byte[] { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, (byte) 0x80 };
public EightFlagsIntEncoder() {
super(8);
}
@Override
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
if (value == 1) {
indicator |= ENCODE_TABLE[ordinal];
} else {
encodeQueue.ints[encodeQueue.length++] = value - 2;
}
++ordinal;
// encode the chunk and the indicator
if (ordinal == 8) {
encodeChunk(buf);
}
}
// encode remaining values
if (ordinal != 0) {
encodeChunk(buf);
}
}
@Override
public IntDecoder createMatchingDecoder() {
return new EightFlagsIntDecoder();
}
@Override
public String toString() {
return "EightFlags(VInt)";
}
}

View File

@ -1,92 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Decodes values encoded with {@link FourFlagsIntEncoder}.
*
* @lucene.experimental
*/
public class FourFlagsIntDecoder extends IntDecoder {
/**
* Holds all combinations of <i>indicator</i> for fast decoding (saves time
* on real-time bit manipulation)
*/
private final static byte[][] DECODE_TABLE = new byte[256][4];
/** Generating all combinations of <i>indicator</i> into separate flags. */
static {
for (int i = 256; i != 0;) {
--i;
for (int j = 4; j != 0;) {
--j;
DECODE_TABLE[i][j] = (byte) ((i >>> (j << 1)) & 0x3);
}
}
}
@Override
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
int upto = buf.offset + buf.length;
int offset = buf.offset;
while (offset < upto) {
// read indicator
int indicator = buf.bytes[offset++] & 0xFF;
int ordinal = 0;
int capacityNeeded = values.length + 4;
if (values.ints.length < capacityNeeded) {
values.grow(capacityNeeded);
}
while (ordinal != 4) {
byte decodeVal = DECODE_TABLE[indicator][ordinal++];
if (decodeVal == 0) {
if (offset == upto) { // end of buffer
return;
}
// it is better if the decoding is inlined like so, and not e.g.
// in a utility method
int value = 0;
while (true) {
byte b = buf.bytes[offset++];
if (b >= 0) {
values.ints[values.length++] = ((value << 7) | b) + 4;
break;
} else {
value = (value << 7) | (b & 0x7F);
}
}
} else {
values.ints[values.length++] = decodeVal;
}
}
}
}
@Override
public String toString() {
return "FourFlags(VInt)";
}
}

View File

@ -1,102 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link ChunksIntEncoder} which encodes values in chunks of 4. Every group
* starts with a single byte (called indicator) which represents 4 - 2 bit
* flags, where the values:
* <ul>
* <li>1, 2 or 3 mean the encoded value is '1', '2' or '3' respectively.
* <li>0 means the value is encoded using {@link VInt8IntEncoder}, and the
* encoded bytes follow the indicator.<br>
* Since value 0 is illegal, and 1-3 are encoded in the indicator, the actual
* value that is encoded is <code>value-4</code>, which saves some more bits.
* </ul>
* Encoding example:
* <ul>
* <li>Original values: 6, 16, 5, 9, 7, 1, 11
* <li>After sorting: 1, 5, 6, 7, 9, 11, 16
* <li>D-Gap computing: 1, 4, 1, 1, 2, 5 (so far - done by
* {@link DGapIntEncoder})
* <li>Encoding: 1,0,1,1 as the first indicator, followed by 0 (4-4), than
* 2,0,0,0 as the second indicator, followed by 1 (5-4) encoded with.
* <li>Binary encode: <u>01 | 01 | 00 | 01</u> 00000000 <u>00 | 00 | 00 | 10</u>
* 00000001 (indicators are <u>underlined</u>).<br>
* <b>NOTE:</b> the order of the values in the indicator is lsb &rArr; msb,
* which allows for more efficient decoding.
* </ul>
*
* @lucene.experimental
*/
public class FourFlagsIntEncoder extends ChunksIntEncoder {
/*
* Holds all combinations of <i>indicator</i> flags for fast encoding (saves
* time on bit manipulation @ encode time)
*/
private static final byte[][] ENCODE_TABLE = new byte[][] {
new byte[] { 0x00, 0x00, 0x00, 0x00 },
new byte[] { 0x01, 0x04, 0x10, 0x40 },
new byte[] { 0x02, 0x08, 0x20, (byte) 0x80 },
new byte[] { 0x03, 0x0C, 0x30, (byte) 0xC0 },
};
public FourFlagsIntEncoder() {
super(4);
}
@Override
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
if (value <= 3) {
indicator |= ENCODE_TABLE[value][ordinal];
} else {
encodeQueue.ints[encodeQueue.length++] = value - 4;
}
++ordinal;
// encode the chunk and the indicator
if (ordinal == 4) {
encodeChunk(buf);
}
}
// encode remaining values
if (ordinal != 0) {
encodeChunk(buf);
}
}
@Override
public IntDecoder createMatchingDecoder() {
return new FourFlagsIntDecoder();
}
@Override
public String toString() {
return "FourFlags(VInt)";
}
}

View File

@ -1,37 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Decodes integers from a set {@link BytesRef}.
*
* @lucene.experimental
*/
public abstract class IntDecoder {
/**
* Decodes the values from the buffer into the given {@link IntsRef}. Note
* that {@code values.offset} is set to 0, and {@code values.length} is
* updated to denote the number of decoded values.
*/
public abstract void decode(BytesRef buf, IntsRef values);
}

View File

@ -1,46 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Encodes integers to a set {@link BytesRef}. For convenience, each encoder
* implements {@link #createMatchingDecoder()} for easy access to the matching
* decoder.
*
* @lucene.experimental
*/
public abstract class IntEncoder {
public IntEncoder() {}
/**
* Encodes the values to the given buffer. Note that the buffer's offset and
* length are set to 0.
*/
public abstract void encode(IntsRef values, BytesRef buf);
/**
* Returns an {@link IntDecoder} which can decode the values that were encoded
* with this encoder.
*/
public abstract IntDecoder createMatchingDecoder();
}

View File

@ -1,34 +0,0 @@
package org.apache.lucene.facet.encoding;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An abstract implementation of {@link IntEncoder} which wraps another encoder.
*
* @lucene.experimental
*/
public abstract class IntEncoderFilter extends IntEncoder {
protected final IntEncoder encoder;
protected IntEncoderFilter(IntEncoder encoder) {
this.encoder = encoder;
}
}

View File

@ -1,86 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Decodes values encoded encoded with {@link NOnesIntEncoder}.
*
* @lucene.experimental
*/
public class NOnesIntDecoder extends FourFlagsIntDecoder {
// Number of consecutive '1's to generate upon decoding a '2'
private final int n;
private final IntsRef internalBuffer;
/**
* Constructs a decoder with a given N (Number of consecutive '1's which are
* translated into a single target value '2'.
*/
public NOnesIntDecoder(int n) {
this.n = n;
// initial size (room for 100 integers)
internalBuffer = new IntsRef(100);
}
@Override
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
internalBuffer.length = 0;
super.decode(buf, internalBuffer);
if (values.ints.length < internalBuffer.length) {
// need space for internalBuffer.length to internalBuffer.length*N,
// grow mildly at first
values.grow(internalBuffer.length * n/2);
}
for (int i = 0; i < internalBuffer.length; i++) {
int decode = internalBuffer.ints[i];
if (decode == 1) {
if (values.length == values.ints.length) {
values.grow(values.length + 10); // grow by few items, however not too many
}
// 1 is 1
values.ints[values.length++] = 1;
} else if (decode == 2) {
if (values.length + n >= values.ints.length) {
values.grow(values.length + n); // grow by few items, however not too many
}
// '2' means N 1's
for (int j = 0; j < n; j++) {
values.ints[values.length++] = 1;
}
} else {
if (values.length == values.ints.length) {
values.grow(values.length + 10); // grow by few items, however not too many
}
// any other value is val-1
values.ints[values.length++] = decode - 1;
}
}
}
@Override
public String toString() {
return "NOnes(" + n + ") (" + super.toString() + ")";
}
}

View File

@ -1,114 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A variation of {@link FourFlagsIntEncoder} which translates the data as
* follows:
* <ul>
* <li>Values &ge; 2 are trnalsated to <code>value+1</code> (2 &rArr; 3, 3
* &rArr; 4 and so forth).
* <li>Any <code>N</code> occurrences of 1 are encoded as a single 2.
* <li>Otherwise, each 1 is encoded as 1.
* </ul>
* <p>
* Encoding examples:
* <ul>
* <li>N = 4: the data 1,1,1,1,1 is translated to: 2, 1
* <li>N = 3: the data 1,2,3,4,1,1,1,1,5 is translated to 1,3,4,5,2,1,6
* </ul>
* <b>NOTE:</b> this encoder does not support values &le; 0 and
* {@link Integer#MAX_VALUE}. 0 is not supported because it's not supported by
* {@link FourFlagsIntEncoder} and {@link Integer#MAX_VALUE} because this
* encoder translates N to N+1, which will cause an overflow and
* {@link Integer#MAX_VALUE} will become a negative number, which is not
* supported as well.<br>
* This does not mean you cannot encode {@link Integer#MAX_VALUE}. If it is not
* the first value to encode, and you wrap this encoder with
* {@link DGapIntEncoder}, then the value that will be sent to this encoder will
* be <code>MAX_VAL - prev</code>.
*
* @lucene.experimental
*/
public class NOnesIntEncoder extends FourFlagsIntEncoder {
private final IntsRef internalBuffer;
/** Number of consecutive '1's to be translated into single target value '2'. */
private final int n;
/**
* Constructs an encoder with a given value of N (N: Number of consecutive
* '1's to be translated into single target value '2').
*/
public NOnesIntEncoder(int n) {
this.n = n;
internalBuffer = new IntsRef(n);
}
@Override
public void encode(IntsRef values, BytesRef buf) {
internalBuffer.length = 0;
// make sure the internal buffer is large enough
if (values.length > internalBuffer.ints.length) {
internalBuffer.grow(values.length);
}
int onesCounter = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
if (value == 1) {
// every N 1's should be encoded as '2'
if (++onesCounter == n) {
internalBuffer.ints[internalBuffer.length++] = 2;
onesCounter = 0;
}
} else {
// there might have been 1's that we need to encode
while (onesCounter > 0) {
--onesCounter;
internalBuffer.ints[internalBuffer.length++] = 1;
}
// encode value as value+1
internalBuffer.ints[internalBuffer.length++] = value + 1;
}
}
// there might have been 1's that we need to encode
while (onesCounter > 0) {
--onesCounter;
internalBuffer.ints[internalBuffer.length++] = 1;
}
super.encode(internalBuffer, buf);
}
@Override
public IntDecoder createMatchingDecoder() {
return new NOnesIntDecoder(n);
}
@Override
public String toString() {
return "NOnes(" + n + ") (" + super.toString() + ")";
}
}

View File

@ -1,56 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Decodes values encoded with {@link SimpleIntEncoder}.
*
* @lucene.experimental
*/
public final class SimpleIntDecoder extends IntDecoder {
@Override
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
int numValues = buf.length / 4; // every value is 4 bytes
if (values.ints.length < numValues) { // offset and length are 0
values.ints = new int[ArrayUtil.oversize(numValues, RamUsageEstimator.NUM_BYTES_INT)];
}
int offset = buf.offset;
int upto = buf.offset + buf.length;
while (offset < upto) {
values.ints[values.length++] =
((buf.bytes[offset++] & 0xFF) << 24) |
((buf.bytes[offset++] & 0xFF) << 16) |
((buf.bytes[offset++] & 0xFF) << 8) |
(buf.bytes[offset++] & 0xFF);
}
}
@Override
public String toString() {
return "Simple";
}
}

View File

@ -1,59 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A simple {@link IntEncoder}, writing an integer as 4 raw bytes. *
*
* @lucene.experimental
*/
public final class SimpleIntEncoder extends IntEncoder {
@Override
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
// ensure there's enough room in the buffer
int bytesNeeded = values.length * 4;
if (buf.bytes.length < bytesNeeded) {
buf.grow(bytesNeeded);
}
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
buf.bytes[buf.length++] = (byte) (value >>> 24);
buf.bytes[buf.length++] = (byte) ((value >> 16) & 0xFF);
buf.bytes[buf.length++] = (byte) ((value >> 8) & 0xFF);
buf.bytes[buf.length++] = (byte) (value & 0xFF);
}
}
@Override
public IntDecoder createMatchingDecoder() {
return new SimpleIntDecoder();
}
@Override
public String toString() {
return "Simple";
}
}

View File

@ -1,54 +0,0 @@
package org.apache.lucene.facet.encoding;
import java.util.Arrays;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link IntEncoderFilter} which sorts the values to encode in ascending
* order before encoding them.
*
* @lucene.experimental
*/
public final class SortingIntEncoder extends IntEncoderFilter {
/** Initializes with the given encoder. */
public SortingIntEncoder(IntEncoder encoder) {
super(encoder);
}
@Override
public void encode(IntsRef values, BytesRef buf) {
Arrays.sort(values.ints, values.offset, values.offset + values.length);
encoder.encode(values, buf);
}
@Override
public IntDecoder createMatchingDecoder() {
return encoder.createMatchingDecoder();
}
@Override
public String toString() {
return "Sorting(" + encoder.toString() + ")";
}
}

View File

@ -1,63 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link IntEncoderFilter} which ensures only unique values are encoded. The
* implementation assumes the values given to {@link #encode(IntsRef, BytesRef)} are sorted.
* If this is not the case, you can chain this encoder with
* {@link SortingIntEncoder}.
*
* @lucene.experimental
*/
public final class UniqueValuesIntEncoder extends IntEncoderFilter {
/** Constructs a new instance with the given encoder. */
public UniqueValuesIntEncoder(IntEncoder encoder) {
super(encoder);
}
@Override
public void encode(IntsRef values, BytesRef buf) {
int prev = values.ints[values.offset];
int idx = values.offset + 1;
int upto = values.offset + values.length;
for (int i = idx; i < upto; i++) {
if (values.ints[i] != prev) {
values.ints[idx++] = values.ints[i];
prev = values.ints[i];
}
}
values.length = idx - values.offset;
encoder.encode(values, buf);
}
@Override
public IntDecoder createMatchingDecoder() {
return encoder.createMatchingDecoder();
}
@Override
public String toString() {
return "Unique(" + encoder.toString() + ")";
}
}

View File

@ -1,64 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Decodes values encoded by {@link VInt8IntEncoder}.
*
* @lucene.experimental
*/
public final class VInt8IntDecoder extends IntDecoder {
@Override
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
// grow the buffer up front, even if by a large number of values (buf.length)
// that saves the need to check inside the loop for every decoded value if
// the buffer needs to grow.
if (values.ints.length < buf.length) {
values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)];
}
// it is better if the decoding is inlined like so, and not e.g.
// in a utility method
int upto = buf.offset + buf.length;
int value = 0;
int offset = buf.offset;
while (offset < upto) {
byte b = buf.bytes[offset++];
if (b >= 0) {
values.ints[values.length++] = (value << 7) | b;
value = 0;
} else {
value = (value << 7) | (b & 0x7F);
}
}
}
@Override
public String toString() {
return "VInt8";
}
}

View File

@ -1,104 +0,0 @@
package org.apache.lucene.facet.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link IntEncoder} which implements variable length encoding. A number is
* encoded as follows:
* <ul>
* <li>If it is less than 127 and non-negative, i.e. uses only 7 bits, it is
* encoded as a single byte: 0bbbbbbb.
* <li>If it occupies more than 7 bits, it is represented as a series of bytes,
* each byte carrying 7 bits. All but the last byte have the MSB set, the last
* one has it unset.
* </ul>
* Example:
* <ol>
* <li>n = 117 = 01110101: This has less than 8 significant bits, therefore is
* encoded as 01110101 = 0x75.
* <li>n = 100000 = (binary) 11000011010100000. This has 17 significant bits,
* thus needs three Vint8 bytes. Pad it to a multiple of 7 bits, then split it
* into chunks of 7 and add an MSB, 0 for the last byte, 1 for the others:
* 1|0000110 1|0001101 0|0100000 = 0x86 0x8D 0x20.
* </ol>
* <b>NOTE:</b> although this encoder is not limited to values &ge; 0, it is not
* recommended for use with negative values, as their encoding will result in 5
* bytes written to the output stream, rather than 4. For such values, either
* use {@link SimpleIntEncoder} or write your own version of variable length
* encoding, which can better handle negative values.
*
* @lucene.experimental
*/
public final class VInt8IntEncoder extends IntEncoder {
@Override
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt
if (buf.bytes.length < maxBytesNeeded) {
buf.grow(maxBytesNeeded);
}
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
// it is better if the encoding is inlined like so, and not e.g.
// in a utility method
int value = values.ints[i];
if ((value & ~0x7F) == 0) {
buf.bytes[buf.length] = (byte) value;
buf.length++;
} else if ((value & ~0x3FFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
buf.length += 2;
} else if ((value & ~0x1FFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
buf.length += 3;
} else if ((value & ~0xFFFFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
buf.length += 4;
} else {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
buf.length += 5;
}
}
}
@Override
public IntDecoder createMatchingDecoder() {
return new VInt8IntDecoder();
}
@Override
public String toString() {
return "VInt8";
}
}

View File

@ -1,24 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Facets Encoding</title>
</head>
<body>
Offers various encoders and decoders for category ordinals.
</body>
</html>

View File

@ -1,38 +0,0 @@
package org.apache.lucene.facet.index;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Builds a category list data by encoding the appropriate information for every
* category and ordinal given to {@link #build(IntsRef, Iterable)}.
*
* @lucene.experimental
*/
public interface CategoryListBuilder {
/** Returns the encoded ordinals data. */
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<FacetLabel> categories) throws IOException;
}

View File

@ -1,170 +0,0 @@
package org.apache.lucene.facet.index;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.facet.encoding.IntEncoder;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link CategoryListBuilder} which builds a counting list data by encoding
* the category ordinals into one or more {@link BytesRef}. Each
* {@link BytesRef} corresponds to a set of ordinals that belong to the same
* partition. When partitions are not enabled (i.e.
* {@link FacetIndexingParams#getPartitionSize()} returns
* {@link Integer#MAX_VALUE}), only one {@link BytesRef} is returned by this
* class.
* <p>
* Counting lists are used usually for computing the weight of categories by
* summing their number of occurrences (hence counting) in a result set.
*/
public class CountingListBuilder implements CategoryListBuilder {
/** Specializes encoding ordinals when partitions are enabled/disabled. */
private static abstract class OrdinalsEncoder {
OrdinalsEncoder() {}
public abstract Map<String,BytesRef> encode(IntsRef ordinals);
}
private static final class NoPartitionsOrdinalsEncoder extends OrdinalsEncoder {
private final IntEncoder encoder;
private final String name = "";
NoPartitionsOrdinalsEncoder(CategoryListParams categoryListParams) {
encoder = categoryListParams.createEncoder();
}
@Override
public Map<String,BytesRef> encode(IntsRef ordinals) {
final BytesRef bytes = new BytesRef(128); // should be enough for most common applications
encoder.encode(ordinals, bytes);
return Collections.singletonMap(name, bytes);
}
}
private static final class PerPartitionOrdinalsEncoder extends OrdinalsEncoder {
private final FacetIndexingParams indexingParams;
private final CategoryListParams categoryListParams;
private final int partitionSize;
private final HashMap<String,IntEncoder> partitionEncoder = new HashMap<String,IntEncoder>();
PerPartitionOrdinalsEncoder(FacetIndexingParams indexingParams, CategoryListParams categoryListParams) {
this.indexingParams = indexingParams;
this.categoryListParams = categoryListParams;
this.partitionSize = indexingParams.getPartitionSize();
}
@Override
public HashMap<String,BytesRef> encode(IntsRef ordinals) {
// build the partitionOrdinals map
final HashMap<String,IntsRef> partitionOrdinals = new HashMap<String,IntsRef>();
for (int i = 0; i < ordinals.length; i++) {
int ordinal = ordinals.ints[i];
final String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, ordinal);
IntsRef partitionOrds = partitionOrdinals.get(name);
if (partitionOrds == null) {
partitionOrds = new IntsRef(32);
partitionOrdinals.put(name, partitionOrds);
partitionEncoder.put(name, categoryListParams.createEncoder());
}
partitionOrds.ints[partitionOrds.length++] = ordinal % partitionSize;
}
HashMap<String,BytesRef> partitionBytes = new HashMap<String,BytesRef>();
for (Entry<String,IntsRef> e : partitionOrdinals.entrySet()) {
String name = e.getKey();
final IntEncoder encoder = partitionEncoder.get(name);
final BytesRef bytes = new BytesRef(128); // should be enough for most common applications
encoder.encode(e.getValue(), bytes);
partitionBytes.put(name, bytes);
}
return partitionBytes;
}
}
private final OrdinalsEncoder ordinalsEncoder;
private final TaxonomyWriter taxoWriter;
private final CategoryListParams clp;
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
TaxonomyWriter taxoWriter) {
this.taxoWriter = taxoWriter;
this.clp = categoryListParams;
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
} else {
ordinalsEncoder = new PerPartitionOrdinalsEncoder(indexingParams, categoryListParams);
}
}
/**
* Every returned {@link BytesRef} corresponds to a single partition (as
* defined by {@link FacetIndexingParams#getPartitionSize()}) and the key
* denotes the partition ID. When no partitions are defined, the returned map
* contains only one value.
* <p>
* <b>NOTE:</b> the {@code ordinals} array is modified by adding parent
* ordinals to it. Also, some encoders may sort the array and remove duplicate
* ordinals. Therefore you may want to invoke this method after you finished
* processing the array for other purposes.
*/
@Override
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<FacetLabel> categories) throws IOException {
int upto = ordinals.length; // since we may add ordinals to IntsRef, iterate upto original length
Iterator<FacetLabel> iter = categories.iterator();
for (int i = 0; i < upto; i++) {
int ordinal = ordinals.ints[i];
FacetLabel cp = iter.next();
OrdinalPolicy op = clp.getOrdinalPolicy(cp.components[0]);
if (op != OrdinalPolicy.NO_PARENTS) {
// need to add parents too
int parent = taxoWriter.getParent(ordinal);
if (parent > 0) {
// only do this if the category is not a dimension itself, otherwise, it was just discarded by the 'if' below
while (parent > 0) {
ordinals.ints[ordinals.length++] = parent;
parent = taxoWriter.getParent(parent);
}
if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
ordinals.length--;
}
}
}
}
return ordinalsEncoder.encode(ordinals);
}
}

View File

@ -1,83 +0,0 @@
package org.apache.lucene.facet.index;
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link TokenStream} which creates category drill-down terms.
*
* @lucene.experimental
*/
public class DrillDownStream extends TokenStream {
private final FacetIndexingParams indexingParams;
private final Iterator<FacetLabel> categories;
private final CharTermAttribute termAttribute;
private FacetLabel current;
private boolean isParent;
public DrillDownStream(Iterable<FacetLabel> categories, FacetIndexingParams indexingParams) {
termAttribute = addAttribute(CharTermAttribute.class);
this.categories = categories.iterator();
this.indexingParams = indexingParams;
}
protected void addAdditionalAttributes(FacetLabel category, boolean isParent) {
// a hook for AssociationsDrillDownStream to add the associations payload to
// the drill-down terms
}
@Override
public final boolean incrementToken() throws IOException {
if (current.length == 0) {
if (!categories.hasNext()) {
return false; // no more categories
}
current = categories.next();
termAttribute.resizeBuffer(current.fullPathLength());
isParent = false;
}
// copy current as drill-down term (it's either the leaf node or PathPolicy
// accepted it.
int nChars = indexingParams.drillDownTermText(current, termAttribute.buffer());
termAttribute.setLength(nChars);
addAdditionalAttributes(current, isParent);
// prepare current for next call by trimming the last component (parents)
current = current.subpath(current.length - 1);
isParent = true;
return true;
}
@Override
public void reset() throws IOException {
current = categories.next();
termAttribute.resizeBuffer(current.fullPathLength());
isParent = false;
}
}

View File

@ -1,194 +0,0 @@
package org.apache.lucene.facet.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Map;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A utility class for adding facet fields to a document. Usually one field will
* be added for all facets, however per the
* {@link FacetIndexingParams#getCategoryListParams(FacetLabel)}, one field
* may be added for every group of facets.
*
* @lucene.experimental
*/
public class FacetFields {
// The drill-down field is added with a TokenStream, hence why it's based on
// TextField type. However in practice, it is added just like StringField.
// Therefore we set its IndexOptions to DOCS_ONLY.
private static final FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
static {
DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY);
DRILL_DOWN_TYPE.setOmitNorms(true);
DRILL_DOWN_TYPE.freeze();
}
protected final TaxonomyWriter taxonomyWriter;
protected final FacetIndexingParams indexingParams;
/**
* Constructs a new instance with the {@link FacetIndexingParams#DEFAULT
* default} facet indexing params.
*
* @param taxonomyWriter
* used to resolve given categories to ordinals
*/
public FacetFields(TaxonomyWriter taxonomyWriter) {
this(taxonomyWriter, FacetIndexingParams.DEFAULT);
}
/**
* Constructs a new instance with the given facet indexing params.
*
* @param taxonomyWriter
* used to resolve given categories to ordinals
* @param params
* determines under which fields the categories should be indexed
*/
public FacetFields(TaxonomyWriter taxonomyWriter, FacetIndexingParams params) {
this.taxonomyWriter = taxonomyWriter;
this.indexingParams = params;
}
/**
* Creates a mapping between a {@link CategoryListParams} and all
* {@link FacetLabel categories} that are associated with it.
*/
protected Map<CategoryListParams,Iterable<FacetLabel>> createCategoryListMapping(
Iterable<FacetLabel> categories) {
if (indexingParams.getAllCategoryListParams().size() == 1) {
return Collections.singletonMap(indexingParams.getCategoryListParams(null), categories);
}
HashMap<CategoryListParams,Iterable<FacetLabel>> categoryLists =
new HashMap<CategoryListParams,Iterable<FacetLabel>>();
for (FacetLabel cp : categories) {
// each category may be indexed under a different field, so add it to the right list.
CategoryListParams clp = indexingParams.getCategoryListParams(cp);
List<FacetLabel> list = (List<FacetLabel>) categoryLists.get(clp);
if (list == null) {
list = new ArrayList<FacetLabel>();
categoryLists.put(clp, list);
}
list.add(cp);
}
return categoryLists;
}
/**
* Returns the category list data, as a mapping from key to {@link BytesRef}
* which includes the encoded data. Every ordinal in {@code ordinals}
* corrspond to a {@link FacetLabel} returned from {@code categories}.
*/
protected Map<String,BytesRef> getCategoryListData(CategoryListParams categoryListParams,
IntsRef ordinals, Iterable<FacetLabel> categories /* needed for AssociationsFacetFields */)
throws IOException {
return new CountingListBuilder(categoryListParams, indexingParams, taxonomyWriter).build(ordinals, categories);
}
/**
* Returns a {@link DrillDownStream} for writing the categories drill-down
* terms.
*/
protected DrillDownStream getDrillDownStream(Iterable<FacetLabel> categories) {
return new DrillDownStream(categories, indexingParams);
}
/**
* Returns the {@link FieldType} with which the drill-down terms should be
* indexed. The default is {@link IndexOptions#DOCS_ONLY}.
*/
protected FieldType drillDownFieldType() {
return DRILL_DOWN_TYPE;
}
/**
* Add the counting list data to the document under the given field. Note that
* the field is determined by the {@link CategoryListParams}.
*/
protected void addCountingListData(Document doc, Map<String,BytesRef> categoriesData, String field) {
for (Entry<String,BytesRef> entry : categoriesData.entrySet()) {
doc.add(new BinaryDocValuesField(field + entry.getKey(), entry.getValue()));
}
}
/** Adds the needed facet fields to the document. */
public void addFields(Document doc, Iterable<FacetLabel> categories) throws IOException {
if (categories == null) {
throw new IllegalArgumentException("categories should not be null");
}
// TODO: add reuse capabilities to this class, per CLP objects:
// - drill-down field
// - counting list field
// - DrillDownStream
// - CountingListStream
final Map<CategoryListParams,Iterable<FacetLabel>> categoryLists = createCategoryListMapping(categories);
// for each CLP we add a different field for drill-down terms as well as for
// counting list data.
IntsRef ordinals = new IntsRef(32); // should be enough for most common applications
for (Entry<CategoryListParams, Iterable<FacetLabel>> e : categoryLists.entrySet()) {
final CategoryListParams clp = e.getKey();
final String field = clp.field;
// build category list data
ordinals.length = 0; // reset
int maxNumOrds = 0;
for (FacetLabel cp : e.getValue()) {
int ordinal = taxonomyWriter.addCategory(cp);
maxNumOrds += cp.length; // ordinal and potentially all parents
if (ordinals.ints.length < maxNumOrds) {
ordinals.grow(maxNumOrds);
}
ordinals.ints[ordinals.length++] = ordinal;
}
Map<String,BytesRef> categoriesData = getCategoryListData(clp, ordinals, e.getValue());
// add the counting list data
addCountingListData(doc, categoriesData, field);
// add the drill-down field
DrillDownStream drillDownStream = getDrillDownStream(e.getValue());
Field drillDown = new Field(field, drillDownStream, drillDownFieldType());
doc.add(drillDown);
}
}
}

View File

@ -1,24 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Facets indexing code</title>
</head>
<body>
Facets indexing code.
</body>
</html>

View File

@ -1,116 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.sampling.RandomSampler;
import org.apache.lucene.facet.sampling.Sampler;
import org.apache.lucene.facet.sampling.SamplingAccumulator;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* {@link FacetsAccumulator} whose behavior regarding complements, sampling,
* etc. is not set up front but rather is determined at accumulation time
* according to the statistics of the accumulated set of documents and the
* index.
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()}.
*
* @lucene.experimental
*/
public final class AdaptiveFacetsAccumulator extends OldFacetsAccumulator {
private Sampler sampler = new RandomSampler();
/**
* Create an {@link AdaptiveFacetsAccumulator}
* @see OldFacetsAccumulator#OldFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader)
*/
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
super(searchParams, indexReader, taxonomyReader);
}
/**
* Create an {@link AdaptiveFacetsAccumulator}
*
* @see OldFacetsAccumulator#OldFacetsAccumulator(FacetSearchParams,
* IndexReader, TaxonomyReader, FacetArrays)
*/
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader, FacetArrays facetArrays) {
super(searchParams, indexReader, taxonomyReader, facetArrays);
}
/**
* Set the sampler.
* @param sampler sampler to set
*/
public void setSampler(Sampler sampler) {
this.sampler = sampler;
}
@Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
OldFacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids);
if (delegee == this) {
return super.accumulate(docids);
}
return delegee.accumulate(docids);
}
/**
* Compute the appropriate facet accumulator to use.
* If no special/clever adaptation is possible/needed return this (self).
*/
private OldFacetsAccumulator appropriateFacetCountingAccumulator(ScoredDocIDs docids) {
// Verify that searchPareams permit sampling/complement/etc... otherwise do default
if (!mayComplement()) {
return this;
}
// Now we're sure we can use the sampling methods as we're in a counting only mode
// Verify that sampling is enabled and required ... otherwise do default
if (sampler == null || !sampler.shouldSample(docids)) {
return this;
}
SamplingAccumulator samplingAccumulator = new SamplingAccumulator(sampler, searchParams, indexReader, taxonomyReader);
samplingAccumulator.setComplementThreshold(getComplementThreshold());
return samplingAccumulator;
}
/**
* @return the sampler in effect
*/
public final Sampler getSampler() {
return sampler;
}
}

View File

@ -1,48 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Aggregates the categories of documents given to
* {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local
* to the reader given to {@link #setNextReader(AtomicReaderContext)}.
*
* @lucene.experimental
*/
public interface Aggregator {
/**
* Sets the {@link AtomicReaderContext} for which
* {@link #aggregate(int, float, IntsRef)} calls will be made. If this method
* returns false, {@link #aggregate(int, float, IntsRef)} should not be called
* for this reader.
*/
public boolean setNextReader(AtomicReaderContext context) throws IOException;
/**
* Aggregate the ordinals of the given document ID (and its score). The given
* ordinals offset is always zero.
*/
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException;
}

View File

@ -1,44 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link CountingAggregator} used during complement counting.
*
* @lucene.experimental
*/
public class ComplementCountingAggregator extends CountingAggregator {
public ComplementCountingAggregator(int[] counterArray) {
super(counterArray);
}
@Override
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException {
for (int i = 0; i < ordinals.length; i++) {
int ord = ordinals.ints[i];
assert counterArray[ord] != 0 : "complement aggregation: count is about to become negative for ordinal " + ord;
--counterArray[ord];
}
}
}

View File

@ -1,66 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link Aggregator} which updates a counter array with the size of the
* whole taxonomy, counting the number of times each category appears in the
* given set of documents.
*
* @lucene.experimental
*/
public class CountingAggregator implements Aggregator {
protected int[] counterArray;
public CountingAggregator(int[] counterArray) {
this.counterArray = counterArray;
}
@Override
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException {
for (int i = 0; i < ordinals.length; i++) {
counterArray[ordinals.ints[i]]++;
}
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != this.getClass()) {
return false;
}
CountingAggregator that = (CountingAggregator) obj;
return that.counterArray == this.counterArray;
}
@Override
public int hashCode() {
return counterArray == null ? 0 : counterArray.hashCode();
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return true;
}
}

View File

@ -1,174 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Represents {@link MatchingDocs} as {@link ScoredDocIDs}.
*
* @lucene.experimental
*/
public class MatchingDocsAsScoredDocIDs implements ScoredDocIDs {
// TODO remove this class once we get rid of ScoredDocIDs
final List<MatchingDocs> matchingDocs;
final int size;
public MatchingDocsAsScoredDocIDs(List<MatchingDocs> matchingDocs) {
this.matchingDocs = matchingDocs;
int totalSize = 0;
for (MatchingDocs md : matchingDocs) {
totalSize += md.totalHits;
}
this.size = totalSize;
}
@Override
public ScoredDocIDsIterator iterator() throws IOException {
return new ScoredDocIDsIterator() {
final Iterator<MatchingDocs> mdIter = matchingDocs.iterator();
int scoresIdx = 0;
int doc = 0;
MatchingDocs current;
int currentLength;
boolean done = false;
@Override
public boolean next() {
if (done) {
return false;
}
while (current == null) {
if (!mdIter.hasNext()) {
done = true;
return false;
}
current = mdIter.next();
currentLength = current.bits.length();
doc = 0;
scoresIdx = 0;
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
current = null;
} else {
doc = -1; // we're calling nextSetBit later on
}
}
++doc;
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
current = null;
return next();
}
return true;
}
@Override
public float getScore() {
return current.scores == null ? ScoredDocIDsIterator.DEFAULT_SCORE : current.scores[scoresIdx++];
}
@Override
public int getDocID() {
return done ? DocIdSetIterator.NO_MORE_DOCS : doc + current.context.docBase;
}
};
}
@Override
public DocIdSet getDocIDs() {
return new DocIdSet() {
final Iterator<MatchingDocs> mdIter = matchingDocs.iterator();
int doc = 0;
MatchingDocs current;
int currentLength;
boolean done = false;
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
@Override
public int nextDoc() throws IOException {
if (done) {
return DocIdSetIterator.NO_MORE_DOCS;
}
while (current == null) {
if (!mdIter.hasNext()) {
done = true;
return DocIdSetIterator.NO_MORE_DOCS;
}
current = mdIter.next();
currentLength = current.bits.length();
doc = 0;
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
current = null;
} else {
doc = -1; // we're calling nextSetBit later on
}
}
++doc;
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
current = null;
return nextDoc();
}
return doc + current.context.docBase;
}
@Override
public int docID() {
return doc + current.context.docBase;
}
@Override
public long cost() {
return size;
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException("not supported");
}
};
}
};
}
@Override
public int size() {
return size;
}
}

View File

@ -1,457 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import org.apache.lucene.facet.complements.TotalFacetCounts;
import org.apache.lucene.facet.complements.TotalFacetCountsCache;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.partitions.IntermediateFacetResult;
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
import org.apache.lucene.facet.sampling.Sampler.OverSampledFacetRequest;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.CountFacetRequest;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.search.OrdinalValueResolver;
import org.apache.lucene.facet.search.OrdinalValueResolver.FloatValueResolver;
import org.apache.lucene.facet.search.OrdinalValueResolver.IntValueResolver;
import org.apache.lucene.facet.search.SumScoreFacetRequest;
import org.apache.lucene.facet.search.TaxonomyFacetsAccumulator;
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
import org.apache.lucene.facet.search.TopKInEachNodeHandler;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetsAccumulator} which supports partitions, sampling and
* complement counting.
* <p>
* <b>NOTE:</b> this accumulator still uses the old API and will be removed
* eventually in favor of dedicated accumulators which support the above
* features ovee the new {@link FacetsAggregator} API. It provides
* {@link Aggregator} implementations for {@link CountFacetRequest},
* {@link SumScoreFacetRequest} and {@link OverSampledFacetRequest}. If you need
* to use it in conjunction with other facet requests, you should override
* {@link #createAggregator(FacetRequest, FacetArrays)}.
*
* @lucene.experimental
*/
public class OldFacetsAccumulator extends TaxonomyFacetsAccumulator {
/**
* Default threshold for using the complements optimization.
* If accumulating facets for a document set larger than this ratio of the index size than
* perform the complement optimization.
* @see #setComplementThreshold(double) for more info on the complements optimization.
*/
public static final double DEFAULT_COMPLEMENT_THRESHOLD = 0.6;
/**
* Passing this to {@link #setComplementThreshold(double)} will disable using complement optimization.
*/
public static final double DISABLE_COMPLEMENT = Double.POSITIVE_INFINITY; // > 1 actually
/**
* Passing this to {@link #setComplementThreshold(double)} will force using complement optimization.
*/
public static final double FORCE_COMPLEMENT = 0; // <=0
protected int partitionSize;
protected int maxPartitions;
protected boolean isUsingComplements;
private TotalFacetCounts totalFacetCounts;
private Object accumulateGuard;
private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD;
private static FacetArrays createFacetArrays(FacetSearchParams searchParams, TaxonomyReader taxoReader) {
return new FacetArrays(PartitionsUtils.partitionSize(searchParams.indexingParams, taxoReader));
}
public OldFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
this(searchParams, indexReader, taxonomyReader, null);
}
public OldFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader, FacetArrays facetArrays) {
super(searchParams, indexReader, taxonomyReader, facetArrays == null ? createFacetArrays(searchParams, taxonomyReader) : facetArrays);
// can only be computed later when docids size is known
isUsingComplements = false;
partitionSize = PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader);
maxPartitions = (int) Math.ceil(this.taxonomyReader.getSize() / (double) partitionSize);
accumulateGuard = new Object();
}
// TODO: this should be removed once we clean the API
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// synchronize to prevent calling two accumulate()'s at the same time.
// We decided not to synchronize the method because that might mislead
// users to feel encouraged to call this method simultaneously.
synchronized (accumulateGuard) {
// only now we can compute this
isUsingComplements = shouldComplement(docids);
if (isUsingComplements) {
try {
totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader, searchParams.indexingParams);
if (totalFacetCounts != null) {
docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader);
} else {
isUsingComplements = false;
}
} catch (UnsupportedOperationException e) {
// TODO (Facet): this exception is thrown from TotalCountsKey if the
// IndexReader used does not support getVersion(). We should re-think
// this: is this tiny detail worth disabling total counts completely
// for such readers? Currently, it's not supported by Parallel and
// MultiReader, which might be problematic for several applications.
// We could, for example, base our "isCurrent" logic on something else
// than the reader's version. Need to think more deeply about it.
isUsingComplements = false;
} catch (IOException e) {
// silently fail if for some reason failed to load/save from/to dir
isUsingComplements = false;
} catch (Exception e) {
// give up: this should not happen!
throw new IOException("PANIC: Got unexpected exception while trying to get/calculate total counts", e);
}
}
docids = actualDocsToAccumulate(docids);
HashMap<FacetRequest, IntermediateFacetResult> fr2tmpRes = new HashMap<FacetRequest, IntermediateFacetResult>();
try {
for (int part = 0; part < maxPartitions; part++) {
// fill arrays from category lists
fillArraysForPartition(docids, facetArrays, part);
int offset = part * partitionSize;
// for each partition we go over all requests and handle
// each, where the request maintains the merged result.
// In this implementation merges happen after each partition,
// but other impl could merge only at the end.
final HashSet<FacetRequest> handledRequests = new HashSet<FacetRequest>();
for (FacetRequest fr : searchParams.facetRequests) {
// Handle and merge only facet requests which were not already handled.
if (handledRequests.add(fr)) {
PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(offset);
IntermediateFacetResult oldRes = fr2tmpRes.get(fr);
if (oldRes != null) {
res4fr = frHndlr.mergeResults(oldRes, res4fr);
}
fr2tmpRes.put(fr, res4fr);
}
}
}
} finally {
facetArrays.free();
}
// gather results from all requests into a list for returning them
List<FacetResult> res = new ArrayList<FacetResult>();
for (FacetRequest fr : searchParams.facetRequests) {
PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) {
// Add empty FacetResult:
res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr));
continue;
}
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
// final labeling if allowed (because labeling is a costly operation)
frHndlr.labelResult(facetRes);
res.add(facetRes);
}
return res;
}
}
/** check if all requests are complementable */
protected boolean mayComplement() {
for (FacetRequest freq : searchParams.facetRequests) {
if (!(freq instanceof CountFacetRequest)) {
return false;
}
}
return true;
}
@Override
public PartitionsFacetResultsHandler createFacetResultsHandler(FacetRequest fr, OrdinalValueResolver resolver) {
if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) {
return new TopKInEachNodeHandler(taxonomyReader, fr, resolver, facetArrays);
} else {
return new TopKFacetResultsHandler(taxonomyReader, fr, resolver, facetArrays);
}
}
/**
* Set the actual set of documents over which accumulation should take place.
* <p>
* Allows to override the set of documents to accumulate for. Invoked just
* before actual accumulating starts. From this point that set of documents
* remains unmodified. Default implementation just returns the input
* unchanged.
*
* @param docids
* candidate documents to accumulate for
* @return actual documents to accumulate for
*/
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
return docids;
}
/** Check if it is worth to use complements */
protected boolean shouldComplement(ScoredDocIDs docids) {
return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
}
/**
* Creates an {@link OrdinalValueResolver} for the given {@link FacetRequest}.
* By default this method supports {@link CountFacetRequest} and
* {@link SumScoreFacetRequest}. You should override if you are using other
* requests with this accumulator.
*/
public OrdinalValueResolver createOrdinalValueResolver(FacetRequest fr) {
if (fr instanceof CountFacetRequest) {
return new IntValueResolver(facetArrays);
} else if (fr instanceof SumScoreFacetRequest) {
return new FloatValueResolver(facetArrays);
} else if (fr instanceof OverSampledFacetRequest) {
return createOrdinalValueResolver(((OverSampledFacetRequest) fr).orig);
} else {
throw new IllegalArgumentException("unrecognized FacetRequest " + fr.getClass());
}
}
/**
* Iterate over the documents for this partition and fill the facet arrays with the correct
* count/complement count/value.
*/
private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition)
throws IOException {
if (isUsingComplements) {
initArraysByTotalCounts(facetArrays, partition, docids.size());
} else {
facetArrays.free(); // to get a cleared array for this partition
}
HashMap<CategoryListIterator, Aggregator> categoryLists = getCategoryListMap(facetArrays, partition);
IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps
for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
final ScoredDocIDsIterator iterator = docids.iterator();
final CategoryListIterator categoryListIter = entry.getKey();
final Aggregator aggregator = entry.getValue();
Iterator<AtomicReaderContext> contexts = indexReader.leaves().iterator();
AtomicReaderContext current = null;
int maxDoc = -1;
while (iterator.next()) {
int docID = iterator.getDocID();
if (docID >= maxDoc) {
boolean iteratorDone = false;
do { // find the segment which contains this document
if (!contexts.hasNext()) {
throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?");
}
current = contexts.next();
maxDoc = current.docBase + current.reader().maxDoc();
if (docID < maxDoc) { // segment has docs, check if it has categories
boolean validSegment = categoryListIter.setNextReader(current);
validSegment &= aggregator.setNextReader(current);
if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs
while (docID < maxDoc && iterator.next()) {
docID = iterator.getDocID();
}
if (docID < maxDoc) {
iteratorDone = true;
}
}
}
} while (docID >= maxDoc);
if (iteratorDone) { // iterator finished, terminate the loop
break;
}
}
docID -= current.docBase;
categoryListIter.getOrdinals(docID, ordinals);
if (ordinals.length == 0) {
continue; // document does not have category ordinals
}
aggregator.aggregate(docID, iterator.getScore(), ordinals);
}
}
}
/** Init arrays for partition by total counts, optionally applying a factor */
private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) {
int[] intArray = facetArrays.getIntArray();
totalFacetCounts.fillTotalCountsForPartition(intArray, partition);
double totalCountsFactor = getTotalCountsFactor();
// fix total counts, but only if the effect of this would be meaningful.
if (totalCountsFactor < 0.99999) {
int delta = nAccumulatedDocs + 1;
for (int i = 0; i < intArray.length; i++) {
intArray[i] *= totalCountsFactor;
// also translate to prevent loss of non-positive values
// due to complement sampling (ie if sampled docs all decremented a certain category).
intArray[i] += delta;
}
}
}
/**
* Expert: factor by which counts should be multiplied when initializing
* the count arrays from total counts.
* Default implementation for this returns 1, which is a no op.
* @return a factor by which total counts should be multiplied
*/
protected double getTotalCountsFactor() {
return 1;
}
protected Aggregator createAggregator(FacetRequest fr, FacetArrays facetArrays) {
if (fr instanceof CountFacetRequest) {
// we rely on that, if needed, result is cleared by arrays!
int[] a = facetArrays.getIntArray();
if (isUsingComplements) {
return new ComplementCountingAggregator(a);
} else {
return new CountingAggregator(a);
}
} else if (fr instanceof SumScoreFacetRequest) {
if (isUsingComplements) {
throw new IllegalArgumentException("complements are not supported by SumScoreFacetRequest");
} else {
return new ScoringAggregator(facetArrays.getFloatArray());
}
} else if (fr instanceof OverSampledFacetRequest) {
return createAggregator(((OverSampledFacetRequest) fr).orig, facetArrays);
} else {
throw new IllegalArgumentException("unknown Aggregator implementation for request " + fr.getClass());
}
}
/**
* Create an {@link Aggregator} and a {@link CategoryListIterator} for each
* and every {@link FacetRequest}. Generating a map, matching each
* categoryListIterator to its matching aggregator.
* <p>
* If two CategoryListIterators are served by the same aggregator, a single
* aggregator is returned for both.
*
* <b>NOTE: </b>If a given category list iterator is needed with two different
* aggregators (e.g counting and association) - an exception is thrown as this
* functionality is not supported at this time.
*/
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(FacetArrays facetArrays,
int partition) throws IOException {
HashMap<CategoryListIterator, Aggregator> categoryLists = new HashMap<CategoryListIterator, Aggregator>();
FacetIndexingParams indexingParams = searchParams.indexingParams;
for (FacetRequest facetRequest : searchParams.facetRequests) {
Aggregator categoryAggregator = createAggregator(facetRequest, facetArrays);
CategoryListIterator cli = indexingParams.getCategoryListParams(facetRequest.categoryPath).createCategoryListIterator(partition);
// get the aggregator
Aggregator old = categoryLists.put(cli, categoryAggregator);
if (old != null && !old.equals(categoryAggregator)) {
throw new RuntimeException("Overriding existing category list with different aggregator");
}
// if the aggregator is the same we're covered
}
return categoryLists;
}
@Override
public List<FacetResult> accumulate(List<MatchingDocs> matchingDocs) throws IOException {
return accumulate(new MatchingDocsAsScoredDocIDs(matchingDocs));
}
/**
* Returns the complement threshold.
* @see #setComplementThreshold(double)
*/
public double getComplementThreshold() {
return complementThreshold;
}
/**
* Set the complement threshold.
* This threshold will dictate whether the complements optimization is applied.
* The optimization is to count for less documents. It is useful when the same
* FacetSearchParams are used for varying sets of documents. The first time
* complements is used the "total counts" are computed - counting for all the
* documents in the collection. Then, only the complementing set of documents
* is considered, and used to decrement from the overall counts, thereby
* walking through less documents, which is faster.
* <p>
* For the default settings see {@link #DEFAULT_COMPLEMENT_THRESHOLD}.
* <p>
* To forcing complements in all cases pass {@link #FORCE_COMPLEMENT}.
* This is mostly useful for testing purposes, as forcing complements when only
* tiny fraction of available documents match the query does not make sense and
* would incur performance degradations.
* <p>
* To disable complements pass {@link #DISABLE_COMPLEMENT}.
* @param complementThreshold the complement threshold to set
* @see #getComplementThreshold()
*/
public void setComplementThreshold(double complementThreshold) {
this.complementThreshold = complementThreshold;
}
/** Returns true if complements are enabled. */
public boolean isUsingComplements() {
return isUsingComplements;
}
}

View File

@ -1,42 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import org.apache.lucene.search.DocIdSet;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Document IDs with scores for each, driving facets accumulation. Document
* scores are optionally used in the process of facets scoring.
*
* @see OldFacetsAccumulator#accumulate(ScoredDocIDs)
* @lucene.experimental
*/
public interface ScoredDocIDs {
/** Returns an iterator over the document IDs and their scores. */
public ScoredDocIDsIterator iterator() throws IOException;
/** Returns the set of doc IDs. */
public DocIdSet getDocIDs();
/** Returns the number of scored documents. */
public int size();
}

View File

@ -1,43 +0,0 @@
package org.apache.lucene.facet.old;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Iterator over document IDs and their scores. Each {@link #next()} retrieves
* the next docID and its score which can be later be retrieved by
* {@link #getDocID()} and {@link #getScore()}. <b>NOTE:</b> you must call
* {@link #next()} before {@link #getDocID()} and/or {@link #getScore()}, or
* otherwise the returned values are unexpected.
*
* @lucene.experimental
*/
public interface ScoredDocIDsIterator {
/** Default score used in case scoring is disabled. */
public static final float DEFAULT_SCORE = 1.0f;
/** Iterate to the next document/score pair. Returns true iff there is such a pair. */
public abstract boolean next();
/** Returns the ID of the current document. */
public abstract int getDocID();
/** Returns the score of the current document. */
public abstract float getScore();
}

View File

@ -1,446 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Utility methods for Scored Doc IDs.
*
* @lucene.experimental
*/
public class ScoredDocIdsUtils {
/**
* Create a complement of the input set. The returned {@link ScoredDocIDs}
* does not contain any scores, which makes sense given that the complementing
* documents were not scored.
*
* Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader
*
* @param docids to be complemented.
* @param reader holding the number of documents & information about deletions.
*/
public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
throws IOException {
final int maxDoc = reader.maxDoc();
DocIdSet docIdSet = docids.getDocIDs();
final FixedBitSet complement;
if (docIdSet instanceof FixedBitSet) {
// That is the most common case, if ScoredDocIdsCollector was used.
complement = ((FixedBitSet) docIdSet).clone();
} else {
complement = new FixedBitSet(maxDoc);
DocIdSetIterator iter = docIdSet.iterator();
int doc;
while ((doc = iter.nextDoc()) < maxDoc) {
complement.set(doc);
}
}
complement.flip(0, maxDoc);
clearDeleted(reader, complement);
return createScoredDocIds(complement, maxDoc);
}
/** Clear all deleted documents from a given open-bit-set according to a given reader */
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
// TODO use BitsFilteredDocIdSet?
// If there are no deleted docs
if (!reader.hasDeletions()) {
return; // return immediately
}
DocIdSetIterator it = set.iterator();
int doc = it.nextDoc();
for (AtomicReaderContext context : reader.leaves()) {
AtomicReader r = context.reader();
final int maxDoc = r.maxDoc() + context.docBase;
if (doc >= maxDoc) { // skip this segment
continue;
}
if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions
while ((doc = it.nextDoc()) < maxDoc) {}
continue;
}
Bits liveDocs = r.getLiveDocs();
do {
if (!liveDocs.get(doc - context.docBase)) {
set.clear(doc);
}
} while ((doc = it.nextDoc()) < maxDoc);
}
}
/**
* Create a subset of an existing ScoredDocIDs object.
*
* @param allDocIds orginal set
* @param sampleSet Doc Ids of the subset.
*/
public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds,
final int[] sampleSet) throws IOException {
// sort so that we can scan docs in order
final int[] docids = sampleSet;
Arrays.sort(docids);
final float[] scores = new float[docids.length];
// fetch scores and compute size
ScoredDocIDsIterator it = allDocIds.iterator();
int n = 0;
while (it.next() && n < docids.length) {
int doc = it.getDocID();
if (doc == docids[n]) {
scores[n] = it.getScore();
++n;
}
}
final int size = n;
return new ScoredDocIDs() {
@Override
public DocIdSet getDocIDs() {
return new DocIdSet() {
@Override
public boolean isCacheable() { return true; }
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
private int next = -1;
@Override
public int advance(int target) {
while (next < size && docids[next++] < target) {
}
return next == size ? NO_MORE_DOCS : docids[next];
}
@Override
public int docID() {
return docids[next];
}
@Override
public int nextDoc() {
if (++next >= size) {
return NO_MORE_DOCS;
}
return docids[next];
}
@Override
public long cost() {
return size;
}
};
}
};
}
@Override
public ScoredDocIDsIterator iterator() {
return new ScoredDocIDsIterator() {
int next = -1;
@Override
public boolean next() { return ++next < size; }
@Override
public float getScore() { return scores[next]; }
@Override
public int getDocID() { return docids[next]; }
};
}
@Override
public int size() { return size; }
};
}
/**
* Creates a {@link ScoredDocIDs} which returns document IDs all non-deleted doc ids
* according to the given reader.
* The returned set contains the range of [0 .. reader.maxDoc ) doc ids
*/
public static final ScoredDocIDs createAllDocsScoredDocIDs (final IndexReader reader) {
if (reader.hasDeletions()) {
return new AllLiveDocsScoredDocIDs(reader);
}
return new AllDocsScoredDocIDs(reader);
}
/**
* Create a ScoredDocIDs out of a given docIdSet and the total number of documents in an index
*/
public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) {
return new ScoredDocIDs() {
private int size = -1;
@Override
public DocIdSet getDocIDs() { return docIdSet; }
@Override
public ScoredDocIDsIterator iterator() throws IOException {
final DocIdSetIterator docIterator = docIdSet.iterator();
return new ScoredDocIDsIterator() {
@Override
public boolean next() {
try {
return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public float getScore() { return DEFAULT_SCORE; }
@Override
public int getDocID() { return docIterator.docID(); }
};
}
@Override
public int size() {
// lazy size computation
if (size < 0) {
OpenBitSetDISI openBitSetDISI;
try {
openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
} catch (IOException e) {
throw new RuntimeException(e);
}
size = (int) openBitSetDISI.cardinality();
}
return size;
}
};
}
/**
* All docs ScoredDocsIDs - this one is simply an 'all 1' bitset. Used when
* there are no deletions in the index and we wish to go through each and
* every document
*/
private static class AllDocsScoredDocIDs implements ScoredDocIDs {
final int maxDoc;
public AllDocsScoredDocIDs(IndexReader reader) {
this.maxDoc = reader.maxDoc();
}
@Override
public int size() {
return maxDoc;
}
@Override
public DocIdSet getDocIDs() {
return new DocIdSet() {
@Override
public boolean isCacheable() {
return true;
}
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
private int next = -1;
@Override
public int advance(int target) {
if (target <= next) {
target = next + 1;
}
return next = target >= maxDoc ? NO_MORE_DOCS : target;
}
@Override
public int docID() {
return next;
}
@Override
public int nextDoc() {
return ++next < maxDoc ? next : NO_MORE_DOCS;
}
@Override
public long cost() {
return maxDoc;
}
};
}
};
}
@Override
public ScoredDocIDsIterator iterator() {
try {
final DocIdSetIterator iter = getDocIDs().iterator();
return new ScoredDocIDsIterator() {
@Override
public boolean next() {
try {
return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
} catch (IOException e) {
// cannot happen
return false;
}
}
@Override
public float getScore() {
return DEFAULT_SCORE;
}
@Override
public int getDocID() {
return iter.docID();
}
};
} catch (IOException e) {
// cannot happen
throw new RuntimeException(e);
}
}
}
/**
* An All-docs bitset which has '0' for deleted documents and '1' for the
* rest. Useful for iterating over all 'live' documents in a given index.
* <p>
* NOTE: this class would work for indexes with no deletions at all,
* although it is recommended to use {@link AllDocsScoredDocIDs} to ease
* the performance cost of validating isDeleted() on each and every docId
*/
private static final class AllLiveDocsScoredDocIDs implements ScoredDocIDs {
final int maxDoc;
final IndexReader reader;
AllLiveDocsScoredDocIDs(IndexReader reader) {
this.maxDoc = reader.maxDoc();
this.reader = reader;
}
@Override
public int size() {
return reader.numDocs();
}
@Override
public DocIdSet getDocIDs() {
return new DocIdSet() {
@Override
public boolean isCacheable() {
return true;
}
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
final Bits liveDocs = MultiFields.getLiveDocs(reader);
private int next = -1;
@Override
public int advance(int target) {
if (target > next) {
next = target - 1;
}
return nextDoc();
}
@Override
public int docID() {
return next;
}
@Override
public int nextDoc() {
do {
++next;
} while (next < maxDoc && liveDocs != null && !liveDocs.get(next));
return next < maxDoc ? next : NO_MORE_DOCS;
}
@Override
public long cost() {
return maxDoc;
}
};
}
};
}
@Override
public ScoredDocIDsIterator iterator() {
try {
final DocIdSetIterator iter = getDocIDs().iterator();
return new ScoredDocIDsIterator() {
@Override
public boolean next() {
try {
return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
} catch (IOException e) {
// cannot happen
return false;
}
}
@Override
public float getScore() {
return DEFAULT_SCORE;
}
@Override
public int getDocID() {
return iter.docID();
}
};
} catch (IOException e) {
// cannot happen
throw new RuntimeException(e);
}
}
}
}

View File

@ -1,67 +0,0 @@
package org.apache.lucene.facet.old;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An {@link Aggregator} which updates the weight of a category according to the
* scores of the documents it was found in.
*
* @lucene.experimental
*/
public class ScoringAggregator implements Aggregator {
private final float[] scoreArray;
private final int hashCode;
public ScoringAggregator(float[] counterArray) {
this.scoreArray = counterArray;
this.hashCode = scoreArray == null ? 0 : scoreArray.hashCode();
}
@Override
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException {
for (int i = 0; i < ordinals.length; i++) {
scoreArray[ordinals.ints[i]] += score;
}
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != this.getClass()) {
return false;
}
ScoringAggregator that = (ScoringAggregator) obj;
return that.scoreArray == this.scoreArray;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return true;
}
}

View File

@ -1,24 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Old Faceted Search API</title>
</head>
<body>
Old faceted search API, kept until complements, sampling and partitions are migrated to the new API.
</body>
</html>

View File

@ -1,187 +0,0 @@
package org.apache.lucene.facet.params;
import java.io.IOException;
import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
import org.apache.lucene.facet.encoding.IntDecoder;
import org.apache.lucene.facet.encoding.IntEncoder;
import org.apache.lucene.facet.encoding.SortingIntEncoder;
import org.apache.lucene.facet.encoding.UniqueValuesIntEncoder;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.DocValuesCategoryListIterator;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.util.PartitionsUtils;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Contains parameters for a category list *
*
* @lucene.experimental
*/
public class CategoryListParams {
/**
* Defines which category ordinals are encoded for every document. This also
* affects how category ordinals are aggregated, check the different policies
* for more details.
*/
public static enum OrdinalPolicy {
/**
* Encodes only the ordinals of leaf nodes. That is, for the category A/B/C,
* the ordinals of A and A/B will not be encoded. This policy is efficient
* for hierarchical dimensions, as it reduces the number of ordinals that
* are visited per document. During faceted search, this policy behaves
* exactly like {@link #ALL_PARENTS}, and the counts of all path components
* will be computed as well.
*
* <p>
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
* accumulator, which will fix the parents' counts.
*
* <p>
* <b>NOTE:</b> since only leaf nodes are encoded for the document, you
* should use this policy when the same document doesn't share two
* categories that have a mutual parent, or otherwise the counts will be
* wrong (the mutual parent will be over-counted). For example, if a
* document has the categories A/B/C and A/B/D, then with this policy the
* counts of "A" and "B" will be 2, which is wrong. If you intend to index
* hierarchical dimensions, with more than one category per document, you
* should use either {@link #ALL_PARENTS} or {@link #ALL_BUT_DIMENSION}.
*/
NO_PARENTS,
/**
* Encodes the ordinals of all path components. That is, the category A/B/C
* will encode the ordinals of A and A/B as well. If you don't require the
* dimension's count during search, consider using
* {@link #ALL_BUT_DIMENSION}.
*/
ALL_PARENTS,
/**
* Encodes the ordinals of all path components except the dimension. The
* dimension of a category is defined to be the first components in
* {@link FacetLabel#components}. For the category A/B/C, the ordinal of
* A/B will be encoded as well, however not the ordinal of A.
*
* <p>
* <b>NOTE:</b> when facets are aggregated, this policy behaves exactly like
* {@link #ALL_PARENTS}, except that the dimension is never counted. I.e. if
* you ask to count the facet "A", then while in {@link #ALL_PARENTS} you
* will get counts for "A" <u>and its children</u>, with this policy you
* will get counts for <u>only its children</u>. This policy is the default
* one, and makes sense for using with flat dimensions, whenever your
* application does not require the dimension's count. Otherwise, use
* {@link #ALL_PARENTS}.
*/
ALL_BUT_DIMENSION
}
/** The default field used to store the facets information. */
public static final String DEFAULT_FIELD = "$facets";
/**
* The default {@link OrdinalPolicy} that's used when encoding a document's
* category ordinals.
*/
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_BUT_DIMENSION;
public final String field;
private final int hashCode;
/** Constructs a default category list parameters object, using {@link #DEFAULT_FIELD}. */
public CategoryListParams() {
this(DEFAULT_FIELD);
}
/** Constructs a category list parameters object, using the given field. */
public CategoryListParams(String field) {
this.field = field;
// Pre-compute the hashCode because these objects are immutable. Saves
// some time on the comparisons later.
this.hashCode = field.hashCode();
}
/**
* Allows to override how categories are encoded and decoded. A matching
* {@link IntDecoder} is provided by the {@link IntEncoder}.
* <p>
* Default implementation creates a new Sorting(<b>Unique</b>(DGap)) encoder.
* Uniqueness in this regard means when the same category appears twice in a
* document, only one appearance would be encoded. This has effect on facet
* counting results.
* <p>
* Some possible considerations when overriding may be:
* <ul>
* <li>an application "knows" that all categories are unique. So no need to
* pass through the unique filter.</li>
* <li>Another application might wish to count multiple occurrences of the
* same category, or, use a faster encoding which will consume more space.</li>
* </ul>
* In any event when changing this value make sure you know what you are
* doing, and test the results - e.g. counts, if the application is about
* counting facets.
*/
public IntEncoder createEncoder() {
return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder()));
}
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (!(o instanceof CategoryListParams)) {
return false;
}
CategoryListParams other = (CategoryListParams) o;
if (hashCode != other.hashCode) {
return false;
}
return field.equals(other.field);
}
@Override
public int hashCode() {
return hashCode;
}
/** Create the {@link CategoryListIterator} for the specified partition. */
public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
String categoryListTermStr = PartitionsUtils.partitionName(partition);
String docValuesField = field + categoryListTermStr;
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
}
/**
* Returns the {@link OrdinalPolicy} to use for the given dimension. This
* {@link CategoryListParams} always returns {@link #DEFAULT_ORDINAL_POLICY}
* for all dimensions.
*/
public OrdinalPolicy getOrdinalPolicy(String dimension) {
return DEFAULT_ORDINAL_POLICY;
}
@Override
public String toString() {
return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(null);
}
}

View File

@ -1,180 +0,0 @@
package org.apache.lucene.facet.params;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Defines parameters that are needed for facets indexing. Note that this class
* does not have any setters. That's because overriding the default parameters
* is considered expert. If you wish to override them, simply extend this class
* and override the relevant getter.
*
* <p>
* <b>NOTE:</b> This class is also used during faceted search in order to e.g.
* know which field holds the drill-down terms or the fulltree posting.
* Therefore this class should be initialized once and you should refrain from
* changing it. Also note that if you make any changes to it (e.g. suddenly
* deciding that drill-down terms should be read from a different field) and use
* it on an existing index, things may not work as expected.
*
* @lucene.experimental
*/
public class FacetIndexingParams {
// the default CLP, can be a singleton
protected static final CategoryListParams DEFAULT_CATEGORY_LIST_PARAMS = new CategoryListParams();
/**
* A {@link FacetIndexingParams} which fixes a single
* {@link CategoryListParams} with
* {@link CategoryListParams#DEFAULT_ORDINAL_POLICY}.
*/
public static final FacetIndexingParams DEFAULT = new FacetIndexingParams();
/**
* The default delimiter with which {@link FacetLabel#components} are
* concatenated when written to the index, e.g. as drill-down terms. If you
* choose to override it by overiding {@link #getFacetDelimChar()}, you should
* make sure that you return a character that's not found in any path
* component.
*/
public static final char DEFAULT_FACET_DELIM_CHAR = '\u001F';
private final int partitionSize = Integer.MAX_VALUE;
protected final CategoryListParams clParams;
/**
* Initializes new default params. You should use this constructor only if you
* intend to override any of the getters, otherwise you can use
* {@link #DEFAULT} to save unnecessary object allocations.
*/
public FacetIndexingParams() {
this(DEFAULT_CATEGORY_LIST_PARAMS);
}
/** Initializes new params with the given {@link CategoryListParams}. */
public FacetIndexingParams(CategoryListParams categoryListParams) {
clParams = categoryListParams;
}
/**
* Returns the {@link CategoryListParams} for this {@link FacetLabel}. The
* default implementation returns the same {@link CategoryListParams} for all
* categories (even if {@code category} is {@code null}).
*
* @see PerDimensionIndexingParams
*/
public CategoryListParams getCategoryListParams(FacetLabel category) {
return clParams;
}
/**
* Copies the text required to execute a drill-down query on the given
* category to the given {@code char[]}, and returns the number of characters
* that were written.
* <p>
* <b>NOTE:</b> You should make sure that the {@code char[]} is large enough,
* by e.g. calling {@link FacetLabel#fullPathLength()}.
*/
public int drillDownTermText(FacetLabel path, char[] buffer) {
return path.copyFullPath(buffer, 0, getFacetDelimChar());
}
/**
* Returns the size of a partition. <i>Partitions</i> allow you to divide
* (hence, partition) the categories space into small sets to e.g. improve RAM
* consumption during faceted search. For instance, {@code partitionSize=100K}
* would mean that if your taxonomy index contains 420K categories, they will
* be divided into 5 groups and at search time a {@link FacetArrays} will be
* allocated at the size of the partition.
*
* <p>
* This is real advanced setting and should be changed with care. By default,
* all categories are put in one partition. You should modify this setting if
* you have really large taxonomies (e.g. 1M+ nodes).
*/
public int getPartitionSize() {
return partitionSize;
}
/**
* Returns a list of all {@link CategoryListParams categoryListParams} that
* are used for facets indexing.
*/
public List<CategoryListParams> getAllCategoryListParams() {
return Collections.singletonList(clParams);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((clParams == null) ? 0 : clParams.hashCode());
result = prime * result + partitionSize;
for (CategoryListParams clp : getAllCategoryListParams()) {
result ^= clp.hashCode();
}
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof FacetIndexingParams)) {
return false;
}
FacetIndexingParams other = (FacetIndexingParams) obj;
if (clParams == null) {
if (other.clParams != null) {
return false;
}
} else if (!clParams.equals(other.clParams)) {
return false;
}
if (partitionSize != other.partitionSize) {
return false;
}
Iterable<CategoryListParams> cLs = getAllCategoryListParams();
Iterable<CategoryListParams> otherCLs = other.getAllCategoryListParams();
return cLs.equals(otherCLs);
}
/**
* Returns the delimiter character used internally for concatenating category
* path components, e.g. for drill-down terms.
*/
public char getFacetDelimChar() {
return DEFAULT_FACET_DELIM_CHAR;
}
}

View File

@ -1,93 +0,0 @@
package org.apache.lucene.facet.params;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.facet.search.FacetRequest;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Defines parameters that are needed for faceted search: the list of facet
* {@link FacetRequest facet requests} which should be aggregated as well as the
* {@link FacetIndexingParams indexing params} that were used to index them.
*
* @lucene.experimental
*/
public class FacetSearchParams {
public final FacetIndexingParams indexingParams;
public final List<FacetRequest> facetRequests;
/**
* Initializes with the given {@link FacetRequest requests} and default
* {@link FacetIndexingParams#DEFAULT}. If you used a different
* {@link FacetIndexingParams}, you should use
* {@link #FacetSearchParams(FacetIndexingParams, List)}.
*/
public FacetSearchParams(FacetRequest... facetRequests) {
this(FacetIndexingParams.DEFAULT, Arrays.asList(facetRequests));
}
/**
* Initializes with the given {@link FacetRequest requests} and default
* {@link FacetIndexingParams#DEFAULT}. If you used a different
* {@link FacetIndexingParams}, you should use
* {@link #FacetSearchParams(FacetIndexingParams, List)}.
*/
public FacetSearchParams(List<FacetRequest> facetRequests) {
this(FacetIndexingParams.DEFAULT, facetRequests);
}
/**
* Initializes with the given {@link FacetRequest requests} and
* {@link FacetIndexingParams}.
*/
public FacetSearchParams(FacetIndexingParams indexingParams, FacetRequest... facetRequests) {
this(indexingParams, Arrays.asList(facetRequests));
}
/**
* Initializes with the given {@link FacetRequest requests} and
* {@link FacetIndexingParams}.
*/
public FacetSearchParams(FacetIndexingParams indexingParams, List<FacetRequest> facetRequests) {
if (facetRequests == null || facetRequests.size() == 0) {
throw new IllegalArgumentException("at least one FacetRequest must be defined");
}
this.facetRequests = facetRequests;
this.indexingParams = indexingParams;
}
@Override
public String toString() {
final String INDENT = " ";
final char NEWLINE = '\n';
StringBuilder sb = new StringBuilder("IndexingParams: ");
sb.append(NEWLINE).append(INDENT).append(indexingParams);
sb.append(NEWLINE).append("FacetRequests:");
for (FacetRequest facetRequest : facetRequests) {
sb.append(NEWLINE).append(INDENT).append(facetRequest);
}
return sb.toString();
}
}

View File

@ -1,96 +0,0 @@
package org.apache.lucene.facet.params;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetIndexingParams} that utilizes different category lists, defined
* by the dimension specified by a {@link FacetLabel category} (see
* {@link #PerDimensionIndexingParams(Map, CategoryListParams)}.
* <p>
* A 'dimension' is defined as the first or "zero-th" component in a
* {@link FacetLabel}. For example, if a category is defined as
* "Author/American/Mark Twain", then the dimension would be "Author".
*
* @lucene.experimental
*/
public class PerDimensionIndexingParams extends FacetIndexingParams {
private final Map<String, CategoryListParams> clParamsMap;
/**
* Initializes a new instance with the given dimension-to-params mapping. The
* dimension is considered as what's returned by
* {@link FacetLabel#components cp.components[0]}.
*
* <p>
* <b>NOTE:</b> for any dimension whose {@link CategoryListParams} is not
* defined in the mapping, a default {@link CategoryListParams} will be used.
*
* @see #PerDimensionIndexingParams(Map, CategoryListParams)
*/
public PerDimensionIndexingParams(Map<FacetLabel, CategoryListParams> paramsMap) {
this(paramsMap, DEFAULT_CATEGORY_LIST_PARAMS);
}
/**
* Same as {@link #PerDimensionIndexingParams(Map)}, only the given
* {@link CategoryListParams} will be used for any dimension that is not
* specified in the given mapping.
*/
public PerDimensionIndexingParams(Map<FacetLabel, CategoryListParams> paramsMap,
CategoryListParams categoryListParams) {
super(categoryListParams);
clParamsMap = new HashMap<String,CategoryListParams>();
for (Entry<FacetLabel, CategoryListParams> e : paramsMap.entrySet()) {
clParamsMap.put(e.getKey().components[0], e.getValue());
}
}
@Override
public List<CategoryListParams> getAllCategoryListParams() {
ArrayList<CategoryListParams> vals = new ArrayList<CategoryListParams>(clParamsMap.values());
vals.add(clParams); // add the default too
return vals;
}
/**
* Returns the {@link CategoryListParams} for the corresponding dimension
* which is returned by {@code category.getComponent(0)}. If {@code category}
* is {@code null}, or was not specified in the map given to the constructor,
* returns the default {@link CategoryListParams}.
*/
@Override
public CategoryListParams getCategoryListParams(FacetLabel category) {
if (category != null) {
CategoryListParams clParams = clParamsMap.get(category.components[0]);
if (clParams != null) {
return clParams;
}
}
return clParams;
}
}

View File

@ -1,55 +0,0 @@
package org.apache.lucene.facet.params;
import java.util.Map;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link CategoryListParams} which allow controlling the
* {@link CategoryListParams.OrdinalPolicy} used for each dimension. The
* dimension is specified as the first component in
* {@link FacetLabel#components}.
*/
public class PerDimensionOrdinalPolicy extends CategoryListParams {
private final Map<String,OrdinalPolicy> policies;
private final OrdinalPolicy defaultOP;
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies) {
this(policies, DEFAULT_ORDINAL_POLICY);
}
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies, OrdinalPolicy defaultOP) {
this.defaultOP = defaultOP;
this.policies = policies;
}
@Override
public OrdinalPolicy getOrdinalPolicy(String dimension) {
OrdinalPolicy op = policies.get(dimension);
return op == null ? defaultOP : op;
}
@Override
public String toString() {
return super.toString() + " policies=" + policies;
}
}

View File

@ -1,25 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Facets indexing and search parameters</title>
</head>
<body>
Facets indexing and search parameters. Define how facets are indexed
as well as which categories need to be aggregated.
</body>
</html>

View File

@ -1,42 +0,0 @@
package org.apache.lucene.facet.partitions;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultsHandler;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Intermediate {@link FacetResult} of faceted search.
* <p>
* This is an empty interface on purpose.
* <p>
* It allows {@link FacetResultsHandler} to return intermediate result objects
* that only it knows how to interpret, and so the handler has maximal freedom
* in defining what an intermediate result is, depending on its specific logic.
*
* @lucene.experimental
*/
public interface IntermediateFacetResult {
/**
* Facet request for which this temporary result was created.
*/
FacetRequest getFacetRequest();
}

View File

@ -1,137 +0,0 @@
package org.apache.lucene.facet.partitions;
import java.io.IOException;
import org.apache.lucene.facet.old.OldFacetsAccumulator;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.OrdinalValueResolver;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetResultsHandler} designed to work with facet partitions.
*
* @lucene.experimental
*/
public abstract class PartitionsFacetResultsHandler extends FacetResultsHandler {
public PartitionsFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest,
OrdinalValueResolver resolver, FacetArrays facetArrays) {
super(taxonomyReader, facetRequest, resolver, facetArrays);
}
/**
* Fetch results of a single partition, given facet arrays for that partition,
* and based on the matching documents and faceted search parameters.
* @param offset
* offset in input arrays where partition starts
*
* @return temporary facet result, potentially, to be passed back to
* <b>this</b> result handler for merging, or <b>null</b> in case that
* constructor parameter, <code>facetRequest</code>, requests an
* illegal FacetResult, like, e.g., a root node category path that
* does not exist in constructor parameter <code>taxonomyReader</code>
* .
* @throws IOException
* on error
*/
public abstract IntermediateFacetResult fetchPartitionResult(int offset) throws IOException;
/**
* Merge results of several facet partitions. Logic of the merge is undefined
* and open for interpretations. For example, a merge implementation could
* keep top K results. Passed {@link IntermediateFacetResult} must be ones
* that were created by this handler otherwise a {@link ClassCastException} is
* thrown. In addition, all passed {@link IntermediateFacetResult} must have
* the same {@link FacetRequest} otherwise an {@link IllegalArgumentException}
* is thrown.
*
* @param tmpResults one or more temporary results created by <b>this</b>
* handler.
* @return temporary facet result that represents to union, as specified by
* <b>this</b> handler, of the input temporary facet results.
* @throws IOException on error.
* @throws ClassCastException if the temporary result passed was not created
* by this handler
* @throws IllegalArgumentException if passed <code>facetResults</code> do not
* have the same {@link FacetRequest}
* @see IntermediateFacetResult#getFacetRequest()
*/
public abstract IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException;
/**
* Create a facet result from the temporary result.
* @param tmpResult temporary result to be rendered as a {@link FacetResult}
* @throws IOException on error.
*/
public abstract FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException ;
/**
* Perform any rearrangement as required on a facet result that has changed after
* it was rendered.
* <P>
* Possible use case: a sampling facets accumulator invoked another
* other facets accumulator on a sample set of documents, obtained
* rendered facet results, fixed their counts, and now it is needed
* to sort the results differently according to the fixed counts.
* @param facetResult result to be rearranged.
* @see FacetResultNode#value
*/
public abstract FacetResult rearrangeFacetResult(FacetResult facetResult);
/**
* Label results according to settings in {@link FacetRequest}, such as
* {@link FacetRequest#getNumLabel()}. Usually invoked by
* {@link OldFacetsAccumulator#accumulate(ScoredDocIDs)}
*
* @param facetResult
* facet result to be labeled.
* @throws IOException
* on error
*/
public abstract void labelResult(FacetResult facetResult) throws IOException;
/**
* Check if an array contains the partition which contains ordinal
*
* @param ordinal
* checked facet
* @param facetArrays
* facet arrays for the certain partition
* @param offset
* offset in input arrays where partition starts
*/
protected boolean isSelfPartition (int ordinal, FacetArrays facetArrays, int offset) {
int partitionSize = facetArrays.arrayLength;
return ordinal / partitionSize == offset / partitionSize;
}
@Override
public final FacetResult compute() throws IOException {
FacetResult res = renderFacetResult(fetchPartitionResult(0));
labelResult(res);
return res;
}
}

View File

@ -1,27 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Category Partitions</title>
</head>
<body>
<h1>Category Partitions</h1>
Allows partitioning the category ordinals space, so that less RAM is consumed during search.
Only meaningful for very large taxonomies (tens of millions of categories).
</body>
</html>

View File

@ -1,33 +0,0 @@
package org.apache.lucene.facet.range;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/** Holds the facet results for a {@link
* RangeFacetRequest}. */
public class RangeFacetResultNode extends FacetResultNode {
public final Range range;
RangeFacetResultNode(String field, Range range, int count) {
super(-1, count);
this.range = range;
this.label = new FacetLabel(field, range.label);
}
}

View File

@ -1,24 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Facets numeric range code</title>
</head>
<body>
Code to compute facets for numeric ranges.
</body>
</html>

View File

@ -1,71 +0,0 @@
package org.apache.lucene.facet.sampling;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
import org.apache.lucene.facet.old.ScoredDocIdsUtils;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Simple random sampler
*/
public class RandomSampler extends Sampler {
private final Random random;
public RandomSampler() {
super();
this.random = new Random();
}
public RandomSampler(SamplingParams params, Random random) throws IllegalArgumentException {
super(params);
this.random = random;
}
@Override
protected SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize) throws IOException {
final int[] sample = new int[sampleSetSize];
final int maxStep = (actualSize * 2 ) / sampleSetSize; //floor
int remaining = actualSize;
ScoredDocIDsIterator it = docids.iterator();
int i = 0;
// select sample docs with random skipStep, make sure to leave sufficient #docs for selection after last skip
while (i<sample.length && remaining>(sampleSetSize-maxStep-i)) {
int skipStep = 1 + random.nextInt(maxStep);
// Skip over 'skipStep' documents
for (int j=0; j<skipStep; j++) {
it.next();
-- remaining;
}
sample[i++] = it.getDocID();
}
// Add leftover documents to the sample set
while (i<sample.length) {
it.next();
sample[i++] = it.getDocID();
}
ScoredDocIDs sampleRes = ScoredDocIdsUtils.createScoredDocIDsSubset(docids, sample);
SampleResult res = new SampleResult(sampleRes, sampleSetSize/(double)actualSize);
return res;
}
}

View File

@ -1,405 +0,0 @@
package org.apache.lucene.facet.sampling;
import java.io.IOException;
import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
import org.apache.lucene.facet.old.ScoredDocIdsUtils;
import org.apache.lucene.util.PriorityQueue;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Take random samples of large collections.
* @lucene.experimental
*/
public class RepeatableSampler extends Sampler {
private static final Logger logger = Logger.getLogger(RepeatableSampler.class.getName());
public RepeatableSampler(SamplingParams params) {
super(params);
}
@Override
protected SampleResult createSample(ScoredDocIDs docids, int actualSize,
int sampleSetSize) throws IOException {
int[] sampleSet = null;
try {
sampleSet = repeatableSample(docids, actualSize,
sampleSetSize);
} catch (IOException e) {
if (logger.isLoggable(Level.WARNING)) {
logger.log(Level.WARNING, "sampling failed: "+e.getMessage()+" - falling back to no sampling!", e);
}
return new SampleResult(docids, 1d);
}
ScoredDocIDs sampled = ScoredDocIdsUtils.createScoredDocIDsSubset(docids,
sampleSet);
if (logger.isLoggable(Level.FINEST)) {
logger.finest("******************** " + sampled.size());
}
return new SampleResult(sampled, sampled.size()/(double)docids.size());
}
/**
* Returns <code>sampleSize</code> values from the first <code>collectionSize</code>
* locations of <code>collection</code>, chosen using
* the <code>TRAVERSAL</code> algorithm. The sample values are not sorted.
* @param collection The values from which a sample is wanted.
* @param collectionSize The number of values (from the first) from which to draw the sample.
* @param sampleSize The number of values to return.
* @return An array of values chosen from the collection.
* @see Algorithm#TRAVERSAL
*/
private static int[] repeatableSample(ScoredDocIDs collection,
int collectionSize, int sampleSize)
throws IOException {
return repeatableSample(collection, collectionSize,
sampleSize, Algorithm.HASHING, Sorted.NO);
}
/**
* Returns <code>sampleSize</code> values from the first <code>collectionSize</code>
* locations of <code>collection</code>, chosen using <code>algorithm</code>.
* @param collection The values from which a sample is wanted.
* @param collectionSize The number of values (from the first) from which to draw the sample.
* @param sampleSize The number of values to return.
* @param algorithm Which algorithm to use.
* @param sorted Sorted.YES to sort the sample values in ascending order before returning;
* Sorted.NO to return them in essentially random order.
* @return An array of values chosen from the collection.
*/
private static int[] repeatableSample(ScoredDocIDs collection,
int collectionSize, int sampleSize,
Algorithm algorithm, Sorted sorted)
throws IOException {
if (collection == null) {
throw new IOException("docIdSet is null");
}
if (sampleSize < 1) {
throw new IOException("sampleSize < 1 (" + sampleSize + ")");
}
if (collectionSize < sampleSize) {
throw new IOException("collectionSize (" + collectionSize + ") less than sampleSize (" + sampleSize + ")");
}
int[] sample = new int[sampleSize];
long[] times = new long[4];
if (algorithm == Algorithm.TRAVERSAL) {
sample1(collection, collectionSize, sample, times);
} else if (algorithm == Algorithm.HASHING) {
sample2(collection, collectionSize, sample, times);
} else {
throw new IllegalArgumentException("Invalid algorithm selection");
}
if (sorted == Sorted.YES) {
Arrays.sort(sample);
}
if (returnTimings) {
times[3] = System.currentTimeMillis();
if (logger.isLoggable(Level.FINEST)) {
logger.finest("Times: " + (times[1] - times[0]) + "ms, "
+ (times[2] - times[1]) + "ms, " + (times[3] - times[2])+"ms");
}
}
return sample;
}
/**
* Returns <code>sample</code>.length values chosen from the first <code>collectionSize</code>
* locations of <code>collection</code>, using the TRAVERSAL algorithm. The sample is
* pseudorandom: no subset of the original collection
* is in principle more likely to occur than any other, but for a given collection
* and sample size, the same sample will always be returned. This algorithm walks the
* original collection in a methodical way that is guaranteed not to visit any location
* more than once, which makes sampling without replacement faster because removals don't
* have to be tracked, and the number of operations is proportional to the sample size,
* not the collection size.
* Times for performance measurement
* are returned in <code>times</code>, which must be an array of at least three longs, containing
* nanosecond event times. The first
* is set when the algorithm starts; the second, when the step size has been calculated;
* and the third when the sample has been taken.
* @param collection The set to be sampled.
* @param collectionSize The number of values to use (starting from first).
* @param sample The array in which to return the sample.
* @param times The times of three events, for measuring performance.
*/
private static void sample1(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times)
throws IOException {
ScoredDocIDsIterator it = collection.iterator();
if (returnTimings) {
times[0] = System.currentTimeMillis();
}
int sampleSize = sample.length;
int prime = findGoodStepSize(collectionSize, sampleSize);
int mod = prime % collectionSize;
if (returnTimings) {
times[1] = System.currentTimeMillis();
}
int sampleCount = 0;
int index = 0;
for (; sampleCount < sampleSize;) {
if (index + mod < collectionSize) {
for (int i = 0; i < mod; i++, index++) {
it.next();
}
} else {
index = index + mod - collectionSize;
it = collection.iterator();
for (int i = 0; i < index; i++) {
it.next();
}
}
sample[sampleCount++] = it.getDocID();
}
if (returnTimings) {
times[2] = System.currentTimeMillis();
}
}
/**
* Returns a value which will allow the caller to walk
* a collection of <code>collectionSize</code> values, without repeating or missing
* any, and spanning the collection from beginning to end at least once with
* <code>sampleSize</code> visited locations. Choosing a value
* that is relatively prime to the collection size ensures that stepping by that size (modulo
* the collection size) will hit all locations without repeating, eliminating the need to
* track previously visited locations for a "without replacement" sample. Starting with the
* square root of the collection size ensures that either the first or second prime tried will
* work (they can't both divide the collection size). It also has the property that N steps of
* size N will span a collection of N**2 elements once. If the sample is bigger than N, it will
* wrap multiple times (without repeating). If the sample is smaller, a step size is chosen
* that will result in at least one spanning of the collection.
*
* @param collectionSize The number of values in the collection to be sampled.
* @param sampleSize The number of values wanted in the sample.
* @return A good increment value for walking the collection.
*/
private static int findGoodStepSize(int collectionSize, int sampleSize) {
int i = (int) Math.sqrt(collectionSize);
if (sampleSize < i) {
i = collectionSize / sampleSize;
}
do {
i = findNextPrimeAfter(i);
} while (collectionSize % i == 0);
return i;
}
/**
* Returns the first prime number that is larger than <code>n</code>.
* @param n A number less than the prime to be returned.
* @return The smallest prime larger than <code>n</code>.
*/
private static int findNextPrimeAfter(int n) {
n += (n % 2 == 0) ? 1 : 2; // next odd
foundFactor: for (;; n += 2) { //TODO labels??!!
int sri = (int) (Math.sqrt(n));
for (int primeIndex = 0; primeIndex < N_PRIMES; primeIndex++) {
int p = primes[primeIndex];
if (p > sri) {
return n;
}
if (n % p == 0) {
continue foundFactor;
}
}
for (int p = primes[N_PRIMES - 1] + 2;; p += 2) {
if (p > sri) {
return n;
}
if (n % p == 0) {
continue foundFactor;
}
}
}
}
/**
* The first N_PRIMES primes, after 2.
*/
private static final int N_PRIMES = 4000;
private static int[] primes = new int[N_PRIMES];
static {
primes[0] = 3;
for (int count = 1; count < N_PRIMES; count++) {
primes[count] = findNextPrimeAfter(primes[count - 1]);
}
}
/**
* Returns <code>sample</code>.length values chosen from the first <code>collectionSize</code>
* locations of <code>collection</code>, using the HASHING algorithm. Performance measurements
* are returned in <code>times</code>, which must be an array of at least three longs. The first
* will be set when the algorithm starts; the second, when a hash key has been calculated and
* inserted into the priority queue for every element in the collection; and the third when the
* original elements associated with the keys remaining in the PQ have been stored in the sample
* array for return.
* <P>
* This algorithm slows as the sample size becomes a significant fraction of the collection
* size, because the PQ is as large as the sample set, and will not do early rejection of values
* below the minimum until it fills up, and a larger PQ contains more small values to be purged,
* resulting in less early rejection and more logN insertions.
*
* @param collection The set to be sampled.
* @param collectionSize The number of values to use (starting from first).
* @param sample The array in which to return the sample.
* @param times The times of three events, for measuring performance.
*/
private static void sample2(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times)
throws IOException {
if (returnTimings) {
times[0] = System.currentTimeMillis();
}
int sampleSize = sample.length;
IntPriorityQueue pq = new IntPriorityQueue(sampleSize);
/*
* Convert every value in the collection to a hashed "weight" value, and insert
* into a bounded PQ (retains only sampleSize highest weights).
*/
ScoredDocIDsIterator it = collection.iterator();
MI mi = null;
while (it.next()) {
if (mi == null) {
mi = new MI();
}
mi.value = (int) (it.getDocID() * PHI_32) & 0x7FFFFFFF;
mi = pq.insertWithOverflow(mi);
}
if (returnTimings) {
times[1] = System.currentTimeMillis();
}
/*
* Extract heap, convert weights back to original values, and return as integers.
*/
Object[] heap = pq.getHeap();
for (int si = 0; si < sampleSize; si++) {
sample[si] = (int)(((MI) heap[si+1]).value * PHI_32I) & 0x7FFFFFFF;
}
if (returnTimings) {
times[2] = System.currentTimeMillis();
}
}
/**
* A mutable integer that lets queue objects be reused once they start overflowing.
*/
private static class MI {
MI() { }
public int value;
}
/**
* A bounded priority queue for Integers, to retain a specified number of
* the highest-weighted values for return as a random sample.
*/
private static class IntPriorityQueue extends PriorityQueue<MI> {
/**
* Creates a bounded PQ of size <code>size</code>.
* @param size The number of elements to retain.
*/
public IntPriorityQueue(int size) {
super(size);
}
/**
* Returns the underlying data structure for faster access. Extracting elements
* one at a time would require N logN time, and since we want the elements sorted
* in ascending order by value (not weight), the array is useful as-is.
* @return The underlying heap array.
*/
public Object[] getHeap() {
return getHeapArray();
}
/**
* Returns true if <code>o1<code>'s weight is less than that of <code>o2</code>, for
* ordering in the PQ.
* @return True if <code>o1</code> weighs less than <code>o2</code>.
*/
@Override
public boolean lessThan(MI o1, MI o2) {
return o1.value < o2.value;
}
}
/**
* For specifying which sampling algorithm to use.
*/
private enum Algorithm {
/**
* Specifies a methodical traversal algorithm, which is guaranteed to span the collection
* at least once, and never to return duplicates. Faster than the hashing algorithm and
* uses much less space, but the randomness of the sample may be affected by systematic
* variations in the collection. Requires only an array for the sample, and visits only
* the number of elements in the sample set, not the full set.
*/
// TODO (Facet): This one produces a bimodal distribution (very flat around
// each peak!) for collection size 10M and sample sizes 10k and 10544.
// Figure out why.
TRAVERSAL,
/**
* Specifies a Fibonacci-style hash algorithm (see Knuth, S&S), which generates a less
* systematically distributed subset of the sampled collection than the traversal method,
* but requires a bounded priority queue the size of the sample, and creates an object
* containing a sampled value and its hash, for every element in the full set.
*/
HASHING
}
/**
* For specifying whether to sort the sample.
*/
private enum Sorted {
/**
* Sort resulting sample before returning.
*/
YES,
/**
*Do not sort the resulting sample.
*/
NO
}
/**
* Magic number 1: prime closest to phi, in 32 bits.
*/
private static final long PHI_32 = 2654435769L;
/**
* Magic number 2: multiplicative inverse of PHI_32, modulo 2**32.
*/
private static final long PHI_32I = 340573321L;
/**
* Switch to cause methods to return timings.
*/
private static boolean returnTimings = false;
}

View File

@ -1,73 +0,0 @@
package org.apache.lucene.facet.sampling;
import java.io.IOException;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Fixer of sample facet accumulation results.
*
* @lucene.experimental
*/
public abstract class SampleFixer {
/**
* Alter the input result, fixing it to account for the sampling. This
* implementation can compute accurate or estimated counts for the sampled
* facets. For example, a faster correction could just multiply by a
* compensating factor.
*
* @param origDocIds
* full set of matching documents.
* @param fres
* sample result to be fixed.
* @throws IOException
* If there is a low-level I/O error.
*/
public void fixResult(ScoredDocIDs origDocIds, FacetResult fres, double samplingRatio) throws IOException {
FacetResultNode topRes = fres.getFacetResultNode();
fixResultNode(topRes, origDocIds, samplingRatio);
}
/**
* Fix result node count, and, recursively, fix all its children
*
* @param facetResNode
* result node to be fixed
* @param docIds
* docids in effect
* @throws IOException
* If there is a low-level I/O error.
*/
protected void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
throws IOException {
singleNodeFix(facetResNode, docIds, samplingRatio);
for (FacetResultNode frn : facetResNode.subResults) {
fixResultNode(frn, docIds, samplingRatio);
}
}
/** Fix the given node's value. */
protected abstract void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
throws IOException;
}

View File

@ -1,216 +0,0 @@
package org.apache.lucene.facet.sampling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsAggregator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sampling definition for facets accumulation
* <p>
* The Sampler uses TAKMI style counting to provide a 'best guess' top-K result
* set of the facets accumulated.
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()}.
*
* @lucene.experimental
*/
public abstract class Sampler {
protected final SamplingParams samplingParams;
/**
* Construct with {@link SamplingParams}
*/
public Sampler() {
this(new SamplingParams());
}
/**
* Construct with certain {@link SamplingParams}
*
* @param params sampling params in effect
* @throws IllegalArgumentException if the provided SamplingParams are not valid
*/
public Sampler(SamplingParams params) throws IllegalArgumentException {
if (!params.validate()) {
throw new IllegalArgumentException("The provided SamplingParams are not valid!!");
}
this.samplingParams = params;
}
/**
* Check if this sampler would complement for the input docIds
*/
public boolean shouldSample(ScoredDocIDs docIds) {
return docIds.size() > samplingParams.getSamplingThreshold();
}
/**
* Compute a sample set out of the input set, based on the {@link SamplingParams#getSampleRatio()}
* in effect. Sub classes can override to alter how the sample set is
* computed.
* <p>
* If the input set is of size smaller than {@link SamplingParams#getMinSampleSize()},
* the input set is returned (no sampling takes place).
* <p>
* Other than that, the returned set size will not be larger than {@link SamplingParams#getMaxSampleSize()}
* nor smaller than {@link SamplingParams#getMinSampleSize()}.
* @param docids
* full set of matching documents out of which a sample is needed.
*/
public SampleResult getSampleSet(ScoredDocIDs docids) throws IOException {
if (!shouldSample(docids)) {
return new SampleResult(docids, 1d);
}
int actualSize = docids.size();
int sampleSetSize = (int) (actualSize * samplingParams.getSampleRatio());
sampleSetSize = Math.max(sampleSetSize, samplingParams.getMinSampleSize());
sampleSetSize = Math.min(sampleSetSize, samplingParams.getMaxSampleSize());
return createSample(docids, actualSize, sampleSetSize);
}
/**
* Create and return a sample of the input set
* @param docids input set out of which a sample is to be created
* @param actualSize original size of set, prior to sampling
* @param sampleSetSize required size of sample set
* @return sample of the input set in the required size
*/
protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize)
throws IOException;
/**
* Result of sample computation
*/
public final static class SampleResult {
public final ScoredDocIDs docids;
public final double actualSampleRatio;
protected SampleResult(ScoredDocIDs docids, double actualSampleRatio) {
this.docids = docids;
this.actualSampleRatio = actualSampleRatio;
}
}
/**
* Return the sampling params in effect
*/
public final SamplingParams getSamplingParams() {
return samplingParams;
}
/**
* Trim the input facet result.<br>
* Note: It is only valid to call this method with result obtained for a
* facet request created through {@link #overSampledSearchParams(FacetSearchParams)}.
*
* @throws IllegalArgumentException
* if called with results not obtained for requests created
* through {@link #overSampledSearchParams(FacetSearchParams)}
*/
public FacetResult trimResult(FacetResult facetResult) throws IllegalArgumentException {
double overSampleFactor = getSamplingParams().getOversampleFactor();
if (overSampleFactor <= 1) { // no factoring done?
return facetResult;
}
OverSampledFacetRequest sampledFreq = null;
try {
sampledFreq = (OverSampledFacetRequest) facetResult.getFacetRequest();
} catch (ClassCastException e) {
throw new IllegalArgumentException(
"It is only valid to call this method with result obtained for a " +
"facet request created through sampler.overSamlpingSearchParams()",
e);
}
FacetRequest origFrq = sampledFreq.orig;
FacetResultNode trimmedRootNode = facetResult.getFacetResultNode();
trimSubResults(trimmedRootNode, origFrq.numResults);
return new FacetResult(origFrq, trimmedRootNode, facetResult.getNumValidDescendants());
}
/** Trim sub results to a given size. */
private void trimSubResults(FacetResultNode node, int size) {
if (node.subResults == FacetResultNode.EMPTY_SUB_RESULTS || node.subResults.size() == 0) {
return;
}
ArrayList<FacetResultNode> trimmed = new ArrayList<FacetResultNode>(size);
for (int i = 0; i < node.subResults.size() && i < size; i++) {
FacetResultNode trimmedNode = node.subResults.get(i);
trimSubResults(trimmedNode, size);
trimmed.add(trimmedNode);
}
node.subResults = trimmed;
}
/**
* Over-sampled search params, wrapping each request with an over-sampled one.
*/
public FacetSearchParams overSampledSearchParams(FacetSearchParams original) {
FacetSearchParams res = original;
// So now we can sample -> altering the searchParams to accommodate for the statistical error for the sampling
double overSampleFactor = getSamplingParams().getOversampleFactor();
if (overSampleFactor > 1) { // any factoring to do?
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
for (FacetRequest frq : original.facetRequests) {
int overSampledNumResults = (int) Math.ceil(frq.numResults * overSampleFactor);
facetRequests.add(new OverSampledFacetRequest(frq, overSampledNumResults));
}
res = new FacetSearchParams(original.indexingParams, facetRequests);
}
return res;
}
/** Wrapping a facet request for over sampling. */
public static class OverSampledFacetRequest extends FacetRequest {
public final FacetRequest orig;
public OverSampledFacetRequest(FacetRequest orig, int num) {
super(orig.categoryPath, num);
this.orig = orig;
setDepth(orig.getDepth());
setNumLabel(0); // don't label anything as we're over-sampling
setResultMode(orig.getResultMode());
setSortOrder(orig.getSortOrder());
}
@Override
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
return orig.createFacetsAggregator(fip);
}
}
}

View File

@ -1,137 +0,0 @@
package org.apache.lucene.facet.sampling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.facet.old.OldFacetsAccumulator;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
import org.apache.lucene.facet.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Facets accumulation with sampling.<br>
* <p>
* Note two major differences between this class and {@link SamplingWrapper}:
* <ol>
* <li>Latter can wrap any other {@link FacetsAccumulator} while this class
* directly extends {@link OldFacetsAccumulator}.</li>
* <li>This class can effectively apply sampling on the complement set of
* matching document, thereby working efficiently with the complement
* optimization - see {@link OldFacetsAccumulator#getComplementThreshold()}
* .</li>
* </ol>
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()}.
*
* @see Sampler
* @lucene.experimental
*/
public class SamplingAccumulator extends OldFacetsAccumulator {
private double samplingRatio = -1d;
private final Sampler sampler;
public SamplingAccumulator(Sampler sampler, FacetSearchParams searchParams,
IndexReader indexReader, TaxonomyReader taxonomyReader,
FacetArrays facetArrays) {
super(searchParams, indexReader, taxonomyReader, facetArrays);
this.sampler = sampler;
}
/**
* Constructor...
*/
public SamplingAccumulator(
Sampler sampler,
FacetSearchParams searchParams,
IndexReader indexReader, TaxonomyReader taxonomyReader) {
super(searchParams, indexReader, taxonomyReader);
this.sampler = sampler;
}
@Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// Replacing the original searchParams with the over-sampled
FacetSearchParams original = searchParams;
SampleFixer samplerFixer = sampler.samplingParams.getSampleFixer();
final boolean shouldOversample = sampler.samplingParams.shouldOverSample();
if (shouldOversample) {
searchParams = sampler.overSampledSearchParams(original);
}
List<FacetResult> sampleRes = super.accumulate(docids);
List<FacetResult> results = new ArrayList<FacetResult>();
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
FacetRequest fr = fres.getFacetRequest();
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
if (samplerFixer != null) {
// fix the result of current request
samplerFixer.fixResult(docids, fres, samplingRatio);
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
if (shouldOversample) {
// Using the sampler to trim the extra (over-sampled) results
fres = sampler.trimResult(fres);
}
}
// final labeling if allowed (because labeling is a costly operation)
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
// category does not exist, add an empty result
results.add(emptyResult(fres.getFacetResultNode().ordinal, fr));
} else {
frh.labelResult(fres);
results.add(fres);
}
}
searchParams = original; // Back to original params
return results;
}
@Override
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
SampleResult sampleRes = sampler.getSampleSet(docids);
samplingRatio = sampleRes.actualSampleRatio;
return sampleRes.docids;
}
@Override
protected double getTotalCountsFactor() {
if (samplingRatio<0) {
throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked");
}
return samplingRatio;
}
}

View File

@ -1,196 +0,0 @@
package org.apache.lucene.facet.sampling;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Parameters for sampling, dictating whether sampling is to take place and how.
*
* @lucene.experimental
*/
public class SamplingParams {
/**
* Default factor by which more results are requested over the sample set.
* @see SamplingParams#getOversampleFactor()
*/
public static final double DEFAULT_OVERSAMPLE_FACTOR = 1d;
/**
* Default ratio between size of sample to original size of document set.
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public static final double DEFAULT_SAMPLE_RATIO = 0.01;
/**
* Default maximum size of sample.
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public static final int DEFAULT_MAX_SAMPLE_SIZE = 10000;
/**
* Default minimum size of sample.
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public static final int DEFAULT_MIN_SAMPLE_SIZE = 100;
/**
* Default sampling threshold, if number of results is less than this number - no sampling will take place
* @see SamplingParams#getSampleRatio()
*/
public static final int DEFAULT_SAMPLING_THRESHOLD = 75000;
private int maxSampleSize = DEFAULT_MAX_SAMPLE_SIZE;
private int minSampleSize = DEFAULT_MIN_SAMPLE_SIZE;
private double sampleRatio = DEFAULT_SAMPLE_RATIO;
private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
private SampleFixer sampleFixer = null;
/**
* Return the maxSampleSize.
* In no case should the resulting sample size exceed this value.
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public final int getMaxSampleSize() {
return maxSampleSize;
}
/**
* Return the minSampleSize.
* In no case should the resulting sample size be smaller than this value.
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public final int getMinSampleSize() {
return minSampleSize;
}
/**
* @return the sampleRatio
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public final double getSampleRatio() {
return sampleRatio;
}
/**
* Return the samplingThreshold.
* Sampling would be performed only for document sets larger than this.
*/
public final int getSamplingThreshold() {
return samplingThreshold;
}
/**
* @param maxSampleSize
* the maxSampleSize to set
* @see #getMaxSampleSize()
*/
public void setMaxSampleSize(int maxSampleSize) {
this.maxSampleSize = maxSampleSize;
}
/**
* @param minSampleSize
* the minSampleSize to set
* @see #getMinSampleSize()
*/
public void setMinSampleSize(int minSampleSize) {
this.minSampleSize = minSampleSize;
}
/**
* @param sampleRatio
* the sampleRatio to set
* @see #getSampleRatio()
*/
public void setSampleRatio(double sampleRatio) {
this.sampleRatio = sampleRatio;
}
/**
* Set a sampling-threshold
* @see #getSamplingThreshold()
*/
public void setSamplingThreshold(int samplingThreshold) {
this.samplingThreshold = samplingThreshold;
}
/**
* Check validity of sampling settings, making sure that
* <ul>
* <li> <code>minSampleSize <= maxSampleSize <= samplingThreshold </code></li>
* <li> <code>0 < samplingRatio <= 1 </code></li>
* </ul>
*
* @return true if valid, false otherwise
*/
public boolean validate() {
return
samplingThreshold >= maxSampleSize &&
maxSampleSize >= minSampleSize &&
sampleRatio > 0 &&
sampleRatio < 1;
}
/**
* Return the oversampleFactor. When sampling, we would collect that much more
* results, so that later, when selecting top out of these, chances are higher
* to get actual best results. Note that having this value larger than 1 only
* makes sense when using a SampleFixer which finds accurate results, such as
* <code>TakmiSampleFixer</code>. When this value is smaller than 1, it is
* ignored and no oversampling takes place.
*/
public final double getOversampleFactor() {
return oversampleFactor;
}
/**
* @param oversampleFactor the oversampleFactor to set
* @see #getOversampleFactor()
*/
public void setOversampleFactor(double oversampleFactor) {
this.oversampleFactor = oversampleFactor;
}
/**
* @return {@link SampleFixer} to be used while fixing the sampled results, if
* <code>null</code> no fixing will be performed
*/
public SampleFixer getSampleFixer() {
return sampleFixer;
}
/**
* Set a {@link SampleFixer} to be used while fixing the sampled results.
* {@code null} means no fixing will be performed
*/
public void setSampleFixer(SampleFixer sampleFixer) {
this.sampleFixer = sampleFixer;
}
/**
* Returns whether over-sampling should be done. By default returns
* {@code true} when {@link #getSampleFixer()} is not {@code null} and
* {@link #getOversampleFactor()} &gt; 1, {@code false} otherwise.
*/
public boolean shouldOverSample() {
return sampleFixer != null && oversampleFactor > 1d;
}
}

View File

@ -1,112 +0,0 @@
package org.apache.lucene.facet.sampling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.facet.old.OldFacetsAccumulator;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
import org.apache.lucene.facet.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Wrap any Facets Accumulator with sampling.
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()}.
*
* @lucene.experimental
*/
public class SamplingWrapper extends OldFacetsAccumulator {
private OldFacetsAccumulator delegee;
private Sampler sampler;
public SamplingWrapper(OldFacetsAccumulator delegee, Sampler sampler) {
super(delegee.searchParams, delegee.indexReader, delegee.taxonomyReader);
this.delegee = delegee;
this.sampler = sampler;
}
@Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// Replacing the original searchParams with the over-sampled (and without statistics-compute)
FacetSearchParams original = delegee.searchParams;
boolean shouldOversample = sampler.samplingParams.shouldOverSample();
if (shouldOversample) {
delegee.searchParams = sampler.overSampledSearchParams(original);
}
SampleResult sampleSet = sampler.getSampleSet(docids);
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
List<FacetResult> results = new ArrayList<FacetResult>();
SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
FacetRequest fr = fres.getFacetRequest();
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
if (sampleFixer != null) {
// fix the result of current request
sampleFixer.fixResult(docids, fres, sampleSet.actualSampleRatio);
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
}
if (shouldOversample) {
// Using the sampler to trim the extra (over-sampled) results
fres = sampler.trimResult(fres);
}
// final labeling if allowed (because labeling is a costly operation)
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
// category does not exist, add an empty result
results.add(emptyResult(fres.getFacetResultNode().ordinal, fr));
} else {
frh.labelResult(fres);
results.add(fres);
}
}
if (shouldOversample) {
delegee.searchParams = original; // Back to original params
}
return results;
}
@Override
public double getComplementThreshold() {
return delegee.getComplementThreshold();
}
@Override
public void setComplementThreshold(double complementThreshold) {
delegee.setComplementThreshold(complementThreshold);
}
}

View File

@ -1,168 +0,0 @@
package org.apache.lucene.facet.sampling;
import java.io.IOException;
import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.DrillDownQuery;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Fix sampling results by correct results, by counting the intersection between
* two lists: a TermDocs (list of documents in a certain category) and a
* DocIdSetIterator (list of documents matching the query).
* <p>
* This fixer is suitable for scenarios which prioritize accuracy over
* performance.
* <p>
* <b>Note:</b> for statistically more accurate top-k selection, set
* {@link SamplingParams#setOversampleFactor(double) oversampleFactor} to at
* least 2, so that the top-k categories would have better chance of showing up
* in the sampled top-cK results (see {@link SamplingParams#getOversampleFactor}
*
* @lucene.experimental
*/
public class TakmiSampleFixer extends SampleFixer {
private TaxonomyReader taxonomyReader;
private IndexReader indexReader;
private FacetSearchParams searchParams;
public TakmiSampleFixer(IndexReader indexReader,
TaxonomyReader taxonomyReader, FacetSearchParams searchParams) {
this.indexReader = indexReader;
this.taxonomyReader = taxonomyReader;
this.searchParams = searchParams;
}
@Override
public void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) throws IOException {
recount(facetResNode, docIds);
}
/**
* Internal utility: recount for a facet result node
*
* @param fresNode
* result node to be recounted
* @param docIds
* full set of matching documents.
* @throws IOException If there is a low-level I/O error.
*/
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
// TODO (Facet): change from void to return the new, smaller docSet, and use
// that for the children, as this will make their intersection ops faster.
// can do this only when the new set is "sufficiently" smaller.
/* We need the category's path name in order to do its recounting.
* If it is missing, because the option to label only part of the
* facet results was exercise, we need to calculate them anyway, so
* in essence sampling with recounting spends some extra cycles for
* labeling results for which labels are not required. */
if (fresNode.label == null) {
fresNode.label = taxonomyReader.getPath(fresNode.ordinal);
}
FacetLabel catPath = fresNode.label;
Term drillDownTerm = DrillDownQuery.term(searchParams.indexingParams, catPath);
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
drillDownTerm.field(), drillDownTerm.bytes(),
0), docIds.iterator());
fresNode.value = updatedCount;
}
/**
* Count the size of the intersection between two lists: a TermDocs (list of
* documents in a certain category) and a DocIdSetIterator (list of documents
* matching a query).
*/
private static int countIntersection(DocsEnum p1, ScoredDocIDsIterator p2)
throws IOException {
// The documentation of of both TermDocs and DocIdSetIterator claim
// that we must do next() before doc(). So we do, and if one of the
// lists is empty, obviously return 0;
if (p1 == null || p1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return 0;
}
if (!p2.next()) {
return 0;
}
int d1 = p1.docID();
int d2 = p2.getDocID();
int count = 0;
for (;;) {
if (d1 == d2) {
++count;
if (p1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
break; // end of list 1, nothing more in intersection
}
d1 = p1.docID();
if (!advance(p2, d1)) {
break; // end of list 2, nothing more in intersection
}
d2 = p2.getDocID();
} else if (d1 < d2) {
if (p1.advance(d2) == DocIdSetIterator.NO_MORE_DOCS) {
break; // end of list 1, nothing more in intersection
}
d1 = p1.docID();
} else /* d1>d2 */ {
if (!advance(p2, d1)) {
break; // end of list 2, nothing more in intersection
}
d2 = p2.getDocID();
}
}
return count;
}
/**
* utility: advance the iterator until finding (or exceeding) specific
* document
*
* @param iterator
* iterator being advanced
* @param targetDoc
* target of advancing
* @return false if iterator exhausted, true otherwise.
*/
private static boolean advance(ScoredDocIDsIterator iterator, int targetDoc) {
while (iterator.next()) {
if (iterator.getDocID() >= targetDoc) {
return true; // target reached
}
}
return false; // exhausted
}
}

View File

@ -1,24 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Facets sampling</title>
</head>
<body>
Facets sampling.
</body>
</html>

View File

@ -1,110 +0,0 @@
package org.apache.lucene.facet.search;
import java.util.Arrays;
import java.util.concurrent.ArrayBlockingQueue;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A thread-safe pool of {@code int[]} and {@code float[]} arrays. One specifies
* the maximum number of arrays in the constructor. Calls to
* {@link #allocateFloatArray()} or {@link #allocateIntArray()} take an array
* from the pool, and if one is not available, allocate a new one. When you are
* done using the array, you should {@link #free(int[]) free} it.
* <p>
* This class is used by {@link ReusingFacetArrays} for temporal facet
* aggregation arrays, which can be reused across searches instead of being
* allocated afresh on every search.
*
* @lucene.experimental
*/
public final class ArraysPool {
private final ArrayBlockingQueue<int[]> intsPool;
private final ArrayBlockingQueue<float[]> floatsPool;
public final int arrayLength;
/**
* Specifies the max number of arrays to pool, as well as the length of each
* array to allocate.
*
* @param arrayLength the size of the arrays to allocate
* @param maxArrays the maximum number of arrays to pool, from each type
*
* @throws IllegalArgumentException if maxArrays is set to 0.
*/
public ArraysPool(int arrayLength, int maxArrays) {
if (maxArrays == 0) {
throw new IllegalArgumentException(
"maxArrays cannot be 0 - don't use this class if you don't intend to pool arrays");
}
this.arrayLength = arrayLength;
this.intsPool = new ArrayBlockingQueue<int[]>(maxArrays);
this.floatsPool = new ArrayBlockingQueue<float[]>(maxArrays);
}
/**
* Allocates a new {@code int[]}. If there's an available array in the pool,
* it is used, otherwise a new array is allocated.
*/
public final int[] allocateIntArray() {
int[] arr = intsPool.poll();
if (arr == null) {
return new int[arrayLength];
}
Arrays.fill(arr, 0); // reset array
return arr;
}
/**
* Allocates a new {@code float[]}. If there's an available array in the pool,
* it is used, otherwise a new array is allocated.
*/
public final float[] allocateFloatArray() {
float[] arr = floatsPool.poll();
if (arr == null) {
return new float[arrayLength];
}
Arrays.fill(arr, 0f); // reset array
return arr;
}
/**
* Frees a no-longer-needed array. If there's room in the pool, the array is
* added to it, otherwise discarded.
*/
public final void free(int[] arr) {
if (arr != null) {
// use offer - if there isn't room, we don't want to wait
intsPool.offer(arr);
}
}
/**
* Frees a no-longer-needed array. If there's room in the pool, the array is
* added to it, otherwise discarded.
*/
public final void free(float[] arr) {
if (arr != null) {
// use offer - if there isn't room, we don't want to wait
floatsPool.offer(arr);
}
}
}

View File

@ -1,54 +0,0 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.search.OrdinalsCache.CachedOrds;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetsAggregator} which updates categories values by counting their
* occurrences in matching documents. Uses {@link OrdinalsCache} to obtain the
* category ordinals of each segment.
*
* @lucene.experimental
*/
public class CachedOrdsCountingFacetsAggregator extends IntRollupFacetsAggregator {
@Override
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
final CachedOrds ords = OrdinalsCache.getCachedOrds(matchingDocs.context, clp);
if (ords == null) {
return; // this segment has no ordinals for the given category list
}
final int[] counts = facetArrays.getIntArray();
int doc = 0;
int length = matchingDocs.bits.length();
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
int start = ords.offsets[doc];
int end = ords.offsets[doc + 1];
for (int i = start; i < end; i++) {
++counts[ords.ordinals[i]];
}
++doc;
}
}
}

View File

@ -1,56 +0,0 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An interface for obtaining the category ordinals of documents.
* {@link #getOrdinals(int, IntsRef)} calls are done with document IDs that are
* local to the reader given to {@link #setNextReader(AtomicReaderContext)}.
* <p>
* <b>NOTE:</b> this class operates as a key to a map, and therefore you should
* implement {@code equals()} and {@code hashCode()} for proper behavior.
*
* @lucene.experimental
*/
public interface CategoryListIterator {
/**
* Sets the {@link AtomicReaderContext} for which
* {@link #getOrdinals(int, IntsRef)} calls will be made. Returns true iff any
* of the documents in this reader have category ordinals. This method must be
* called before any calls to {@link #getOrdinals(int, IntsRef)}.
*/
public boolean setNextReader(AtomicReaderContext context) throws IOException;
/**
* Stores the category ordinals of the given document ID in the given
* {@link IntsRef}, starting at position 0 upto {@link IntsRef#length}. Grows
* the {@link IntsRef} if it is not large enough.
*
* <p>
* <b>NOTE:</b> if the requested document does not have category ordinals
* associated with it, {@link IntsRef#length} is set to zero.
*/
public void getOrdinals(int docID, IntsRef ints) throws IOException;
}

View File

@ -1,39 +0,0 @@
package org.apache.lucene.facet.search;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Facet request for counting facets.
*
* @lucene.experimental
*/
public class CountFacetRequest extends FacetRequest {
public CountFacetRequest(FacetLabel path, int num) {
super(path, num);
}
@Override
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
return CountingFacetsAggregator.create(fip.getCategoryListParams(categoryPath));
}
}

View File

@ -1,72 +0,0 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetsAggregator} which counts the number of times each category
* appears in the given set of documents. This aggregator uses the
* {@link CategoryListIterator} to read the encoded categories. If you used the
* default settings while idnexing, you can use
* {@link FastCountingFacetsAggregator} for better performance.
*
* @lucene.experimental
*/
public class CountingFacetsAggregator extends IntRollupFacetsAggregator {
/**
* Returns a {@link FacetsAggregator} suitable for counting categories given
* the {@link CategoryListParams}.
*/
public static FacetsAggregator create(CategoryListParams clp) {
if (clp.createEncoder().createMatchingDecoder().getClass() == DGapVInt8IntDecoder.class) {
return new FastCountingFacetsAggregator();
} else {
return new CountingFacetsAggregator();
}
}
private final IntsRef ordinals = new IntsRef(32);
@Override
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
final CategoryListIterator cli = clp.createCategoryListIterator(0);
if (!cli.setNextReader(matchingDocs.context)) {
return;
}
final int length = matchingDocs.bits.length();
final int[] counts = facetArrays.getIntArray();
int doc = 0;
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
cli.getOrdinals(doc, ordinals);
final int upto = ordinals.offset + ordinals.length;
for (int i = ordinals.offset; i < upto; i++) {
++counts[ordinals.ints[i]];
}
++doc;
}
}
}

View File

@ -1,136 +0,0 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import org.apache.lucene.facet.search.FacetRequest.SortOrder;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.PriorityQueue;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link FacetResultsHandler} which counts the top-K facets at depth 1 only
* and always labels all result categories. The results are always sorted by
* value, in descending order.
*
* @lucene.experimental
*/
public class DepthOneFacetResultsHandler extends FacetResultsHandler {
private static class FacetResultNodeQueue extends PriorityQueue<FacetResultNode> {
public FacetResultNodeQueue(int maxSize, boolean prepopulate) {
super(maxSize, prepopulate);
}
@Override
protected FacetResultNode getSentinelObject() {
return new FacetResultNode(TaxonomyReader.INVALID_ORDINAL, 0);
}
@Override
protected boolean lessThan(FacetResultNode a, FacetResultNode b) {
return a.compareTo(b) < 0;
}
}
public DepthOneFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays,
OrdinalValueResolver resolver) {
super(taxonomyReader, facetRequest, resolver, facetArrays);
assert facetRequest.getDepth() == 1 : "this handler only computes the top-K facets at depth 1";
assert facetRequest.numResults == facetRequest.getNumLabel() : "this handler always labels all top-K results";
assert facetRequest.getSortOrder() == SortOrder.DESCENDING : "this handler always sorts results in descending order";
}
@Override
public final FacetResult compute() throws IOException {
ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays();
final int[] children = arrays.children();
final int[] siblings = arrays.siblings();
int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath);
FacetResultNode root = new FacetResultNode(rootOrd, resolver.valueOf(rootOrd));
root.label = facetRequest.categoryPath;
if (facetRequest.numResults > taxonomyReader.getSize()) {
// specialize this case, user is interested in all available results
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
int ordinal = children[rootOrd];
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
double value = resolver.valueOf(ordinal);
if (value > 0) {
FacetResultNode node = new FacetResultNode(ordinal, value);
node.label = taxonomyReader.getPath(ordinal);
nodes.add(node);
}
ordinal = siblings[ordinal];
}
CollectionUtil.introSort(nodes, Collections.reverseOrder(new Comparator<FacetResultNode>() {
@Override
public int compare(FacetResultNode o1, FacetResultNode o2) {
return o1.compareTo(o2);
}
}));
root.subResults = nodes;
return new FacetResult(facetRequest, root, nodes.size());
}
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true);
int ordinal = children[rootOrd];
FacetResultNode top = pq.top();
int numSiblings = 0;
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
double value = resolver.valueOf(ordinal);
if (value > 0) {
++numSiblings;
if (value > top.value) {
top.value = value;
top.ordinal = ordinal;
top = pq.updateTop();
}
}
ordinal = siblings[ordinal];
}
// pop() the least (sentinel) elements
int pqsize = pq.size();
int size = numSiblings < pqsize ? numSiblings : pqsize;
for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
// create the FacetResultNodes.
FacetResultNode[] subResults = new FacetResultNode[size];
for (int i = size - 1; i >= 0; i--) {
FacetResultNode node = pq.pop();
node.label = taxonomyReader.getPath(node.ordinal);
subResults[i] = node;
}
root.subResults = Arrays.asList(subResults);
return new FacetResult(facetRequest, root, numSiblings);
}
}

View File

@ -1,87 +0,0 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import org.apache.lucene.facet.encoding.IntDecoder;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** A {@link CategoryListIterator} which reads the ordinals from a {@link BinaryDocValues}. */
public class DocValuesCategoryListIterator implements CategoryListIterator {
private final IntDecoder decoder;
private final String field;
private final int hashCode;
private final BytesRef bytes = new BytesRef(32);
private BinaryDocValues current;
/**
* Constructs a new {@link DocValuesCategoryListIterator}.
*/
public DocValuesCategoryListIterator(String field, IntDecoder decoder) {
this.field = field;
this.decoder = decoder;
this.hashCode = field.hashCode();
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof DocValuesCategoryListIterator)) {
return false;
}
DocValuesCategoryListIterator other = (DocValuesCategoryListIterator) o;
if (hashCode != other.hashCode) {
return false;
}
// Hash codes are the same, check equals() to avoid cases of hash-collisions.
return field.equals(other.field);
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
current = context.reader().getBinaryDocValues(field);
return current != null;
}
@Override
public void getOrdinals(int docID, IntsRef ints) throws IOException {
assert current != null : "don't call this if setNextReader returned false";
current.get(docID, bytes);
ints.length = 0;
if (bytes.length > 0) {
decoder.decode(bytes, ints);
}
}
@Override
public String toString() {
return field;
}
}

View File

@ -1,222 +0,0 @@
package org.apache.lucene.facet.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
/**
* A {@link Query} for drill-down over {@link FacetLabel categories}. You
* should call {@link #add(FacetLabel...)} for every group of categories you
* want to drill-down over. Each category in the group is {@code OR'ed} with
* the others, and groups are {@code AND'ed}.
* <p>
* <b>NOTE:</b> if you choose to create your own {@link Query} by calling
* {@link #term}, it is recommended to wrap it with {@link ConstantScoreQuery}
* and set the {@link ConstantScoreQuery#setBoost(float) boost} to {@code 0.0f},
* so that it does not affect the scores of the documents.
*
* @lucene.experimental
*/
public final class DrillDownQuery extends Query {
/** Return a drill-down {@link Term} for a category. */
public static Term term(FacetIndexingParams iParams, FacetLabel path) {
CategoryListParams clp = iParams.getCategoryListParams(path);
char[] buffer = new char[path.fullPathLength()];
iParams.drillDownTermText(path, buffer);
return new Term(clp.field, String.valueOf(buffer));
}
private final BooleanQuery query;
private final Map<String,Integer> drillDownDims = new LinkedHashMap<String,Integer>();
final FacetIndexingParams fip;
/** Used by clone() */
DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Map<String,Integer> drillDownDims) {
this.fip = fip;
this.query = query.clone();
this.drillDownDims.putAll(drillDownDims);
}
/** Used by DrillSideways */
DrillDownQuery(Filter filter, DrillDownQuery other) {
query = new BooleanQuery(true); // disable coord
BooleanClause[] clauses = other.query.getClauses();
if (clauses.length == other.drillDownDims.size()) {
throw new IllegalArgumentException("cannot apply filter unless baseQuery isn't null; pass ConstantScoreQuery instead");
}
assert clauses.length == 1+other.drillDownDims.size(): clauses.length + " vs " + (1+other.drillDownDims.size());
drillDownDims.putAll(other.drillDownDims);
query.add(new FilteredQuery(clauses[0].getQuery(), filter), Occur.MUST);
for(int i=1;i<clauses.length;i++) {
query.add(clauses[i].getQuery(), Occur.MUST);
}
fip = other.fip;
}
/** Used by DrillSideways */
DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
this.fip = fip;
this.query = new BooleanQuery(true);
if (baseQuery != null) {
query.add(baseQuery, Occur.MUST);
}
for(Query clause : clauses) {
query.add(clause, Occur.MUST);
}
this.drillDownDims.putAll(drillDownDims);
}
/**
* Creates a new {@link DrillDownQuery} without a base query,
* to perform a pure browsing query (equivalent to using
* {@link MatchAllDocsQuery} as base).
*/
public DrillDownQuery(FacetIndexingParams fip) {
this(fip, null);
}
/**
* Creates a new {@link DrillDownQuery} over the given base query. Can be
* {@code null}, in which case the result {@link Query} from
* {@link #rewrite(IndexReader)} will be a pure browsing query, filtering on
* the added categories only.
*/
public DrillDownQuery(FacetIndexingParams fip, Query baseQuery) {
query = new BooleanQuery(true); // disable coord
if (baseQuery != null) {
query.add(baseQuery, Occur.MUST);
}
this.fip = fip;
}
/**
* Adds one dimension of drill downs; if you pass multiple values they are
* OR'd, and then the entire dimension is AND'd against the base query.
*/
public void add(FacetLabel... paths) {
Query q;
if (paths[0].length == 0) {
throw new IllegalArgumentException("all CategoryPaths must have length > 0");
}
String dim = paths[0].components[0];
if (drillDownDims.containsKey(dim)) {
throw new IllegalArgumentException("dimension '" + dim + "' was already added");
}
if (paths.length == 1) {
q = new TermQuery(term(fip, paths[0]));
} else {
BooleanQuery bq = new BooleanQuery(true); // disable coord
for (FacetLabel cp : paths) {
if (cp.length == 0) {
throw new IllegalArgumentException("all CategoryPaths must have length > 0");
}
if (!cp.components[0].equals(dim)) {
throw new IllegalArgumentException("multiple (OR'd) drill-down paths must be under same dimension; got '"
+ dim + "' and '" + cp.components[0] + "'");
}
bq.add(new TermQuery(term(fip, cp)), Occur.SHOULD);
}
q = bq;
}
add(dim, q);
}
/** Expert: add a custom drill-down subQuery. Use this
* when you have a separate way to drill-down on the
* dimension than the indexed facet ordinals. */
public void add(String dim, Query subQuery) {
// TODO: we should use FilteredQuery?
// So scores of the drill-down query don't have an
// effect:
final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subQuery);
drillDownQuery.setBoost(0.0f);
query.add(drillDownQuery, Occur.MUST);
drillDownDims.put(dim, drillDownDims.size());
}
@Override
public DrillDownQuery clone() {
return new DrillDownQuery(fip, query, drillDownDims);
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
return prime * result + query.hashCode();
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof DrillDownQuery)) {
return false;
}
DrillDownQuery other = (DrillDownQuery) obj;
return query.equals(other.query) && super.equals(other);
}
@Override
public Query rewrite(IndexReader r) throws IOException {
if (query.clauses().size() == 0) {
// baseQuery given to the ctor was null + no drill-downs were added
// note that if only baseQuery was given to the ctor, but no drill-down terms
// is fine, since the rewritten query will be the original base query.
throw new IllegalStateException("no base query or drill-down categories given");
}
return query;
}
@Override
public String toString(String field) {
return query.toString(field);
}
BooleanQuery getBooleanQuery() {
return query;
}
Map<String,Integer> getDims() {
return drillDownDims;
}
}

View File

@ -1,188 +0,0 @@
package org.apache.lucene.facet.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.IdentityHashMap;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer.ChildScorer;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
/** Collector that scrutinizes each hit to determine if it
* passed all constraints (a true hit) or if it missed
* exactly one dimension (a near-miss, to count for
* drill-sideways counts on that dimension). */
class DrillSidewaysCollector extends Collector {
private final Collector hitCollector;
private final Collector drillDownCollector;
private final Collector[] drillSidewaysCollectors;
private final Scorer[] subScorers;
private final int exactCount;
// Maps Weight to either -1 (mainQuery) or to integer
// index of the dims drillDown. We needs this when
// visiting the child scorers to correlate back to the
// right scorers:
private final Map<Weight,Integer> weightToIndex = new IdentityHashMap<Weight,Integer>();
private Scorer mainScorer;
public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors,
Map<String,Integer> dims) {
this.hitCollector = hitCollector;
this.drillDownCollector = drillDownCollector;
this.drillSidewaysCollectors = drillSidewaysCollectors;
subScorers = new Scorer[dims.size()];
if (dims.size() == 1) {
// When we have only one dim, we insert the
// MatchAllDocsQuery, bringing the clause count to
// 2:
exactCount = 2;
} else {
exactCount = dims.size();
}
}
@Override
public void collect(int doc) throws IOException {
//System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount);
if (mainScorer == null) {
// This segment did not have any docs with any
// drill-down field & value:
return;
}
if (mainScorer.freq() == exactCount) {
// All sub-clauses from the drill-down filters
// matched, so this is a "real" hit, so we first
// collect in both the hitCollector and the
// drillDown collector:
//System.out.println(" hit " + drillDownCollector);
hitCollector.collect(doc);
if (drillDownCollector != null) {
drillDownCollector.collect(doc);
}
// Also collect across all drill-sideways counts so
// we "merge in" drill-down counts for this
// dimension.
for(int i=0;i<subScorers.length;i++) {
// This cannot be null, because it was a hit,
// meaning all drill-down dims matched, so all
// dims must have non-null scorers:
assert subScorers[i] != null;
int subDoc = subScorers[i].docID();
assert subDoc == doc;
drillSidewaysCollectors[i].collect(doc);
}
} else {
boolean found = false;
for(int i=0;i<subScorers.length;i++) {
if (subScorers[i] == null) {
// This segment did not have any docs with this
// drill-down field & value:
drillSidewaysCollectors[i].collect(doc);
assert allMatchesFrom(i+1, doc);
found = true;
break;
}
int subDoc = subScorers[i].docID();
//System.out.println(" i=" + i + " sub: " + subDoc);
if (subDoc != doc) {
//System.out.println(" +ds[" + i + "]");
assert subDoc > doc: "subDoc=" + subDoc + " doc=" + doc;
drillSidewaysCollectors[i].collect(doc);
assert allMatchesFrom(i+1, doc);
found = true;
break;
}
}
assert found;
}
}
// Only used by assert:
private boolean allMatchesFrom(int startFrom, int doc) {
for(int i=startFrom;i<subScorers.length;i++) {
assert subScorers[i].docID() == doc;
}
return true;
}
@Override
public boolean acceptsDocsOutOfOrder() {
// We actually could accept docs out of order, but, we
// need to force BooleanScorer2 so that the
// sub-scorers are "on" each docID we are collecting:
return false;
}
@Override
public void setNextReader(AtomicReaderContext leaf) throws IOException {
//System.out.println("DS.setNextReader reader=" + leaf.reader());
hitCollector.setNextReader(leaf);
if (drillDownCollector != null) {
drillDownCollector.setNextReader(leaf);
}
for(Collector dsc : drillSidewaysCollectors) {
dsc.setNextReader(leaf);
}
}
void setWeight(Weight weight, int index) {
assert !weightToIndex.containsKey(weight);
weightToIndex.put(weight, index);
}
private void findScorers(Scorer scorer) {
Integer index = weightToIndex.get(scorer.getWeight());
if (index != null) {
if (index.intValue() == -1) {
mainScorer = scorer;
} else {
subScorers[index] = scorer;
}
}
for(ChildScorer child : scorer.getChildren()) {
findScorers(child.child);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
mainScorer = null;
Arrays.fill(subScorers, null);
findScorers(scorer);
hitCollector.setScorer(scorer);
if (drillDownCollector != null) {
drillDownCollector.setScorer(scorer);
}
for(Collector dsc : drillSidewaysCollectors) {
dsc.setScorer(scorer);
}
}
}

View File

@ -1,195 +0,0 @@
package org.apache.lucene.facet.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
class DrillSidewaysQuery extends Query {
final Query baseQuery;
final Collector drillDownCollector;
final Collector[] drillSidewaysCollectors;
final Term[][] drillDownTerms;
DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Term[][] drillDownTerms) {
this.baseQuery = baseQuery;
this.drillDownCollector = drillDownCollector;
this.drillSidewaysCollectors = drillSidewaysCollectors;
this.drillDownTerms = drillDownTerms;
}
@Override
public String toString(String field) {
return "DrillSidewaysQuery";
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newQuery = baseQuery;
while(true) {
Query rewrittenQuery = newQuery.rewrite(reader);
if (rewrittenQuery == newQuery) {
break;
}
newQuery = rewrittenQuery;
}
if (newQuery == baseQuery) {
return this;
} else {
return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
}
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final Weight baseWeight = baseQuery.createWeight(searcher);
return new Weight() {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
return baseWeight.explain(context, doc);
}
@Override
public Query getQuery() {
return baseQuery;
}
@Override
public float getValueForNormalization() throws IOException {
return baseWeight.getValueForNormalization();
}
@Override
public void normalize(float norm, float topLevelBoost) {
baseWeight.normalize(norm, topLevelBoost);
}
@Override
public boolean scoresDocsOutOfOrder() {
// TODO: would be nice if AssertingIndexSearcher
// confirmed this for us
return false;
}
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
DrillSidewaysScorer.DocsEnumsAndFreq[] dims = new DrillSidewaysScorer.DocsEnumsAndFreq[drillDownTerms.length];
TermsEnum termsEnum = null;
String lastField = null;
int nullCount = 0;
for(int dim=0;dim<dims.length;dim++) {
dims[dim] = new DrillSidewaysScorer.DocsEnumsAndFreq();
dims[dim].sidewaysCollector = drillSidewaysCollectors[dim];
String field = drillDownTerms[dim][0].field();
dims[dim].dim = drillDownTerms[dim][0].text();
if (lastField == null || !lastField.equals(field)) {
AtomicReader reader = context.reader();
Terms terms = reader.terms(field);
if (terms != null) {
termsEnum = terms.iterator(null);
} else {
termsEnum = null;
}
lastField = field;
}
dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
if (termsEnum == null) {
nullCount++;
continue;
}
for(int i=0;i<drillDownTerms[dim].length;i++) {
if (termsEnum.seekExact(drillDownTerms[dim][i].bytes())) {
DocsEnum docsEnum = termsEnum.docs(null, null, 0);
if (docsEnum != null) {
dims[dim].docsEnums[i] = docsEnum;
dims[dim].maxCost = Math.max(dims[dim].maxCost, docsEnum.cost());
}
}
}
}
if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
return null;
}
// Sort drill-downs by most restrictive first:
Arrays.sort(dims);
// TODO: it could be better if we take acceptDocs
// into account instead of baseScorer?
Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
if (baseScorer == null) {
return null;
}
return new DrillSidewaysScorer(this, context,
baseScorer,
drillDownCollector, dims);
}
};
}
// TODO: these should do "deeper" equals/hash on the 2-D drillDownTerms array
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
result = prime * result
+ ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
result = prime * result + Arrays.hashCode(drillDownTerms);
result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (!super.equals(obj)) return false;
if (getClass() != obj.getClass()) return false;
DrillSidewaysQuery other = (DrillSidewaysQuery) obj;
if (baseQuery == null) {
if (other.baseQuery != null) return false;
} else if (!baseQuery.equals(other.baseQuery)) return false;
if (drillDownCollector == null) {
if (other.drillDownCollector != null) return false;
} else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
return true;
}
}

View File

@ -1,654 +0,0 @@
package org.apache.lucene.facet.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.FixedBitSet;
class DrillSidewaysScorer extends Scorer {
//private static boolean DEBUG = false;
private final Collector drillDownCollector;
private final DocsEnumsAndFreq[] dims;
// DrillDown DocsEnums:
private final Scorer baseScorer;
private final AtomicReaderContext context;
private static final int CHUNK = 2048;
private static final int MASK = CHUNK-1;
private int collectDocID = -1;
private float collectScore;
DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
DocsEnumsAndFreq[] dims) {
super(w);
this.dims = dims;
this.context = context;
this.baseScorer = baseScorer;
this.drillDownCollector = drillDownCollector;
}
@Override
public void score(Collector collector) throws IOException {
//if (DEBUG) {
// System.out.println("\nscore: reader=" + context.reader());
//}
//System.out.println("score r=" + context.reader());
collector.setScorer(this);
if (drillDownCollector != null) {
drillDownCollector.setScorer(this);
drillDownCollector.setNextReader(context);
}
for(DocsEnumsAndFreq dim : dims) {
dim.sidewaysCollector.setScorer(this);
dim.sidewaysCollector.setNextReader(context);
}
// TODO: if we ever allow null baseScorer ... it will
// mean we DO score docs out of order ... hmm, or if we
// change up the order of the conjuntions below
assert baseScorer != null;
// Position all scorers to their first matching doc:
baseScorer.nextDoc();
for(DocsEnumsAndFreq dim : dims) {
for (DocsEnum docsEnum : dim.docsEnums) {
if (docsEnum != null) {
docsEnum.nextDoc();
}
}
}
final int numDims = dims.length;
DocsEnum[][] docsEnums = new DocsEnum[numDims][];
Collector[] sidewaysCollectors = new Collector[numDims];
long drillDownCost = 0;
for(int dim=0;dim<numDims;dim++) {
docsEnums[dim] = dims[dim].docsEnums;
sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
for (DocsEnum de : dims[dim].docsEnums) {
if (de != null) {
drillDownCost += de.cost();
}
}
}
long baseQueryCost = baseScorer.cost();
/*
System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount);
System.out.println(" maxDoc=" + context.reader().maxDoc());
System.out.println(" maxCost=" + maxCost);
System.out.println(" dims[0].freq=" + dims[0].freq);
if (numDims > 1) {
System.out.println(" dims[1].freq=" + dims[1].freq);
}
*/
if (baseQueryCost < drillDownCost/10) {
//System.out.println("baseAdvance");
doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors);
} else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) {
//System.out.println("drillDownAdvance");
doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
} else {
//System.out.println("union");
doUnionScoring(collector, docsEnums, sidewaysCollectors);
}
}
/** Used when drill downs are highly constraining vs
* baseQuery. */
private void doDrillDownAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
final int maxDoc = context.reader().maxDoc();
final int numDims = dims.length;
//if (DEBUG) {
// System.out.println(" doDrillDownAdvanceScoring");
//}
// TODO: maybe a class like BS, instead of parallel arrays
int[] filledSlots = new int[CHUNK];
int[] docIDs = new int[CHUNK];
float[] scores = new float[CHUNK];
int[] missingDims = new int[CHUNK];
int[] counts = new int[CHUNK];
docIDs[0] = -1;
int nextChunkStart = CHUNK;
final FixedBitSet seen = new FixedBitSet(CHUNK);
while (true) {
//if (DEBUG) {
// System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
//}
// First dim:
//if (DEBUG) {
// System.out.println(" dim0");
//}
for(DocsEnum docsEnum : docsEnums[0]) {
if (docsEnum == null) {
continue;
}
int docID = docsEnum.docID();
while (docID < nextChunkStart) {
int slot = docID & MASK;
if (docIDs[slot] != docID) {
seen.set(slot);
// Mark slot as valid:
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
//}
docIDs[slot] = docID;
missingDims[slot] = 1;
counts[slot] = 1;
}
docID = docsEnum.nextDoc();
}
}
// Second dim:
//if (DEBUG) {
// System.out.println(" dim1");
//}
for(DocsEnum docsEnum : docsEnums[1]) {
if (docsEnum == null) {
continue;
}
int docID = docsEnum.docID();
while (docID < nextChunkStart) {
int slot = docID & MASK;
if (docIDs[slot] != docID) {
// Mark slot as valid:
seen.set(slot);
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
//}
docIDs[slot] = docID;
missingDims[slot] = 0;
counts[slot] = 1;
} else {
// TODO: single-valued dims will always be true
// below; we could somehow specialize
if (missingDims[slot] >= 1) {
missingDims[slot] = 2;
counts[slot] = 2;
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
//}
} else {
counts[slot] = 1;
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
//}
}
}
docID = docsEnum.nextDoc();
}
}
// After this we can "upgrade" to conjunction, because
// any doc not seen by either dim 0 or dim 1 cannot be
// a hit or a near miss:
//if (DEBUG) {
// System.out.println(" baseScorer");
//}
// Fold in baseScorer, using advance:
int filledCount = 0;
int slot0 = 0;
while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != -1) {
int ddDocID = docIDs[slot0];
assert ddDocID != -1;
int baseDocID = baseScorer.docID();
if (baseDocID < ddDocID) {
baseDocID = baseScorer.advance(ddDocID);
}
if (baseDocID == ddDocID) {
//if (DEBUG) {
// System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
//}
scores[slot0] = baseScorer.score();
filledSlots[filledCount++] = slot0;
counts[slot0]++;
} else {
//if (DEBUG) {
// System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
//}
docIDs[slot0] = -1;
// TODO: we could jump slot0 forward to the
// baseDocID ... but we'd need to set docIDs for
// intervening slots to -1
}
slot0++;
}
seen.clear(0, CHUNK);
if (filledCount == 0) {
if (nextChunkStart >= maxDoc) {
break;
}
nextChunkStart += CHUNK;
continue;
}
// TODO: factor this out & share w/ union scorer,
// except we start from dim=2 instead:
for(int dim=2;dim<numDims;dim++) {
//if (DEBUG) {
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
//}
for(DocsEnum docsEnum : docsEnums[dim]) {
if (docsEnum == null) {
continue;
}
int docID = docsEnum.docID();
while (docID < nextChunkStart) {
int slot = docID & MASK;
if (docIDs[slot] == docID && counts[slot] >= dim) {
// TODO: single-valued dims will always be true
// below; we could somehow specialize
if (missingDims[slot] >= dim) {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
//}
missingDims[slot] = dim+1;
counts[slot] = dim+2;
} else {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
//}
counts[slot] = dim+1;
}
}
// TODO: sometimes use advance?
docID = docsEnum.nextDoc();
}
}
}
// Collect:
//if (DEBUG) {
// System.out.println(" now collect: " + filledCount + " hits");
//}
for(int i=0;i<filledCount;i++) {
int slot = filledSlots[i];
collectDocID = docIDs[slot];
collectScore = scores[slot];
//if (DEBUG) {
// System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]);
//}
if (counts[slot] == 1+numDims) {
collectHit(collector, sidewaysCollectors);
} else if (counts[slot] == numDims) {
collectNearMiss(sidewaysCollectors, missingDims[slot]);
}
}
if (nextChunkStart >= maxDoc) {
break;
}
nextChunkStart += CHUNK;
}
}
/** Used when base query is highly constraining vs the
* drilldowns; in this case we just .next() on base and
* .advance() on the dims. */
private void doBaseAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" doBaseAdvanceScoring");
//}
int docID = baseScorer.docID();
final int numDims = dims.length;
nextDoc: while (docID != NO_MORE_DOCS) {
int failedDim = -1;
for(int dim=0;dim<numDims;dim++) {
// TODO: should we sort this 2nd dimension of
// docsEnums from most frequent to least?
boolean found = false;
for(DocsEnum docsEnum : docsEnums[dim]) {
if (docsEnum == null) {
continue;
}
if (docsEnum.docID() < docID) {
docsEnum.advance(docID);
}
if (docsEnum.docID() == docID) {
found = true;
break;
}
}
if (!found) {
if (failedDim != -1) {
// More than one dim fails on this document, so
// it's neither a hit nor a near-miss; move to
// next doc:
docID = baseScorer.nextDoc();
continue nextDoc;
} else {
failedDim = dim;
}
}
}
collectDocID = docID;
// TODO: we could score on demand instead since we are
// daat here:
collectScore = baseScorer.score();
if (failedDim == -1) {
collectHit(collector, sidewaysCollectors);
} else {
collectNearMiss(sidewaysCollectors, failedDim);
}
docID = baseScorer.nextDoc();
}
}
private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" hit");
//}
collector.collect(collectDocID);
if (drillDownCollector != null) {
drillDownCollector.collect(collectDocID);
}
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the
// end instead:
// Tally sideways counts:
for(int dim=0;dim<sidewaysCollectors.length;dim++) {
sidewaysCollectors[dim].collect(collectDocID);
}
}
private void collectNearMiss(Collector[] sidewaysCollectors, int dim) throws IOException {
//if (DEBUG) {
// System.out.println(" missingDim=" + dim);
//}
sidewaysCollectors[dim].collect(collectDocID);
}
private void doUnionScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" doUnionScoring");
//}
final int maxDoc = context.reader().maxDoc();
final int numDims = dims.length;
// TODO: maybe a class like BS, instead of parallel arrays
int[] filledSlots = new int[CHUNK];
int[] docIDs = new int[CHUNK];
float[] scores = new float[CHUNK];
int[] missingDims = new int[CHUNK];
int[] counts = new int[CHUNK];
docIDs[0] = -1;
// NOTE: this is basically a specialized version of
// BooleanScorer, to the minShouldMatch=N-1 case, but
// carefully tracking which dimension failed to match
int nextChunkStart = CHUNK;
while (true) {
//if (DEBUG) {
// System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
//}
int filledCount = 0;
int docID = baseScorer.docID();
//if (DEBUG) {
// System.out.println(" base docID=" + docID);
//}
while (docID < nextChunkStart) {
int slot = docID & MASK;
//if (DEBUG) {
// System.out.println(" docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id"));
//}
// Mark slot as valid:
assert docIDs[slot] != docID: "slot=" + slot + " docID=" + docID;
docIDs[slot] = docID;
scores[slot] = baseScorer.score();
filledSlots[filledCount++] = slot;
missingDims[slot] = 0;
counts[slot] = 1;
docID = baseScorer.nextDoc();
}
if (filledCount == 0) {
if (nextChunkStart >= maxDoc) {
break;
}
nextChunkStart += CHUNK;
continue;
}
// First drill-down dim, basically adds SHOULD onto
// the baseQuery:
//if (DEBUG) {
// System.out.println(" dim=0 [" + dims[0].dim + "]");
//}
for(DocsEnum docsEnum : docsEnums[0]) {
if (docsEnum == null) {
continue;
}
docID = docsEnum.docID();
//if (DEBUG) {
// System.out.println(" start docID=" + docID);
//}
while (docID < nextChunkStart) {
int slot = docID & MASK;
if (docIDs[slot] == docID) {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " count=2");
//}
missingDims[slot] = 1;
counts[slot] = 2;
}
docID = docsEnum.nextDoc();
}
}
for(int dim=1;dim<numDims;dim++) {
//if (DEBUG) {
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
//}
for(DocsEnum docsEnum : docsEnums[dim]) {
if (docsEnum == null) {
continue;
}
docID = docsEnum.docID();
//if (DEBUG) {
// System.out.println(" start docID=" + docID);
//}
while (docID < nextChunkStart) {
int slot = docID & MASK;
if (docIDs[slot] == docID && counts[slot] >= dim) {
// This doc is still in the running...
// TODO: single-valued dims will always be true
// below; we could somehow specialize
if (missingDims[slot] >= dim) {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
//}
missingDims[slot] = dim+1;
counts[slot] = dim+2;
} else {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
//}
counts[slot] = dim+1;
}
}
docID = docsEnum.nextDoc();
}
// TODO: sometimes use advance?
/*
int docBase = nextChunkStart - CHUNK;
for(int i=0;i<filledCount;i++) {
int slot = filledSlots[i];
docID = docBase + filledSlots[i];
if (docIDs[slot] == docID && counts[slot] >= dim) {
// This doc is still in the running...
int ddDocID = docsEnum.docID();
if (ddDocID < docID) {
ddDocID = docsEnum.advance(docID);
}
if (ddDocID == docID) {
if (missingDims[slot] >= dim && counts[slot] == allMatchCount) {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
// }
missingDims[slot] = dim+1;
counts[slot] = dim+2;
} else {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
// }
counts[slot] = dim+1;
}
}
}
}
*/
}
}
// Collect:
//if (DEBUG) {
// System.out.println(" now collect: " + filledCount + " hits");
//}
for(int i=0;i<filledCount;i++) {
// NOTE: This is actually in-order collection,
// because we only accept docs originally returned by
// the baseScorer (ie that Scorer is AND'd)
int slot = filledSlots[i];
collectDocID = docIDs[slot];
collectScore = scores[slot];
//if (DEBUG) {
// System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]);
//}
//System.out.println(" collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims);
if (counts[slot] == 1+numDims) {
//System.out.println(" hit");
collectHit(collector, sidewaysCollectors);
} else if (counts[slot] == numDims) {
//System.out.println(" sw");
collectNearMiss(sidewaysCollectors, missingDims[slot]);
}
}
if (nextChunkStart >= maxDoc) {
break;
}
nextChunkStart += CHUNK;
}
}
@Override
public int docID() {
return collectDocID;
}
@Override
public float score() {
return collectScore;
}
@Override
public int freq() {
return 1+dims.length;
}
@Override
public int nextDoc() {
throw new UnsupportedOperationException();
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return baseScorer.cost();
}
@Override
public Collection<ChildScorer> getChildren() {
return Collections.singletonList(new ChildScorer(baseScorer, "MUST"));
}
static class DocsEnumsAndFreq implements Comparable<DocsEnumsAndFreq> {
DocsEnum[] docsEnums;
// Max cost for all docsEnums for this dim:
long maxCost;
Collector sidewaysCollector;
String dim;
@Override
public int compareTo(DocsEnumsAndFreq other) {
if (maxCost < other.maxCost) {
return -1;
} else if (maxCost > other.maxCost) {
return 1;
} else {
return 0;
}
}
}
}

View File

@ -1,83 +0,0 @@
package org.apache.lucene.facet.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Provider of arrays used for facets aggregation. Returns either an
* {@code int[]} or {@code float[]} of the specified array length. When the
* arrays are no longer needed, you should call {@link #free()}, so that e.g.
* they will be reclaimed.
*
* <p>
* <b>NOTE:</b> if you need to reuse the allocated arrays between search
* requests, use {@link ReusingFacetArrays}.
*
* <p>
* <b>NOTE:</b> this class is not thread safe. You typically allocate it per
* search.
*
* @lucene.experimental
*/
public class FacetArrays {
private int[] ints;
private float[] floats;
public final int arrayLength;
/** Arrays will be allocated at the specified length. */
public FacetArrays(int arrayLength) {
this.arrayLength = arrayLength;
}
protected float[] newFloatArray() {
return new float[arrayLength];
}
protected int[] newIntArray() {
return new int[arrayLength];
}
protected void doFree(float[] floats, int[] ints) {
}
/**
* Notifies that the arrays obtained from {@link #getIntArray()}
* or {@link #getFloatArray()} are no longer needed and can be freed.
*/
public final void free() {
doFree(floats, ints);
ints = null;
floats = null;
}
public final int[] getIntArray() {
if (ints == null) {
ints = newIntArray();
}
return ints;
}
public final float[] getFloatArray() {
if (floats == null) {
floats = newFloatArray();
}
return floats;
}
}

View File

@ -1,212 +0,0 @@
package org.apache.lucene.facet.search;
import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Defines an aggregation request for a category. Allows specifying the
* {@link #numResults number of child categories} to return as well as
* {@link #getSortOrder() which} categories to consider the "top" (highest or
* lowest ranking ones).
* <p>
* If the category being aggregated is hierarchical, you can also specify the
* {@link #setDepth(int) depth} up which to aggregate child categories as well
* as how the result should be {@link #setResultMode(ResultMode) constructed}.
*
* @lucene.experimental
*/
public abstract class FacetRequest {
/**
* When {@link FacetRequest#getDepth()} is greater than 1, defines the
* structure of the result as well as how constraints such as
* {@link FacetRequest#numResults} and {@link FacetRequest#getNumLabel()} are
* applied.
*/
public enum ResultMode {
/**
* Constraints are applied per node, and the result has a full tree
* structure. Default result mode.
*/
PER_NODE_IN_TREE,
/**
* Constraints are applied globally, on total number of results, and the
* result has a flat structure.
*/
GLOBAL_FLAT
}
/**
* Defines which categories to return. If {@link #DESCENDING} (the default),
* the highest {@link FacetRequest#numResults} weighted categories will be
* returned, otherwise the lowest ones.
*/
public enum SortOrder { ASCENDING, DESCENDING }
/** The category being aggregated in this facet request. */
public final FacetLabel categoryPath;
/** The number of child categories to return for {@link #categoryPath}. */
public final int numResults;
private int numLabel;
private int depth = 1;
private SortOrder sortOrder = SortOrder.DESCENDING;
private ResultMode resultMode = ResultMode.PER_NODE_IN_TREE;
// Computed at construction; based on categoryPath and numResults.
private final int hashCode;
/**
* Constructor with the given category to aggregate and the number of child
* categories to return.
*
* @param path
* the category to aggregate. Cannot be {@code null}.
* @param numResults
* the number of child categories to return. If set to
* {@code Integer.MAX_VALUE}, all immediate child categories will be
* returned. Must be greater than 0.
*/
public FacetRequest(FacetLabel path, int numResults) {
if (numResults <= 0) {
throw new IllegalArgumentException("num results must be a positive (>0) number: " + numResults);
}
if (path == null) {
throw new IllegalArgumentException("category path cannot be null!");
}
categoryPath = path;
this.numResults = numResults;
numLabel = numResults;
hashCode = categoryPath.hashCode() ^ this.numResults;
}
/**
* Returns the {@link FacetsAggregator} which can aggregate the categories of
* this facet request. The aggregator is expected to aggregate category values
* into {@link FacetArrays}. If the facet request does not support that, e.g.
* {@link RangeFacetRequest}, it can return {@code null}. Note though that
* such requests require a dedicated {@link FacetsAccumulator}.
*/
public abstract FacetsAggregator createFacetsAggregator(FacetIndexingParams fip);
@Override
public boolean equals(Object o) {
if (o instanceof FacetRequest) {
FacetRequest that = (FacetRequest) o;
return that.hashCode == this.hashCode &&
that.categoryPath.equals(this.categoryPath) &&
that.numResults == this.numResults &&
that.depth == this.depth &&
that.resultMode == this.resultMode &&
that.numLabel == this.numLabel &&
that.sortOrder == this.sortOrder;
}
return false;
}
/**
* How deeply to look under {@link #categoryPath}. By default, only its
* immediate children are aggregated (depth=1). If set to
* {@code Integer.MAX_VALUE}, the entire sub-tree of the category will be
* aggregated.
* <p>
* <b>NOTE:</b> setting depth to 0 means that only the category itself should
* be aggregated. In that case, make sure to index the category with
* {@link OrdinalPolicy#ALL_PARENTS}, unless it is not the root category (the
* dimension), in which case {@link OrdinalPolicy#ALL_BUT_DIMENSION} is fine
* too.
*/
public final int getDepth() {
// TODO an AUTO_EXPAND option could be useful
return depth;
}
/**
* Allows to specify the number of categories to label. By default all
* returned categories are labeled.
* <p>
* This allows an app to request a large number of results to return, while
* labeling them on-demand (e.g. when the UI requests to show more
* categories).
*/
public final int getNumLabel() {
return numLabel;
}
/** Return the requested result mode (defaults to {@link ResultMode#PER_NODE_IN_TREE}. */
public final ResultMode getResultMode() {
return resultMode;
}
/** Return the requested order of results (defaults to {@link SortOrder#DESCENDING}. */
public final SortOrder getSortOrder() {
return sortOrder;
}
@Override
public int hashCode() {
return hashCode;
}
/**
* Sets the depth up to which to aggregate facets.
*
* @see #getDepth()
*/
public void setDepth(int depth) {
this.depth = depth;
}
/**
* Sets the number of categories to label.
*
* @see #getNumLabel()
*/
public void setNumLabel(int numLabel) {
this.numLabel = numLabel;
}
/**
* Sets the {@link ResultMode} for this request.
*
* @see #getResultMode()
*/
public void setResultMode(ResultMode resultMode) {
this.resultMode = resultMode;
}
/**
* Sets the {@link SortOrder} for this request.
*
* @see #getSortOrder()
*/
public void setSortOrder(SortOrder sortOrder) {
this.sortOrder = sortOrder;
}
@Override
public String toString() {
return categoryPath.toString() + " nRes=" + numResults + " nLbl=" + numLabel;
}
}

View File

@ -1,117 +0,0 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.util.CollectionUtil;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Result of faceted search.
*
* @lucene.experimental
*/
public class FacetResult {
private static FacetResultNode addIfNotExist(Map<FacetLabel, FacetResultNode> nodes, FacetResultNode node) {
FacetResultNode n = nodes.get(node.label);
if (n == null) {
nodes.put(node.label, node);
n = node;
}
return n;
}
private final FacetRequest facetRequest;
private final FacetResultNode rootNode;
private final int numValidDescendants;
public FacetResult(FacetRequest facetRequest, FacetResultNode rootNode, int numValidDescendants) {
this.facetRequest = facetRequest;
this.rootNode = rootNode;
this.numValidDescendants = numValidDescendants;
}
/**
* Facet result node matching the root of the {@link #getFacetRequest() facet request}.
* @see #getFacetRequest()
* @see FacetRequest#categoryPath
*/
public final FacetResultNode getFacetResultNode() {
return rootNode;
}
/**
* Number of descendants of {@link #getFacetResultNode() root facet result
* node}, up till the requested depth.
*/
public final int getNumValidDescendants() {
return numValidDescendants;
}
/**
* Request for which this result was obtained.
*/
public final FacetRequest getFacetRequest() {
return this.facetRequest;
}
/**
* String representation of this facet result.
* Use with caution: might return a very long string.
* @param prefix prefix for each result line
* @see #toString()
*/
public String toString(String prefix) {
StringBuilder sb = new StringBuilder();
String nl = "";
// request
if (this.facetRequest != null) {
sb.append(nl).append(prefix).append("Request: ").append(
this.facetRequest.toString());
nl = "\n";
}
// total facets
sb.append(nl).append(prefix).append("Num valid Descendants (up to specified depth): ").append(
this.numValidDescendants);
nl = "\n";
// result node
if (this.rootNode != null) {
sb.append(nl).append(this.rootNode.toString(prefix + "\t"));
}
return sb.toString();
}
@Override
public String toString() {
return toString("");
}
}

Some files were not shown because too many files have changed in this diff Show More