mirror of https://github.com/apache/lucene.git
LUCENE-5339: finish cutover
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1545466 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
92b1e23071
commit
0255424864
|
@ -1,74 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
|
||||||
import org.apache.lucene.facet.index.DrillDownStream;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link DrillDownStream} which adds to each drill-down token a
|
|
||||||
* payload according to the {@link CategoryAssociation} defined in the
|
|
||||||
* {@link CategoryAssociationsContainer}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class AssociationsDrillDownStream extends DrillDownStream {
|
|
||||||
|
|
||||||
private final PayloadAttribute payloadAttribute;
|
|
||||||
private final BytesRef payload;
|
|
||||||
private final ByteArrayDataOutput output = new ByteArrayDataOutput();
|
|
||||||
private final CategoryAssociationsContainer associations;
|
|
||||||
|
|
||||||
public AssociationsDrillDownStream(CategoryAssociationsContainer associations, FacetIndexingParams indexingParams) {
|
|
||||||
super(associations, indexingParams);
|
|
||||||
this.associations = associations;
|
|
||||||
payloadAttribute = addAttribute(PayloadAttribute.class);
|
|
||||||
BytesRef bytes = payloadAttribute.getPayload();
|
|
||||||
if (bytes == null) {
|
|
||||||
bytes = new BytesRef(new byte[4]);
|
|
||||||
payloadAttribute.setPayload(bytes);
|
|
||||||
}
|
|
||||||
bytes.offset = 0;
|
|
||||||
this.payload = bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void addAdditionalAttributes(FacetLabel cp, boolean isParent) {
|
|
||||||
if (isParent) {
|
|
||||||
return; // associations are not added to parent categories
|
|
||||||
}
|
|
||||||
|
|
||||||
CategoryAssociation association = associations.getAssociation(cp);
|
|
||||||
if (association == null) {
|
|
||||||
// it is ok to set a null association for a category - it's treated as a
|
|
||||||
// regular category in that case.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (payload.bytes.length < association.maxBytesNeeded()) {
|
|
||||||
payload.grow(association.maxBytesNeeded());
|
|
||||||
}
|
|
||||||
output.reset(payload.bytes);
|
|
||||||
association.serialize(output);
|
|
||||||
payload.length = output.getPosition();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,124 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.facet.index.DrillDownStream;
|
|
||||||
import org.apache.lucene.facet.index.FacetFields;
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A utility class for adding facet fields to a document. Usually one field will
|
|
||||||
* be added for all facets, however per the
|
|
||||||
* {@link FacetIndexingParams#getCategoryListParams(FacetLabel)}, one field
|
|
||||||
* may be added for every group of facets.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class AssociationsFacetFields extends FacetFields {
|
|
||||||
|
|
||||||
// The drill-down field is added with a TokenStream, hence why it's based on
|
|
||||||
// TextField type. However for associations, we store a payload with the
|
|
||||||
// association value, therefore we set IndexOptions to include positions.
|
|
||||||
private static final FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
|
|
||||||
static {
|
|
||||||
DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
|
||||||
DRILL_DOWN_TYPE.freeze();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new instance with the {@link FacetIndexingParams#DEFAULT
|
|
||||||
* default} facet indexing params.
|
|
||||||
*
|
|
||||||
* @param taxonomyWriter
|
|
||||||
* used to resolve given categories to ordinals
|
|
||||||
*/
|
|
||||||
public AssociationsFacetFields(TaxonomyWriter taxonomyWriter) {
|
|
||||||
super(taxonomyWriter);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new instance with the given facet indexing params.
|
|
||||||
*
|
|
||||||
* @param taxonomyWriter
|
|
||||||
* used to resolve given categories to ordinals
|
|
||||||
* @param params
|
|
||||||
* determines under which fields the categories should be indexed
|
|
||||||
*/
|
|
||||||
public AssociationsFacetFields(TaxonomyWriter taxonomyWriter, FacetIndexingParams params) {
|
|
||||||
super(taxonomyWriter, params);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Map<CategoryListParams,Iterable<FacetLabel>> createCategoryListMapping(
|
|
||||||
Iterable<FacetLabel> categories) {
|
|
||||||
CategoryAssociationsContainer categoryAssociations = (CategoryAssociationsContainer) categories;
|
|
||||||
HashMap<CategoryListParams,Iterable<FacetLabel>> categoryLists =
|
|
||||||
new HashMap<CategoryListParams,Iterable<FacetLabel>>();
|
|
||||||
for (FacetLabel cp : categories) {
|
|
||||||
// each category may be indexed under a different field, so add it to the right list.
|
|
||||||
CategoryListParams clp = indexingParams.getCategoryListParams(cp);
|
|
||||||
CategoryAssociationsContainer clpContainer = (CategoryAssociationsContainer) categoryLists.get(clp);
|
|
||||||
if (clpContainer == null) {
|
|
||||||
clpContainer = new CategoryAssociationsContainer();
|
|
||||||
categoryLists.put(clp, clpContainer);
|
|
||||||
}
|
|
||||||
clpContainer.setAssociation(cp, categoryAssociations.getAssociation(cp));
|
|
||||||
}
|
|
||||||
return categoryLists;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Map<String,BytesRef> getCategoryListData(CategoryListParams categoryListParams, IntsRef ordinals,
|
|
||||||
Iterable<FacetLabel> categories) throws IOException {
|
|
||||||
AssociationsListBuilder associations = new AssociationsListBuilder((CategoryAssociationsContainer) categories);
|
|
||||||
return associations.build(ordinals, categories);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected DrillDownStream getDrillDownStream(Iterable<FacetLabel> categories) {
|
|
||||||
return new AssociationsDrillDownStream((CategoryAssociationsContainer) categories, indexingParams);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected FieldType drillDownFieldType() {
|
|
||||||
return DRILL_DOWN_TYPE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addFields(Document doc, Iterable<FacetLabel> categories) throws IOException {
|
|
||||||
if (!(categories instanceof CategoryAssociationsContainer)) {
|
|
||||||
throw new IllegalArgumentException("categories must be of type " +
|
|
||||||
CategoryAssociationsContainer.class.getSimpleName());
|
|
||||||
}
|
|
||||||
super.addFields(doc, categories);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,82 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.index.CategoryListBuilder;
|
|
||||||
import org.apache.lucene.facet.index.CountingListBuilder;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link AssociationsListBuilder} which encodes category-association value pairs.
|
|
||||||
* Every category-association pair is written under the respective association's
|
|
||||||
* {@link CategoryAssociation#getCategoryListID()}.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> associations list do not encode the counting list data. You
|
|
||||||
* should use {@link CountingListBuilder} to build that information and then
|
|
||||||
* merge the results of both {@link #build(IntsRef, Iterable)}.
|
|
||||||
*/
|
|
||||||
public class AssociationsListBuilder implements CategoryListBuilder {
|
|
||||||
|
|
||||||
private final CategoryAssociationsContainer associations;
|
|
||||||
private final ByteArrayDataOutput output = new ByteArrayDataOutput();
|
|
||||||
|
|
||||||
public AssociationsListBuilder(CategoryAssociationsContainer associations) {
|
|
||||||
this.associations = associations;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<FacetLabel> categories) throws IOException {
|
|
||||||
final HashMap<String,BytesRef> res = new HashMap<String,BytesRef>();
|
|
||||||
int idx = 0;
|
|
||||||
for (FacetLabel cp : categories) {
|
|
||||||
// build per-association key BytesRef
|
|
||||||
CategoryAssociation association = associations.getAssociation(cp);
|
|
||||||
|
|
||||||
BytesRef bytes = res.get(association.getCategoryListID());
|
|
||||||
if (bytes == null) {
|
|
||||||
bytes = new BytesRef(32);
|
|
||||||
res.put(association.getCategoryListID(), bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
int maxBytesNeeded = 4 /* int */ + association.maxBytesNeeded() + bytes.length;
|
|
||||||
if (bytes.bytes.length < maxBytesNeeded) {
|
|
||||||
bytes.grow(maxBytesNeeded);
|
|
||||||
}
|
|
||||||
|
|
||||||
// reset the output to write from bytes.length (current position) until the end
|
|
||||||
output.reset(bytes.bytes, bytes.length, bytes.bytes.length - bytes.length);
|
|
||||||
output.writeInt(ordinals.ints[idx++]);
|
|
||||||
|
|
||||||
// encode the association bytes
|
|
||||||
association.serialize(output);
|
|
||||||
|
|
||||||
// update BytesRef
|
|
||||||
bytes.length = output.getPosition();
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,48 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
|
||||||
import org.apache.lucene.store.DataInput;
|
|
||||||
import org.apache.lucene.store.DataOutput;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allows associating an arbitrary value with a {@link FacetLabel}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface CategoryAssociation {
|
|
||||||
|
|
||||||
/** Serializes the associated value into the given {@link DataOutput}. */
|
|
||||||
public void serialize(ByteArrayDataOutput output);
|
|
||||||
|
|
||||||
/** Deserializes the association value from the given {@link DataInput}. */
|
|
||||||
public void deserialize(ByteArrayDataInput input);
|
|
||||||
|
|
||||||
/** Returns the maximum bytes needed to encode the association value. */
|
|
||||||
public int maxBytesNeeded();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the ID of the category association. The ID is used as e.g. the
|
|
||||||
* term's text under which to encode the association values.
|
|
||||||
*/
|
|
||||||
public String getCategoryListID();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,65 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** Holds {@link CategoryAssociation} per {@link FacetLabel}. */
|
|
||||||
public class CategoryAssociationsContainer implements Iterable<FacetLabel> {
|
|
||||||
|
|
||||||
private final HashMap<FacetLabel,CategoryAssociation> categoryAssociations =
|
|
||||||
new HashMap<FacetLabel,CategoryAssociation>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds the {@link CategoryAssociation} for the given {@link FacetLabel
|
|
||||||
* category}. Overrides any assocation that was previously set.
|
|
||||||
*/
|
|
||||||
public void setAssociation(FacetLabel category, CategoryAssociation association) {
|
|
||||||
if (association == null) {
|
|
||||||
throw new IllegalArgumentException("cannot set a null association to a category");
|
|
||||||
}
|
|
||||||
categoryAssociations.put(category, association);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the {@link CategoryAssociation} that was set for the
|
|
||||||
* {@link FacetLabel category}, or {@code null} if none was defined.
|
|
||||||
*/
|
|
||||||
public CategoryAssociation getAssociation(FacetLabel category) {
|
|
||||||
return categoryAssociations.get(category);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Iterator<FacetLabel> iterator() {
|
|
||||||
return categoryAssociations.keySet().iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Clears all category associations. */
|
|
||||||
public void clear() {
|
|
||||||
categoryAssociations.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return categoryAssociations.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,80 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** A {@link CategoryAssociation} that associates a float with a category. */
|
|
||||||
public class CategoryFloatAssociation implements CategoryAssociation {
|
|
||||||
|
|
||||||
public static final String ASSOCIATION_LIST_ID = "$assoc_float$";
|
|
||||||
|
|
||||||
private float value;
|
|
||||||
|
|
||||||
public CategoryFloatAssociation() {
|
|
||||||
// used for deserialization
|
|
||||||
}
|
|
||||||
|
|
||||||
public CategoryFloatAssociation(float value) {
|
|
||||||
this.value = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void serialize(ByteArrayDataOutput output) {
|
|
||||||
try {
|
|
||||||
output.writeInt(Float.floatToIntBits(value));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("unexpected exception writing to a byte[]", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deserialize(ByteArrayDataInput input) {
|
|
||||||
value = Float.intBitsToFloat(input.readInt());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int maxBytesNeeded() {
|
|
||||||
// plain integer
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCategoryListID() {
|
|
||||||
return ASSOCIATION_LIST_ID;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the value associated with a category. If you used
|
|
||||||
* {@link #CategoryFloatAssociation()}, you should call
|
|
||||||
* {@link #deserialize(ByteArrayDataInput)} before calling this method, or
|
|
||||||
* otherwise the value returned is undefined.
|
|
||||||
*/
|
|
||||||
public float getValue() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return getClass().getSimpleName() + "(" + value + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,80 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** A {@link CategoryAssociation} that associates an integer with a category. */
|
|
||||||
public class CategoryIntAssociation implements CategoryAssociation {
|
|
||||||
|
|
||||||
public static final String ASSOCIATION_LIST_ID = "$assoc_int$";
|
|
||||||
|
|
||||||
private int value;
|
|
||||||
|
|
||||||
public CategoryIntAssociation() {
|
|
||||||
// used for deserialization
|
|
||||||
}
|
|
||||||
|
|
||||||
public CategoryIntAssociation(int value) {
|
|
||||||
this.value = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void serialize(ByteArrayDataOutput output) {
|
|
||||||
try {
|
|
||||||
output.writeInt(value);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("unexpected exception writing to a byte[]", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deserialize(ByteArrayDataInput input) {
|
|
||||||
value = input.readInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int maxBytesNeeded() {
|
|
||||||
// plain integer
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCategoryListID() {
|
|
||||||
return ASSOCIATION_LIST_ID;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the value associated with a category. If you used
|
|
||||||
* {@link #CategoryIntAssociation()}, you should call
|
|
||||||
* {@link #deserialize(ByteArrayDataInput)} before calling this method, or
|
|
||||||
* otherwise the value returned is undefined.
|
|
||||||
*/
|
|
||||||
public int getValue() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return getClass().getSimpleName() + "(" + value + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,46 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAggregator;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetRequest} for weighting facets according to their float
|
|
||||||
* association by summing the association values.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SumFloatAssociationFacetRequest extends FacetRequest {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a float association facet request for a given node in the
|
|
||||||
* taxonomy.
|
|
||||||
*/
|
|
||||||
public SumFloatAssociationFacetRequest(FacetLabel path, int num) {
|
|
||||||
super(path, num);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
|
|
||||||
return new SumFloatAssociationFacetsAggregator();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,92 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAggregator;
|
|
||||||
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver.FloatValueResolver;
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetsAggregator} which computes the weight of a category as the sum
|
|
||||||
* of the float values associated with it in the result documents. Assumes that
|
|
||||||
* the association encoded for each ordinal is {@link CategoryFloatAssociation}.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this aggregator does not support
|
|
||||||
* {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
|
|
||||||
* aggregates the categories for which you added a {@link CategoryAssociation}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SumFloatAssociationFacetsAggregator implements FacetsAggregator {
|
|
||||||
|
|
||||||
private final BytesRef bytes = new BytesRef(32);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
|
|
||||||
BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field + CategoryFloatAssociation.ASSOCIATION_LIST_ID);
|
|
||||||
if (dv == null) {
|
|
||||||
return; // no float associations in this reader
|
|
||||||
}
|
|
||||||
|
|
||||||
final int length = matchingDocs.bits.length();
|
|
||||||
final float[] values = facetArrays.getFloatArray();
|
|
||||||
int doc = 0;
|
|
||||||
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
|
|
||||||
dv.get(doc, bytes);
|
|
||||||
if (bytes.length > 0) {
|
|
||||||
// aggreate float association values for ordinals
|
|
||||||
int bytesUpto = bytes.offset + bytes.length;
|
|
||||||
int pos = bytes.offset;
|
|
||||||
while (pos < bytesUpto) {
|
|
||||||
int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
|
|
||||||
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
|
|
||||||
|
|
||||||
int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
|
|
||||||
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
|
|
||||||
|
|
||||||
values[ordinal] += Float.intBitsToFloat(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
++doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean requiresDocScores() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
|
|
||||||
// NO-OP: this aggregator does no rollup values to the parents.
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public OrdinalValueResolver createOrdinalValueResolver(FacetRequest facetRequest, FacetArrays arrays) {
|
|
||||||
return new FloatValueResolver(arrays);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,46 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAggregator;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetRequest} for weighting facets according to their integer
|
|
||||||
* association by summing the association values.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SumIntAssociationFacetRequest extends FacetRequest {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an integer association facet request for a given node in the
|
|
||||||
* taxonomy.
|
|
||||||
*/
|
|
||||||
public SumIntAssociationFacetRequest(FacetLabel path, int num) {
|
|
||||||
super(path, num);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
|
|
||||||
return new SumIntAssociationFacetsAggregator();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,91 +0,0 @@
|
||||||
package org.apache.lucene.facet.associations;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAggregator;
|
|
||||||
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver.IntValueResolver;
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetsAggregator} which computes the weight of a category as the sum
|
|
||||||
* of the integer values associated with it in the result documents. Assumes
|
|
||||||
* that the association encoded for each ordinal is
|
|
||||||
* {@link CategoryIntAssociation}.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this aggregator does not support
|
|
||||||
* {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
|
|
||||||
* aggregates the categories for which you added a {@link CategoryAssociation}.
|
|
||||||
*/
|
|
||||||
public class SumIntAssociationFacetsAggregator implements FacetsAggregator {
|
|
||||||
|
|
||||||
private final BytesRef bytes = new BytesRef(32);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
|
|
||||||
BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field + CategoryIntAssociation.ASSOCIATION_LIST_ID);
|
|
||||||
if (dv == null) {
|
|
||||||
return; // no int associations in this reader
|
|
||||||
}
|
|
||||||
|
|
||||||
final int length = matchingDocs.bits.length();
|
|
||||||
final int[] values = facetArrays.getIntArray();
|
|
||||||
int doc = 0;
|
|
||||||
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
|
|
||||||
dv.get(doc, bytes);
|
|
||||||
if (bytes.length > 0) {
|
|
||||||
// aggregate association values for ordinals
|
|
||||||
int bytesUpto = bytes.offset + bytes.length;
|
|
||||||
int pos = bytes.offset;
|
|
||||||
while (pos < bytesUpto) {
|
|
||||||
int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
|
|
||||||
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
|
|
||||||
|
|
||||||
int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
|
|
||||||
| ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
|
|
||||||
|
|
||||||
values[ordinal] += value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
++doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean requiresDocScores() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
|
|
||||||
// NO-OP: this aggregator does no rollup values to the parents.
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public OrdinalValueResolver createOrdinalValueResolver(FacetRequest facetRequest, FacetArrays arrays) {
|
|
||||||
return new IntValueResolver(arrays);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,25 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Category Association</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Allows associating arbitrary values with a category. The value can be used e.g. to compute
|
|
||||||
the category's weight during faceted search.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,554 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An Array-based hashtable which maps, similar to Java's HashMap, only
|
|
||||||
* performance tests showed it performs better.
|
|
||||||
* <p>
|
|
||||||
* The hashtable is constructed with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. Capacity is
|
|
||||||
* adjusted to a power of 2, and there are 2 * capacity entries for the hash.
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* where index 0 is used as 'Ground' or 'NULL'.
|
|
||||||
* <p>
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the <key,value> pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class ArrayHashMap<K,V> implements Iterable<V> {
|
|
||||||
|
|
||||||
/** Implements an IntIterator which iterates over all the allocated indexes. */
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/** The next not-yet-visited index. */
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/** Index of the last visited pair. Used in {@link #remove()}. */
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return index != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void remove() {
|
|
||||||
ArrayHashMap.this.remove((K) keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Implements an Iterator, used for iteration over the map's keys. */
|
|
||||||
private final class KeyIterator implements Iterator<K> {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public K next() {
|
|
||||||
return (K) keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Implements an Iterator, used for iteration over the map's values. */
|
|
||||||
private final class ValueIterator implements Iterator<V> {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public V next() {
|
|
||||||
return (V) values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Default capacity - in case no capacity was specified in the constructor */
|
|
||||||
private static final int DEFAULT_CAPACITY = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1).
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/** hashFactor is always (2^(N+1)) - 1. Used for faster hashing. */
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/** Holds the unique keys. */
|
|
||||||
Object[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/** Number of currently stored objects in the map. */
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/** Holds the values. */
|
|
||||||
Object[] values;
|
|
||||||
|
|
||||||
/** Constructs a map with default capacity. */
|
|
||||||
public ArrayHashMap() {
|
|
||||||
this(DEFAULT_CAPACITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public ArrayHashMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
values = new Object[arrayLength];
|
|
||||||
keys = new Object[arrayLength];
|
|
||||||
next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}. New pairs are always
|
|
||||||
* inserted to baseHash, and are followed by the old colliding pair.
|
|
||||||
*/
|
|
||||||
private void prvt_put(K key, V value) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = value;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Calculating the baseHash index using the internal internal <code>hashFactor</code>. */
|
|
||||||
protected int calcBaseHashIndex(K key) {
|
|
||||||
return key.hashCode() & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Empties the map. Generates the "Empty" space list for later allocation. */
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns true iff the key exists in the map. */
|
|
||||||
public boolean containsKey(K key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns true iff the object exists in the map. */
|
|
||||||
public boolean containsValue(Object o) {
|
|
||||||
for (Iterator<V> iterator = iterator(); iterator.hasNext();) {
|
|
||||||
V object = iterator.next();
|
|
||||||
if (object.equals(o)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the index of the given key, or zero if the key wasn't found. */
|
|
||||||
protected int find(K key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex].equals(key)) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds the actual index of a given key with it's baseHashIndex. Some methods
|
|
||||||
* use the baseHashIndex. If those call {@link #find} there's no need to
|
|
||||||
* re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 if the key wasn't found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(K key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index].equals(key)) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got thus far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return prev = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the object mapped with the given key, or null if the key wasn't found. */
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public V get(K key) {
|
|
||||||
return (V) values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocates a new map of double the capacity, and fast-insert the old
|
|
||||||
* key-value pairs.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
protected void grow() {
|
|
||||||
ArrayHashMap<K,V> newmap = new ArrayHashMap<K,V>(capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
newmap.prvt_put((K) keys[index], (V) values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
capacity = newmap.capacity;
|
|
||||||
size = newmap.size;
|
|
||||||
firstEmpty = newmap.firstEmpty;
|
|
||||||
values = newmap.values;
|
|
||||||
keys = newmap.keys;
|
|
||||||
next = newmap.next;
|
|
||||||
baseHash = newmap.baseHash;
|
|
||||||
hashFactor = newmap.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns true iff the map is empty. */
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the mapped objects. */
|
|
||||||
@Override
|
|
||||||
public Iterator<V> iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the map keys. */
|
|
||||||
public Iterator<K> keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Prints the baseHash array, used for debugging purposes. */
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(this.baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or null if the key didn't exist.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public V put(K key, V e) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
V old = (V) values[index];
|
|
||||||
values[index] = e;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, e);
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or null if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or null if none existed.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public V remove(K key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return (V) values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns number of pairs currently in the map. */
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return an object array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public Object[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
Object[] array = new Object[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (Iterator<V> iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of V
|
|
||||||
*
|
|
||||||
* @param a the array into which the elements of the list are to be stored, if
|
|
||||||
* it is big enough; otherwise, use as much space as it can.
|
|
||||||
* @return an array containing the elements of the list
|
|
||||||
*/
|
|
||||||
public V[] toArray(V[] a) {
|
|
||||||
int j = 0;
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (Iterator<V> iterator = iterator(); j < a.length
|
|
||||||
&& iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
if (j < a.length) {
|
|
||||||
a[j] = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
Iterator<K> keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
K key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
ArrayHashMap<K, V> that = (ArrayHashMap<K,V>)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Iterator<K> it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
K key = it.next();
|
|
||||||
V v1 = this.get(key);
|
|
||||||
V v2 = that.get(key);
|
|
||||||
if ((v1 == null && v2 != null) ||
|
|
||||||
(v1 != null && v2 == null) ||
|
|
||||||
(!v1.equals(v2))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,31 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterator interface for primitive double iteration. *
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface DoubleIterator {
|
|
||||||
|
|
||||||
boolean hasNext();
|
|
||||||
double next();
|
|
||||||
void remove();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,31 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterator interface for primitive int iteration. *
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface FloatIterator {
|
|
||||||
|
|
||||||
boolean hasNext();
|
|
||||||
float next();
|
|
||||||
void remove();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,634 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
|
|
||||||
* An Array-based hashtable which maps primitive float to Objects of generic type
|
|
||||||
* T.<br>
|
|
||||||
* The hashtable is constracted with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. <br>
|
|
||||||
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
|
|
||||||
* the hash.
|
|
||||||
*
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* when index 0 is used as 'Ground' or 'NULL'.<br>
|
|
||||||
*
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the key,value pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FloatToObjectMap<T> implements Iterable<T> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
FloatToObjectMap.this.remove(keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements FloatIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float next() {
|
|
||||||
return keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an Iterator of a generic type T used for iteration over the
|
|
||||||
* map's values.
|
|
||||||
*/
|
|
||||||
private final class ValueIterator implements Iterator<T> {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T next() {
|
|
||||||
return (T) values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
float[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the values
|
|
||||||
*/
|
|
||||||
Object[] values;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public FloatToObjectMap() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public FloatToObjectMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.values = new Object[arrayLength];
|
|
||||||
this.keys = new float[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given Object
|
|
||||||
* @param e
|
|
||||||
* element which is being mapped using the given key
|
|
||||||
*/
|
|
||||||
private void prvt_put(float key, T e) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = e;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>.
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(float key) {
|
|
||||||
return Float.floatToIntBits(key) & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsKey(float key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if the given object exists in the map.<br>
|
|
||||||
* This method iterates over the collection, trying to find an equal object.
|
|
||||||
*
|
|
||||||
* @param o
|
|
||||||
* object that is checked against the map data.
|
|
||||||
* @return true if the object exists in the map (in .equals() meaning).
|
|
||||||
* false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsValue(Object o) {
|
|
||||||
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
|
|
||||||
T object = iterator.next();
|
|
||||||
if (object.equals(o)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(float key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex] == key) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(float key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index] == key) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the object mapped with the given key.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* int who's mapped object we're interested in.
|
|
||||||
* @return an object mapped by the given key. null if the key wasn't found.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T get(float key) {
|
|
||||||
return (T) values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of double the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
protected void grow() {
|
|
||||||
FloatToObjectMap<T> that = new FloatToObjectMap<T>(
|
|
||||||
this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_put(this.keys[index], (T) this.values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.values = that.values;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped objects.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public Iterator<T> iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the map keys. */
|
|
||||||
public FloatIterator keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for DEBUG purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(this.baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or null if the key didn't exist.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T put(float key, T e) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
T old = (T) values[index];
|
|
||||||
values[index] = e;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, e);
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or null if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or null if none existed.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T remove(float key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return (T) values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return an object array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public Object[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
Object[] array = new Object[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of T
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the list are to be
|
|
||||||
* stored, if it is big enough; otherwise, use whatever space we
|
|
||||||
* have, setting the one after the true data as null.
|
|
||||||
*
|
|
||||||
* @return an array containing the elements of the list
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public T[] toArray(T[] a) {
|
|
||||||
int j = 0;
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (Iterator<T> iterator = iterator(); j < a.length
|
|
||||||
&& iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (j < a.length) {
|
|
||||||
a[j] = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
FloatIterator keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
float key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
FloatToObjectMap<T> that = (FloatToObjectMap<T>)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
FloatIterator it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
float key = it.next();
|
|
||||||
if (!that.containsKey(key)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
T v1 = this.get(key);
|
|
||||||
T v2 = that.get(key);
|
|
||||||
if ((v1 == null && v2 != null) ||
|
|
||||||
(v1 != null && v2 == null) ||
|
|
||||||
(!v1.equals(v2))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,252 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A Class wrapper for a grow-able int[] which can be sorted and intersect with
|
|
||||||
* other IntArrays.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class IntArray {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The int[] which holds the data
|
|
||||||
*/
|
|
||||||
private int[] data;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the number of items in the array.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A flag which indicates whether a sort should occur of the array is
|
|
||||||
* already sorted.
|
|
||||||
*/
|
|
||||||
private boolean shouldSort;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct a default IntArray, size 0 and surly a sort should not occur.
|
|
||||||
*/
|
|
||||||
public IntArray() {
|
|
||||||
init(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void init(boolean realloc) {
|
|
||||||
size = 0;
|
|
||||||
if (realloc) {
|
|
||||||
data = new int[0];
|
|
||||||
}
|
|
||||||
shouldSort = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Intersects the data with a given {@link IntHashSet}.
|
|
||||||
*
|
|
||||||
* @param set
|
|
||||||
* A given ArrayHashSetInt which holds the data to be intersected
|
|
||||||
* against
|
|
||||||
*/
|
|
||||||
public void intersect(IntHashSet set) {
|
|
||||||
int newSize = 0;
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
if (set.contains(data[i])) {
|
|
||||||
data[newSize] = data[i];
|
|
||||||
++newSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.size = newSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Intersects the data with a given IntArray
|
|
||||||
*
|
|
||||||
* @param other
|
|
||||||
* A given IntArray which holds the data to be intersected agains
|
|
||||||
*/
|
|
||||||
public void intersect(IntArray other) {
|
|
||||||
sort();
|
|
||||||
other.sort();
|
|
||||||
|
|
||||||
int myIndex = 0;
|
|
||||||
int otherIndex = 0;
|
|
||||||
int newSize = 0;
|
|
||||||
if (this.size > other.size) {
|
|
||||||
while (otherIndex < other.size && myIndex < size) {
|
|
||||||
while (otherIndex < other.size
|
|
||||||
&& other.data[otherIndex] < data[myIndex]) {
|
|
||||||
++otherIndex;
|
|
||||||
}
|
|
||||||
if (otherIndex == other.size) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
while (myIndex < size && other.data[otherIndex] > data[myIndex]) {
|
|
||||||
++myIndex;
|
|
||||||
}
|
|
||||||
if (other.data[otherIndex] == data[myIndex]) {
|
|
||||||
data[newSize++] = data[myIndex];
|
|
||||||
++otherIndex;
|
|
||||||
++myIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
while (otherIndex < other.size && myIndex < size) {
|
|
||||||
while (myIndex < size && other.data[otherIndex] > data[myIndex]) {
|
|
||||||
++myIndex;
|
|
||||||
}
|
|
||||||
if (myIndex == size) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
while (otherIndex < other.size
|
|
||||||
&& other.data[otherIndex] < data[myIndex]) {
|
|
||||||
++otherIndex;
|
|
||||||
}
|
|
||||||
if (other.data[otherIndex] == data[myIndex]) {
|
|
||||||
data[newSize++] = data[myIndex];
|
|
||||||
++otherIndex;
|
|
||||||
++myIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.size = newSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the size of the Array. Not the allocated size, but the number of
|
|
||||||
* values actually set.
|
|
||||||
*
|
|
||||||
* @return the (filled) size of the array
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a value to the array.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* value to be added
|
|
||||||
*/
|
|
||||||
public void addToArray(int value) {
|
|
||||||
if (size == data.length) {
|
|
||||||
int[] newArray = new int[2 * size + 1];
|
|
||||||
System.arraycopy(data, 0, newArray, 0, size);
|
|
||||||
data = newArray;
|
|
||||||
}
|
|
||||||
data[size] = value;
|
|
||||||
++size;
|
|
||||||
shouldSort = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Equals method. Checking the sizes, than the values from the last index to
|
|
||||||
* the first (Statistically for random should be the same but for our
|
|
||||||
* specific use would find differences faster).
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (!(o instanceof IntArray)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
IntArray array = (IntArray) o;
|
|
||||||
if (array.size != size) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
sort();
|
|
||||||
array.sort();
|
|
||||||
|
|
||||||
boolean equal = true;
|
|
||||||
|
|
||||||
for (int i = size; i > 0 && equal;) {
|
|
||||||
--i;
|
|
||||||
equal = (array.data[i] == this.data[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return equal;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sorts the data. If it is needed.
|
|
||||||
*/
|
|
||||||
public void sort() {
|
|
||||||
if (shouldSort) {
|
|
||||||
shouldSort = false;
|
|
||||||
Arrays.sort(data, 0, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculates a hash-code for HashTables
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int hash = 0;
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
hash = data[i] ^ (hash * 31);
|
|
||||||
}
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get an element from a specific index.
|
|
||||||
*
|
|
||||||
* @param i
|
|
||||||
* index of which element should be retrieved.
|
|
||||||
*/
|
|
||||||
public int get(int i) {
|
|
||||||
if (i >= size) {
|
|
||||||
throw new ArrayIndexOutOfBoundsException(i);
|
|
||||||
}
|
|
||||||
return this.data[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
public void set(int idx, int value) {
|
|
||||||
if (idx >= size) {
|
|
||||||
throw new ArrayIndexOutOfBoundsException(idx);
|
|
||||||
}
|
|
||||||
this.data[idx] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* toString or not toString. That is the question!
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
String s = "(" + size + ") ";
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
s += "" + data[i] + ", ";
|
|
||||||
}
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clear the IntArray (set all elements to zero).
|
|
||||||
* @param resize - if resize is true, then clear actually allocates
|
|
||||||
* a new array of size 0, essentially 'clearing' the array and freeing
|
|
||||||
* memory.
|
|
||||||
*/
|
|
||||||
public void clear(boolean resize) {
|
|
||||||
init(resize);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,548 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A Set or primitive int. Implemented as a HashMap of int->int. *
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class IntHashSet {
|
|
||||||
|
|
||||||
// TODO (Facet): This is wasteful as the "values" are actually the "keys" and
|
|
||||||
// we could spare this amount of space (capacity * sizeof(int)). Perhaps even
|
|
||||||
// though it is not OOP, we should re-implement the hash for just that cause.
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
IntHashSet.this.remove(keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements IntIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
return keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
int[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public IntHashSet() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public IntHashSet(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.keys = new int[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given value
|
|
||||||
*/
|
|
||||||
private void prvt_add(int key) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>
|
|
||||||
* .
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(int key) {
|
|
||||||
return key & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean contains(int value) {
|
|
||||||
return find(value) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(int key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex] == key) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(int key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index] == key) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of double the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
protected void grow() {
|
|
||||||
IntHashSet that = new IntHashSet(this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_add(this.keys[index]);
|
|
||||||
}
|
|
||||||
// for (int i = capacity; i > 0; --i) {
|
|
||||||
//
|
|
||||||
// that._add(this.keys[i]);
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped objects.
|
|
||||||
*/
|
|
||||||
public IntIterator iterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for debug purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(this.baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a mapping int key -> int value.
|
|
||||||
* <p>
|
|
||||||
* If the key was already inside just
|
|
||||||
* updating the value it refers to as the given object.
|
|
||||||
* <p>
|
|
||||||
* Otherwise if the map is full, first {@link #grow()} the map.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* integer which maps the given value
|
|
||||||
* @return true always.
|
|
||||||
*/
|
|
||||||
public boolean add(int value) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(value);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_add(value);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove a pair from the map, specified by it's key.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* specify the value to be removed
|
|
||||||
*
|
|
||||||
* @return true if the map was changed (the key was found and removed).
|
|
||||||
* false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean remove(int value) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(value);
|
|
||||||
int index = findForRemove(value, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return an object array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public int[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
int[] array = new int[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (IntIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of ints
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the map are to be stored,
|
|
||||||
* if it is big enough; otherwise, a new array of the same
|
|
||||||
* runtime type is allocated for this purpose.
|
|
||||||
*
|
|
||||||
* @return an array containing the values stored in the map
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public int[] toArray(int[] a) {
|
|
||||||
int j = 0;
|
|
||||||
if (a.length < size) {
|
|
||||||
a = new int[size];
|
|
||||||
}
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (IntIterator iterator = iterator(); j < a.length
|
|
||||||
&& iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* I have no idea why would anyone call it - but for debug purposes.<br>
|
|
||||||
* Prints the entire map, including the index, key, object, next and prev.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
IntIterator iterator = iterator();
|
|
||||||
while (iterator.hasNext()) {
|
|
||||||
sb.append(iterator.next());
|
|
||||||
if (iterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toHashString() {
|
|
||||||
String string = "\n";
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
|
|
||||||
for (int i = 0; i < this.baseHash.length; i++) {
|
|
||||||
StringBuffer sb2 = new StringBuffer();
|
|
||||||
boolean shouldAppend = false;
|
|
||||||
sb2.append(i + ".\t");
|
|
||||||
for (int index = baseHash[i]; index != 0; index = next[index]) {
|
|
||||||
sb2.append(" -> " + keys[index] + "@" + index);
|
|
||||||
shouldAppend = true;
|
|
||||||
}
|
|
||||||
if (shouldAppend) {
|
|
||||||
sb.append(sb2);
|
|
||||||
sb.append(string);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,31 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterator interface for primitive int iteration. *
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface IntIterator {
|
|
||||||
|
|
||||||
boolean hasNext();
|
|
||||||
int next();
|
|
||||||
void remove();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,631 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An Array-based hashtable which maps primitive int to a primitive double.<br>
|
|
||||||
* The hashtable is constracted with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. <br>
|
|
||||||
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
|
|
||||||
* the hash.
|
|
||||||
*
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* when index 0 is used as 'Ground' or 'NULL'.<br>
|
|
||||||
*
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the key,value pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class IntToDoubleMap {
|
|
||||||
|
|
||||||
public static final double GROUND = Double.NaN;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
IntToDoubleMap.this.remove(keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements IntIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
return keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an Iterator of a generic type T used for iteration over the
|
|
||||||
* map's values.
|
|
||||||
*/
|
|
||||||
private final class ValueIterator implements DoubleIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double next() {
|
|
||||||
return values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
int[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the values
|
|
||||||
*/
|
|
||||||
double[] values;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public IntToDoubleMap() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public IntToDoubleMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.values = new double[arrayLength];
|
|
||||||
this.keys = new int[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
this.values[0] = GROUND;
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given Object
|
|
||||||
* @param v
|
|
||||||
* double value which is being mapped using the given key
|
|
||||||
*/
|
|
||||||
private void prvt_put(int key, double v) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = v;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>
|
|
||||||
* .
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(int key) {
|
|
||||||
return key & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsKey(int key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if the given value exists in the map.<br>
|
|
||||||
* This method iterates over the collection, trying to find an equal object.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* double value that is checked against the map data.
|
|
||||||
* @return true if the value exists in the map, false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsValue(double value) {
|
|
||||||
for (DoubleIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
double d = iterator.next();
|
|
||||||
if (d == value) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(int key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex] == key) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(int key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index] == key) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the value mapped with the given key.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* int who's mapped object we're interested in.
|
|
||||||
* @return a double value mapped by the given key. Double.NaN if the key wasn't found.
|
|
||||||
*/
|
|
||||||
public double get(int key) {
|
|
||||||
return values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of double the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
protected void grow() {
|
|
||||||
IntToDoubleMap that = new IntToDoubleMap(
|
|
||||||
this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_put(this.keys[index], this.values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.values = that.values;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped double values.
|
|
||||||
*/
|
|
||||||
public DoubleIterator iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the map keys. */
|
|
||||||
public IntIterator keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for debug purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(this.baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or {@link Double#NaN} if the key didn't exist.
|
|
||||||
*/
|
|
||||||
public double put(int key, double v) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
double old = values[index];
|
|
||||||
values[index] = v;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, v);
|
|
||||||
|
|
||||||
return Double.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or {@link Double#NaN} if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or {@link Double#NaN} if none existed.
|
|
||||||
*/
|
|
||||||
public double remove(int key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return Double.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return a double array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public double[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
double[] array = new double[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (DoubleIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of T
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the list are to be
|
|
||||||
* stored. If it is big enough use whatever space we need,
|
|
||||||
* setting the one after the true data as {@link Double#NaN}.
|
|
||||||
*
|
|
||||||
* @return an array containing the elements of the list, using the given
|
|
||||||
* parameter if big enough, otherwise allocate an appropriate array
|
|
||||||
* and return it.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public double[] toArray(double[] a) {
|
|
||||||
int j = 0;
|
|
||||||
if (a.length < this.size()) {
|
|
||||||
a = new double[this.size()];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (DoubleIterator iterator = iterator(); iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (j < a.length) {
|
|
||||||
a[j] = Double.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
IntIterator keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
int key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
IntToDoubleMap that = (IntToDoubleMap)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
IntIterator it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
int key = it.next();
|
|
||||||
if (!that.containsKey(key)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
double v1 = this.get(key);
|
|
||||||
double v2 = that.get(key);
|
|
||||||
if (Double.compare(v1, v2) != 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,631 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An Array-based hashtable which maps primitive int to a primitive float.<br>
|
|
||||||
* The hashtable is constracted with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. <br>
|
|
||||||
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
|
|
||||||
* the hash.
|
|
||||||
*
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* when index 0 is used as 'Ground' or 'NULL'.<br>
|
|
||||||
*
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the key,value pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class IntToFloatMap {
|
|
||||||
|
|
||||||
public static final float GROUND = Float.NaN;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
IntToFloatMap.this.remove(keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements IntIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
return keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an Iterator of a generic type T used for iteration over the
|
|
||||||
* map's values.
|
|
||||||
*/
|
|
||||||
private final class ValueIterator implements FloatIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float next() {
|
|
||||||
return values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
int[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a float linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the values
|
|
||||||
*/
|
|
||||||
float[] values;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public IntToFloatMap() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public IntToFloatMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.values = new float[arrayLength];
|
|
||||||
this.keys = new int[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
this.values[0] = GROUND;
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given Object
|
|
||||||
* @param v
|
|
||||||
* float value which is being mapped using the given key
|
|
||||||
*/
|
|
||||||
private void prvt_put(int key, float v) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = v;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>
|
|
||||||
* .
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(int key) {
|
|
||||||
return key & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsKey(int key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if the given value exists in the map.<br>
|
|
||||||
* This method iterates over the collection, trying to find an equal object.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* float value that is checked against the map data.
|
|
||||||
* @return true if the value exists in the map, false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsValue(float value) {
|
|
||||||
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
float d = iterator.next();
|
|
||||||
if (d == value) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(int key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex] == key) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(int key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index] == key) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the value mapped with the given key.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* int who's mapped object we're interested in.
|
|
||||||
* @return a float value mapped by the given key. float.NaN if the key wasn't found.
|
|
||||||
*/
|
|
||||||
public float get(int key) {
|
|
||||||
return values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of float the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
protected void grow() {
|
|
||||||
IntToFloatMap that = new IntToFloatMap(
|
|
||||||
this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_put(this.keys[index], this.values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.values = that.values;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped float values.
|
|
||||||
*/
|
|
||||||
public FloatIterator iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the map keys. */
|
|
||||||
public IntIterator keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for debug purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(this.baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or {@link Float#NaN} if the key didn't exist.
|
|
||||||
*/
|
|
||||||
public float put(int key, float v) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
float old = values[index];
|
|
||||||
values[index] = v;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, v);
|
|
||||||
|
|
||||||
return Float.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or {@link Float#NaN} if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or {@link Float#NaN} if none existed.
|
|
||||||
*/
|
|
||||||
public float remove(int key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return Float.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return a float array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public float[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
float[] array = new float[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of T
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the list are to be
|
|
||||||
* stored. If it is big enough use whatever space we need,
|
|
||||||
* setting the one after the true data as {@link Float#NaN}.
|
|
||||||
*
|
|
||||||
* @return an array containing the elements of the list, using the given
|
|
||||||
* parameter if big enough, otherwise allocate an appropriate array
|
|
||||||
* and return it.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public float[] toArray(float[] a) {
|
|
||||||
int j = 0;
|
|
||||||
if (a.length < this.size()) {
|
|
||||||
a = new float[this.size()];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (FloatIterator iterator = iterator(); iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (j < a.length) {
|
|
||||||
a[j] = Float.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
IntIterator keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
int key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
IntToFloatMap that = (IntToFloatMap)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
IntIterator it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
int key = it.next();
|
|
||||||
if (!that.containsKey(key)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
float v1 = this.get(key);
|
|
||||||
float v2 = that.get(key);
|
|
||||||
if (Float.compare(v1, v2) != 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,622 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An Array-based hashtable which maps primitive int to primitive int.<br>
|
|
||||||
* The hashtable is constracted with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. <br>
|
|
||||||
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
|
|
||||||
* the hash.
|
|
||||||
*
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* when index 0 is used as 'Ground' or 'NULL'.<br>
|
|
||||||
*
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the key,value pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class IntToIntMap {
|
|
||||||
|
|
||||||
public static final int GROUD = -1;
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
IntToIntMap.this.remove(keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements IntIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
return keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator used for iteration over the map's values.
|
|
||||||
*/
|
|
||||||
private final class ValueIterator implements IntIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
return values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
int[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the values
|
|
||||||
*/
|
|
||||||
int[] values;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public IntToIntMap() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public IntToIntMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.values = new int[arrayLength];
|
|
||||||
this.keys = new int[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
this.values[0] = GROUD;
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given value
|
|
||||||
* @param e
|
|
||||||
* value which is being mapped using the given key
|
|
||||||
*/
|
|
||||||
private void prvt_put(int key, int e) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = e;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>.
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(int key) {
|
|
||||||
return key & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsKey(int key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if the given object exists in the map.<br>
|
|
||||||
* This method iterates over the collection, trying to find an equal object.
|
|
||||||
*
|
|
||||||
* @param v
|
|
||||||
* value that is checked against the map data.
|
|
||||||
* @return true if the value exists in the map (in .equals() meaning).
|
|
||||||
* false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsValue(int v) {
|
|
||||||
for (IntIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
if (v == iterator.next()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(int key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex] == key) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(int key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index] == key) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the object mapped with the given key.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* int who's mapped object we're interested in.
|
|
||||||
* @return an object mapped by the given key. null if the key wasn't found.
|
|
||||||
*/
|
|
||||||
public int get(int key) {
|
|
||||||
return values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of double the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
protected void grow() {
|
|
||||||
IntToIntMap that = new IntToIntMap(
|
|
||||||
this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_put(this.keys[index], this.values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.values = that.values;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped objects.
|
|
||||||
*/
|
|
||||||
public IntIterator iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the map keys. */
|
|
||||||
public IntIterator keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for debug purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(this.baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or 0 if the key didn't exist.
|
|
||||||
*/
|
|
||||||
public int put(int key, int e) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
int old = values[index];
|
|
||||||
values[index] = e;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, e);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or 0 if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or 0 if none existed.
|
|
||||||
*/
|
|
||||||
public int remove(int key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return an object array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public int[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
int[] array = new int[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (IntIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of ints
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the map are to be
|
|
||||||
* stored, if it is big enough; otherwise, a new array of the
|
|
||||||
* same runtime type is allocated for this purpose.
|
|
||||||
*
|
|
||||||
* @return an array containing the values stored in the map
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public int[] toArray(int[] a) {
|
|
||||||
int j = 0;
|
|
||||||
if (a.length < size) {
|
|
||||||
a = new int[size];
|
|
||||||
}
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (IntIterator iterator = iterator(); j < a.length
|
|
||||||
&& iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
IntIterator keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
int key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
IntToIntMap that = (IntToIntMap)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
IntIterator it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
int key = it.next();
|
|
||||||
|
|
||||||
if (!that.containsKey(key)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int v1 = this.get(key);
|
|
||||||
int v2 = that.get(key);
|
|
||||||
if (v1 != v2) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,634 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An Array-based hashtable which maps primitive int to Objects of generic type
|
|
||||||
* T.<br>
|
|
||||||
* The hashtable is constracted with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. <br>
|
|
||||||
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
|
|
||||||
* the hash.
|
|
||||||
*
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* when index 0 is used as 'Ground' or 'NULL'.<br>
|
|
||||||
*
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the key,value pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class IntToObjectMap<T> implements Iterable<T> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
IntToObjectMap.this.remove(keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements IntIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
return keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an Iterator of a generic type T used for iteration over the
|
|
||||||
* map's values.
|
|
||||||
*/
|
|
||||||
private final class ValueIterator implements Iterator<T> {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T next() {
|
|
||||||
return (T) values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
int[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the values
|
|
||||||
*/
|
|
||||||
Object[] values;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public IntToObjectMap() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public IntToObjectMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.values = new Object[arrayLength];
|
|
||||||
this.keys = new int[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given Object
|
|
||||||
* @param e
|
|
||||||
* element which is being mapped using the given key
|
|
||||||
*/
|
|
||||||
private void prvt_put(int key, T e) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = e;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(int key) {
|
|
||||||
return key & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsKey(int key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if the given object exists in the map.<br>
|
|
||||||
* This method iterates over the collection, trying to find an equal object.
|
|
||||||
*
|
|
||||||
* @param o
|
|
||||||
* object that is checked against the map data.
|
|
||||||
* @return true if the object exists in the map (in .equals() meaning).
|
|
||||||
* false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsValue(Object o) {
|
|
||||||
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
|
|
||||||
T object = iterator.next();
|
|
||||||
if (object.equals(o)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(int key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex] == key) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(int key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index] == key) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the object mapped with the given key.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* int who's mapped object we're interested in.
|
|
||||||
* @return an object mapped by the given key. null if the key wasn't found.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T get(int key) {
|
|
||||||
return (T) values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of double the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
protected void grow() {
|
|
||||||
IntToObjectMap<T> that = new IntToObjectMap<T>(
|
|
||||||
this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_put(this.keys[index], (T) this.values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.values = that.values;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped objects.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public Iterator<T> iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the map keys. */
|
|
||||||
public IntIterator keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for debug purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or null if the key didn't exist.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T put(int key, T e) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
T old = (T) values[index];
|
|
||||||
values[index] = e;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, e);
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or null if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or null if none existed.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public T remove(int key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return (T) values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return an object array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public Object[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
Object[] array = new Object[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (Iterator<T> iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of T
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the list are to be
|
|
||||||
* stored, if it is big enough; otherwise, use whatever space we
|
|
||||||
* have, setting the one after the true data as null.
|
|
||||||
*
|
|
||||||
* @return an array containing the elements of the list
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public T[] toArray(T[] a) {
|
|
||||||
int j = 0;
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (Iterator<T> iterator = iterator(); j < a.length
|
|
||||||
&& iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (j < a.length) {
|
|
||||||
a[j] = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
IntIterator keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
int key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
IntToObjectMap<T> that = (IntToObjectMap<T>)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
IntIterator it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
int key = it.next();
|
|
||||||
if (!that.containsKey(key)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
T v1 = this.get(key);
|
|
||||||
T v2 = that.get(key);
|
|
||||||
if ((v1 == null && v2 != null) ||
|
|
||||||
(v1 != null && v2 == null) ||
|
|
||||||
(!v1.equals(v2))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,623 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An Array-based hashtable which maps Objects of generic type
|
|
||||||
* T to primitive float values.<br>
|
|
||||||
* The hashtable is constructed with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. <br>
|
|
||||||
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
|
|
||||||
* the hash.
|
|
||||||
*
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* when index 0 is used as 'Ground' or 'NULL'.<br>
|
|
||||||
*
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the key,value pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class ObjectToFloatMap<K> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void remove() {
|
|
||||||
ObjectToFloatMap.this.remove((K) keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements Iterator<K> {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public K next() {
|
|
||||||
return (K) keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an Iterator of a generic type T used for iteration over the
|
|
||||||
* map's values.
|
|
||||||
*/
|
|
||||||
private final class ValueIterator implements FloatIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float next() {
|
|
||||||
return values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
Object[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the values
|
|
||||||
*/
|
|
||||||
float[] values;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public ObjectToFloatMap() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public ObjectToFloatMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.values = new float[arrayLength];
|
|
||||||
this.keys = new Object[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given Object
|
|
||||||
* @param e
|
|
||||||
* element which is being mapped using the given key
|
|
||||||
*/
|
|
||||||
private void prvt_put(K key, float e) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = e;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>.
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(K key) {
|
|
||||||
return key.hashCode() & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
values[0] = Float.NaN;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsKey(K key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if the given object exists in the map.<br>
|
|
||||||
* This method iterates over the collection, trying to find an equal object.
|
|
||||||
*
|
|
||||||
* @param o
|
|
||||||
* object that is checked against the map data.
|
|
||||||
* @return true if the object exists in the map (in .equals() meaning).
|
|
||||||
* false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsValue(float o) {
|
|
||||||
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
if (o == iterator.next()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(K key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex].equals(key)) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(K key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index].equals(key)) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the float mapped with the given key.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* object who's mapped float we're interested in.
|
|
||||||
* @return a float mapped by the given key. Float.NaN if the key wasn't found.
|
|
||||||
*/
|
|
||||||
public float get(K key) {
|
|
||||||
return values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of double the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
protected void grow() {
|
|
||||||
ObjectToFloatMap<K> that = new ObjectToFloatMap<K>(
|
|
||||||
this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_put((K) this.keys[index], this.values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.values = that.values;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped floats.
|
|
||||||
*/
|
|
||||||
public FloatIterator iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns an iterator on the map keys. */
|
|
||||||
public Iterator<K> keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for debug purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or {@link Float#NaN} if the key didn't exist.
|
|
||||||
*/
|
|
||||||
public float put(K key, float e) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
float old = values[index];
|
|
||||||
values[index] = e;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, e);
|
|
||||||
|
|
||||||
return Float.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or {@link Float#NaN} if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or {@link Float#NaN} if none existed.
|
|
||||||
*/
|
|
||||||
public float remove(K key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return Float.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return an object array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public float[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
float[] array = new float[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (FloatIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of T
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the list are to be
|
|
||||||
* stored, if it is big enough; otherwise, use as much space as it can.
|
|
||||||
*
|
|
||||||
* @return an array containing the elements of the list
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public float[] toArray(float[] a) {
|
|
||||||
int j = 0;
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (FloatIterator iterator = iterator(); j < a.length
|
|
||||||
&& iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
if (j < a.length) {
|
|
||||||
a[j] = Float.NaN;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
Iterator<K> keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
K key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
ObjectToFloatMap<K> that = (ObjectToFloatMap<K>)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Iterator<K> it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
K key = it.next();
|
|
||||||
float v1 = this.get(key);
|
|
||||||
float v2 = that.get(key);
|
|
||||||
if (Float.compare(v1, v2) != 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,622 +0,0 @@
|
||||||
package org.apache.lucene.facet.collections;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An Array-based hashtable which maps Objects of generic type
|
|
||||||
* T to primitive int values.<br>
|
|
||||||
* The hashtable is constructed with a given capacity, or 16 as a default. In
|
|
||||||
* case there's not enough room for new pairs, the hashtable grows. <br>
|
|
||||||
* Capacity is adjusted to a power of 2, and there are 2 * capacity entries for
|
|
||||||
* the hash.
|
|
||||||
*
|
|
||||||
* The pre allocated arrays (for keys, values) are at length of capacity + 1,
|
|
||||||
* when index 0 is used as 'Ground' or 'NULL'.<br>
|
|
||||||
*
|
|
||||||
* The arrays are allocated ahead of hash operations, and form an 'empty space'
|
|
||||||
* list, to which the key,value pair is allocated.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class ObjectToIntMap<K> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator which iterates over all the allocated indexes.
|
|
||||||
*/
|
|
||||||
private final class IndexIterator implements IntIterator {
|
|
||||||
/**
|
|
||||||
* The last used baseHashIndex. Needed for "jumping" from one hash entry
|
|
||||||
* to another.
|
|
||||||
*/
|
|
||||||
private int baseHashIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next not-yet-visited index.
|
|
||||||
*/
|
|
||||||
private int index = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Index of the last visited pair. Used in {@link #remove()}.
|
|
||||||
*/
|
|
||||||
private int lastIndex = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the Iterator, make <code>index</code> point to the "first"
|
|
||||||
* index which is not empty. If such does not exist (eg. the map is
|
|
||||||
* empty) it would be zero.
|
|
||||||
*/
|
|
||||||
public IndexIterator() {
|
|
||||||
for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
if (index != 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (index != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
// Save the last index visited
|
|
||||||
lastIndex = index;
|
|
||||||
|
|
||||||
// next the index
|
|
||||||
index = next[index];
|
|
||||||
|
|
||||||
// if the next index points to the 'Ground' it means we're done with
|
|
||||||
// the current hash entry and we need to jump to the next one. This
|
|
||||||
// is done until all the hash entries had been visited.
|
|
||||||
while (index == 0 && ++baseHashIndex < baseHash.length) {
|
|
||||||
index = baseHash[baseHashIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void remove() {
|
|
||||||
ObjectToIntMap.this.remove((K) keys[lastIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an IntIterator, used for iteration over the map's keys.
|
|
||||||
*/
|
|
||||||
private final class KeyIterator implements Iterator<K> {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
KeyIterator() { }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public K next() {
|
|
||||||
return (K) keys[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements an Iterator of a generic type T used for iteration over the
|
|
||||||
* map's values.
|
|
||||||
*/
|
|
||||||
private final class ValueIterator implements IntIterator {
|
|
||||||
private IntIterator iterator = new IndexIterator();
|
|
||||||
|
|
||||||
ValueIterator() {}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
return values[iterator.next()];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
iterator.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default capacity - in case no capacity was specified in the constructor
|
|
||||||
*/
|
|
||||||
private static int defaultCapacity = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds the base hash entries. if the capacity is 2^N, than the base hash
|
|
||||||
* holds 2^(N+1). It can hold
|
|
||||||
*/
|
|
||||||
int[] baseHash;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The current capacity of the map. Always 2^N and never less than 16. We
|
|
||||||
* never use the zero index. It is needed to improve performance and is also
|
|
||||||
* used as "ground".
|
|
||||||
*/
|
|
||||||
private int capacity;
|
|
||||||
/**
|
|
||||||
* All objects are being allocated at map creation. Those objects are "free"
|
|
||||||
* or empty. Whenever a new pair comes along, a pair is being "allocated" or
|
|
||||||
* taken from the free-linked list. as this is just a free list.
|
|
||||||
*/
|
|
||||||
private int firstEmpty;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
|
|
||||||
*/
|
|
||||||
private int hashFactor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the unique keys
|
|
||||||
*/
|
|
||||||
Object[] keys;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In case of collisions, we implement a double linked list of the colliding
|
|
||||||
* hash's with the following next[] and prev[]. Those are also used to store
|
|
||||||
* the "empty" list.
|
|
||||||
*/
|
|
||||||
int[] next;
|
|
||||||
|
|
||||||
private int prev;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of currently objects in the map.
|
|
||||||
*/
|
|
||||||
private int size;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This array holds the values
|
|
||||||
*/
|
|
||||||
int[] values;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with default capacity.
|
|
||||||
*/
|
|
||||||
public ObjectToIntMap() {
|
|
||||||
this(defaultCapacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a map with given capacity. Capacity is adjusted to a native
|
|
||||||
* power of 2, with minimum of 16.
|
|
||||||
*
|
|
||||||
* @param capacity
|
|
||||||
* minimum capacity for the map.
|
|
||||||
*/
|
|
||||||
public ObjectToIntMap(int capacity) {
|
|
||||||
this.capacity = 16;
|
|
||||||
// Minimum capacity is 16..
|
|
||||||
while (this.capacity < capacity) {
|
|
||||||
// Multiply by 2 as long as we're still under the requested capacity
|
|
||||||
this.capacity <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As mentioned, we use the first index (0) as 'Ground', so we need the
|
|
||||||
// length of the arrays to be one more than the capacity
|
|
||||||
int arrayLength = this.capacity + 1;
|
|
||||||
|
|
||||||
this.values = new int[arrayLength];
|
|
||||||
this.keys = new Object[arrayLength];
|
|
||||||
this.next = new int[arrayLength];
|
|
||||||
|
|
||||||
// Hash entries are twice as big as the capacity.
|
|
||||||
int baseHashSize = this.capacity << 1;
|
|
||||||
|
|
||||||
this.baseHash = new int[baseHashSize];
|
|
||||||
|
|
||||||
// The has factor is 2^M - 1 which is used as an "AND" hashing operator.
|
|
||||||
// {@link #calcBaseHash()}
|
|
||||||
this.hashFactor = baseHashSize - 1;
|
|
||||||
|
|
||||||
this.size = 0;
|
|
||||||
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a pair to the map. Takes the first empty position from the
|
|
||||||
* empty-linked-list's head - {@link #firstEmpty}.
|
|
||||||
*
|
|
||||||
* New pairs are always inserted to baseHash, and are followed by the old
|
|
||||||
* colliding pair.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* integer which maps the given Object
|
|
||||||
* @param e
|
|
||||||
* element which is being mapped using the given key
|
|
||||||
*/
|
|
||||||
private void prvt_put(K key, int e) {
|
|
||||||
// Hash entry to which the new pair would be inserted
|
|
||||||
int hashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// 'Allocating' a pair from the "Empty" list.
|
|
||||||
int objectIndex = firstEmpty;
|
|
||||||
|
|
||||||
// Setting data
|
|
||||||
firstEmpty = next[firstEmpty];
|
|
||||||
values[objectIndex] = e;
|
|
||||||
keys[objectIndex] = key;
|
|
||||||
|
|
||||||
// Inserting the new pair as the first node in the specific hash entry
|
|
||||||
next[objectIndex] = baseHash[hashIndex];
|
|
||||||
baseHash[hashIndex] = objectIndex;
|
|
||||||
|
|
||||||
// Announcing a new pair was added!
|
|
||||||
++size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculating the baseHash index using the internal <code>hashFactor</code>.
|
|
||||||
*/
|
|
||||||
protected int calcBaseHashIndex(K key) {
|
|
||||||
return key.hashCode() & hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empties the map. Generates the "Empty" space list for later allocation.
|
|
||||||
*/
|
|
||||||
public void clear() {
|
|
||||||
// Clears the hash entries
|
|
||||||
Arrays.fill(this.baseHash, 0);
|
|
||||||
|
|
||||||
// Set size to zero
|
|
||||||
size = 0;
|
|
||||||
|
|
||||||
values[0] = Integer.MAX_VALUE;
|
|
||||||
|
|
||||||
// Mark all array entries as empty. This is done with
|
|
||||||
// <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
|
|
||||||
// used as 'Ground').
|
|
||||||
firstEmpty = 1;
|
|
||||||
|
|
||||||
// And setting all the <code>next[i]</code> to point at
|
|
||||||
// <code>i+1</code>.
|
|
||||||
for (int i = 1; i < this.capacity;) {
|
|
||||||
next[i] = ++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Surly, the last one should point to the 'Ground'.
|
|
||||||
next[this.capacity] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given key exists in the map.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* that is checked against the map data.
|
|
||||||
* @return true if the key exists in the map. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsKey(K key) {
|
|
||||||
return find(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if the given object exists in the map.<br>
|
|
||||||
* This method iterates over the collection, trying to find an equal object.
|
|
||||||
*
|
|
||||||
* @param o
|
|
||||||
* object that is checked against the map data.
|
|
||||||
* @return true if the object exists in the map (in .equals() meaning).
|
|
||||||
* false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean containsValue(int o) {
|
|
||||||
for (IntIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
if (o == iterator.next()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key.
|
|
||||||
*
|
|
||||||
* @return index of the key. zero if the key wasn't found.
|
|
||||||
*/
|
|
||||||
protected int find(K key) {
|
|
||||||
// Calculate the hash entry.
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
|
|
||||||
// Start from the hash entry.
|
|
||||||
int localIndex = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (localIndex != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[localIndex].equals(key)) {
|
|
||||||
return localIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
localIndex = next[localIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the actual index of a given key with it's baseHashIndex.<br>
|
|
||||||
* Some methods use the baseHashIndex. If those call {@link #find} there's
|
|
||||||
* no need to re-calculate that hash.
|
|
||||||
*
|
|
||||||
* @return the index of the given key, or 0 as 'Ground' if the key wasn't
|
|
||||||
* found.
|
|
||||||
*/
|
|
||||||
private int findForRemove(K key, int baseHashIndex) {
|
|
||||||
// Start from the hash entry.
|
|
||||||
this.prev = 0;
|
|
||||||
int index = baseHash[baseHashIndex];
|
|
||||||
|
|
||||||
// while the index does not point to the 'Ground'
|
|
||||||
while (index != 0) {
|
|
||||||
// returns the index found in case of of a matching key.
|
|
||||||
if (keys[index].equals(key)) {
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// next the local index
|
|
||||||
prev = index;
|
|
||||||
index = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got this far, it could only mean we did not find the key we
|
|
||||||
// were asked for. return 'Ground' index.
|
|
||||||
this.prev = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the int mapped with the given key.
|
|
||||||
*
|
|
||||||
* @param key
|
|
||||||
* int who's mapped object we're interested in.
|
|
||||||
* @return an object mapped by the given key. null if the key wasn't found.
|
|
||||||
*/
|
|
||||||
public int get(K key) {
|
|
||||||
return values[find(key)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the map. Allocates a new map of double the capacity, and
|
|
||||||
* fast-insert the old key-value pairs.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
protected void grow() {
|
|
||||||
ObjectToIntMap<K> that = new ObjectToIntMap<K>(
|
|
||||||
this.capacity * 2);
|
|
||||||
|
|
||||||
// Iterates fast over the collection. Any valid pair is put into the new
|
|
||||||
// map without checking for duplicates or if there's enough space for
|
|
||||||
// it.
|
|
||||||
for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
|
|
||||||
int index = iterator.next();
|
|
||||||
that.prvt_put((K) this.keys[index], this.values[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy that's data into this.
|
|
||||||
this.capacity = that.capacity;
|
|
||||||
this.size = that.size;
|
|
||||||
this.firstEmpty = that.firstEmpty;
|
|
||||||
this.values = that.values;
|
|
||||||
this.keys = that.keys;
|
|
||||||
this.next = that.next;
|
|
||||||
this.baseHash = that.baseHash;
|
|
||||||
this.hashFactor = that.hashFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return true if the map is empty. false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return size == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new iterator for the mapped objects.
|
|
||||||
*/
|
|
||||||
public IntIterator iterator() {
|
|
||||||
return new ValueIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<K> keyIterator() {
|
|
||||||
return new KeyIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints the baseHash array, used for debug purposes.
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private String getBaseHashAsString() {
|
|
||||||
return Arrays.toString(baseHash);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inserts the <key,value> pair into the map. If the key already exists,
|
|
||||||
* this method updates the mapped value to the given one, returning the old
|
|
||||||
* mapped value.
|
|
||||||
*
|
|
||||||
* @return the old mapped value, or 0 if the key didn't exist.
|
|
||||||
*/
|
|
||||||
public int put(K key, int e) {
|
|
||||||
// Does key exists?
|
|
||||||
int index = find(key);
|
|
||||||
|
|
||||||
// Yes!
|
|
||||||
if (index != 0) {
|
|
||||||
// Set new data and exit.
|
|
||||||
int old = values[index];
|
|
||||||
values[index] = e;
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is there enough room for a new pair?
|
|
||||||
if (size == capacity) {
|
|
||||||
// No? Than grow up!
|
|
||||||
grow();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that everything is set, the pair can be just put inside with no
|
|
||||||
// worries.
|
|
||||||
prvt_put(key, e);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a <key,value> pair from the map and returns the mapped value,
|
|
||||||
* or 0 if the none existed.
|
|
||||||
*
|
|
||||||
* @param key used to find the value to remove
|
|
||||||
* @return the removed value or 0 if none existed.
|
|
||||||
*/
|
|
||||||
public int remove(K key) {
|
|
||||||
int baseHashIndex = calcBaseHashIndex(key);
|
|
||||||
int index = findForRemove(key, baseHashIndex);
|
|
||||||
if (index != 0) {
|
|
||||||
// If it is the first in the collision list, we should promote its
|
|
||||||
// next colliding element.
|
|
||||||
if (prev == 0) {
|
|
||||||
baseHash[baseHashIndex] = next[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
next[prev] = next[index];
|
|
||||||
next[index] = firstEmpty;
|
|
||||||
firstEmpty = index;
|
|
||||||
--size;
|
|
||||||
return values[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return number of pairs currently in the map
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return this.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of Objects
|
|
||||||
*
|
|
||||||
* @return an object array of all the values currently in the map.
|
|
||||||
*/
|
|
||||||
public int[] toArray() {
|
|
||||||
int j = -1;
|
|
||||||
int[] array = new int[size];
|
|
||||||
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (IntIterator iterator = iterator(); iterator.hasNext();) {
|
|
||||||
array[++j] = iterator.next();
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translates the mapped pairs' values into an array of T
|
|
||||||
*
|
|
||||||
* @param a
|
|
||||||
* the array into which the elements of the list are to be
|
|
||||||
* stored, if it is big enough; otherwise, use as much space as it can.
|
|
||||||
*
|
|
||||||
* @return an array containing the elements of the list
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public int[] toArray(int[] a) {
|
|
||||||
int j = 0;
|
|
||||||
// Iterates over the values, adding them to the array.
|
|
||||||
for (IntIterator iterator = iterator(); j < a.length
|
|
||||||
&& iterator.hasNext(); ++j) {
|
|
||||||
a[j] = iterator.next();
|
|
||||||
}
|
|
||||||
if (j < a.length) {
|
|
||||||
a[j] = Integer.MAX_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append('{');
|
|
||||||
Iterator<K> keyIterator = keyIterator();
|
|
||||||
while (keyIterator.hasNext()) {
|
|
||||||
K key = keyIterator.next();
|
|
||||||
sb.append(key);
|
|
||||||
sb.append('=');
|
|
||||||
sb.append(get(key));
|
|
||||||
if (keyIterator.hasNext()) {
|
|
||||||
sb.append(',');
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append('}');
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return getClass().hashCode() ^ size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
ObjectToIntMap<K> that = (ObjectToIntMap<K>)o;
|
|
||||||
if (that.size() != this.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Iterator<K> it = keyIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
K key = it.next();
|
|
||||||
int v1 = this.get(key);
|
|
||||||
int v2 = that.get(key);
|
|
||||||
if (Float.compare(v1, v2) != 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Facets Collections</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Various optimized Collections implementations.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,180 +0,0 @@
|
||||||
package org.apache.lucene.facet.complements;
|
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.DataInputStream;
|
|
||||||
import java.io.DataOutputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.Aggregator;
|
|
||||||
import org.apache.lucene.facet.old.CountingAggregator;
|
|
||||||
import org.apache.lucene.facet.old.OldFacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIdsUtils;
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
|
||||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
|
||||||
import org.apache.lucene.facet.search.CountFacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maintain Total Facet Counts per partition, for given parameters:
|
|
||||||
* <ul>
|
|
||||||
* <li>Index reader of an index</li>
|
|
||||||
* <li>Taxonomy index reader</li>
|
|
||||||
* <li>Facet indexing params (and particularly the category list params)</li>
|
|
||||||
* <li></li>
|
|
||||||
* </ul>
|
|
||||||
* The total facet counts are maintained as an array of arrays of integers,
|
|
||||||
* where a separate array is kept for each partition.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class TotalFacetCounts {
|
|
||||||
|
|
||||||
/** total facet counts per partition: totalCounts[partition][ordinal%partitionLength] */
|
|
||||||
private int[][] totalCounts = null;
|
|
||||||
|
|
||||||
private final TaxonomyReader taxonomy;
|
|
||||||
private final FacetIndexingParams facetIndexingParams;
|
|
||||||
|
|
||||||
private final static AtomicInteger atomicGen4Test = new AtomicInteger(1);
|
|
||||||
/** Creation type for test purposes */
|
|
||||||
enum CreationType { Computed, Loaded } // for testing
|
|
||||||
final int gen4test;
|
|
||||||
final CreationType createType4test;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct by key - from index Directory or by recomputing.
|
|
||||||
*/
|
|
||||||
private TotalFacetCounts (TaxonomyReader taxonomy, FacetIndexingParams facetIndexingParams,
|
|
||||||
int[][] counts, CreationType createType4Test) {
|
|
||||||
this.taxonomy = taxonomy;
|
|
||||||
this.facetIndexingParams = facetIndexingParams;
|
|
||||||
this.totalCounts = counts;
|
|
||||||
this.createType4test = createType4Test;
|
|
||||||
this.gen4test = atomicGen4Test.incrementAndGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fill a partition's array with the TotalCountsArray values.
|
|
||||||
* @param partitionArray array to fill
|
|
||||||
* @param partition number of required partition
|
|
||||||
*/
|
|
||||||
public void fillTotalCountsForPartition(int[] partitionArray, int partition) {
|
|
||||||
int partitionSize = partitionArray.length;
|
|
||||||
int[] countArray = totalCounts[partition];
|
|
||||||
if (countArray == null) {
|
|
||||||
countArray = new int[partitionSize];
|
|
||||||
totalCounts[partition] = countArray;
|
|
||||||
}
|
|
||||||
int length = Math.min(partitionSize, countArray.length);
|
|
||||||
System.arraycopy(countArray, 0, partitionArray, 0, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the total count of an input category
|
|
||||||
* @param ordinal ordinal of category whose total count is required
|
|
||||||
*/
|
|
||||||
public int getTotalCount(int ordinal) {
|
|
||||||
int partition = PartitionsUtils.partitionNumber(facetIndexingParams,ordinal);
|
|
||||||
int offset = ordinal % PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
|
|
||||||
return totalCounts[partition][offset];
|
|
||||||
}
|
|
||||||
|
|
||||||
static TotalFacetCounts loadFromFile(File inputFile, TaxonomyReader taxonomy,
|
|
||||||
FacetIndexingParams facetIndexingParams) throws IOException {
|
|
||||||
DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
|
|
||||||
try {
|
|
||||||
int[][] counts = new int[dis.readInt()][];
|
|
||||||
for (int i=0; i<counts.length; i++) {
|
|
||||||
int size = dis.readInt();
|
|
||||||
if (size<0) {
|
|
||||||
counts[i] = null;
|
|
||||||
} else {
|
|
||||||
counts[i] = new int[size];
|
|
||||||
for (int j=0; j<size; j++) {
|
|
||||||
counts[i][j] = dis.readInt();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Loaded);
|
|
||||||
} finally {
|
|
||||||
dis.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void storeToFile(File outputFile, TotalFacetCounts tfc) throws IOException {
|
|
||||||
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
|
|
||||||
try {
|
|
||||||
dos.writeInt(tfc.totalCounts.length);
|
|
||||||
for (int[] counts : tfc.totalCounts) {
|
|
||||||
if (counts == null) {
|
|
||||||
dos.writeInt(-1);
|
|
||||||
} else {
|
|
||||||
dos.writeInt(counts.length);
|
|
||||||
for (int i : counts) {
|
|
||||||
dos.writeInt(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
dos.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// needed because FacetSearchParams do not allow empty FacetRequests
|
|
||||||
private static final FacetRequest DUMMY_REQ = new CountFacetRequest(FacetLabel.EMPTY, 1);
|
|
||||||
|
|
||||||
static TotalFacetCounts compute(final IndexReader indexReader, final TaxonomyReader taxonomy,
|
|
||||||
final FacetIndexingParams facetIndexingParams) throws IOException {
|
|
||||||
int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
|
|
||||||
final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize];
|
|
||||||
FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams, DUMMY_REQ);
|
|
||||||
//createAllListsSearchParams(facetIndexingParams, this.totalCounts);
|
|
||||||
OldFacetsAccumulator sfa = new OldFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
|
|
||||||
@Override
|
|
||||||
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(
|
|
||||||
FacetArrays facetArrays, int partition) throws IOException {
|
|
||||||
|
|
||||||
Aggregator aggregator = new CountingAggregator(counts[partition]);
|
|
||||||
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
|
|
||||||
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
|
|
||||||
map.put(clp.createCategoryListIterator(partition), aggregator);
|
|
||||||
}
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
sfa.setComplementThreshold(OldFacetsAccumulator.DISABLE_COMPLEMENT);
|
|
||||||
sfa.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader));
|
|
||||||
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,299 +0,0 @@
|
||||||
package org.apache.lucene.facet.complements;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
|
|
||||||
* facet indexing params.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class TotalFacetCountsCache {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default size of in memory cache for computed total facet counts.
|
|
||||||
* Set to 2 for the case when an application reopened a reader and
|
|
||||||
* the original one is still in use (Otherwise there will be
|
|
||||||
* switching again and again between the two.)
|
|
||||||
*/
|
|
||||||
public static final int DEFAULT_CACHE_SIZE = 2;
|
|
||||||
|
|
||||||
private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the single instance of this cache
|
|
||||||
*/
|
|
||||||
public static TotalFacetCountsCache getSingleton() {
|
|
||||||
return singleton;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In-memory cache of TFCs.
|
|
||||||
* <ul>
|
|
||||||
* <li>It's size is kept within limits through {@link #trimCache()}.
|
|
||||||
* <li>An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}.
|
|
||||||
* <li>After each addition to the cache, trimCache is called, to remove entries least recently used.
|
|
||||||
* </ul>
|
|
||||||
* @see #markRecentlyUsed(TFCKey)
|
|
||||||
*/
|
|
||||||
private ConcurrentHashMap<TFCKey,TotalFacetCounts> cache = new ConcurrentHashMap<TFCKey,TotalFacetCounts>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
|
|
||||||
* @see #markRecentlyUsed(TFCKey)
|
|
||||||
*/
|
|
||||||
private ConcurrentLinkedQueue<TFCKey> lruKeys = new ConcurrentLinkedQueue<TFCKey>();
|
|
||||||
|
|
||||||
private int maxCacheSize = DEFAULT_CACHE_SIZE;
|
|
||||||
|
|
||||||
/** private constructor for singleton pattern */
|
|
||||||
private TotalFacetCountsCache() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the total facet counts for a reader/taxonomy pair and facet indexing
|
|
||||||
* parameters. If not in cache, computed here and added to the cache for later
|
|
||||||
* use.
|
|
||||||
*
|
|
||||||
* @param indexReader
|
|
||||||
* the documents index
|
|
||||||
* @param taxonomy
|
|
||||||
* the taxonomy index
|
|
||||||
* @param facetIndexingParams
|
|
||||||
* facet indexing parameters
|
|
||||||
* @return the total facet counts.
|
|
||||||
*/
|
|
||||||
public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
|
|
||||||
FacetIndexingParams facetIndexingParams) throws IOException {
|
|
||||||
// create the key
|
|
||||||
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
|
|
||||||
// it is important that this call is not synchronized, so that available TFC
|
|
||||||
// would not wait for one that needs to be computed.
|
|
||||||
TotalFacetCounts tfc = cache.get(key);
|
|
||||||
if (tfc != null) {
|
|
||||||
markRecentlyUsed(key);
|
|
||||||
return tfc;
|
|
||||||
}
|
|
||||||
return computeAndCache(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mark key as it as recently used.
|
|
||||||
* <p>
|
|
||||||
* <b>Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:</b>
|
|
||||||
* <ol>
|
|
||||||
* <li>A concurrent {@link LinkedHashMap} would have made this class much simpler.
|
|
||||||
* But unfortunately, Java does not provide one.
|
|
||||||
* Instead, we combine two concurrent objects:
|
|
||||||
* <ul>
|
|
||||||
* <li>{@link ConcurrentHashMap} for the cached TFCs.
|
|
||||||
* <li>{@link ConcurrentLinkedQueue} for active keys
|
|
||||||
* </ul>
|
|
||||||
* <li>Both {@link #lruKeys} and {@link #cache} are concurrently safe.
|
|
||||||
* <li>Checks for a cached item through getTotalCounts() are not synchronized.
|
|
||||||
* Therefore, the case that a needed TFC is in the cache is very fast:
|
|
||||||
* it does not wait for the computation of other TFCs.
|
|
||||||
* <li>computeAndCache() is synchronized, and, has a (double) check of the required
|
|
||||||
* TFC, to avoid computing the same TFC twice.
|
|
||||||
* <li>A race condition in this method (markRecentlyUsed) might result in two copies
|
|
||||||
* of the same 'key' in lruKeys, but this is handled by the loop in trimCache(),
|
|
||||||
* where an attempt to remove the same key twice is a no-op.
|
|
||||||
* </ol>
|
|
||||||
*/
|
|
||||||
private void markRecentlyUsed(TFCKey key) {
|
|
||||||
lruKeys.remove(key);
|
|
||||||
lruKeys.add(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized void trimCache() {
|
|
||||||
// loop until cache is of desired size.
|
|
||||||
while (cache.size()>maxCacheSize ) {
|
|
||||||
TFCKey key = lruKeys.poll();
|
|
||||||
if (key==null) { //defensive
|
|
||||||
// it is defensive since lruKeys presumably covers the cache keys
|
|
||||||
key = cache.keys().nextElement();
|
|
||||||
}
|
|
||||||
// remove this element. Note that an attempt to remove with the same key again is a no-op,
|
|
||||||
// which gracefully handles the possible race in markRecentlyUsed().
|
|
||||||
cache.remove(key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* compute TFC and cache it, after verifying it was not just added - for this
|
|
||||||
* matter this method is synchronized, which is not too bad, because there is
|
|
||||||
* lots of work done in the computations.
|
|
||||||
*/
|
|
||||||
private synchronized TotalFacetCounts computeAndCache(TFCKey key) throws IOException {
|
|
||||||
TotalFacetCounts tfc = cache.get(key);
|
|
||||||
if (tfc == null) {
|
|
||||||
tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams);
|
|
||||||
lruKeys.add(key);
|
|
||||||
cache.put(key,tfc);
|
|
||||||
trimCache();
|
|
||||||
}
|
|
||||||
return tfc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Load {@link TotalFacetCounts} matching input parameters from the provided
|
|
||||||
* outputFile and add them into the cache for the provided indexReader,
|
|
||||||
* taxonomy, and facetIndexingParams. If a {@link TotalFacetCounts} for these
|
|
||||||
* parameters already exists in the cache, it will be replaced by the loaded
|
|
||||||
* one.
|
|
||||||
*
|
|
||||||
* @param inputFile
|
|
||||||
* file from which to read the data
|
|
||||||
* @param indexReader
|
|
||||||
* the documents index
|
|
||||||
* @param taxonomy
|
|
||||||
* the taxonomy index
|
|
||||||
* @param facetIndexingParams
|
|
||||||
* the facet indexing parameters
|
|
||||||
* @throws IOException
|
|
||||||
* on error
|
|
||||||
*/
|
|
||||||
public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
|
|
||||||
FacetIndexingParams facetIndexingParams) throws IOException {
|
|
||||||
if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
|
|
||||||
throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
|
|
||||||
}
|
|
||||||
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
|
|
||||||
TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
|
|
||||||
cache.put(key,tfc);
|
|
||||||
trimCache();
|
|
||||||
markRecentlyUsed(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Store the {@link TotalFacetCounts} matching input parameters into the
|
|
||||||
* provided outputFile, making them available for a later call to
|
|
||||||
* {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}. If
|
|
||||||
* these {@link TotalFacetCounts} are available in the cache, they are used.
|
|
||||||
* But if they are not in the cache, this call will first compute them (which
|
|
||||||
* will also add them to the cache).
|
|
||||||
*
|
|
||||||
* @param outputFile
|
|
||||||
* file to store in.
|
|
||||||
* @param indexReader
|
|
||||||
* the documents index
|
|
||||||
* @param taxonomy
|
|
||||||
* the taxonomy index
|
|
||||||
* @param facetIndexingParams
|
|
||||||
* the facet indexing parameters
|
|
||||||
* @throws IOException
|
|
||||||
* on error
|
|
||||||
* @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
|
|
||||||
*/
|
|
||||||
public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
|
|
||||||
FacetIndexingParams facetIndexingParams) throws IOException {
|
|
||||||
File parentFile = outputFile.getParentFile();
|
|
||||||
if (
|
|
||||||
( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
|
|
||||||
(!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
|
|
||||||
) {
|
|
||||||
throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
|
|
||||||
}
|
|
||||||
TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams);
|
|
||||||
TotalFacetCounts.storeToFile(outputFile, tfc);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class TFCKey {
|
|
||||||
final IndexReader indexReader;
|
|
||||||
final TaxonomyReader taxonomy;
|
|
||||||
private final Iterable<CategoryListParams> clps;
|
|
||||||
private final int hashCode;
|
|
||||||
private final int nDels; // needed when a reader used for faceted search was just used for deletion.
|
|
||||||
final FacetIndexingParams facetIndexingParams;
|
|
||||||
|
|
||||||
public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
|
|
||||||
FacetIndexingParams facetIndexingParams) {
|
|
||||||
this.indexReader = indexReader;
|
|
||||||
this.taxonomy = taxonomy;
|
|
||||||
this.facetIndexingParams = facetIndexingParams;
|
|
||||||
this.clps = facetIndexingParams.getAllCategoryListParams();
|
|
||||||
this.nDels = indexReader.numDeletedDocs();
|
|
||||||
hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return hashCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object other) {
|
|
||||||
TFCKey o = (TFCKey) other;
|
|
||||||
if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
Iterator<CategoryListParams> it1 = clps.iterator();
|
|
||||||
Iterator<CategoryListParams> it2 = o.clps.iterator();
|
|
||||||
while (it1.hasNext() && it2.hasNext()) {
|
|
||||||
if (!it1.next().equals(it2.next())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return it1.hasNext() == it2.hasNext();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clear the cache.
|
|
||||||
*/
|
|
||||||
public synchronized void clear() {
|
|
||||||
cache.clear();
|
|
||||||
lruKeys.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the maximal cache size
|
|
||||||
*/
|
|
||||||
public int getCacheSize() {
|
|
||||||
return maxCacheSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the number of TotalFacetCounts arrays that will remain in memory cache.
|
|
||||||
* <p>
|
|
||||||
* If new size is smaller than current size, the cache is appropriately trimmed.
|
|
||||||
* <p>
|
|
||||||
* Minimal size is 1, so passing zero or negative size would result in size of 1.
|
|
||||||
* @param size new size to set
|
|
||||||
*/
|
|
||||||
public void setCacheSize(int size) {
|
|
||||||
if (size < 1) size = 1;
|
|
||||||
int origSize = maxCacheSize;
|
|
||||||
maxCacheSize = size;
|
|
||||||
if (maxCacheSize < origSize) { // need to trim only if the cache was reduced
|
|
||||||
trimCache();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,27 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Facets Complements counting</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Allows to cache the total counts of categories, so that during search which
|
|
||||||
returns a large number of results (>60% of segment size), the complement set
|
|
||||||
of matching documents is counted. Useful for queries that visit a large
|
|
||||||
number of documents, e.g. overview queries.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,115 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link IntEncoder} which encodes values in chunks. Implementations of this
|
|
||||||
* class assume the data which needs encoding consists of small, consecutive
|
|
||||||
* values, and therefore the encoder is able to compress them better. You can
|
|
||||||
* read more on the two implementations {@link FourFlagsIntEncoder} and
|
|
||||||
* {@link EightFlagsIntEncoder}.
|
|
||||||
* <p>
|
|
||||||
* Extensions of this class need to implement {@link #encode(IntsRef, BytesRef)}
|
|
||||||
* in order to build the proper indicator (flags). When enough values were
|
|
||||||
* accumulated (typically the batch size), extensions can call
|
|
||||||
* {@link #encodeChunk(BytesRef)} to flush the indicator and the rest of the
|
|
||||||
* values.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> flags encoders do not accept values ≤ 0 (zero) in their
|
|
||||||
* {@link #encode(IntsRef, BytesRef)}. For performance reasons they do not check
|
|
||||||
* that condition, however if such value is passed the result stream may be
|
|
||||||
* corrupt or an exception will be thrown. Also, these encoders perform the best
|
|
||||||
* when there are many consecutive small values (depends on the encoder
|
|
||||||
* implementation). If that is not the case, the encoder will occupy 1 more byte
|
|
||||||
* for every <i>batch</i> number of integers, over whatever
|
|
||||||
* {@link VInt8IntEncoder} would have occupied. Therefore make sure to check
|
|
||||||
* whether your data fits into the conditions of the specific encoder.
|
|
||||||
* <p>
|
|
||||||
* For the reasons mentioned above, these encoders are usually chained with
|
|
||||||
* {@link UniqueValuesIntEncoder} and {@link DGapIntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class ChunksIntEncoder extends IntEncoder {
|
|
||||||
|
|
||||||
/** Holds the values which must be encoded, outside the indicator. */
|
|
||||||
protected final IntsRef encodeQueue;
|
|
||||||
|
|
||||||
/** Represents bits flag byte. */
|
|
||||||
protected int indicator = 0;
|
|
||||||
|
|
||||||
/** Counts the current ordinal of the encoded value. */
|
|
||||||
protected byte ordinal = 0;
|
|
||||||
|
|
||||||
protected ChunksIntEncoder(int chunkSize) {
|
|
||||||
encodeQueue = new IntsRef(chunkSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes the values of the current chunk. First it writes the indicator, and
|
|
||||||
* then it encodes the values outside the indicator.
|
|
||||||
*/
|
|
||||||
protected void encodeChunk(BytesRef buf) {
|
|
||||||
// ensure there's enough room in the buffer
|
|
||||||
int maxBytesRequired = buf.length + 1 + encodeQueue.length * 4; /* indicator + at most 4 bytes per positive VInt */
|
|
||||||
if (buf.bytes.length < maxBytesRequired) {
|
|
||||||
buf.grow(maxBytesRequired);
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.bytes[buf.length++] = ((byte) indicator);
|
|
||||||
for (int i = 0; i < encodeQueue.length; i++) {
|
|
||||||
// it is better if the encoding is inlined like so, and not e.g.
|
|
||||||
// in a utility method
|
|
||||||
int value = encodeQueue.ints[i];
|
|
||||||
if ((value & ~0x7F) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) value;
|
|
||||||
buf.length++;
|
|
||||||
} else if ((value & ~0x3FFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 2;
|
|
||||||
} else if ((value & ~0x1FFFFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 3;
|
|
||||||
} else if ((value & ~0xFFFFFFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 4;
|
|
||||||
} else {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ordinal = 0;
|
|
||||||
indicator = 0;
|
|
||||||
encodeQueue.length = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,52 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link IntDecoder} which wraps another decoder and reverts the d-gap that
|
|
||||||
* was encoded by {@link DGapIntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class DGapIntDecoder extends IntDecoder {
|
|
||||||
|
|
||||||
private final IntDecoder decoder;
|
|
||||||
|
|
||||||
public DGapIntDecoder(IntDecoder decoder) {
|
|
||||||
this.decoder = decoder;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(BytesRef buf, IntsRef values) {
|
|
||||||
decoder.decode(buf, values);
|
|
||||||
int prev = 0;
|
|
||||||
for (int i = 0; i < values.length; i++) {
|
|
||||||
values.ints[i] += prev;
|
|
||||||
prev = values.ints[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "DGap(" + decoder.toString() + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,67 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link IntEncoderFilter} which encodes the gap between the given values,
|
|
||||||
* rather than the values themselves. This encoder usually yields better
|
|
||||||
* encoding performance space-wise (i.e., the final encoded values consume less
|
|
||||||
* space) if the values are 'close' to each other.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this encoder assumes the values are given to
|
|
||||||
* {@link #encode(IntsRef, BytesRef)} in an ascending sorted manner, which ensures only
|
|
||||||
* positive values are encoded and thus yields better performance. If you are
|
|
||||||
* not sure whether the values are sorted or not, it is possible to chain this
|
|
||||||
* encoder with {@link SortingIntEncoder} to ensure the values will be
|
|
||||||
* sorted before encoding.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class DGapIntEncoder extends IntEncoderFilter {
|
|
||||||
|
|
||||||
/** Initializes with the given encoder. */
|
|
||||||
public DGapIntEncoder(IntEncoder encoder) {
|
|
||||||
super(encoder);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
int prev = 0;
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
for (int i = values.offset; i < upto; i++) {
|
|
||||||
int tmp = values.ints[i];
|
|
||||||
values.ints[i] -= prev;
|
|
||||||
prev = tmp;
|
|
||||||
}
|
|
||||||
encoder.encode(values, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return new DGapIntDecoder(encoder.createMatchingDecoder());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "DGap(" + encoder.toString() + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,67 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes values encoded by {@link DGapVInt8IntDecoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class DGapVInt8IntDecoder extends IntDecoder {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(BytesRef buf, IntsRef values) {
|
|
||||||
values.offset = values.length = 0;
|
|
||||||
|
|
||||||
// grow the buffer up front, even if by a large number of values (buf.length)
|
|
||||||
// that saves the need to check inside the loop for every decoded value if
|
|
||||||
// the buffer needs to grow.
|
|
||||||
if (values.ints.length < buf.length) {
|
|
||||||
values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)];
|
|
||||||
}
|
|
||||||
|
|
||||||
// it is better if the decoding is inlined like so, and not e.g.
|
|
||||||
// in a utility method
|
|
||||||
int upto = buf.offset + buf.length;
|
|
||||||
int value = 0;
|
|
||||||
int offset = buf.offset;
|
|
||||||
int prev = 0;
|
|
||||||
while (offset < upto) {
|
|
||||||
byte b = buf.bytes[offset++];
|
|
||||||
if (b >= 0) {
|
|
||||||
values.ints[values.length] = ((value << 7) | b) + prev;
|
|
||||||
value = 0;
|
|
||||||
prev = values.ints[values.length];
|
|
||||||
values.length++;
|
|
||||||
} else {
|
|
||||||
value = (value << 7) | (b & 0x7F);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "DGapVInt8";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,89 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link IntEncoder} which implements variable length encoding for the gap
|
|
||||||
* between values. It's a specialized form of the combination of
|
|
||||||
* {@link DGapIntEncoder} and {@link VInt8IntEncoder}.
|
|
||||||
*
|
|
||||||
* @see VInt8IntEncoder
|
|
||||||
* @see DGapIntEncoder
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class DGapVInt8IntEncoder extends IntEncoder {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
buf.offset = buf.length = 0;
|
|
||||||
int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt
|
|
||||||
if (buf.bytes.length < maxBytesNeeded) {
|
|
||||||
buf.grow(maxBytesNeeded);
|
|
||||||
}
|
|
||||||
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
int prev = 0;
|
|
||||||
for (int i = values.offset; i < upto; i++) {
|
|
||||||
// it is better if the encoding is inlined like so, and not e.g.
|
|
||||||
// in a utility method
|
|
||||||
int value = values.ints[i] - prev;
|
|
||||||
if ((value & ~0x7F) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) value;
|
|
||||||
buf.length++;
|
|
||||||
} else if ((value & ~0x3FFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 2;
|
|
||||||
} else if ((value & ~0x1FFFFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 3;
|
|
||||||
} else if ((value & ~0xFFFFFFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 4;
|
|
||||||
} else {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 5;
|
|
||||||
}
|
|
||||||
prev = values.ints[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return new DGapVInt8IntDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "DGapVInt8";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,92 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes values encoded with {@link EightFlagsIntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class EightFlagsIntDecoder extends IntDecoder {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Holds all combinations of <i>indicator</i> for fast decoding (saves time
|
|
||||||
* on real-time bit manipulation)
|
|
||||||
*/
|
|
||||||
private static final byte[][] DECODE_TABLE = new byte[256][8];
|
|
||||||
|
|
||||||
/** Generating all combinations of <i>indicator</i> into separate flags. */
|
|
||||||
static {
|
|
||||||
for (int i = 256; i != 0;) {
|
|
||||||
--i;
|
|
||||||
for (int j = 8; j != 0;) {
|
|
||||||
--j;
|
|
||||||
DECODE_TABLE[i][j] = (byte) ((i >>> j) & 0x1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(BytesRef buf, IntsRef values) {
|
|
||||||
values.offset = values.length = 0;
|
|
||||||
int upto = buf.offset + buf.length;
|
|
||||||
int offset = buf.offset;
|
|
||||||
while (offset < upto) {
|
|
||||||
// read indicator
|
|
||||||
int indicator = buf.bytes[offset++] & 0xFF;
|
|
||||||
int ordinal = 0;
|
|
||||||
|
|
||||||
int capacityNeeded = values.length + 8;
|
|
||||||
if (values.ints.length < capacityNeeded) {
|
|
||||||
values.grow(capacityNeeded);
|
|
||||||
}
|
|
||||||
|
|
||||||
// process indicator, until we read 8 values, or end-of-buffer
|
|
||||||
while (ordinal != 8) {
|
|
||||||
if (DECODE_TABLE[indicator][ordinal++] == 0) {
|
|
||||||
if (offset == upto) { // end of buffer
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// it is better if the decoding is inlined like so, and not e.g.
|
|
||||||
// in a utility method
|
|
||||||
int value = 0;
|
|
||||||
while (true) {
|
|
||||||
byte b = buf.bytes[offset++];
|
|
||||||
if (b >= 0) {
|
|
||||||
values.ints[values.length++] = ((value << 7) | b) + 2;
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
value = (value << 7) | (b & 0x7F);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
values.ints[values.length++] = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "EightFlags(VInt8)";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,96 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link ChunksIntEncoder} which encodes data in chunks of 8. Every group
|
|
||||||
* starts with a single byte (called indicator) which represents 8 - 1 bit
|
|
||||||
* flags, where the value:
|
|
||||||
* <ul>
|
|
||||||
* <li>1 means the encoded value is '1'
|
|
||||||
* <li>0 means the value is encoded using {@link VInt8IntEncoder}, and the
|
|
||||||
* encoded bytes follow the indicator.<br>
|
|
||||||
* Since value 0 is illegal, and 1 is encoded in the indicator, the actual value
|
|
||||||
* that is encoded is <code>value-2</code>, which saves some more bits.
|
|
||||||
* </ul>
|
|
||||||
* Encoding example:
|
|
||||||
* <ul>
|
|
||||||
* <li>Original values: 6, 16, 5, 9, 7, 1
|
|
||||||
* <li>After sorting: 1, 5, 6, 7, 9, 16
|
|
||||||
* <li>D-Gap computing: 1, 4, 1, 1, 2, 5 (so far - done by
|
|
||||||
* {@link DGapIntEncoder})
|
|
||||||
* <li>Encoding: 1,0,1,1,0,0,0,0 as the indicator, by 2 (4-2), 0 (2-2), 3 (5-2).
|
|
||||||
* <li>Binary encode: <u>0 | 0 | 0 | 0 | 1 | 1 | 0 | 1</u> 00000010 00000000
|
|
||||||
* 00000011 (indicator is <u>underlined</u>).<br>
|
|
||||||
* <b>NOTE:</b> the order of the values in the indicator is lsb ⇒ msb,
|
|
||||||
* which allows for more efficient decoding.
|
|
||||||
* </ul>
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class EightFlagsIntEncoder extends ChunksIntEncoder {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Holds all combinations of <i>indicator</i> flags for fast encoding (saves
|
|
||||||
* time on bit manipulation at encode time)
|
|
||||||
*/
|
|
||||||
private static final byte[] ENCODE_TABLE = new byte[] { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, (byte) 0x80 };
|
|
||||||
|
|
||||||
public EightFlagsIntEncoder() {
|
|
||||||
super(8);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
buf.offset = buf.length = 0;
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
for (int i = values.offset; i < upto; i++) {
|
|
||||||
int value = values.ints[i];
|
|
||||||
if (value == 1) {
|
|
||||||
indicator |= ENCODE_TABLE[ordinal];
|
|
||||||
} else {
|
|
||||||
encodeQueue.ints[encodeQueue.length++] = value - 2;
|
|
||||||
}
|
|
||||||
++ordinal;
|
|
||||||
|
|
||||||
// encode the chunk and the indicator
|
|
||||||
if (ordinal == 8) {
|
|
||||||
encodeChunk(buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// encode remaining values
|
|
||||||
if (ordinal != 0) {
|
|
||||||
encodeChunk(buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return new EightFlagsIntDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "EightFlags(VInt)";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,92 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes values encoded with {@link FourFlagsIntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FourFlagsIntDecoder extends IntDecoder {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds all combinations of <i>indicator</i> for fast decoding (saves time
|
|
||||||
* on real-time bit manipulation)
|
|
||||||
*/
|
|
||||||
private final static byte[][] DECODE_TABLE = new byte[256][4];
|
|
||||||
|
|
||||||
/** Generating all combinations of <i>indicator</i> into separate flags. */
|
|
||||||
static {
|
|
||||||
for (int i = 256; i != 0;) {
|
|
||||||
--i;
|
|
||||||
for (int j = 4; j != 0;) {
|
|
||||||
--j;
|
|
||||||
DECODE_TABLE[i][j] = (byte) ((i >>> (j << 1)) & 0x3);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(BytesRef buf, IntsRef values) {
|
|
||||||
values.offset = values.length = 0;
|
|
||||||
int upto = buf.offset + buf.length;
|
|
||||||
int offset = buf.offset;
|
|
||||||
while (offset < upto) {
|
|
||||||
// read indicator
|
|
||||||
int indicator = buf.bytes[offset++] & 0xFF;
|
|
||||||
int ordinal = 0;
|
|
||||||
|
|
||||||
int capacityNeeded = values.length + 4;
|
|
||||||
if (values.ints.length < capacityNeeded) {
|
|
||||||
values.grow(capacityNeeded);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (ordinal != 4) {
|
|
||||||
byte decodeVal = DECODE_TABLE[indicator][ordinal++];
|
|
||||||
if (decodeVal == 0) {
|
|
||||||
if (offset == upto) { // end of buffer
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// it is better if the decoding is inlined like so, and not e.g.
|
|
||||||
// in a utility method
|
|
||||||
int value = 0;
|
|
||||||
while (true) {
|
|
||||||
byte b = buf.bytes[offset++];
|
|
||||||
if (b >= 0) {
|
|
||||||
values.ints[values.length++] = ((value << 7) | b) + 4;
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
value = (value << 7) | (b & 0x7F);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
values.ints[values.length++] = decodeVal;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "FourFlags(VInt)";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,102 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link ChunksIntEncoder} which encodes values in chunks of 4. Every group
|
|
||||||
* starts with a single byte (called indicator) which represents 4 - 2 bit
|
|
||||||
* flags, where the values:
|
|
||||||
* <ul>
|
|
||||||
* <li>1, 2 or 3 mean the encoded value is '1', '2' or '3' respectively.
|
|
||||||
* <li>0 means the value is encoded using {@link VInt8IntEncoder}, and the
|
|
||||||
* encoded bytes follow the indicator.<br>
|
|
||||||
* Since value 0 is illegal, and 1-3 are encoded in the indicator, the actual
|
|
||||||
* value that is encoded is <code>value-4</code>, which saves some more bits.
|
|
||||||
* </ul>
|
|
||||||
* Encoding example:
|
|
||||||
* <ul>
|
|
||||||
* <li>Original values: 6, 16, 5, 9, 7, 1, 11
|
|
||||||
* <li>After sorting: 1, 5, 6, 7, 9, 11, 16
|
|
||||||
* <li>D-Gap computing: 1, 4, 1, 1, 2, 5 (so far - done by
|
|
||||||
* {@link DGapIntEncoder})
|
|
||||||
* <li>Encoding: 1,0,1,1 as the first indicator, followed by 0 (4-4), than
|
|
||||||
* 2,0,0,0 as the second indicator, followed by 1 (5-4) encoded with.
|
|
||||||
* <li>Binary encode: <u>01 | 01 | 00 | 01</u> 00000000 <u>00 | 00 | 00 | 10</u>
|
|
||||||
* 00000001 (indicators are <u>underlined</u>).<br>
|
|
||||||
* <b>NOTE:</b> the order of the values in the indicator is lsb ⇒ msb,
|
|
||||||
* which allows for more efficient decoding.
|
|
||||||
* </ul>
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FourFlagsIntEncoder extends ChunksIntEncoder {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Holds all combinations of <i>indicator</i> flags for fast encoding (saves
|
|
||||||
* time on bit manipulation @ encode time)
|
|
||||||
*/
|
|
||||||
private static final byte[][] ENCODE_TABLE = new byte[][] {
|
|
||||||
new byte[] { 0x00, 0x00, 0x00, 0x00 },
|
|
||||||
new byte[] { 0x01, 0x04, 0x10, 0x40 },
|
|
||||||
new byte[] { 0x02, 0x08, 0x20, (byte) 0x80 },
|
|
||||||
new byte[] { 0x03, 0x0C, 0x30, (byte) 0xC0 },
|
|
||||||
};
|
|
||||||
|
|
||||||
public FourFlagsIntEncoder() {
|
|
||||||
super(4);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
buf.offset = buf.length = 0;
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
for (int i = values.offset; i < upto; i++) {
|
|
||||||
int value = values.ints[i];
|
|
||||||
if (value <= 3) {
|
|
||||||
indicator |= ENCODE_TABLE[value][ordinal];
|
|
||||||
} else {
|
|
||||||
encodeQueue.ints[encodeQueue.length++] = value - 4;
|
|
||||||
}
|
|
||||||
++ordinal;
|
|
||||||
|
|
||||||
// encode the chunk and the indicator
|
|
||||||
if (ordinal == 4) {
|
|
||||||
encodeChunk(buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// encode remaining values
|
|
||||||
if (ordinal != 0) {
|
|
||||||
encodeChunk(buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return new FourFlagsIntDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "FourFlags(VInt)";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,37 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes integers from a set {@link BytesRef}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class IntDecoder {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes the values from the buffer into the given {@link IntsRef}. Note
|
|
||||||
* that {@code values.offset} is set to 0, and {@code values.length} is
|
|
||||||
* updated to denote the number of decoded values.
|
|
||||||
*/
|
|
||||||
public abstract void decode(BytesRef buf, IntsRef values);
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,46 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes integers to a set {@link BytesRef}. For convenience, each encoder
|
|
||||||
* implements {@link #createMatchingDecoder()} for easy access to the matching
|
|
||||||
* decoder.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class IntEncoder {
|
|
||||||
|
|
||||||
public IntEncoder() {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes the values to the given buffer. Note that the buffer's offset and
|
|
||||||
* length are set to 0.
|
|
||||||
*/
|
|
||||||
public abstract void encode(IntsRef values, BytesRef buf);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns an {@link IntDecoder} which can decode the values that were encoded
|
|
||||||
* with this encoder.
|
|
||||||
*/
|
|
||||||
public abstract IntDecoder createMatchingDecoder();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,34 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An abstract implementation of {@link IntEncoder} which wraps another encoder.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class IntEncoderFilter extends IntEncoder {
|
|
||||||
|
|
||||||
protected final IntEncoder encoder;
|
|
||||||
|
|
||||||
protected IntEncoderFilter(IntEncoder encoder) {
|
|
||||||
this.encoder = encoder;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,86 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes values encoded encoded with {@link NOnesIntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class NOnesIntDecoder extends FourFlagsIntDecoder {
|
|
||||||
|
|
||||||
// Number of consecutive '1's to generate upon decoding a '2'
|
|
||||||
private final int n;
|
|
||||||
private final IntsRef internalBuffer;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a decoder with a given N (Number of consecutive '1's which are
|
|
||||||
* translated into a single target value '2'.
|
|
||||||
*/
|
|
||||||
public NOnesIntDecoder(int n) {
|
|
||||||
this.n = n;
|
|
||||||
// initial size (room for 100 integers)
|
|
||||||
internalBuffer = new IntsRef(100);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(BytesRef buf, IntsRef values) {
|
|
||||||
values.offset = values.length = 0;
|
|
||||||
internalBuffer.length = 0;
|
|
||||||
super.decode(buf, internalBuffer);
|
|
||||||
if (values.ints.length < internalBuffer.length) {
|
|
||||||
// need space for internalBuffer.length to internalBuffer.length*N,
|
|
||||||
// grow mildly at first
|
|
||||||
values.grow(internalBuffer.length * n/2);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < internalBuffer.length; i++) {
|
|
||||||
int decode = internalBuffer.ints[i];
|
|
||||||
if (decode == 1) {
|
|
||||||
if (values.length == values.ints.length) {
|
|
||||||
values.grow(values.length + 10); // grow by few items, however not too many
|
|
||||||
}
|
|
||||||
// 1 is 1
|
|
||||||
values.ints[values.length++] = 1;
|
|
||||||
} else if (decode == 2) {
|
|
||||||
if (values.length + n >= values.ints.length) {
|
|
||||||
values.grow(values.length + n); // grow by few items, however not too many
|
|
||||||
}
|
|
||||||
// '2' means N 1's
|
|
||||||
for (int j = 0; j < n; j++) {
|
|
||||||
values.ints[values.length++] = 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (values.length == values.ints.length) {
|
|
||||||
values.grow(values.length + 10); // grow by few items, however not too many
|
|
||||||
}
|
|
||||||
// any other value is val-1
|
|
||||||
values.ints[values.length++] = decode - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "NOnes(" + n + ") (" + super.toString() + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,114 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A variation of {@link FourFlagsIntEncoder} which translates the data as
|
|
||||||
* follows:
|
|
||||||
* <ul>
|
|
||||||
* <li>Values ≥ 2 are trnalsated to <code>value+1</code> (2 ⇒ 3, 3
|
|
||||||
* ⇒ 4 and so forth).
|
|
||||||
* <li>Any <code>N</code> occurrences of 1 are encoded as a single 2.
|
|
||||||
* <li>Otherwise, each 1 is encoded as 1.
|
|
||||||
* </ul>
|
|
||||||
* <p>
|
|
||||||
* Encoding examples:
|
|
||||||
* <ul>
|
|
||||||
* <li>N = 4: the data 1,1,1,1,1 is translated to: 2, 1
|
|
||||||
* <li>N = 3: the data 1,2,3,4,1,1,1,1,5 is translated to 1,3,4,5,2,1,6
|
|
||||||
* </ul>
|
|
||||||
* <b>NOTE:</b> this encoder does not support values ≤ 0 and
|
|
||||||
* {@link Integer#MAX_VALUE}. 0 is not supported because it's not supported by
|
|
||||||
* {@link FourFlagsIntEncoder} and {@link Integer#MAX_VALUE} because this
|
|
||||||
* encoder translates N to N+1, which will cause an overflow and
|
|
||||||
* {@link Integer#MAX_VALUE} will become a negative number, which is not
|
|
||||||
* supported as well.<br>
|
|
||||||
* This does not mean you cannot encode {@link Integer#MAX_VALUE}. If it is not
|
|
||||||
* the first value to encode, and you wrap this encoder with
|
|
||||||
* {@link DGapIntEncoder}, then the value that will be sent to this encoder will
|
|
||||||
* be <code>MAX_VAL - prev</code>.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class NOnesIntEncoder extends FourFlagsIntEncoder {
|
|
||||||
|
|
||||||
private final IntsRef internalBuffer;
|
|
||||||
|
|
||||||
/** Number of consecutive '1's to be translated into single target value '2'. */
|
|
||||||
private final int n;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs an encoder with a given value of N (N: Number of consecutive
|
|
||||||
* '1's to be translated into single target value '2').
|
|
||||||
*/
|
|
||||||
public NOnesIntEncoder(int n) {
|
|
||||||
this.n = n;
|
|
||||||
internalBuffer = new IntsRef(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
internalBuffer.length = 0;
|
|
||||||
// make sure the internal buffer is large enough
|
|
||||||
if (values.length > internalBuffer.ints.length) {
|
|
||||||
internalBuffer.grow(values.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
int onesCounter = 0;
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
for (int i = values.offset; i < upto; i++) {
|
|
||||||
int value = values.ints[i];
|
|
||||||
if (value == 1) {
|
|
||||||
// every N 1's should be encoded as '2'
|
|
||||||
if (++onesCounter == n) {
|
|
||||||
internalBuffer.ints[internalBuffer.length++] = 2;
|
|
||||||
onesCounter = 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// there might have been 1's that we need to encode
|
|
||||||
while (onesCounter > 0) {
|
|
||||||
--onesCounter;
|
|
||||||
internalBuffer.ints[internalBuffer.length++] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// encode value as value+1
|
|
||||||
internalBuffer.ints[internalBuffer.length++] = value + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// there might have been 1's that we need to encode
|
|
||||||
while (onesCounter > 0) {
|
|
||||||
--onesCounter;
|
|
||||||
internalBuffer.ints[internalBuffer.length++] = 1;
|
|
||||||
}
|
|
||||||
super.encode(internalBuffer, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return new NOnesIntDecoder(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "NOnes(" + n + ") (" + super.toString() + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,56 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes values encoded with {@link SimpleIntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class SimpleIntDecoder extends IntDecoder {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(BytesRef buf, IntsRef values) {
|
|
||||||
values.offset = values.length = 0;
|
|
||||||
int numValues = buf.length / 4; // every value is 4 bytes
|
|
||||||
if (values.ints.length < numValues) { // offset and length are 0
|
|
||||||
values.ints = new int[ArrayUtil.oversize(numValues, RamUsageEstimator.NUM_BYTES_INT)];
|
|
||||||
}
|
|
||||||
|
|
||||||
int offset = buf.offset;
|
|
||||||
int upto = buf.offset + buf.length;
|
|
||||||
while (offset < upto) {
|
|
||||||
values.ints[values.length++] =
|
|
||||||
((buf.bytes[offset++] & 0xFF) << 24) |
|
|
||||||
((buf.bytes[offset++] & 0xFF) << 16) |
|
|
||||||
((buf.bytes[offset++] & 0xFF) << 8) |
|
|
||||||
(buf.bytes[offset++] & 0xFF);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "Simple";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,59 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A simple {@link IntEncoder}, writing an integer as 4 raw bytes. *
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class SimpleIntEncoder extends IntEncoder {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
buf.offset = buf.length = 0;
|
|
||||||
// ensure there's enough room in the buffer
|
|
||||||
int bytesNeeded = values.length * 4;
|
|
||||||
if (buf.bytes.length < bytesNeeded) {
|
|
||||||
buf.grow(bytesNeeded);
|
|
||||||
}
|
|
||||||
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
for (int i = values.offset; i < upto; i++) {
|
|
||||||
int value = values.ints[i];
|
|
||||||
buf.bytes[buf.length++] = (byte) (value >>> 24);
|
|
||||||
buf.bytes[buf.length++] = (byte) ((value >> 16) & 0xFF);
|
|
||||||
buf.bytes[buf.length++] = (byte) ((value >> 8) & 0xFF);
|
|
||||||
buf.bytes[buf.length++] = (byte) (value & 0xFF);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return new SimpleIntDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "Simple";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,54 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link IntEncoderFilter} which sorts the values to encode in ascending
|
|
||||||
* order before encoding them.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class SortingIntEncoder extends IntEncoderFilter {
|
|
||||||
|
|
||||||
/** Initializes with the given encoder. */
|
|
||||||
public SortingIntEncoder(IntEncoder encoder) {
|
|
||||||
super(encoder);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
Arrays.sort(values.ints, values.offset, values.offset + values.length);
|
|
||||||
encoder.encode(values, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return encoder.createMatchingDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "Sorting(" + encoder.toString() + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,63 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link IntEncoderFilter} which ensures only unique values are encoded. The
|
|
||||||
* implementation assumes the values given to {@link #encode(IntsRef, BytesRef)} are sorted.
|
|
||||||
* If this is not the case, you can chain this encoder with
|
|
||||||
* {@link SortingIntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class UniqueValuesIntEncoder extends IntEncoderFilter {
|
|
||||||
|
|
||||||
/** Constructs a new instance with the given encoder. */
|
|
||||||
public UniqueValuesIntEncoder(IntEncoder encoder) {
|
|
||||||
super(encoder);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
int prev = values.ints[values.offset];
|
|
||||||
int idx = values.offset + 1;
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
for (int i = idx; i < upto; i++) {
|
|
||||||
if (values.ints[i] != prev) {
|
|
||||||
values.ints[idx++] = values.ints[i];
|
|
||||||
prev = values.ints[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
values.length = idx - values.offset;
|
|
||||||
encoder.encode(values, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return encoder.createMatchingDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "Unique(" + encoder.toString() + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,64 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decodes values encoded by {@link VInt8IntEncoder}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class VInt8IntDecoder extends IntDecoder {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(BytesRef buf, IntsRef values) {
|
|
||||||
values.offset = values.length = 0;
|
|
||||||
|
|
||||||
// grow the buffer up front, even if by a large number of values (buf.length)
|
|
||||||
// that saves the need to check inside the loop for every decoded value if
|
|
||||||
// the buffer needs to grow.
|
|
||||||
if (values.ints.length < buf.length) {
|
|
||||||
values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)];
|
|
||||||
}
|
|
||||||
|
|
||||||
// it is better if the decoding is inlined like so, and not e.g.
|
|
||||||
// in a utility method
|
|
||||||
int upto = buf.offset + buf.length;
|
|
||||||
int value = 0;
|
|
||||||
int offset = buf.offset;
|
|
||||||
while (offset < upto) {
|
|
||||||
byte b = buf.bytes[offset++];
|
|
||||||
if (b >= 0) {
|
|
||||||
values.ints[values.length++] = (value << 7) | b;
|
|
||||||
value = 0;
|
|
||||||
} else {
|
|
||||||
value = (value << 7) | (b & 0x7F);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "VInt8";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,104 +0,0 @@
|
||||||
package org.apache.lucene.facet.encoding;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link IntEncoder} which implements variable length encoding. A number is
|
|
||||||
* encoded as follows:
|
|
||||||
* <ul>
|
|
||||||
* <li>If it is less than 127 and non-negative, i.e. uses only 7 bits, it is
|
|
||||||
* encoded as a single byte: 0bbbbbbb.
|
|
||||||
* <li>If it occupies more than 7 bits, it is represented as a series of bytes,
|
|
||||||
* each byte carrying 7 bits. All but the last byte have the MSB set, the last
|
|
||||||
* one has it unset.
|
|
||||||
* </ul>
|
|
||||||
* Example:
|
|
||||||
* <ol>
|
|
||||||
* <li>n = 117 = 01110101: This has less than 8 significant bits, therefore is
|
|
||||||
* encoded as 01110101 = 0x75.
|
|
||||||
* <li>n = 100000 = (binary) 11000011010100000. This has 17 significant bits,
|
|
||||||
* thus needs three Vint8 bytes. Pad it to a multiple of 7 bits, then split it
|
|
||||||
* into chunks of 7 and add an MSB, 0 for the last byte, 1 for the others:
|
|
||||||
* 1|0000110 1|0001101 0|0100000 = 0x86 0x8D 0x20.
|
|
||||||
* </ol>
|
|
||||||
* <b>NOTE:</b> although this encoder is not limited to values ≥ 0, it is not
|
|
||||||
* recommended for use with negative values, as their encoding will result in 5
|
|
||||||
* bytes written to the output stream, rather than 4. For such values, either
|
|
||||||
* use {@link SimpleIntEncoder} or write your own version of variable length
|
|
||||||
* encoding, which can better handle negative values.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class VInt8IntEncoder extends IntEncoder {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(IntsRef values, BytesRef buf) {
|
|
||||||
buf.offset = buf.length = 0;
|
|
||||||
int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt
|
|
||||||
if (buf.bytes.length < maxBytesNeeded) {
|
|
||||||
buf.grow(maxBytesNeeded);
|
|
||||||
}
|
|
||||||
|
|
||||||
int upto = values.offset + values.length;
|
|
||||||
for (int i = values.offset; i < upto; i++) {
|
|
||||||
// it is better if the encoding is inlined like so, and not e.g.
|
|
||||||
// in a utility method
|
|
||||||
int value = values.ints[i];
|
|
||||||
if ((value & ~0x7F) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) value;
|
|
||||||
buf.length++;
|
|
||||||
} else if ((value & ~0x3FFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 2;
|
|
||||||
} else if ((value & ~0x1FFFFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 3;
|
|
||||||
} else if ((value & ~0xFFFFFFF) == 0) {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 4;
|
|
||||||
} else {
|
|
||||||
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
|
|
||||||
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
|
|
||||||
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
|
|
||||||
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
|
|
||||||
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
|
|
||||||
buf.length += 5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntDecoder createMatchingDecoder() {
|
|
||||||
return new VInt8IntDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "VInt8";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Facets Encoding</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Offers various encoders and decoders for category ordinals.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,38 +0,0 @@
|
||||||
package org.apache.lucene.facet.index;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Builds a category list data by encoding the appropriate information for every
|
|
||||||
* category and ordinal given to {@link #build(IntsRef, Iterable)}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface CategoryListBuilder {
|
|
||||||
|
|
||||||
/** Returns the encoded ordinals data. */
|
|
||||||
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<FacetLabel> categories) throws IOException;
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,170 +0,0 @@
|
||||||
package org.apache.lucene.facet.index;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.encoding.IntEncoder;
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
|
||||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link CategoryListBuilder} which builds a counting list data by encoding
|
|
||||||
* the category ordinals into one or more {@link BytesRef}. Each
|
|
||||||
* {@link BytesRef} corresponds to a set of ordinals that belong to the same
|
|
||||||
* partition. When partitions are not enabled (i.e.
|
|
||||||
* {@link FacetIndexingParams#getPartitionSize()} returns
|
|
||||||
* {@link Integer#MAX_VALUE}), only one {@link BytesRef} is returned by this
|
|
||||||
* class.
|
|
||||||
* <p>
|
|
||||||
* Counting lists are used usually for computing the weight of categories by
|
|
||||||
* summing their number of occurrences (hence counting) in a result set.
|
|
||||||
*/
|
|
||||||
public class CountingListBuilder implements CategoryListBuilder {
|
|
||||||
|
|
||||||
/** Specializes encoding ordinals when partitions are enabled/disabled. */
|
|
||||||
private static abstract class OrdinalsEncoder {
|
|
||||||
OrdinalsEncoder() {}
|
|
||||||
public abstract Map<String,BytesRef> encode(IntsRef ordinals);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class NoPartitionsOrdinalsEncoder extends OrdinalsEncoder {
|
|
||||||
|
|
||||||
private final IntEncoder encoder;
|
|
||||||
private final String name = "";
|
|
||||||
|
|
||||||
NoPartitionsOrdinalsEncoder(CategoryListParams categoryListParams) {
|
|
||||||
encoder = categoryListParams.createEncoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Map<String,BytesRef> encode(IntsRef ordinals) {
|
|
||||||
final BytesRef bytes = new BytesRef(128); // should be enough for most common applications
|
|
||||||
encoder.encode(ordinals, bytes);
|
|
||||||
return Collections.singletonMap(name, bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class PerPartitionOrdinalsEncoder extends OrdinalsEncoder {
|
|
||||||
|
|
||||||
private final FacetIndexingParams indexingParams;
|
|
||||||
private final CategoryListParams categoryListParams;
|
|
||||||
private final int partitionSize;
|
|
||||||
private final HashMap<String,IntEncoder> partitionEncoder = new HashMap<String,IntEncoder>();
|
|
||||||
|
|
||||||
PerPartitionOrdinalsEncoder(FacetIndexingParams indexingParams, CategoryListParams categoryListParams) {
|
|
||||||
this.indexingParams = indexingParams;
|
|
||||||
this.categoryListParams = categoryListParams;
|
|
||||||
this.partitionSize = indexingParams.getPartitionSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HashMap<String,BytesRef> encode(IntsRef ordinals) {
|
|
||||||
// build the partitionOrdinals map
|
|
||||||
final HashMap<String,IntsRef> partitionOrdinals = new HashMap<String,IntsRef>();
|
|
||||||
for (int i = 0; i < ordinals.length; i++) {
|
|
||||||
int ordinal = ordinals.ints[i];
|
|
||||||
final String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, ordinal);
|
|
||||||
IntsRef partitionOrds = partitionOrdinals.get(name);
|
|
||||||
if (partitionOrds == null) {
|
|
||||||
partitionOrds = new IntsRef(32);
|
|
||||||
partitionOrdinals.put(name, partitionOrds);
|
|
||||||
partitionEncoder.put(name, categoryListParams.createEncoder());
|
|
||||||
}
|
|
||||||
partitionOrds.ints[partitionOrds.length++] = ordinal % partitionSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
HashMap<String,BytesRef> partitionBytes = new HashMap<String,BytesRef>();
|
|
||||||
for (Entry<String,IntsRef> e : partitionOrdinals.entrySet()) {
|
|
||||||
String name = e.getKey();
|
|
||||||
final IntEncoder encoder = partitionEncoder.get(name);
|
|
||||||
final BytesRef bytes = new BytesRef(128); // should be enough for most common applications
|
|
||||||
encoder.encode(e.getValue(), bytes);
|
|
||||||
partitionBytes.put(name, bytes);
|
|
||||||
}
|
|
||||||
return partitionBytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private final OrdinalsEncoder ordinalsEncoder;
|
|
||||||
private final TaxonomyWriter taxoWriter;
|
|
||||||
private final CategoryListParams clp;
|
|
||||||
|
|
||||||
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
|
|
||||||
TaxonomyWriter taxoWriter) {
|
|
||||||
this.taxoWriter = taxoWriter;
|
|
||||||
this.clp = categoryListParams;
|
|
||||||
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
|
|
||||||
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
|
|
||||||
} else {
|
|
||||||
ordinalsEncoder = new PerPartitionOrdinalsEncoder(indexingParams, categoryListParams);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Every returned {@link BytesRef} corresponds to a single partition (as
|
|
||||||
* defined by {@link FacetIndexingParams#getPartitionSize()}) and the key
|
|
||||||
* denotes the partition ID. When no partitions are defined, the returned map
|
|
||||||
* contains only one value.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> the {@code ordinals} array is modified by adding parent
|
|
||||||
* ordinals to it. Also, some encoders may sort the array and remove duplicate
|
|
||||||
* ordinals. Therefore you may want to invoke this method after you finished
|
|
||||||
* processing the array for other purposes.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<FacetLabel> categories) throws IOException {
|
|
||||||
int upto = ordinals.length; // since we may add ordinals to IntsRef, iterate upto original length
|
|
||||||
|
|
||||||
Iterator<FacetLabel> iter = categories.iterator();
|
|
||||||
for (int i = 0; i < upto; i++) {
|
|
||||||
int ordinal = ordinals.ints[i];
|
|
||||||
FacetLabel cp = iter.next();
|
|
||||||
OrdinalPolicy op = clp.getOrdinalPolicy(cp.components[0]);
|
|
||||||
if (op != OrdinalPolicy.NO_PARENTS) {
|
|
||||||
// need to add parents too
|
|
||||||
int parent = taxoWriter.getParent(ordinal);
|
|
||||||
if (parent > 0) {
|
|
||||||
// only do this if the category is not a dimension itself, otherwise, it was just discarded by the 'if' below
|
|
||||||
while (parent > 0) {
|
|
||||||
ordinals.ints[ordinals.length++] = parent;
|
|
||||||
parent = taxoWriter.getParent(parent);
|
|
||||||
}
|
|
||||||
if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
|
|
||||||
ordinals.length--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ordinalsEncoder.encode(ordinals);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,83 +0,0 @@
|
||||||
package org.apache.lucene.facet.index;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link TokenStream} which creates category drill-down terms.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class DrillDownStream extends TokenStream {
|
|
||||||
|
|
||||||
private final FacetIndexingParams indexingParams;
|
|
||||||
private final Iterator<FacetLabel> categories;
|
|
||||||
private final CharTermAttribute termAttribute;
|
|
||||||
|
|
||||||
private FacetLabel current;
|
|
||||||
private boolean isParent;
|
|
||||||
|
|
||||||
public DrillDownStream(Iterable<FacetLabel> categories, FacetIndexingParams indexingParams) {
|
|
||||||
termAttribute = addAttribute(CharTermAttribute.class);
|
|
||||||
this.categories = categories.iterator();
|
|
||||||
this.indexingParams = indexingParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void addAdditionalAttributes(FacetLabel category, boolean isParent) {
|
|
||||||
// a hook for AssociationsDrillDownStream to add the associations payload to
|
|
||||||
// the drill-down terms
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final boolean incrementToken() throws IOException {
|
|
||||||
if (current.length == 0) {
|
|
||||||
if (!categories.hasNext()) {
|
|
||||||
return false; // no more categories
|
|
||||||
}
|
|
||||||
current = categories.next();
|
|
||||||
termAttribute.resizeBuffer(current.fullPathLength());
|
|
||||||
isParent = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// copy current as drill-down term (it's either the leaf node or PathPolicy
|
|
||||||
// accepted it.
|
|
||||||
int nChars = indexingParams.drillDownTermText(current, termAttribute.buffer());
|
|
||||||
termAttribute.setLength(nChars);
|
|
||||||
addAdditionalAttributes(current, isParent);
|
|
||||||
|
|
||||||
// prepare current for next call by trimming the last component (parents)
|
|
||||||
current = current.subpath(current.length - 1);
|
|
||||||
isParent = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void reset() throws IOException {
|
|
||||||
current = categories.next();
|
|
||||||
termAttribute.resizeBuffer(current.fullPathLength());
|
|
||||||
isParent = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,194 +0,0 @@
|
||||||
package org.apache.lucene.facet.index;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.BinaryDocValuesField;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A utility class for adding facet fields to a document. Usually one field will
|
|
||||||
* be added for all facets, however per the
|
|
||||||
* {@link FacetIndexingParams#getCategoryListParams(FacetLabel)}, one field
|
|
||||||
* may be added for every group of facets.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FacetFields {
|
|
||||||
|
|
||||||
// The drill-down field is added with a TokenStream, hence why it's based on
|
|
||||||
// TextField type. However in practice, it is added just like StringField.
|
|
||||||
// Therefore we set its IndexOptions to DOCS_ONLY.
|
|
||||||
private static final FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
|
|
||||||
static {
|
|
||||||
DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY);
|
|
||||||
DRILL_DOWN_TYPE.setOmitNorms(true);
|
|
||||||
DRILL_DOWN_TYPE.freeze();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected final TaxonomyWriter taxonomyWriter;
|
|
||||||
|
|
||||||
protected final FacetIndexingParams indexingParams;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new instance with the {@link FacetIndexingParams#DEFAULT
|
|
||||||
* default} facet indexing params.
|
|
||||||
*
|
|
||||||
* @param taxonomyWriter
|
|
||||||
* used to resolve given categories to ordinals
|
|
||||||
*/
|
|
||||||
public FacetFields(TaxonomyWriter taxonomyWriter) {
|
|
||||||
this(taxonomyWriter, FacetIndexingParams.DEFAULT);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new instance with the given facet indexing params.
|
|
||||||
*
|
|
||||||
* @param taxonomyWriter
|
|
||||||
* used to resolve given categories to ordinals
|
|
||||||
* @param params
|
|
||||||
* determines under which fields the categories should be indexed
|
|
||||||
*/
|
|
||||||
public FacetFields(TaxonomyWriter taxonomyWriter, FacetIndexingParams params) {
|
|
||||||
this.taxonomyWriter = taxonomyWriter;
|
|
||||||
this.indexingParams = params;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a mapping between a {@link CategoryListParams} and all
|
|
||||||
* {@link FacetLabel categories} that are associated with it.
|
|
||||||
*/
|
|
||||||
protected Map<CategoryListParams,Iterable<FacetLabel>> createCategoryListMapping(
|
|
||||||
Iterable<FacetLabel> categories) {
|
|
||||||
if (indexingParams.getAllCategoryListParams().size() == 1) {
|
|
||||||
return Collections.singletonMap(indexingParams.getCategoryListParams(null), categories);
|
|
||||||
}
|
|
||||||
HashMap<CategoryListParams,Iterable<FacetLabel>> categoryLists =
|
|
||||||
new HashMap<CategoryListParams,Iterable<FacetLabel>>();
|
|
||||||
for (FacetLabel cp : categories) {
|
|
||||||
// each category may be indexed under a different field, so add it to the right list.
|
|
||||||
CategoryListParams clp = indexingParams.getCategoryListParams(cp);
|
|
||||||
List<FacetLabel> list = (List<FacetLabel>) categoryLists.get(clp);
|
|
||||||
if (list == null) {
|
|
||||||
list = new ArrayList<FacetLabel>();
|
|
||||||
categoryLists.put(clp, list);
|
|
||||||
}
|
|
||||||
list.add(cp);
|
|
||||||
}
|
|
||||||
return categoryLists;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the category list data, as a mapping from key to {@link BytesRef}
|
|
||||||
* which includes the encoded data. Every ordinal in {@code ordinals}
|
|
||||||
* corrspond to a {@link FacetLabel} returned from {@code categories}.
|
|
||||||
*/
|
|
||||||
protected Map<String,BytesRef> getCategoryListData(CategoryListParams categoryListParams,
|
|
||||||
IntsRef ordinals, Iterable<FacetLabel> categories /* needed for AssociationsFacetFields */)
|
|
||||||
throws IOException {
|
|
||||||
return new CountingListBuilder(categoryListParams, indexingParams, taxonomyWriter).build(ordinals, categories);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a {@link DrillDownStream} for writing the categories drill-down
|
|
||||||
* terms.
|
|
||||||
*/
|
|
||||||
protected DrillDownStream getDrillDownStream(Iterable<FacetLabel> categories) {
|
|
||||||
return new DrillDownStream(categories, indexingParams);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the {@link FieldType} with which the drill-down terms should be
|
|
||||||
* indexed. The default is {@link IndexOptions#DOCS_ONLY}.
|
|
||||||
*/
|
|
||||||
protected FieldType drillDownFieldType() {
|
|
||||||
return DRILL_DOWN_TYPE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add the counting list data to the document under the given field. Note that
|
|
||||||
* the field is determined by the {@link CategoryListParams}.
|
|
||||||
*/
|
|
||||||
protected void addCountingListData(Document doc, Map<String,BytesRef> categoriesData, String field) {
|
|
||||||
for (Entry<String,BytesRef> entry : categoriesData.entrySet()) {
|
|
||||||
doc.add(new BinaryDocValuesField(field + entry.getKey(), entry.getValue()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Adds the needed facet fields to the document. */
|
|
||||||
public void addFields(Document doc, Iterable<FacetLabel> categories) throws IOException {
|
|
||||||
if (categories == null) {
|
|
||||||
throw new IllegalArgumentException("categories should not be null");
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: add reuse capabilities to this class, per CLP objects:
|
|
||||||
// - drill-down field
|
|
||||||
// - counting list field
|
|
||||||
// - DrillDownStream
|
|
||||||
// - CountingListStream
|
|
||||||
|
|
||||||
final Map<CategoryListParams,Iterable<FacetLabel>> categoryLists = createCategoryListMapping(categories);
|
|
||||||
|
|
||||||
// for each CLP we add a different field for drill-down terms as well as for
|
|
||||||
// counting list data.
|
|
||||||
IntsRef ordinals = new IntsRef(32); // should be enough for most common applications
|
|
||||||
for (Entry<CategoryListParams, Iterable<FacetLabel>> e : categoryLists.entrySet()) {
|
|
||||||
final CategoryListParams clp = e.getKey();
|
|
||||||
final String field = clp.field;
|
|
||||||
|
|
||||||
// build category list data
|
|
||||||
ordinals.length = 0; // reset
|
|
||||||
int maxNumOrds = 0;
|
|
||||||
for (FacetLabel cp : e.getValue()) {
|
|
||||||
int ordinal = taxonomyWriter.addCategory(cp);
|
|
||||||
maxNumOrds += cp.length; // ordinal and potentially all parents
|
|
||||||
if (ordinals.ints.length < maxNumOrds) {
|
|
||||||
ordinals.grow(maxNumOrds);
|
|
||||||
}
|
|
||||||
ordinals.ints[ordinals.length++] = ordinal;
|
|
||||||
}
|
|
||||||
Map<String,BytesRef> categoriesData = getCategoryListData(clp, ordinals, e.getValue());
|
|
||||||
|
|
||||||
// add the counting list data
|
|
||||||
addCountingListData(doc, categoriesData, field);
|
|
||||||
|
|
||||||
// add the drill-down field
|
|
||||||
DrillDownStream drillDownStream = getDrillDownStream(e.getValue());
|
|
||||||
Field drillDown = new Field(field, drillDownStream, drillDownFieldType());
|
|
||||||
doc.add(drillDown);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Facets indexing code</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Facets indexing code.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,116 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
|
||||||
import org.apache.lucene.facet.sampling.RandomSampler;
|
|
||||||
import org.apache.lucene.facet.sampling.Sampler;
|
|
||||||
import org.apache.lucene.facet.sampling.SamplingAccumulator;
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@link FacetsAccumulator} whose behavior regarding complements, sampling,
|
|
||||||
* etc. is not set up front but rather is determined at accumulation time
|
|
||||||
* according to the statistics of the accumulated set of documents and the
|
|
||||||
* index.
|
|
||||||
* <p>
|
|
||||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
|
||||||
* does not guarantee accurate values for
|
|
||||||
* {@link FacetResult#getNumValidDescendants()}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class AdaptiveFacetsAccumulator extends OldFacetsAccumulator {
|
|
||||||
|
|
||||||
private Sampler sampler = new RandomSampler();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an {@link AdaptiveFacetsAccumulator}
|
|
||||||
* @see OldFacetsAccumulator#OldFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader)
|
|
||||||
*/
|
|
||||||
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
|
||||||
TaxonomyReader taxonomyReader) {
|
|
||||||
super(searchParams, indexReader, taxonomyReader);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an {@link AdaptiveFacetsAccumulator}
|
|
||||||
*
|
|
||||||
* @see OldFacetsAccumulator#OldFacetsAccumulator(FacetSearchParams,
|
|
||||||
* IndexReader, TaxonomyReader, FacetArrays)
|
|
||||||
*/
|
|
||||||
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
|
||||||
TaxonomyReader taxonomyReader, FacetArrays facetArrays) {
|
|
||||||
super(searchParams, indexReader, taxonomyReader, facetArrays);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the sampler.
|
|
||||||
* @param sampler sampler to set
|
|
||||||
*/
|
|
||||||
public void setSampler(Sampler sampler) {
|
|
||||||
this.sampler = sampler;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
|
|
||||||
OldFacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids);
|
|
||||||
|
|
||||||
if (delegee == this) {
|
|
||||||
return super.accumulate(docids);
|
|
||||||
}
|
|
||||||
|
|
||||||
return delegee.accumulate(docids);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the appropriate facet accumulator to use.
|
|
||||||
* If no special/clever adaptation is possible/needed return this (self).
|
|
||||||
*/
|
|
||||||
private OldFacetsAccumulator appropriateFacetCountingAccumulator(ScoredDocIDs docids) {
|
|
||||||
// Verify that searchPareams permit sampling/complement/etc... otherwise do default
|
|
||||||
if (!mayComplement()) {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now we're sure we can use the sampling methods as we're in a counting only mode
|
|
||||||
|
|
||||||
// Verify that sampling is enabled and required ... otherwise do default
|
|
||||||
if (sampler == null || !sampler.shouldSample(docids)) {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
SamplingAccumulator samplingAccumulator = new SamplingAccumulator(sampler, searchParams, indexReader, taxonomyReader);
|
|
||||||
samplingAccumulator.setComplementThreshold(getComplementThreshold());
|
|
||||||
return samplingAccumulator;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the sampler in effect
|
|
||||||
*/
|
|
||||||
public final Sampler getSampler() {
|
|
||||||
return sampler;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,48 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Aggregates the categories of documents given to
|
|
||||||
* {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local
|
|
||||||
* to the reader given to {@link #setNextReader(AtomicReaderContext)}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface Aggregator {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the {@link AtomicReaderContext} for which
|
|
||||||
* {@link #aggregate(int, float, IntsRef)} calls will be made. If this method
|
|
||||||
* returns false, {@link #aggregate(int, float, IntsRef)} should not be called
|
|
||||||
* for this reader.
|
|
||||||
*/
|
|
||||||
public boolean setNextReader(AtomicReaderContext context) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Aggregate the ordinals of the given document ID (and its score). The given
|
|
||||||
* ordinals offset is always zero.
|
|
||||||
*/
|
|
||||||
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException;
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link CountingAggregator} used during complement counting.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class ComplementCountingAggregator extends CountingAggregator {
|
|
||||||
|
|
||||||
public ComplementCountingAggregator(int[] counterArray) {
|
|
||||||
super(counterArray);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException {
|
|
||||||
for (int i = 0; i < ordinals.length; i++) {
|
|
||||||
int ord = ordinals.ints[i];
|
|
||||||
assert counterArray[ord] != 0 : "complement aggregation: count is about to become negative for ordinal " + ord;
|
|
||||||
--counterArray[ord];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,66 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link Aggregator} which updates a counter array with the size of the
|
|
||||||
* whole taxonomy, counting the number of times each category appears in the
|
|
||||||
* given set of documents.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class CountingAggregator implements Aggregator {
|
|
||||||
|
|
||||||
protected int[] counterArray;
|
|
||||||
|
|
||||||
public CountingAggregator(int[] counterArray) {
|
|
||||||
this.counterArray = counterArray;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException {
|
|
||||||
for (int i = 0; i < ordinals.length; i++) {
|
|
||||||
counterArray[ordinals.ints[i]]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (obj == null || obj.getClass() != this.getClass()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
CountingAggregator that = (CountingAggregator) obj;
|
|
||||||
return that.counterArray == this.counterArray;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return counterArray == null ? 0 : counterArray.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,174 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
|
|
||||||
import org.apache.lucene.search.DocIdSet;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents {@link MatchingDocs} as {@link ScoredDocIDs}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class MatchingDocsAsScoredDocIDs implements ScoredDocIDs {
|
|
||||||
|
|
||||||
// TODO remove this class once we get rid of ScoredDocIDs
|
|
||||||
|
|
||||||
final List<MatchingDocs> matchingDocs;
|
|
||||||
final int size;
|
|
||||||
|
|
||||||
public MatchingDocsAsScoredDocIDs(List<MatchingDocs> matchingDocs) {
|
|
||||||
this.matchingDocs = matchingDocs;
|
|
||||||
int totalSize = 0;
|
|
||||||
for (MatchingDocs md : matchingDocs) {
|
|
||||||
totalSize += md.totalHits;
|
|
||||||
}
|
|
||||||
this.size = totalSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ScoredDocIDsIterator iterator() throws IOException {
|
|
||||||
return new ScoredDocIDsIterator() {
|
|
||||||
|
|
||||||
final Iterator<MatchingDocs> mdIter = matchingDocs.iterator();
|
|
||||||
|
|
||||||
int scoresIdx = 0;
|
|
||||||
int doc = 0;
|
|
||||||
MatchingDocs current;
|
|
||||||
int currentLength;
|
|
||||||
boolean done = false;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean next() {
|
|
||||||
if (done) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (current == null) {
|
|
||||||
if (!mdIter.hasNext()) {
|
|
||||||
done = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
current = mdIter.next();
|
|
||||||
currentLength = current.bits.length();
|
|
||||||
doc = 0;
|
|
||||||
scoresIdx = 0;
|
|
||||||
|
|
||||||
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
|
|
||||||
current = null;
|
|
||||||
} else {
|
|
||||||
doc = -1; // we're calling nextSetBit later on
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
++doc;
|
|
||||||
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
|
|
||||||
current = null;
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getScore() {
|
|
||||||
return current.scores == null ? ScoredDocIDsIterator.DEFAULT_SCORE : current.scores[scoresIdx++];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocID() {
|
|
||||||
return done ? DocIdSetIterator.NO_MORE_DOCS : doc + current.context.docBase;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSet getDocIDs() {
|
|
||||||
return new DocIdSet() {
|
|
||||||
|
|
||||||
final Iterator<MatchingDocs> mdIter = matchingDocs.iterator();
|
|
||||||
int doc = 0;
|
|
||||||
MatchingDocs current;
|
|
||||||
int currentLength;
|
|
||||||
boolean done = false;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator iterator() throws IOException {
|
|
||||||
return new DocIdSetIterator() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
if (done) {
|
|
||||||
return DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (current == null) {
|
|
||||||
if (!mdIter.hasNext()) {
|
|
||||||
done = true;
|
|
||||||
return DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
current = mdIter.next();
|
|
||||||
currentLength = current.bits.length();
|
|
||||||
doc = 0;
|
|
||||||
|
|
||||||
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
|
|
||||||
current = null;
|
|
||||||
} else {
|
|
||||||
doc = -1; // we're calling nextSetBit later on
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
++doc;
|
|
||||||
if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) {
|
|
||||||
current = null;
|
|
||||||
return nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
return doc + current.context.docBase;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return doc + current.context.docBase;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
throw new UnsupportedOperationException("not supported");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,457 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.complements.TotalFacetCounts;
|
|
||||||
import org.apache.lucene.facet.complements.TotalFacetCountsCache;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
|
||||||
import org.apache.lucene.facet.partitions.IntermediateFacetResult;
|
|
||||||
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
|
|
||||||
import org.apache.lucene.facet.sampling.Sampler.OverSampledFacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
|
||||||
import org.apache.lucene.facet.search.CountFacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAggregator;
|
|
||||||
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver.FloatValueResolver;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver.IntValueResolver;
|
|
||||||
import org.apache.lucene.facet.search.SumScoreFacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.TaxonomyFacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
|
|
||||||
import org.apache.lucene.facet.search.TopKInEachNodeHandler;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetsAccumulator} which supports partitions, sampling and
|
|
||||||
* complement counting.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this accumulator still uses the old API and will be removed
|
|
||||||
* eventually in favor of dedicated accumulators which support the above
|
|
||||||
* features ovee the new {@link FacetsAggregator} API. It provides
|
|
||||||
* {@link Aggregator} implementations for {@link CountFacetRequest},
|
|
||||||
* {@link SumScoreFacetRequest} and {@link OverSampledFacetRequest}. If you need
|
|
||||||
* to use it in conjunction with other facet requests, you should override
|
|
||||||
* {@link #createAggregator(FacetRequest, FacetArrays)}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class OldFacetsAccumulator extends TaxonomyFacetsAccumulator {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default threshold for using the complements optimization.
|
|
||||||
* If accumulating facets for a document set larger than this ratio of the index size than
|
|
||||||
* perform the complement optimization.
|
|
||||||
* @see #setComplementThreshold(double) for more info on the complements optimization.
|
|
||||||
*/
|
|
||||||
public static final double DEFAULT_COMPLEMENT_THRESHOLD = 0.6;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Passing this to {@link #setComplementThreshold(double)} will disable using complement optimization.
|
|
||||||
*/
|
|
||||||
public static final double DISABLE_COMPLEMENT = Double.POSITIVE_INFINITY; // > 1 actually
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Passing this to {@link #setComplementThreshold(double)} will force using complement optimization.
|
|
||||||
*/
|
|
||||||
public static final double FORCE_COMPLEMENT = 0; // <=0
|
|
||||||
|
|
||||||
protected int partitionSize;
|
|
||||||
protected int maxPartitions;
|
|
||||||
protected boolean isUsingComplements;
|
|
||||||
|
|
||||||
private TotalFacetCounts totalFacetCounts;
|
|
||||||
|
|
||||||
private Object accumulateGuard;
|
|
||||||
|
|
||||||
private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD;
|
|
||||||
|
|
||||||
private static FacetArrays createFacetArrays(FacetSearchParams searchParams, TaxonomyReader taxoReader) {
|
|
||||||
return new FacetArrays(PartitionsUtils.partitionSize(searchParams.indexingParams, taxoReader));
|
|
||||||
}
|
|
||||||
|
|
||||||
public OldFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
|
||||||
TaxonomyReader taxonomyReader) {
|
|
||||||
this(searchParams, indexReader, taxonomyReader, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public OldFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
|
||||||
TaxonomyReader taxonomyReader, FacetArrays facetArrays) {
|
|
||||||
super(searchParams, indexReader, taxonomyReader, facetArrays == null ? createFacetArrays(searchParams, taxonomyReader) : facetArrays);
|
|
||||||
|
|
||||||
// can only be computed later when docids size is known
|
|
||||||
isUsingComplements = false;
|
|
||||||
partitionSize = PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader);
|
|
||||||
maxPartitions = (int) Math.ceil(this.taxonomyReader.getSize() / (double) partitionSize);
|
|
||||||
accumulateGuard = new Object();
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: this should be removed once we clean the API
|
|
||||||
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
|
|
||||||
|
|
||||||
// synchronize to prevent calling two accumulate()'s at the same time.
|
|
||||||
// We decided not to synchronize the method because that might mislead
|
|
||||||
// users to feel encouraged to call this method simultaneously.
|
|
||||||
synchronized (accumulateGuard) {
|
|
||||||
|
|
||||||
// only now we can compute this
|
|
||||||
isUsingComplements = shouldComplement(docids);
|
|
||||||
|
|
||||||
if (isUsingComplements) {
|
|
||||||
try {
|
|
||||||
totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader, searchParams.indexingParams);
|
|
||||||
if (totalFacetCounts != null) {
|
|
||||||
docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader);
|
|
||||||
} else {
|
|
||||||
isUsingComplements = false;
|
|
||||||
}
|
|
||||||
} catch (UnsupportedOperationException e) {
|
|
||||||
// TODO (Facet): this exception is thrown from TotalCountsKey if the
|
|
||||||
// IndexReader used does not support getVersion(). We should re-think
|
|
||||||
// this: is this tiny detail worth disabling total counts completely
|
|
||||||
// for such readers? Currently, it's not supported by Parallel and
|
|
||||||
// MultiReader, which might be problematic for several applications.
|
|
||||||
// We could, for example, base our "isCurrent" logic on something else
|
|
||||||
// than the reader's version. Need to think more deeply about it.
|
|
||||||
isUsingComplements = false;
|
|
||||||
} catch (IOException e) {
|
|
||||||
// silently fail if for some reason failed to load/save from/to dir
|
|
||||||
isUsingComplements = false;
|
|
||||||
} catch (Exception e) {
|
|
||||||
// give up: this should not happen!
|
|
||||||
throw new IOException("PANIC: Got unexpected exception while trying to get/calculate total counts", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
docids = actualDocsToAccumulate(docids);
|
|
||||||
|
|
||||||
HashMap<FacetRequest, IntermediateFacetResult> fr2tmpRes = new HashMap<FacetRequest, IntermediateFacetResult>();
|
|
||||||
|
|
||||||
try {
|
|
||||||
for (int part = 0; part < maxPartitions; part++) {
|
|
||||||
|
|
||||||
// fill arrays from category lists
|
|
||||||
fillArraysForPartition(docids, facetArrays, part);
|
|
||||||
|
|
||||||
int offset = part * partitionSize;
|
|
||||||
|
|
||||||
// for each partition we go over all requests and handle
|
|
||||||
// each, where the request maintains the merged result.
|
|
||||||
// In this implementation merges happen after each partition,
|
|
||||||
// but other impl could merge only at the end.
|
|
||||||
final HashSet<FacetRequest> handledRequests = new HashSet<FacetRequest>();
|
|
||||||
for (FacetRequest fr : searchParams.facetRequests) {
|
|
||||||
// Handle and merge only facet requests which were not already handled.
|
|
||||||
if (handledRequests.add(fr)) {
|
|
||||||
PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
|
|
||||||
IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(offset);
|
|
||||||
IntermediateFacetResult oldRes = fr2tmpRes.get(fr);
|
|
||||||
if (oldRes != null) {
|
|
||||||
res4fr = frHndlr.mergeResults(oldRes, res4fr);
|
|
||||||
}
|
|
||||||
fr2tmpRes.put(fr, res4fr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
facetArrays.free();
|
|
||||||
}
|
|
||||||
|
|
||||||
// gather results from all requests into a list for returning them
|
|
||||||
List<FacetResult> res = new ArrayList<FacetResult>();
|
|
||||||
for (FacetRequest fr : searchParams.facetRequests) {
|
|
||||||
PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
|
|
||||||
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
|
|
||||||
if (tmpResult == null) {
|
|
||||||
// Add empty FacetResult:
|
|
||||||
res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
|
|
||||||
// final labeling if allowed (because labeling is a costly operation)
|
|
||||||
frHndlr.labelResult(facetRes);
|
|
||||||
res.add(facetRes);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** check if all requests are complementable */
|
|
||||||
protected boolean mayComplement() {
|
|
||||||
for (FacetRequest freq : searchParams.facetRequests) {
|
|
||||||
if (!(freq instanceof CountFacetRequest)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public PartitionsFacetResultsHandler createFacetResultsHandler(FacetRequest fr, OrdinalValueResolver resolver) {
|
|
||||||
if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) {
|
|
||||||
return new TopKInEachNodeHandler(taxonomyReader, fr, resolver, facetArrays);
|
|
||||||
} else {
|
|
||||||
return new TopKFacetResultsHandler(taxonomyReader, fr, resolver, facetArrays);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the actual set of documents over which accumulation should take place.
|
|
||||||
* <p>
|
|
||||||
* Allows to override the set of documents to accumulate for. Invoked just
|
|
||||||
* before actual accumulating starts. From this point that set of documents
|
|
||||||
* remains unmodified. Default implementation just returns the input
|
|
||||||
* unchanged.
|
|
||||||
*
|
|
||||||
* @param docids
|
|
||||||
* candidate documents to accumulate for
|
|
||||||
* @return actual documents to accumulate for
|
|
||||||
*/
|
|
||||||
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
|
|
||||||
return docids;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Check if it is worth to use complements */
|
|
||||||
protected boolean shouldComplement(ScoredDocIDs docids) {
|
|
||||||
return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an {@link OrdinalValueResolver} for the given {@link FacetRequest}.
|
|
||||||
* By default this method supports {@link CountFacetRequest} and
|
|
||||||
* {@link SumScoreFacetRequest}. You should override if you are using other
|
|
||||||
* requests with this accumulator.
|
|
||||||
*/
|
|
||||||
public OrdinalValueResolver createOrdinalValueResolver(FacetRequest fr) {
|
|
||||||
if (fr instanceof CountFacetRequest) {
|
|
||||||
return new IntValueResolver(facetArrays);
|
|
||||||
} else if (fr instanceof SumScoreFacetRequest) {
|
|
||||||
return new FloatValueResolver(facetArrays);
|
|
||||||
} else if (fr instanceof OverSampledFacetRequest) {
|
|
||||||
return createOrdinalValueResolver(((OverSampledFacetRequest) fr).orig);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("unrecognized FacetRequest " + fr.getClass());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate over the documents for this partition and fill the facet arrays with the correct
|
|
||||||
* count/complement count/value.
|
|
||||||
*/
|
|
||||||
private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
if (isUsingComplements) {
|
|
||||||
initArraysByTotalCounts(facetArrays, partition, docids.size());
|
|
||||||
} else {
|
|
||||||
facetArrays.free(); // to get a cleared array for this partition
|
|
||||||
}
|
|
||||||
|
|
||||||
HashMap<CategoryListIterator, Aggregator> categoryLists = getCategoryListMap(facetArrays, partition);
|
|
||||||
|
|
||||||
IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps
|
|
||||||
for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
|
|
||||||
final ScoredDocIDsIterator iterator = docids.iterator();
|
|
||||||
final CategoryListIterator categoryListIter = entry.getKey();
|
|
||||||
final Aggregator aggregator = entry.getValue();
|
|
||||||
Iterator<AtomicReaderContext> contexts = indexReader.leaves().iterator();
|
|
||||||
AtomicReaderContext current = null;
|
|
||||||
int maxDoc = -1;
|
|
||||||
while (iterator.next()) {
|
|
||||||
int docID = iterator.getDocID();
|
|
||||||
if (docID >= maxDoc) {
|
|
||||||
boolean iteratorDone = false;
|
|
||||||
do { // find the segment which contains this document
|
|
||||||
if (!contexts.hasNext()) {
|
|
||||||
throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?");
|
|
||||||
}
|
|
||||||
current = contexts.next();
|
|
||||||
maxDoc = current.docBase + current.reader().maxDoc();
|
|
||||||
if (docID < maxDoc) { // segment has docs, check if it has categories
|
|
||||||
boolean validSegment = categoryListIter.setNextReader(current);
|
|
||||||
validSegment &= aggregator.setNextReader(current);
|
|
||||||
if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs
|
|
||||||
while (docID < maxDoc && iterator.next()) {
|
|
||||||
docID = iterator.getDocID();
|
|
||||||
}
|
|
||||||
if (docID < maxDoc) {
|
|
||||||
iteratorDone = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (docID >= maxDoc);
|
|
||||||
if (iteratorDone) { // iterator finished, terminate the loop
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
docID -= current.docBase;
|
|
||||||
categoryListIter.getOrdinals(docID, ordinals);
|
|
||||||
if (ordinals.length == 0) {
|
|
||||||
continue; // document does not have category ordinals
|
|
||||||
}
|
|
||||||
aggregator.aggregate(docID, iterator.getScore(), ordinals);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Init arrays for partition by total counts, optionally applying a factor */
|
|
||||||
private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) {
|
|
||||||
int[] intArray = facetArrays.getIntArray();
|
|
||||||
totalFacetCounts.fillTotalCountsForPartition(intArray, partition);
|
|
||||||
double totalCountsFactor = getTotalCountsFactor();
|
|
||||||
// fix total counts, but only if the effect of this would be meaningful.
|
|
||||||
if (totalCountsFactor < 0.99999) {
|
|
||||||
int delta = nAccumulatedDocs + 1;
|
|
||||||
for (int i = 0; i < intArray.length; i++) {
|
|
||||||
intArray[i] *= totalCountsFactor;
|
|
||||||
// also translate to prevent loss of non-positive values
|
|
||||||
// due to complement sampling (ie if sampled docs all decremented a certain category).
|
|
||||||
intArray[i] += delta;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: factor by which counts should be multiplied when initializing
|
|
||||||
* the count arrays from total counts.
|
|
||||||
* Default implementation for this returns 1, which is a no op.
|
|
||||||
* @return a factor by which total counts should be multiplied
|
|
||||||
*/
|
|
||||||
protected double getTotalCountsFactor() {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Aggregator createAggregator(FacetRequest fr, FacetArrays facetArrays) {
|
|
||||||
if (fr instanceof CountFacetRequest) {
|
|
||||||
// we rely on that, if needed, result is cleared by arrays!
|
|
||||||
int[] a = facetArrays.getIntArray();
|
|
||||||
if (isUsingComplements) {
|
|
||||||
return new ComplementCountingAggregator(a);
|
|
||||||
} else {
|
|
||||||
return new CountingAggregator(a);
|
|
||||||
}
|
|
||||||
} else if (fr instanceof SumScoreFacetRequest) {
|
|
||||||
if (isUsingComplements) {
|
|
||||||
throw new IllegalArgumentException("complements are not supported by SumScoreFacetRequest");
|
|
||||||
} else {
|
|
||||||
return new ScoringAggregator(facetArrays.getFloatArray());
|
|
||||||
}
|
|
||||||
} else if (fr instanceof OverSampledFacetRequest) {
|
|
||||||
return createAggregator(((OverSampledFacetRequest) fr).orig, facetArrays);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("unknown Aggregator implementation for request " + fr.getClass());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an {@link Aggregator} and a {@link CategoryListIterator} for each
|
|
||||||
* and every {@link FacetRequest}. Generating a map, matching each
|
|
||||||
* categoryListIterator to its matching aggregator.
|
|
||||||
* <p>
|
|
||||||
* If two CategoryListIterators are served by the same aggregator, a single
|
|
||||||
* aggregator is returned for both.
|
|
||||||
*
|
|
||||||
* <b>NOTE: </b>If a given category list iterator is needed with two different
|
|
||||||
* aggregators (e.g counting and association) - an exception is thrown as this
|
|
||||||
* functionality is not supported at this time.
|
|
||||||
*/
|
|
||||||
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(FacetArrays facetArrays,
|
|
||||||
int partition) throws IOException {
|
|
||||||
|
|
||||||
HashMap<CategoryListIterator, Aggregator> categoryLists = new HashMap<CategoryListIterator, Aggregator>();
|
|
||||||
|
|
||||||
FacetIndexingParams indexingParams = searchParams.indexingParams;
|
|
||||||
for (FacetRequest facetRequest : searchParams.facetRequests) {
|
|
||||||
Aggregator categoryAggregator = createAggregator(facetRequest, facetArrays);
|
|
||||||
|
|
||||||
CategoryListIterator cli = indexingParams.getCategoryListParams(facetRequest.categoryPath).createCategoryListIterator(partition);
|
|
||||||
|
|
||||||
// get the aggregator
|
|
||||||
Aggregator old = categoryLists.put(cli, categoryAggregator);
|
|
||||||
|
|
||||||
if (old != null && !old.equals(categoryAggregator)) {
|
|
||||||
throw new RuntimeException("Overriding existing category list with different aggregator");
|
|
||||||
}
|
|
||||||
// if the aggregator is the same we're covered
|
|
||||||
}
|
|
||||||
|
|
||||||
return categoryLists;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<FacetResult> accumulate(List<MatchingDocs> matchingDocs) throws IOException {
|
|
||||||
return accumulate(new MatchingDocsAsScoredDocIDs(matchingDocs));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the complement threshold.
|
|
||||||
* @see #setComplementThreshold(double)
|
|
||||||
*/
|
|
||||||
public double getComplementThreshold() {
|
|
||||||
return complementThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the complement threshold.
|
|
||||||
* This threshold will dictate whether the complements optimization is applied.
|
|
||||||
* The optimization is to count for less documents. It is useful when the same
|
|
||||||
* FacetSearchParams are used for varying sets of documents. The first time
|
|
||||||
* complements is used the "total counts" are computed - counting for all the
|
|
||||||
* documents in the collection. Then, only the complementing set of documents
|
|
||||||
* is considered, and used to decrement from the overall counts, thereby
|
|
||||||
* walking through less documents, which is faster.
|
|
||||||
* <p>
|
|
||||||
* For the default settings see {@link #DEFAULT_COMPLEMENT_THRESHOLD}.
|
|
||||||
* <p>
|
|
||||||
* To forcing complements in all cases pass {@link #FORCE_COMPLEMENT}.
|
|
||||||
* This is mostly useful for testing purposes, as forcing complements when only
|
|
||||||
* tiny fraction of available documents match the query does not make sense and
|
|
||||||
* would incur performance degradations.
|
|
||||||
* <p>
|
|
||||||
* To disable complements pass {@link #DISABLE_COMPLEMENT}.
|
|
||||||
* @param complementThreshold the complement threshold to set
|
|
||||||
* @see #getComplementThreshold()
|
|
||||||
*/
|
|
||||||
public void setComplementThreshold(double complementThreshold) {
|
|
||||||
this.complementThreshold = complementThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns true if complements are enabled. */
|
|
||||||
public boolean isUsingComplements() {
|
|
||||||
return isUsingComplements;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,42 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.DocIdSet;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Document IDs with scores for each, driving facets accumulation. Document
|
|
||||||
* scores are optionally used in the process of facets scoring.
|
|
||||||
*
|
|
||||||
* @see OldFacetsAccumulator#accumulate(ScoredDocIDs)
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface ScoredDocIDs {
|
|
||||||
|
|
||||||
/** Returns an iterator over the document IDs and their scores. */
|
|
||||||
public ScoredDocIDsIterator iterator() throws IOException;
|
|
||||||
|
|
||||||
/** Returns the set of doc IDs. */
|
|
||||||
public DocIdSet getDocIDs();
|
|
||||||
|
|
||||||
/** Returns the number of scored documents. */
|
|
||||||
public int size();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,43 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterator over document IDs and their scores. Each {@link #next()} retrieves
|
|
||||||
* the next docID and its score which can be later be retrieved by
|
|
||||||
* {@link #getDocID()} and {@link #getScore()}. <b>NOTE:</b> you must call
|
|
||||||
* {@link #next()} before {@link #getDocID()} and/or {@link #getScore()}, or
|
|
||||||
* otherwise the returned values are unexpected.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface ScoredDocIDsIterator {
|
|
||||||
|
|
||||||
/** Default score used in case scoring is disabled. */
|
|
||||||
public static final float DEFAULT_SCORE = 1.0f;
|
|
||||||
|
|
||||||
/** Iterate to the next document/score pair. Returns true iff there is such a pair. */
|
|
||||||
public abstract boolean next();
|
|
||||||
|
|
||||||
/** Returns the ID of the current document. */
|
|
||||||
public abstract int getDocID();
|
|
||||||
|
|
||||||
/** Returns the score of the current document. */
|
|
||||||
public abstract float getScore();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,446 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.MultiFields;
|
|
||||||
import org.apache.lucene.search.DocIdSet;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
|
||||||
import org.apache.lucene.util.OpenBitSetDISI;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Utility methods for Scored Doc IDs.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class ScoredDocIdsUtils {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a complement of the input set. The returned {@link ScoredDocIDs}
|
|
||||||
* does not contain any scores, which makes sense given that the complementing
|
|
||||||
* documents were not scored.
|
|
||||||
*
|
|
||||||
* Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader
|
|
||||||
*
|
|
||||||
* @param docids to be complemented.
|
|
||||||
* @param reader holding the number of documents & information about deletions.
|
|
||||||
*/
|
|
||||||
public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
|
|
||||||
throws IOException {
|
|
||||||
final int maxDoc = reader.maxDoc();
|
|
||||||
|
|
||||||
DocIdSet docIdSet = docids.getDocIDs();
|
|
||||||
final FixedBitSet complement;
|
|
||||||
if (docIdSet instanceof FixedBitSet) {
|
|
||||||
// That is the most common case, if ScoredDocIdsCollector was used.
|
|
||||||
complement = ((FixedBitSet) docIdSet).clone();
|
|
||||||
} else {
|
|
||||||
complement = new FixedBitSet(maxDoc);
|
|
||||||
DocIdSetIterator iter = docIdSet.iterator();
|
|
||||||
int doc;
|
|
||||||
while ((doc = iter.nextDoc()) < maxDoc) {
|
|
||||||
complement.set(doc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
complement.flip(0, maxDoc);
|
|
||||||
clearDeleted(reader, complement);
|
|
||||||
|
|
||||||
return createScoredDocIds(complement, maxDoc);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Clear all deleted documents from a given open-bit-set according to a given reader */
|
|
||||||
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
|
|
||||||
// TODO use BitsFilteredDocIdSet?
|
|
||||||
|
|
||||||
// If there are no deleted docs
|
|
||||||
if (!reader.hasDeletions()) {
|
|
||||||
return; // return immediately
|
|
||||||
}
|
|
||||||
|
|
||||||
DocIdSetIterator it = set.iterator();
|
|
||||||
int doc = it.nextDoc();
|
|
||||||
for (AtomicReaderContext context : reader.leaves()) {
|
|
||||||
AtomicReader r = context.reader();
|
|
||||||
final int maxDoc = r.maxDoc() + context.docBase;
|
|
||||||
if (doc >= maxDoc) { // skip this segment
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions
|
|
||||||
while ((doc = it.nextDoc()) < maxDoc) {}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Bits liveDocs = r.getLiveDocs();
|
|
||||||
do {
|
|
||||||
if (!liveDocs.get(doc - context.docBase)) {
|
|
||||||
set.clear(doc);
|
|
||||||
}
|
|
||||||
} while ((doc = it.nextDoc()) < maxDoc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a subset of an existing ScoredDocIDs object.
|
|
||||||
*
|
|
||||||
* @param allDocIds orginal set
|
|
||||||
* @param sampleSet Doc Ids of the subset.
|
|
||||||
*/
|
|
||||||
public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds,
|
|
||||||
final int[] sampleSet) throws IOException {
|
|
||||||
|
|
||||||
// sort so that we can scan docs in order
|
|
||||||
final int[] docids = sampleSet;
|
|
||||||
Arrays.sort(docids);
|
|
||||||
final float[] scores = new float[docids.length];
|
|
||||||
// fetch scores and compute size
|
|
||||||
ScoredDocIDsIterator it = allDocIds.iterator();
|
|
||||||
int n = 0;
|
|
||||||
while (it.next() && n < docids.length) {
|
|
||||||
int doc = it.getDocID();
|
|
||||||
if (doc == docids[n]) {
|
|
||||||
scores[n] = it.getScore();
|
|
||||||
++n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
final int size = n;
|
|
||||||
|
|
||||||
return new ScoredDocIDs() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSet getDocIDs() {
|
|
||||||
return new DocIdSet() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isCacheable() { return true; }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator iterator() {
|
|
||||||
return new DocIdSetIterator() {
|
|
||||||
|
|
||||||
private int next = -1;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) {
|
|
||||||
while (next < size && docids[next++] < target) {
|
|
||||||
}
|
|
||||||
return next == size ? NO_MORE_DOCS : docids[next];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return docids[next];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() {
|
|
||||||
if (++next >= size) {
|
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
return docids[next];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ScoredDocIDsIterator iterator() {
|
|
||||||
return new ScoredDocIDsIterator() {
|
|
||||||
|
|
||||||
int next = -1;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean next() { return ++next < size; }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getScore() { return scores[next]; }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocID() { return docids[next]; }
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() { return size; }
|
|
||||||
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a {@link ScoredDocIDs} which returns document IDs all non-deleted doc ids
|
|
||||||
* according to the given reader.
|
|
||||||
* The returned set contains the range of [0 .. reader.maxDoc ) doc ids
|
|
||||||
*/
|
|
||||||
public static final ScoredDocIDs createAllDocsScoredDocIDs (final IndexReader reader) {
|
|
||||||
if (reader.hasDeletions()) {
|
|
||||||
return new AllLiveDocsScoredDocIDs(reader);
|
|
||||||
}
|
|
||||||
return new AllDocsScoredDocIDs(reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a ScoredDocIDs out of a given docIdSet and the total number of documents in an index
|
|
||||||
*/
|
|
||||||
public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) {
|
|
||||||
return new ScoredDocIDs() {
|
|
||||||
private int size = -1;
|
|
||||||
@Override
|
|
||||||
public DocIdSet getDocIDs() { return docIdSet; }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ScoredDocIDsIterator iterator() throws IOException {
|
|
||||||
final DocIdSetIterator docIterator = docIdSet.iterator();
|
|
||||||
return new ScoredDocIDsIterator() {
|
|
||||||
@Override
|
|
||||||
public boolean next() {
|
|
||||||
try {
|
|
||||||
return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getScore() { return DEFAULT_SCORE; }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocID() { return docIterator.docID(); }
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
// lazy size computation
|
|
||||||
if (size < 0) {
|
|
||||||
OpenBitSetDISI openBitSetDISI;
|
|
||||||
try {
|
|
||||||
openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
size = (int) openBitSetDISI.cardinality();
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* All docs ScoredDocsIDs - this one is simply an 'all 1' bitset. Used when
|
|
||||||
* there are no deletions in the index and we wish to go through each and
|
|
||||||
* every document
|
|
||||||
*/
|
|
||||||
private static class AllDocsScoredDocIDs implements ScoredDocIDs {
|
|
||||||
final int maxDoc;
|
|
||||||
|
|
||||||
public AllDocsScoredDocIDs(IndexReader reader) {
|
|
||||||
this.maxDoc = reader.maxDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return maxDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSet getDocIDs() {
|
|
||||||
return new DocIdSet() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isCacheable() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator iterator() {
|
|
||||||
return new DocIdSetIterator() {
|
|
||||||
private int next = -1;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) {
|
|
||||||
if (target <= next) {
|
|
||||||
target = next + 1;
|
|
||||||
}
|
|
||||||
return next = target >= maxDoc ? NO_MORE_DOCS : target;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() {
|
|
||||||
return ++next < maxDoc ? next : NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return maxDoc;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ScoredDocIDsIterator iterator() {
|
|
||||||
try {
|
|
||||||
final DocIdSetIterator iter = getDocIDs().iterator();
|
|
||||||
return new ScoredDocIDsIterator() {
|
|
||||||
@Override
|
|
||||||
public boolean next() {
|
|
||||||
try {
|
|
||||||
return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
} catch (IOException e) {
|
|
||||||
// cannot happen
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getScore() {
|
|
||||||
return DEFAULT_SCORE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocID() {
|
|
||||||
return iter.docID();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} catch (IOException e) {
|
|
||||||
// cannot happen
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An All-docs bitset which has '0' for deleted documents and '1' for the
|
|
||||||
* rest. Useful for iterating over all 'live' documents in a given index.
|
|
||||||
* <p>
|
|
||||||
* NOTE: this class would work for indexes with no deletions at all,
|
|
||||||
* although it is recommended to use {@link AllDocsScoredDocIDs} to ease
|
|
||||||
* the performance cost of validating isDeleted() on each and every docId
|
|
||||||
*/
|
|
||||||
private static final class AllLiveDocsScoredDocIDs implements ScoredDocIDs {
|
|
||||||
final int maxDoc;
|
|
||||||
final IndexReader reader;
|
|
||||||
|
|
||||||
AllLiveDocsScoredDocIDs(IndexReader reader) {
|
|
||||||
this.maxDoc = reader.maxDoc();
|
|
||||||
this.reader = reader;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return reader.numDocs();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSet getDocIDs() {
|
|
||||||
return new DocIdSet() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isCacheable() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator iterator() {
|
|
||||||
return new DocIdSetIterator() {
|
|
||||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
|
||||||
private int next = -1;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) {
|
|
||||||
if (target > next) {
|
|
||||||
next = target - 1;
|
|
||||||
}
|
|
||||||
return nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() {
|
|
||||||
do {
|
|
||||||
++next;
|
|
||||||
} while (next < maxDoc && liveDocs != null && !liveDocs.get(next));
|
|
||||||
|
|
||||||
return next < maxDoc ? next : NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return maxDoc;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ScoredDocIDsIterator iterator() {
|
|
||||||
try {
|
|
||||||
final DocIdSetIterator iter = getDocIDs().iterator();
|
|
||||||
return new ScoredDocIDsIterator() {
|
|
||||||
@Override
|
|
||||||
public boolean next() {
|
|
||||||
try {
|
|
||||||
return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
} catch (IOException e) {
|
|
||||||
// cannot happen
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getScore() {
|
|
||||||
return DEFAULT_SCORE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getDocID() {
|
|
||||||
return iter.docID();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} catch (IOException e) {
|
|
||||||
// cannot happen
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,67 +0,0 @@
|
||||||
package org.apache.lucene.facet.old;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link Aggregator} which updates the weight of a category according to the
|
|
||||||
* scores of the documents it was found in.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class ScoringAggregator implements Aggregator {
|
|
||||||
|
|
||||||
private final float[] scoreArray;
|
|
||||||
private final int hashCode;
|
|
||||||
|
|
||||||
public ScoringAggregator(float[] counterArray) {
|
|
||||||
this.scoreArray = counterArray;
|
|
||||||
this.hashCode = scoreArray == null ? 0 : scoreArray.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void aggregate(int docID, float score, IntsRef ordinals) throws IOException {
|
|
||||||
for (int i = 0; i < ordinals.length; i++) {
|
|
||||||
scoreArray[ordinals.ints[i]] += score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (obj == null || obj.getClass() != this.getClass()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ScoringAggregator that = (ScoringAggregator) obj;
|
|
||||||
return that.scoreArray == this.scoreArray;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return hashCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Old Faceted Search API</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Old faceted search API, kept until complements, sampling and partitions are migrated to the new API.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,187 +0,0 @@
|
||||||
package org.apache.lucene.facet.params;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
|
|
||||||
import org.apache.lucene.facet.encoding.IntDecoder;
|
|
||||||
import org.apache.lucene.facet.encoding.IntEncoder;
|
|
||||||
import org.apache.lucene.facet.encoding.SortingIntEncoder;
|
|
||||||
import org.apache.lucene.facet.encoding.UniqueValuesIntEncoder;
|
|
||||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
|
||||||
import org.apache.lucene.facet.search.DocValuesCategoryListIterator;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Contains parameters for a category list *
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class CategoryListParams {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Defines which category ordinals are encoded for every document. This also
|
|
||||||
* affects how category ordinals are aggregated, check the different policies
|
|
||||||
* for more details.
|
|
||||||
*/
|
|
||||||
public static enum OrdinalPolicy {
|
|
||||||
/**
|
|
||||||
* Encodes only the ordinals of leaf nodes. That is, for the category A/B/C,
|
|
||||||
* the ordinals of A and A/B will not be encoded. This policy is efficient
|
|
||||||
* for hierarchical dimensions, as it reduces the number of ordinals that
|
|
||||||
* are visited per document. During faceted search, this policy behaves
|
|
||||||
* exactly like {@link #ALL_PARENTS}, and the counts of all path components
|
|
||||||
* will be computed as well.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
|
|
||||||
* accumulator, which will fix the parents' counts.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> since only leaf nodes are encoded for the document, you
|
|
||||||
* should use this policy when the same document doesn't share two
|
|
||||||
* categories that have a mutual parent, or otherwise the counts will be
|
|
||||||
* wrong (the mutual parent will be over-counted). For example, if a
|
|
||||||
* document has the categories A/B/C and A/B/D, then with this policy the
|
|
||||||
* counts of "A" and "B" will be 2, which is wrong. If you intend to index
|
|
||||||
* hierarchical dimensions, with more than one category per document, you
|
|
||||||
* should use either {@link #ALL_PARENTS} or {@link #ALL_BUT_DIMENSION}.
|
|
||||||
*/
|
|
||||||
NO_PARENTS,
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes the ordinals of all path components. That is, the category A/B/C
|
|
||||||
* will encode the ordinals of A and A/B as well. If you don't require the
|
|
||||||
* dimension's count during search, consider using
|
|
||||||
* {@link #ALL_BUT_DIMENSION}.
|
|
||||||
*/
|
|
||||||
ALL_PARENTS,
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes the ordinals of all path components except the dimension. The
|
|
||||||
* dimension of a category is defined to be the first components in
|
|
||||||
* {@link FacetLabel#components}. For the category A/B/C, the ordinal of
|
|
||||||
* A/B will be encoded as well, however not the ordinal of A.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> when facets are aggregated, this policy behaves exactly like
|
|
||||||
* {@link #ALL_PARENTS}, except that the dimension is never counted. I.e. if
|
|
||||||
* you ask to count the facet "A", then while in {@link #ALL_PARENTS} you
|
|
||||||
* will get counts for "A" <u>and its children</u>, with this policy you
|
|
||||||
* will get counts for <u>only its children</u>. This policy is the default
|
|
||||||
* one, and makes sense for using with flat dimensions, whenever your
|
|
||||||
* application does not require the dimension's count. Otherwise, use
|
|
||||||
* {@link #ALL_PARENTS}.
|
|
||||||
*/
|
|
||||||
ALL_BUT_DIMENSION
|
|
||||||
}
|
|
||||||
|
|
||||||
/** The default field used to store the facets information. */
|
|
||||||
public static final String DEFAULT_FIELD = "$facets";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The default {@link OrdinalPolicy} that's used when encoding a document's
|
|
||||||
* category ordinals.
|
|
||||||
*/
|
|
||||||
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_BUT_DIMENSION;
|
|
||||||
|
|
||||||
public final String field;
|
|
||||||
|
|
||||||
private final int hashCode;
|
|
||||||
|
|
||||||
/** Constructs a default category list parameters object, using {@link #DEFAULT_FIELD}. */
|
|
||||||
public CategoryListParams() {
|
|
||||||
this(DEFAULT_FIELD);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Constructs a category list parameters object, using the given field. */
|
|
||||||
public CategoryListParams(String field) {
|
|
||||||
this.field = field;
|
|
||||||
// Pre-compute the hashCode because these objects are immutable. Saves
|
|
||||||
// some time on the comparisons later.
|
|
||||||
this.hashCode = field.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allows to override how categories are encoded and decoded. A matching
|
|
||||||
* {@link IntDecoder} is provided by the {@link IntEncoder}.
|
|
||||||
* <p>
|
|
||||||
* Default implementation creates a new Sorting(<b>Unique</b>(DGap)) encoder.
|
|
||||||
* Uniqueness in this regard means when the same category appears twice in a
|
|
||||||
* document, only one appearance would be encoded. This has effect on facet
|
|
||||||
* counting results.
|
|
||||||
* <p>
|
|
||||||
* Some possible considerations when overriding may be:
|
|
||||||
* <ul>
|
|
||||||
* <li>an application "knows" that all categories are unique. So no need to
|
|
||||||
* pass through the unique filter.</li>
|
|
||||||
* <li>Another application might wish to count multiple occurrences of the
|
|
||||||
* same category, or, use a faster encoding which will consume more space.</li>
|
|
||||||
* </ul>
|
|
||||||
* In any event when changing this value make sure you know what you are
|
|
||||||
* doing, and test the results - e.g. counts, if the application is about
|
|
||||||
* counting facets.
|
|
||||||
*/
|
|
||||||
public IntEncoder createEncoder() {
|
|
||||||
return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder()));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (o == this) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (!(o instanceof CategoryListParams)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
CategoryListParams other = (CategoryListParams) o;
|
|
||||||
if (hashCode != other.hashCode) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return field.equals(other.field);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return hashCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Create the {@link CategoryListIterator} for the specified partition. */
|
|
||||||
public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
|
|
||||||
String categoryListTermStr = PartitionsUtils.partitionName(partition);
|
|
||||||
String docValuesField = field + categoryListTermStr;
|
|
||||||
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the {@link OrdinalPolicy} to use for the given dimension. This
|
|
||||||
* {@link CategoryListParams} always returns {@link #DEFAULT_ORDINAL_POLICY}
|
|
||||||
* for all dimensions.
|
|
||||||
*/
|
|
||||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
|
||||||
return DEFAULT_ORDINAL_POLICY;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,180 +0,0 @@
|
||||||
package org.apache.lucene.facet.params;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Defines parameters that are needed for facets indexing. Note that this class
|
|
||||||
* does not have any setters. That's because overriding the default parameters
|
|
||||||
* is considered expert. If you wish to override them, simply extend this class
|
|
||||||
* and override the relevant getter.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> This class is also used during faceted search in order to e.g.
|
|
||||||
* know which field holds the drill-down terms or the fulltree posting.
|
|
||||||
* Therefore this class should be initialized once and you should refrain from
|
|
||||||
* changing it. Also note that if you make any changes to it (e.g. suddenly
|
|
||||||
* deciding that drill-down terms should be read from a different field) and use
|
|
||||||
* it on an existing index, things may not work as expected.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FacetIndexingParams {
|
|
||||||
|
|
||||||
// the default CLP, can be a singleton
|
|
||||||
protected static final CategoryListParams DEFAULT_CATEGORY_LIST_PARAMS = new CategoryListParams();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetIndexingParams} which fixes a single
|
|
||||||
* {@link CategoryListParams} with
|
|
||||||
* {@link CategoryListParams#DEFAULT_ORDINAL_POLICY}.
|
|
||||||
*/
|
|
||||||
public static final FacetIndexingParams DEFAULT = new FacetIndexingParams();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The default delimiter with which {@link FacetLabel#components} are
|
|
||||||
* concatenated when written to the index, e.g. as drill-down terms. If you
|
|
||||||
* choose to override it by overiding {@link #getFacetDelimChar()}, you should
|
|
||||||
* make sure that you return a character that's not found in any path
|
|
||||||
* component.
|
|
||||||
*/
|
|
||||||
public static final char DEFAULT_FACET_DELIM_CHAR = '\u001F';
|
|
||||||
|
|
||||||
private final int partitionSize = Integer.MAX_VALUE;
|
|
||||||
|
|
||||||
protected final CategoryListParams clParams;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes new default params. You should use this constructor only if you
|
|
||||||
* intend to override any of the getters, otherwise you can use
|
|
||||||
* {@link #DEFAULT} to save unnecessary object allocations.
|
|
||||||
*/
|
|
||||||
public FacetIndexingParams() {
|
|
||||||
this(DEFAULT_CATEGORY_LIST_PARAMS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Initializes new params with the given {@link CategoryListParams}. */
|
|
||||||
public FacetIndexingParams(CategoryListParams categoryListParams) {
|
|
||||||
clParams = categoryListParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the {@link CategoryListParams} for this {@link FacetLabel}. The
|
|
||||||
* default implementation returns the same {@link CategoryListParams} for all
|
|
||||||
* categories (even if {@code category} is {@code null}).
|
|
||||||
*
|
|
||||||
* @see PerDimensionIndexingParams
|
|
||||||
*/
|
|
||||||
public CategoryListParams getCategoryListParams(FacetLabel category) {
|
|
||||||
return clParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copies the text required to execute a drill-down query on the given
|
|
||||||
* category to the given {@code char[]}, and returns the number of characters
|
|
||||||
* that were written.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> You should make sure that the {@code char[]} is large enough,
|
|
||||||
* by e.g. calling {@link FacetLabel#fullPathLength()}.
|
|
||||||
*/
|
|
||||||
public int drillDownTermText(FacetLabel path, char[] buffer) {
|
|
||||||
return path.copyFullPath(buffer, 0, getFacetDelimChar());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the size of a partition. <i>Partitions</i> allow you to divide
|
|
||||||
* (hence, partition) the categories space into small sets to e.g. improve RAM
|
|
||||||
* consumption during faceted search. For instance, {@code partitionSize=100K}
|
|
||||||
* would mean that if your taxonomy index contains 420K categories, they will
|
|
||||||
* be divided into 5 groups and at search time a {@link FacetArrays} will be
|
|
||||||
* allocated at the size of the partition.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* This is real advanced setting and should be changed with care. By default,
|
|
||||||
* all categories are put in one partition. You should modify this setting if
|
|
||||||
* you have really large taxonomies (e.g. 1M+ nodes).
|
|
||||||
*/
|
|
||||||
public int getPartitionSize() {
|
|
||||||
return partitionSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a list of all {@link CategoryListParams categoryListParams} that
|
|
||||||
* are used for facets indexing.
|
|
||||||
*/
|
|
||||||
public List<CategoryListParams> getAllCategoryListParams() {
|
|
||||||
return Collections.singletonList(clParams);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
final int prime = 31;
|
|
||||||
int result = 1;
|
|
||||||
result = prime * result + ((clParams == null) ? 0 : clParams.hashCode());
|
|
||||||
result = prime * result + partitionSize;
|
|
||||||
|
|
||||||
for (CategoryListParams clp : getAllCategoryListParams()) {
|
|
||||||
result ^= clp.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (this == obj) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (obj == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!(obj instanceof FacetIndexingParams)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
FacetIndexingParams other = (FacetIndexingParams) obj;
|
|
||||||
if (clParams == null) {
|
|
||||||
if (other.clParams != null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else if (!clParams.equals(other.clParams)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (partitionSize != other.partitionSize) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Iterable<CategoryListParams> cLs = getAllCategoryListParams();
|
|
||||||
Iterable<CategoryListParams> otherCLs = other.getAllCategoryListParams();
|
|
||||||
|
|
||||||
return cLs.equals(otherCLs);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the delimiter character used internally for concatenating category
|
|
||||||
* path components, e.g. for drill-down terms.
|
|
||||||
*/
|
|
||||||
public char getFacetDelimChar() {
|
|
||||||
return DEFAULT_FACET_DELIM_CHAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,93 +0,0 @@
|
||||||
package org.apache.lucene.facet.params;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Defines parameters that are needed for faceted search: the list of facet
|
|
||||||
* {@link FacetRequest facet requests} which should be aggregated as well as the
|
|
||||||
* {@link FacetIndexingParams indexing params} that were used to index them.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FacetSearchParams {
|
|
||||||
|
|
||||||
public final FacetIndexingParams indexingParams;
|
|
||||||
public final List<FacetRequest> facetRequests;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes with the given {@link FacetRequest requests} and default
|
|
||||||
* {@link FacetIndexingParams#DEFAULT}. If you used a different
|
|
||||||
* {@link FacetIndexingParams}, you should use
|
|
||||||
* {@link #FacetSearchParams(FacetIndexingParams, List)}.
|
|
||||||
*/
|
|
||||||
public FacetSearchParams(FacetRequest... facetRequests) {
|
|
||||||
this(FacetIndexingParams.DEFAULT, Arrays.asList(facetRequests));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes with the given {@link FacetRequest requests} and default
|
|
||||||
* {@link FacetIndexingParams#DEFAULT}. If you used a different
|
|
||||||
* {@link FacetIndexingParams}, you should use
|
|
||||||
* {@link #FacetSearchParams(FacetIndexingParams, List)}.
|
|
||||||
*/
|
|
||||||
public FacetSearchParams(List<FacetRequest> facetRequests) {
|
|
||||||
this(FacetIndexingParams.DEFAULT, facetRequests);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes with the given {@link FacetRequest requests} and
|
|
||||||
* {@link FacetIndexingParams}.
|
|
||||||
*/
|
|
||||||
public FacetSearchParams(FacetIndexingParams indexingParams, FacetRequest... facetRequests) {
|
|
||||||
this(indexingParams, Arrays.asList(facetRequests));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes with the given {@link FacetRequest requests} and
|
|
||||||
* {@link FacetIndexingParams}.
|
|
||||||
*/
|
|
||||||
public FacetSearchParams(FacetIndexingParams indexingParams, List<FacetRequest> facetRequests) {
|
|
||||||
if (facetRequests == null || facetRequests.size() == 0) {
|
|
||||||
throw new IllegalArgumentException("at least one FacetRequest must be defined");
|
|
||||||
}
|
|
||||||
this.facetRequests = facetRequests;
|
|
||||||
this.indexingParams = indexingParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
final String INDENT = " ";
|
|
||||||
final char NEWLINE = '\n';
|
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder("IndexingParams: ");
|
|
||||||
sb.append(NEWLINE).append(INDENT).append(indexingParams);
|
|
||||||
|
|
||||||
sb.append(NEWLINE).append("FacetRequests:");
|
|
||||||
for (FacetRequest facetRequest : facetRequests) {
|
|
||||||
sb.append(NEWLINE).append(INDENT).append(facetRequest);
|
|
||||||
}
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,96 +0,0 @@
|
||||||
package org.apache.lucene.facet.params;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetIndexingParams} that utilizes different category lists, defined
|
|
||||||
* by the dimension specified by a {@link FacetLabel category} (see
|
|
||||||
* {@link #PerDimensionIndexingParams(Map, CategoryListParams)}.
|
|
||||||
* <p>
|
|
||||||
* A 'dimension' is defined as the first or "zero-th" component in a
|
|
||||||
* {@link FacetLabel}. For example, if a category is defined as
|
|
||||||
* "Author/American/Mark Twain", then the dimension would be "Author".
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class PerDimensionIndexingParams extends FacetIndexingParams {
|
|
||||||
|
|
||||||
private final Map<String, CategoryListParams> clParamsMap;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes a new instance with the given dimension-to-params mapping. The
|
|
||||||
* dimension is considered as what's returned by
|
|
||||||
* {@link FacetLabel#components cp.components[0]}.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> for any dimension whose {@link CategoryListParams} is not
|
|
||||||
* defined in the mapping, a default {@link CategoryListParams} will be used.
|
|
||||||
*
|
|
||||||
* @see #PerDimensionIndexingParams(Map, CategoryListParams)
|
|
||||||
*/
|
|
||||||
public PerDimensionIndexingParams(Map<FacetLabel, CategoryListParams> paramsMap) {
|
|
||||||
this(paramsMap, DEFAULT_CATEGORY_LIST_PARAMS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Same as {@link #PerDimensionIndexingParams(Map)}, only the given
|
|
||||||
* {@link CategoryListParams} will be used for any dimension that is not
|
|
||||||
* specified in the given mapping.
|
|
||||||
*/
|
|
||||||
public PerDimensionIndexingParams(Map<FacetLabel, CategoryListParams> paramsMap,
|
|
||||||
CategoryListParams categoryListParams) {
|
|
||||||
super(categoryListParams);
|
|
||||||
clParamsMap = new HashMap<String,CategoryListParams>();
|
|
||||||
for (Entry<FacetLabel, CategoryListParams> e : paramsMap.entrySet()) {
|
|
||||||
clParamsMap.put(e.getKey().components[0], e.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<CategoryListParams> getAllCategoryListParams() {
|
|
||||||
ArrayList<CategoryListParams> vals = new ArrayList<CategoryListParams>(clParamsMap.values());
|
|
||||||
vals.add(clParams); // add the default too
|
|
||||||
return vals;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the {@link CategoryListParams} for the corresponding dimension
|
|
||||||
* which is returned by {@code category.getComponent(0)}. If {@code category}
|
|
||||||
* is {@code null}, or was not specified in the map given to the constructor,
|
|
||||||
* returns the default {@link CategoryListParams}.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public CategoryListParams getCategoryListParams(FacetLabel category) {
|
|
||||||
if (category != null) {
|
|
||||||
CategoryListParams clParams = clParamsMap.get(category.components[0]);
|
|
||||||
if (clParams != null) {
|
|
||||||
return clParams;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return clParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,55 +0,0 @@
|
||||||
package org.apache.lucene.facet.params;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link CategoryListParams} which allow controlling the
|
|
||||||
* {@link CategoryListParams.OrdinalPolicy} used for each dimension. The
|
|
||||||
* dimension is specified as the first component in
|
|
||||||
* {@link FacetLabel#components}.
|
|
||||||
*/
|
|
||||||
public class PerDimensionOrdinalPolicy extends CategoryListParams {
|
|
||||||
|
|
||||||
private final Map<String,OrdinalPolicy> policies;
|
|
||||||
private final OrdinalPolicy defaultOP;
|
|
||||||
|
|
||||||
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies) {
|
|
||||||
this(policies, DEFAULT_ORDINAL_POLICY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies, OrdinalPolicy defaultOP) {
|
|
||||||
this.defaultOP = defaultOP;
|
|
||||||
this.policies = policies;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
|
||||||
OrdinalPolicy op = policies.get(dimension);
|
|
||||||
return op == null ? defaultOP : op;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return super.toString() + " policies=" + policies;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,25 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Facets indexing and search parameters</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Facets indexing and search parameters. Define how facets are indexed
|
|
||||||
as well as which categories need to be aggregated.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,42 +0,0 @@
|
||||||
package org.apache.lucene.facet.partitions;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetResultsHandler;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Intermediate {@link FacetResult} of faceted search.
|
|
||||||
* <p>
|
|
||||||
* This is an empty interface on purpose.
|
|
||||||
* <p>
|
|
||||||
* It allows {@link FacetResultsHandler} to return intermediate result objects
|
|
||||||
* that only it knows how to interpret, and so the handler has maximal freedom
|
|
||||||
* in defining what an intermediate result is, depending on its specific logic.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface IntermediateFacetResult {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Facet request for which this temporary result was created.
|
|
||||||
*/
|
|
||||||
FacetRequest getFacetRequest();
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,137 +0,0 @@
|
||||||
package org.apache.lucene.facet.partitions;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.OldFacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetResultNode;
|
|
||||||
import org.apache.lucene.facet.search.FacetResultsHandler;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalValueResolver;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetResultsHandler} designed to work with facet partitions.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class PartitionsFacetResultsHandler extends FacetResultsHandler {
|
|
||||||
|
|
||||||
public PartitionsFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest,
|
|
||||||
OrdinalValueResolver resolver, FacetArrays facetArrays) {
|
|
||||||
super(taxonomyReader, facetRequest, resolver, facetArrays);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fetch results of a single partition, given facet arrays for that partition,
|
|
||||||
* and based on the matching documents and faceted search parameters.
|
|
||||||
* @param offset
|
|
||||||
* offset in input arrays where partition starts
|
|
||||||
*
|
|
||||||
* @return temporary facet result, potentially, to be passed back to
|
|
||||||
* <b>this</b> result handler for merging, or <b>null</b> in case that
|
|
||||||
* constructor parameter, <code>facetRequest</code>, requests an
|
|
||||||
* illegal FacetResult, like, e.g., a root node category path that
|
|
||||||
* does not exist in constructor parameter <code>taxonomyReader</code>
|
|
||||||
* .
|
|
||||||
* @throws IOException
|
|
||||||
* on error
|
|
||||||
*/
|
|
||||||
public abstract IntermediateFacetResult fetchPartitionResult(int offset) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merge results of several facet partitions. Logic of the merge is undefined
|
|
||||||
* and open for interpretations. For example, a merge implementation could
|
|
||||||
* keep top K results. Passed {@link IntermediateFacetResult} must be ones
|
|
||||||
* that were created by this handler otherwise a {@link ClassCastException} is
|
|
||||||
* thrown. In addition, all passed {@link IntermediateFacetResult} must have
|
|
||||||
* the same {@link FacetRequest} otherwise an {@link IllegalArgumentException}
|
|
||||||
* is thrown.
|
|
||||||
*
|
|
||||||
* @param tmpResults one or more temporary results created by <b>this</b>
|
|
||||||
* handler.
|
|
||||||
* @return temporary facet result that represents to union, as specified by
|
|
||||||
* <b>this</b> handler, of the input temporary facet results.
|
|
||||||
* @throws IOException on error.
|
|
||||||
* @throws ClassCastException if the temporary result passed was not created
|
|
||||||
* by this handler
|
|
||||||
* @throws IllegalArgumentException if passed <code>facetResults</code> do not
|
|
||||||
* have the same {@link FacetRequest}
|
|
||||||
* @see IntermediateFacetResult#getFacetRequest()
|
|
||||||
*/
|
|
||||||
public abstract IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a facet result from the temporary result.
|
|
||||||
* @param tmpResult temporary result to be rendered as a {@link FacetResult}
|
|
||||||
* @throws IOException on error.
|
|
||||||
*/
|
|
||||||
public abstract FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException ;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Perform any rearrangement as required on a facet result that has changed after
|
|
||||||
* it was rendered.
|
|
||||||
* <P>
|
|
||||||
* Possible use case: a sampling facets accumulator invoked another
|
|
||||||
* other facets accumulator on a sample set of documents, obtained
|
|
||||||
* rendered facet results, fixed their counts, and now it is needed
|
|
||||||
* to sort the results differently according to the fixed counts.
|
|
||||||
* @param facetResult result to be rearranged.
|
|
||||||
* @see FacetResultNode#value
|
|
||||||
*/
|
|
||||||
public abstract FacetResult rearrangeFacetResult(FacetResult facetResult);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Label results according to settings in {@link FacetRequest}, such as
|
|
||||||
* {@link FacetRequest#getNumLabel()}. Usually invoked by
|
|
||||||
* {@link OldFacetsAccumulator#accumulate(ScoredDocIDs)}
|
|
||||||
*
|
|
||||||
* @param facetResult
|
|
||||||
* facet result to be labeled.
|
|
||||||
* @throws IOException
|
|
||||||
* on error
|
|
||||||
*/
|
|
||||||
public abstract void labelResult(FacetResult facetResult) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if an array contains the partition which contains ordinal
|
|
||||||
*
|
|
||||||
* @param ordinal
|
|
||||||
* checked facet
|
|
||||||
* @param facetArrays
|
|
||||||
* facet arrays for the certain partition
|
|
||||||
* @param offset
|
|
||||||
* offset in input arrays where partition starts
|
|
||||||
*/
|
|
||||||
protected boolean isSelfPartition (int ordinal, FacetArrays facetArrays, int offset) {
|
|
||||||
int partitionSize = facetArrays.arrayLength;
|
|
||||||
return ordinal / partitionSize == offset / partitionSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final FacetResult compute() throws IOException {
|
|
||||||
FacetResult res = renderFacetResult(fetchPartitionResult(0));
|
|
||||||
labelResult(res);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,27 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Category Partitions</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>Category Partitions</h1>
|
|
||||||
|
|
||||||
Allows partitioning the category ordinals space, so that less RAM is consumed during search.
|
|
||||||
Only meaningful for very large taxonomies (tens of millions of categories).
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,33 +0,0 @@
|
||||||
package org.apache.lucene.facet.range;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.search.FacetResultNode;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/** Holds the facet results for a {@link
|
|
||||||
* RangeFacetRequest}. */
|
|
||||||
public class RangeFacetResultNode extends FacetResultNode {
|
|
||||||
public final Range range;
|
|
||||||
|
|
||||||
RangeFacetResultNode(String field, Range range, int count) {
|
|
||||||
super(-1, count);
|
|
||||||
this.range = range;
|
|
||||||
this.label = new FacetLabel(field, range.label);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Facets numeric range code</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Code to compute facets for numeric ranges.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,71 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIdsUtils;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Simple random sampler
|
|
||||||
*/
|
|
||||||
public class RandomSampler extends Sampler {
|
|
||||||
|
|
||||||
private final Random random;
|
|
||||||
|
|
||||||
public RandomSampler() {
|
|
||||||
super();
|
|
||||||
this.random = new Random();
|
|
||||||
}
|
|
||||||
|
|
||||||
public RandomSampler(SamplingParams params, Random random) throws IllegalArgumentException {
|
|
||||||
super(params);
|
|
||||||
this.random = random;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize) throws IOException {
|
|
||||||
final int[] sample = new int[sampleSetSize];
|
|
||||||
final int maxStep = (actualSize * 2 ) / sampleSetSize; //floor
|
|
||||||
int remaining = actualSize;
|
|
||||||
ScoredDocIDsIterator it = docids.iterator();
|
|
||||||
int i = 0;
|
|
||||||
// select sample docs with random skipStep, make sure to leave sufficient #docs for selection after last skip
|
|
||||||
while (i<sample.length && remaining>(sampleSetSize-maxStep-i)) {
|
|
||||||
int skipStep = 1 + random.nextInt(maxStep);
|
|
||||||
// Skip over 'skipStep' documents
|
|
||||||
for (int j=0; j<skipStep; j++) {
|
|
||||||
it.next();
|
|
||||||
-- remaining;
|
|
||||||
}
|
|
||||||
sample[i++] = it.getDocID();
|
|
||||||
}
|
|
||||||
// Add leftover documents to the sample set
|
|
||||||
while (i<sample.length) {
|
|
||||||
it.next();
|
|
||||||
sample[i++] = it.getDocID();
|
|
||||||
}
|
|
||||||
ScoredDocIDs sampleRes = ScoredDocIdsUtils.createScoredDocIDsSubset(docids, sample);
|
|
||||||
SampleResult res = new SampleResult(sampleRes, sampleSetSize/(double)actualSize);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,405 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.logging.Logger;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIdsUtils;
|
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Take random samples of large collections.
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class RepeatableSampler extends Sampler {
|
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(RepeatableSampler.class.getName());
|
|
||||||
|
|
||||||
public RepeatableSampler(SamplingParams params) {
|
|
||||||
super(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected SampleResult createSample(ScoredDocIDs docids, int actualSize,
|
|
||||||
int sampleSetSize) throws IOException {
|
|
||||||
int[] sampleSet = null;
|
|
||||||
try {
|
|
||||||
sampleSet = repeatableSample(docids, actualSize,
|
|
||||||
sampleSetSize);
|
|
||||||
} catch (IOException e) {
|
|
||||||
if (logger.isLoggable(Level.WARNING)) {
|
|
||||||
logger.log(Level.WARNING, "sampling failed: "+e.getMessage()+" - falling back to no sampling!", e);
|
|
||||||
}
|
|
||||||
return new SampleResult(docids, 1d);
|
|
||||||
}
|
|
||||||
|
|
||||||
ScoredDocIDs sampled = ScoredDocIdsUtils.createScoredDocIDsSubset(docids,
|
|
||||||
sampleSet);
|
|
||||||
if (logger.isLoggable(Level.FINEST)) {
|
|
||||||
logger.finest("******************** " + sampled.size());
|
|
||||||
}
|
|
||||||
return new SampleResult(sampled, sampled.size()/(double)docids.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns <code>sampleSize</code> values from the first <code>collectionSize</code>
|
|
||||||
* locations of <code>collection</code>, chosen using
|
|
||||||
* the <code>TRAVERSAL</code> algorithm. The sample values are not sorted.
|
|
||||||
* @param collection The values from which a sample is wanted.
|
|
||||||
* @param collectionSize The number of values (from the first) from which to draw the sample.
|
|
||||||
* @param sampleSize The number of values to return.
|
|
||||||
* @return An array of values chosen from the collection.
|
|
||||||
* @see Algorithm#TRAVERSAL
|
|
||||||
*/
|
|
||||||
private static int[] repeatableSample(ScoredDocIDs collection,
|
|
||||||
int collectionSize, int sampleSize)
|
|
||||||
throws IOException {
|
|
||||||
return repeatableSample(collection, collectionSize,
|
|
||||||
sampleSize, Algorithm.HASHING, Sorted.NO);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns <code>sampleSize</code> values from the first <code>collectionSize</code>
|
|
||||||
* locations of <code>collection</code>, chosen using <code>algorithm</code>.
|
|
||||||
* @param collection The values from which a sample is wanted.
|
|
||||||
* @param collectionSize The number of values (from the first) from which to draw the sample.
|
|
||||||
* @param sampleSize The number of values to return.
|
|
||||||
* @param algorithm Which algorithm to use.
|
|
||||||
* @param sorted Sorted.YES to sort the sample values in ascending order before returning;
|
|
||||||
* Sorted.NO to return them in essentially random order.
|
|
||||||
* @return An array of values chosen from the collection.
|
|
||||||
*/
|
|
||||||
private static int[] repeatableSample(ScoredDocIDs collection,
|
|
||||||
int collectionSize, int sampleSize,
|
|
||||||
Algorithm algorithm, Sorted sorted)
|
|
||||||
throws IOException {
|
|
||||||
if (collection == null) {
|
|
||||||
throw new IOException("docIdSet is null");
|
|
||||||
}
|
|
||||||
if (sampleSize < 1) {
|
|
||||||
throw new IOException("sampleSize < 1 (" + sampleSize + ")");
|
|
||||||
}
|
|
||||||
if (collectionSize < sampleSize) {
|
|
||||||
throw new IOException("collectionSize (" + collectionSize + ") less than sampleSize (" + sampleSize + ")");
|
|
||||||
}
|
|
||||||
int[] sample = new int[sampleSize];
|
|
||||||
long[] times = new long[4];
|
|
||||||
if (algorithm == Algorithm.TRAVERSAL) {
|
|
||||||
sample1(collection, collectionSize, sample, times);
|
|
||||||
} else if (algorithm == Algorithm.HASHING) {
|
|
||||||
sample2(collection, collectionSize, sample, times);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Invalid algorithm selection");
|
|
||||||
}
|
|
||||||
if (sorted == Sorted.YES) {
|
|
||||||
Arrays.sort(sample);
|
|
||||||
}
|
|
||||||
if (returnTimings) {
|
|
||||||
times[3] = System.currentTimeMillis();
|
|
||||||
if (logger.isLoggable(Level.FINEST)) {
|
|
||||||
logger.finest("Times: " + (times[1] - times[0]) + "ms, "
|
|
||||||
+ (times[2] - times[1]) + "ms, " + (times[3] - times[2])+"ms");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return sample;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns <code>sample</code>.length values chosen from the first <code>collectionSize</code>
|
|
||||||
* locations of <code>collection</code>, using the TRAVERSAL algorithm. The sample is
|
|
||||||
* pseudorandom: no subset of the original collection
|
|
||||||
* is in principle more likely to occur than any other, but for a given collection
|
|
||||||
* and sample size, the same sample will always be returned. This algorithm walks the
|
|
||||||
* original collection in a methodical way that is guaranteed not to visit any location
|
|
||||||
* more than once, which makes sampling without replacement faster because removals don't
|
|
||||||
* have to be tracked, and the number of operations is proportional to the sample size,
|
|
||||||
* not the collection size.
|
|
||||||
* Times for performance measurement
|
|
||||||
* are returned in <code>times</code>, which must be an array of at least three longs, containing
|
|
||||||
* nanosecond event times. The first
|
|
||||||
* is set when the algorithm starts; the second, when the step size has been calculated;
|
|
||||||
* and the third when the sample has been taken.
|
|
||||||
* @param collection The set to be sampled.
|
|
||||||
* @param collectionSize The number of values to use (starting from first).
|
|
||||||
* @param sample The array in which to return the sample.
|
|
||||||
* @param times The times of three events, for measuring performance.
|
|
||||||
*/
|
|
||||||
private static void sample1(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times)
|
|
||||||
throws IOException {
|
|
||||||
ScoredDocIDsIterator it = collection.iterator();
|
|
||||||
if (returnTimings) {
|
|
||||||
times[0] = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
int sampleSize = sample.length;
|
|
||||||
int prime = findGoodStepSize(collectionSize, sampleSize);
|
|
||||||
int mod = prime % collectionSize;
|
|
||||||
if (returnTimings) {
|
|
||||||
times[1] = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
int sampleCount = 0;
|
|
||||||
int index = 0;
|
|
||||||
for (; sampleCount < sampleSize;) {
|
|
||||||
if (index + mod < collectionSize) {
|
|
||||||
for (int i = 0; i < mod; i++, index++) {
|
|
||||||
it.next();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
index = index + mod - collectionSize;
|
|
||||||
it = collection.iterator();
|
|
||||||
for (int i = 0; i < index; i++) {
|
|
||||||
it.next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sample[sampleCount++] = it.getDocID();
|
|
||||||
}
|
|
||||||
if (returnTimings) {
|
|
||||||
times[2] = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a value which will allow the caller to walk
|
|
||||||
* a collection of <code>collectionSize</code> values, without repeating or missing
|
|
||||||
* any, and spanning the collection from beginning to end at least once with
|
|
||||||
* <code>sampleSize</code> visited locations. Choosing a value
|
|
||||||
* that is relatively prime to the collection size ensures that stepping by that size (modulo
|
|
||||||
* the collection size) will hit all locations without repeating, eliminating the need to
|
|
||||||
* track previously visited locations for a "without replacement" sample. Starting with the
|
|
||||||
* square root of the collection size ensures that either the first or second prime tried will
|
|
||||||
* work (they can't both divide the collection size). It also has the property that N steps of
|
|
||||||
* size N will span a collection of N**2 elements once. If the sample is bigger than N, it will
|
|
||||||
* wrap multiple times (without repeating). If the sample is smaller, a step size is chosen
|
|
||||||
* that will result in at least one spanning of the collection.
|
|
||||||
*
|
|
||||||
* @param collectionSize The number of values in the collection to be sampled.
|
|
||||||
* @param sampleSize The number of values wanted in the sample.
|
|
||||||
* @return A good increment value for walking the collection.
|
|
||||||
*/
|
|
||||||
private static int findGoodStepSize(int collectionSize, int sampleSize) {
|
|
||||||
int i = (int) Math.sqrt(collectionSize);
|
|
||||||
if (sampleSize < i) {
|
|
||||||
i = collectionSize / sampleSize;
|
|
||||||
}
|
|
||||||
do {
|
|
||||||
i = findNextPrimeAfter(i);
|
|
||||||
} while (collectionSize % i == 0);
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the first prime number that is larger than <code>n</code>.
|
|
||||||
* @param n A number less than the prime to be returned.
|
|
||||||
* @return The smallest prime larger than <code>n</code>.
|
|
||||||
*/
|
|
||||||
private static int findNextPrimeAfter(int n) {
|
|
||||||
n += (n % 2 == 0) ? 1 : 2; // next odd
|
|
||||||
foundFactor: for (;; n += 2) { //TODO labels??!!
|
|
||||||
int sri = (int) (Math.sqrt(n));
|
|
||||||
for (int primeIndex = 0; primeIndex < N_PRIMES; primeIndex++) {
|
|
||||||
int p = primes[primeIndex];
|
|
||||||
if (p > sri) {
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
if (n % p == 0) {
|
|
||||||
continue foundFactor;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int p = primes[N_PRIMES - 1] + 2;; p += 2) {
|
|
||||||
if (p > sri) {
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
if (n % p == 0) {
|
|
||||||
continue foundFactor;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The first N_PRIMES primes, after 2.
|
|
||||||
*/
|
|
||||||
private static final int N_PRIMES = 4000;
|
|
||||||
private static int[] primes = new int[N_PRIMES];
|
|
||||||
static {
|
|
||||||
primes[0] = 3;
|
|
||||||
for (int count = 1; count < N_PRIMES; count++) {
|
|
||||||
primes[count] = findNextPrimeAfter(primes[count - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns <code>sample</code>.length values chosen from the first <code>collectionSize</code>
|
|
||||||
* locations of <code>collection</code>, using the HASHING algorithm. Performance measurements
|
|
||||||
* are returned in <code>times</code>, which must be an array of at least three longs. The first
|
|
||||||
* will be set when the algorithm starts; the second, when a hash key has been calculated and
|
|
||||||
* inserted into the priority queue for every element in the collection; and the third when the
|
|
||||||
* original elements associated with the keys remaining in the PQ have been stored in the sample
|
|
||||||
* array for return.
|
|
||||||
* <P>
|
|
||||||
* This algorithm slows as the sample size becomes a significant fraction of the collection
|
|
||||||
* size, because the PQ is as large as the sample set, and will not do early rejection of values
|
|
||||||
* below the minimum until it fills up, and a larger PQ contains more small values to be purged,
|
|
||||||
* resulting in less early rejection and more logN insertions.
|
|
||||||
*
|
|
||||||
* @param collection The set to be sampled.
|
|
||||||
* @param collectionSize The number of values to use (starting from first).
|
|
||||||
* @param sample The array in which to return the sample.
|
|
||||||
* @param times The times of three events, for measuring performance.
|
|
||||||
*/
|
|
||||||
private static void sample2(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times)
|
|
||||||
throws IOException {
|
|
||||||
if (returnTimings) {
|
|
||||||
times[0] = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
int sampleSize = sample.length;
|
|
||||||
IntPriorityQueue pq = new IntPriorityQueue(sampleSize);
|
|
||||||
/*
|
|
||||||
* Convert every value in the collection to a hashed "weight" value, and insert
|
|
||||||
* into a bounded PQ (retains only sampleSize highest weights).
|
|
||||||
*/
|
|
||||||
ScoredDocIDsIterator it = collection.iterator();
|
|
||||||
MI mi = null;
|
|
||||||
while (it.next()) {
|
|
||||||
if (mi == null) {
|
|
||||||
mi = new MI();
|
|
||||||
}
|
|
||||||
mi.value = (int) (it.getDocID() * PHI_32) & 0x7FFFFFFF;
|
|
||||||
mi = pq.insertWithOverflow(mi);
|
|
||||||
}
|
|
||||||
if (returnTimings) {
|
|
||||||
times[1] = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Extract heap, convert weights back to original values, and return as integers.
|
|
||||||
*/
|
|
||||||
Object[] heap = pq.getHeap();
|
|
||||||
for (int si = 0; si < sampleSize; si++) {
|
|
||||||
sample[si] = (int)(((MI) heap[si+1]).value * PHI_32I) & 0x7FFFFFFF;
|
|
||||||
}
|
|
||||||
if (returnTimings) {
|
|
||||||
times[2] = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A mutable integer that lets queue objects be reused once they start overflowing.
|
|
||||||
*/
|
|
||||||
private static class MI {
|
|
||||||
MI() { }
|
|
||||||
public int value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A bounded priority queue for Integers, to retain a specified number of
|
|
||||||
* the highest-weighted values for return as a random sample.
|
|
||||||
*/
|
|
||||||
private static class IntPriorityQueue extends PriorityQueue<MI> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a bounded PQ of size <code>size</code>.
|
|
||||||
* @param size The number of elements to retain.
|
|
||||||
*/
|
|
||||||
public IntPriorityQueue(int size) {
|
|
||||||
super(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the underlying data structure for faster access. Extracting elements
|
|
||||||
* one at a time would require N logN time, and since we want the elements sorted
|
|
||||||
* in ascending order by value (not weight), the array is useful as-is.
|
|
||||||
* @return The underlying heap array.
|
|
||||||
*/
|
|
||||||
public Object[] getHeap() {
|
|
||||||
return getHeapArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if <code>o1<code>'s weight is less than that of <code>o2</code>, for
|
|
||||||
* ordering in the PQ.
|
|
||||||
* @return True if <code>o1</code> weighs less than <code>o2</code>.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean lessThan(MI o1, MI o2) {
|
|
||||||
return o1.value < o2.value;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For specifying which sampling algorithm to use.
|
|
||||||
*/
|
|
||||||
private enum Algorithm {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Specifies a methodical traversal algorithm, which is guaranteed to span the collection
|
|
||||||
* at least once, and never to return duplicates. Faster than the hashing algorithm and
|
|
||||||
* uses much less space, but the randomness of the sample may be affected by systematic
|
|
||||||
* variations in the collection. Requires only an array for the sample, and visits only
|
|
||||||
* the number of elements in the sample set, not the full set.
|
|
||||||
*/
|
|
||||||
// TODO (Facet): This one produces a bimodal distribution (very flat around
|
|
||||||
// each peak!) for collection size 10M and sample sizes 10k and 10544.
|
|
||||||
// Figure out why.
|
|
||||||
TRAVERSAL,
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Specifies a Fibonacci-style hash algorithm (see Knuth, S&S), which generates a less
|
|
||||||
* systematically distributed subset of the sampled collection than the traversal method,
|
|
||||||
* but requires a bounded priority queue the size of the sample, and creates an object
|
|
||||||
* containing a sampled value and its hash, for every element in the full set.
|
|
||||||
*/
|
|
||||||
HASHING
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For specifying whether to sort the sample.
|
|
||||||
*/
|
|
||||||
private enum Sorted {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sort resulting sample before returning.
|
|
||||||
*/
|
|
||||||
YES,
|
|
||||||
|
|
||||||
/**
|
|
||||||
*Do not sort the resulting sample.
|
|
||||||
*/
|
|
||||||
NO
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Magic number 1: prime closest to phi, in 32 bits.
|
|
||||||
*/
|
|
||||||
private static final long PHI_32 = 2654435769L;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Magic number 2: multiplicative inverse of PHI_32, modulo 2**32.
|
|
||||||
*/
|
|
||||||
private static final long PHI_32I = 340573321L;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Switch to cause methods to return timings.
|
|
||||||
*/
|
|
||||||
private static boolean returnTimings = false;
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,73 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetResultNode;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fixer of sample facet accumulation results.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class SampleFixer {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Alter the input result, fixing it to account for the sampling. This
|
|
||||||
* implementation can compute accurate or estimated counts for the sampled
|
|
||||||
* facets. For example, a faster correction could just multiply by a
|
|
||||||
* compensating factor.
|
|
||||||
*
|
|
||||||
* @param origDocIds
|
|
||||||
* full set of matching documents.
|
|
||||||
* @param fres
|
|
||||||
* sample result to be fixed.
|
|
||||||
* @throws IOException
|
|
||||||
* If there is a low-level I/O error.
|
|
||||||
*/
|
|
||||||
public void fixResult(ScoredDocIDs origDocIds, FacetResult fres, double samplingRatio) throws IOException {
|
|
||||||
FacetResultNode topRes = fres.getFacetResultNode();
|
|
||||||
fixResultNode(topRes, origDocIds, samplingRatio);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fix result node count, and, recursively, fix all its children
|
|
||||||
*
|
|
||||||
* @param facetResNode
|
|
||||||
* result node to be fixed
|
|
||||||
* @param docIds
|
|
||||||
* docids in effect
|
|
||||||
* @throws IOException
|
|
||||||
* If there is a low-level I/O error.
|
|
||||||
*/
|
|
||||||
protected void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
|
|
||||||
throws IOException {
|
|
||||||
singleNodeFix(facetResNode, docIds, samplingRatio);
|
|
||||||
for (FacetResultNode frn : facetResNode.subResults) {
|
|
||||||
fixResultNode(frn, docIds, samplingRatio);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Fix the given node's value. */
|
|
||||||
protected abstract void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
|
|
||||||
throws IOException;
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,216 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetResultNode;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAggregator;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sampling definition for facets accumulation
|
|
||||||
* <p>
|
|
||||||
* The Sampler uses TAKMI style counting to provide a 'best guess' top-K result
|
|
||||||
* set of the facets accumulated.
|
|
||||||
* <p>
|
|
||||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
|
||||||
* does not guarantee accurate values for
|
|
||||||
* {@link FacetResult#getNumValidDescendants()}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class Sampler {
|
|
||||||
|
|
||||||
protected final SamplingParams samplingParams;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct with {@link SamplingParams}
|
|
||||||
*/
|
|
||||||
public Sampler() {
|
|
||||||
this(new SamplingParams());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct with certain {@link SamplingParams}
|
|
||||||
*
|
|
||||||
* @param params sampling params in effect
|
|
||||||
* @throws IllegalArgumentException if the provided SamplingParams are not valid
|
|
||||||
*/
|
|
||||||
public Sampler(SamplingParams params) throws IllegalArgumentException {
|
|
||||||
if (!params.validate()) {
|
|
||||||
throw new IllegalArgumentException("The provided SamplingParams are not valid!!");
|
|
||||||
}
|
|
||||||
this.samplingParams = params;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if this sampler would complement for the input docIds
|
|
||||||
*/
|
|
||||||
public boolean shouldSample(ScoredDocIDs docIds) {
|
|
||||||
return docIds.size() > samplingParams.getSamplingThreshold();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute a sample set out of the input set, based on the {@link SamplingParams#getSampleRatio()}
|
|
||||||
* in effect. Sub classes can override to alter how the sample set is
|
|
||||||
* computed.
|
|
||||||
* <p>
|
|
||||||
* If the input set is of size smaller than {@link SamplingParams#getMinSampleSize()},
|
|
||||||
* the input set is returned (no sampling takes place).
|
|
||||||
* <p>
|
|
||||||
* Other than that, the returned set size will not be larger than {@link SamplingParams#getMaxSampleSize()}
|
|
||||||
* nor smaller than {@link SamplingParams#getMinSampleSize()}.
|
|
||||||
* @param docids
|
|
||||||
* full set of matching documents out of which a sample is needed.
|
|
||||||
*/
|
|
||||||
public SampleResult getSampleSet(ScoredDocIDs docids) throws IOException {
|
|
||||||
if (!shouldSample(docids)) {
|
|
||||||
return new SampleResult(docids, 1d);
|
|
||||||
}
|
|
||||||
|
|
||||||
int actualSize = docids.size();
|
|
||||||
int sampleSetSize = (int) (actualSize * samplingParams.getSampleRatio());
|
|
||||||
sampleSetSize = Math.max(sampleSetSize, samplingParams.getMinSampleSize());
|
|
||||||
sampleSetSize = Math.min(sampleSetSize, samplingParams.getMaxSampleSize());
|
|
||||||
|
|
||||||
return createSample(docids, actualSize, sampleSetSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create and return a sample of the input set
|
|
||||||
* @param docids input set out of which a sample is to be created
|
|
||||||
* @param actualSize original size of set, prior to sampling
|
|
||||||
* @param sampleSetSize required size of sample set
|
|
||||||
* @return sample of the input set in the required size
|
|
||||||
*/
|
|
||||||
protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize)
|
|
||||||
throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Result of sample computation
|
|
||||||
*/
|
|
||||||
public final static class SampleResult {
|
|
||||||
public final ScoredDocIDs docids;
|
|
||||||
public final double actualSampleRatio;
|
|
||||||
protected SampleResult(ScoredDocIDs docids, double actualSampleRatio) {
|
|
||||||
this.docids = docids;
|
|
||||||
this.actualSampleRatio = actualSampleRatio;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the sampling params in effect
|
|
||||||
*/
|
|
||||||
public final SamplingParams getSamplingParams() {
|
|
||||||
return samplingParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Trim the input facet result.<br>
|
|
||||||
* Note: It is only valid to call this method with result obtained for a
|
|
||||||
* facet request created through {@link #overSampledSearchParams(FacetSearchParams)}.
|
|
||||||
*
|
|
||||||
* @throws IllegalArgumentException
|
|
||||||
* if called with results not obtained for requests created
|
|
||||||
* through {@link #overSampledSearchParams(FacetSearchParams)}
|
|
||||||
*/
|
|
||||||
public FacetResult trimResult(FacetResult facetResult) throws IllegalArgumentException {
|
|
||||||
double overSampleFactor = getSamplingParams().getOversampleFactor();
|
|
||||||
if (overSampleFactor <= 1) { // no factoring done?
|
|
||||||
return facetResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
OverSampledFacetRequest sampledFreq = null;
|
|
||||||
|
|
||||||
try {
|
|
||||||
sampledFreq = (OverSampledFacetRequest) facetResult.getFacetRequest();
|
|
||||||
} catch (ClassCastException e) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"It is only valid to call this method with result obtained for a " +
|
|
||||||
"facet request created through sampler.overSamlpingSearchParams()",
|
|
||||||
e);
|
|
||||||
}
|
|
||||||
|
|
||||||
FacetRequest origFrq = sampledFreq.orig;
|
|
||||||
|
|
||||||
FacetResultNode trimmedRootNode = facetResult.getFacetResultNode();
|
|
||||||
trimSubResults(trimmedRootNode, origFrq.numResults);
|
|
||||||
|
|
||||||
return new FacetResult(origFrq, trimmedRootNode, facetResult.getNumValidDescendants());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Trim sub results to a given size. */
|
|
||||||
private void trimSubResults(FacetResultNode node, int size) {
|
|
||||||
if (node.subResults == FacetResultNode.EMPTY_SUB_RESULTS || node.subResults.size() == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayList<FacetResultNode> trimmed = new ArrayList<FacetResultNode>(size);
|
|
||||||
for (int i = 0; i < node.subResults.size() && i < size; i++) {
|
|
||||||
FacetResultNode trimmedNode = node.subResults.get(i);
|
|
||||||
trimSubResults(trimmedNode, size);
|
|
||||||
trimmed.add(trimmedNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
node.subResults = trimmed;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Over-sampled search params, wrapping each request with an over-sampled one.
|
|
||||||
*/
|
|
||||||
public FacetSearchParams overSampledSearchParams(FacetSearchParams original) {
|
|
||||||
FacetSearchParams res = original;
|
|
||||||
// So now we can sample -> altering the searchParams to accommodate for the statistical error for the sampling
|
|
||||||
double overSampleFactor = getSamplingParams().getOversampleFactor();
|
|
||||||
if (overSampleFactor > 1) { // any factoring to do?
|
|
||||||
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
|
||||||
for (FacetRequest frq : original.facetRequests) {
|
|
||||||
int overSampledNumResults = (int) Math.ceil(frq.numResults * overSampleFactor);
|
|
||||||
facetRequests.add(new OverSampledFacetRequest(frq, overSampledNumResults));
|
|
||||||
}
|
|
||||||
res = new FacetSearchParams(original.indexingParams, facetRequests);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Wrapping a facet request for over sampling. */
|
|
||||||
public static class OverSampledFacetRequest extends FacetRequest {
|
|
||||||
public final FacetRequest orig;
|
|
||||||
public OverSampledFacetRequest(FacetRequest orig, int num) {
|
|
||||||
super(orig.categoryPath, num);
|
|
||||||
this.orig = orig;
|
|
||||||
setDepth(orig.getDepth());
|
|
||||||
setNumLabel(0); // don't label anything as we're over-sampling
|
|
||||||
setResultMode(orig.getResultMode());
|
|
||||||
setSortOrder(orig.getSortOrder());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
|
|
||||||
return orig.createFacetsAggregator(fip);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,137 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.OldFacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
|
||||||
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
|
|
||||||
import org.apache.lucene.facet.sampling.Sampler.SampleResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetArrays;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Facets accumulation with sampling.<br>
|
|
||||||
* <p>
|
|
||||||
* Note two major differences between this class and {@link SamplingWrapper}:
|
|
||||||
* <ol>
|
|
||||||
* <li>Latter can wrap any other {@link FacetsAccumulator} while this class
|
|
||||||
* directly extends {@link OldFacetsAccumulator}.</li>
|
|
||||||
* <li>This class can effectively apply sampling on the complement set of
|
|
||||||
* matching document, thereby working efficiently with the complement
|
|
||||||
* optimization - see {@link OldFacetsAccumulator#getComplementThreshold()}
|
|
||||||
* .</li>
|
|
||||||
* </ol>
|
|
||||||
* <p>
|
|
||||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
|
||||||
* does not guarantee accurate values for
|
|
||||||
* {@link FacetResult#getNumValidDescendants()}.
|
|
||||||
*
|
|
||||||
* @see Sampler
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SamplingAccumulator extends OldFacetsAccumulator {
|
|
||||||
|
|
||||||
private double samplingRatio = -1d;
|
|
||||||
private final Sampler sampler;
|
|
||||||
|
|
||||||
public SamplingAccumulator(Sampler sampler, FacetSearchParams searchParams,
|
|
||||||
IndexReader indexReader, TaxonomyReader taxonomyReader,
|
|
||||||
FacetArrays facetArrays) {
|
|
||||||
super(searchParams, indexReader, taxonomyReader, facetArrays);
|
|
||||||
this.sampler = sampler;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor...
|
|
||||||
*/
|
|
||||||
public SamplingAccumulator(
|
|
||||||
Sampler sampler,
|
|
||||||
FacetSearchParams searchParams,
|
|
||||||
IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
|
||||||
super(searchParams, indexReader, taxonomyReader);
|
|
||||||
this.sampler = sampler;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
|
|
||||||
// Replacing the original searchParams with the over-sampled
|
|
||||||
FacetSearchParams original = searchParams;
|
|
||||||
SampleFixer samplerFixer = sampler.samplingParams.getSampleFixer();
|
|
||||||
final boolean shouldOversample = sampler.samplingParams.shouldOverSample();
|
|
||||||
if (shouldOversample) {
|
|
||||||
searchParams = sampler.overSampledSearchParams(original);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<FacetResult> sampleRes = super.accumulate(docids);
|
|
||||||
|
|
||||||
List<FacetResult> results = new ArrayList<FacetResult>();
|
|
||||||
for (FacetResult fres : sampleRes) {
|
|
||||||
// for sure fres is not null because this is guaranteed by the delegee.
|
|
||||||
FacetRequest fr = fres.getFacetRequest();
|
|
||||||
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
|
|
||||||
if (samplerFixer != null) {
|
|
||||||
// fix the result of current request
|
|
||||||
samplerFixer.fixResult(docids, fres, samplingRatio);
|
|
||||||
|
|
||||||
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
|
|
||||||
|
|
||||||
if (shouldOversample) {
|
|
||||||
// Using the sampler to trim the extra (over-sampled) results
|
|
||||||
fres = sampler.trimResult(fres);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// final labeling if allowed (because labeling is a costly operation)
|
|
||||||
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
|
|
||||||
// category does not exist, add an empty result
|
|
||||||
results.add(emptyResult(fres.getFacetResultNode().ordinal, fr));
|
|
||||||
} else {
|
|
||||||
frh.labelResult(fres);
|
|
||||||
results.add(fres);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
searchParams = original; // Back to original params
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
|
|
||||||
SampleResult sampleRes = sampler.getSampleSet(docids);
|
|
||||||
samplingRatio = sampleRes.actualSampleRatio;
|
|
||||||
return sampleRes.docids;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected double getTotalCountsFactor() {
|
|
||||||
if (samplingRatio<0) {
|
|
||||||
throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked");
|
|
||||||
}
|
|
||||||
return samplingRatio;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,196 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parameters for sampling, dictating whether sampling is to take place and how.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SamplingParams {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default factor by which more results are requested over the sample set.
|
|
||||||
* @see SamplingParams#getOversampleFactor()
|
|
||||||
*/
|
|
||||||
public static final double DEFAULT_OVERSAMPLE_FACTOR = 1d;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default ratio between size of sample to original size of document set.
|
|
||||||
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
|
|
||||||
*/
|
|
||||||
public static final double DEFAULT_SAMPLE_RATIO = 0.01;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default maximum size of sample.
|
|
||||||
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
|
|
||||||
*/
|
|
||||||
public static final int DEFAULT_MAX_SAMPLE_SIZE = 10000;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default minimum size of sample.
|
|
||||||
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
|
|
||||||
*/
|
|
||||||
public static final int DEFAULT_MIN_SAMPLE_SIZE = 100;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default sampling threshold, if number of results is less than this number - no sampling will take place
|
|
||||||
* @see SamplingParams#getSampleRatio()
|
|
||||||
*/
|
|
||||||
public static final int DEFAULT_SAMPLING_THRESHOLD = 75000;
|
|
||||||
|
|
||||||
private int maxSampleSize = DEFAULT_MAX_SAMPLE_SIZE;
|
|
||||||
private int minSampleSize = DEFAULT_MIN_SAMPLE_SIZE;
|
|
||||||
private double sampleRatio = DEFAULT_SAMPLE_RATIO;
|
|
||||||
private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
|
|
||||||
private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
|
|
||||||
|
|
||||||
private SampleFixer sampleFixer = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the maxSampleSize.
|
|
||||||
* In no case should the resulting sample size exceed this value.
|
|
||||||
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
|
|
||||||
*/
|
|
||||||
public final int getMaxSampleSize() {
|
|
||||||
return maxSampleSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the minSampleSize.
|
|
||||||
* In no case should the resulting sample size be smaller than this value.
|
|
||||||
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
|
|
||||||
*/
|
|
||||||
public final int getMinSampleSize() {
|
|
||||||
return minSampleSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the sampleRatio
|
|
||||||
* @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
|
|
||||||
*/
|
|
||||||
public final double getSampleRatio() {
|
|
||||||
return sampleRatio;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the samplingThreshold.
|
|
||||||
* Sampling would be performed only for document sets larger than this.
|
|
||||||
*/
|
|
||||||
public final int getSamplingThreshold() {
|
|
||||||
return samplingThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param maxSampleSize
|
|
||||||
* the maxSampleSize to set
|
|
||||||
* @see #getMaxSampleSize()
|
|
||||||
*/
|
|
||||||
public void setMaxSampleSize(int maxSampleSize) {
|
|
||||||
this.maxSampleSize = maxSampleSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param minSampleSize
|
|
||||||
* the minSampleSize to set
|
|
||||||
* @see #getMinSampleSize()
|
|
||||||
*/
|
|
||||||
public void setMinSampleSize(int minSampleSize) {
|
|
||||||
this.minSampleSize = minSampleSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param sampleRatio
|
|
||||||
* the sampleRatio to set
|
|
||||||
* @see #getSampleRatio()
|
|
||||||
*/
|
|
||||||
public void setSampleRatio(double sampleRatio) {
|
|
||||||
this.sampleRatio = sampleRatio;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set a sampling-threshold
|
|
||||||
* @see #getSamplingThreshold()
|
|
||||||
*/
|
|
||||||
public void setSamplingThreshold(int samplingThreshold) {
|
|
||||||
this.samplingThreshold = samplingThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check validity of sampling settings, making sure that
|
|
||||||
* <ul>
|
|
||||||
* <li> <code>minSampleSize <= maxSampleSize <= samplingThreshold </code></li>
|
|
||||||
* <li> <code>0 < samplingRatio <= 1 </code></li>
|
|
||||||
* </ul>
|
|
||||||
*
|
|
||||||
* @return true if valid, false otherwise
|
|
||||||
*/
|
|
||||||
public boolean validate() {
|
|
||||||
return
|
|
||||||
samplingThreshold >= maxSampleSize &&
|
|
||||||
maxSampleSize >= minSampleSize &&
|
|
||||||
sampleRatio > 0 &&
|
|
||||||
sampleRatio < 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the oversampleFactor. When sampling, we would collect that much more
|
|
||||||
* results, so that later, when selecting top out of these, chances are higher
|
|
||||||
* to get actual best results. Note that having this value larger than 1 only
|
|
||||||
* makes sense when using a SampleFixer which finds accurate results, such as
|
|
||||||
* <code>TakmiSampleFixer</code>. When this value is smaller than 1, it is
|
|
||||||
* ignored and no oversampling takes place.
|
|
||||||
*/
|
|
||||||
public final double getOversampleFactor() {
|
|
||||||
return oversampleFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param oversampleFactor the oversampleFactor to set
|
|
||||||
* @see #getOversampleFactor()
|
|
||||||
*/
|
|
||||||
public void setOversampleFactor(double oversampleFactor) {
|
|
||||||
this.oversampleFactor = oversampleFactor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return {@link SampleFixer} to be used while fixing the sampled results, if
|
|
||||||
* <code>null</code> no fixing will be performed
|
|
||||||
*/
|
|
||||||
public SampleFixer getSampleFixer() {
|
|
||||||
return sampleFixer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set a {@link SampleFixer} to be used while fixing the sampled results.
|
|
||||||
* {@code null} means no fixing will be performed
|
|
||||||
*/
|
|
||||||
public void setSampleFixer(SampleFixer sampleFixer) {
|
|
||||||
this.sampleFixer = sampleFixer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns whether over-sampling should be done. By default returns
|
|
||||||
* {@code true} when {@link #getSampleFixer()} is not {@code null} and
|
|
||||||
* {@link #getOversampleFactor()} > 1, {@code false} otherwise.
|
|
||||||
*/
|
|
||||||
public boolean shouldOverSample() {
|
|
||||||
return sampleFixer != null && oversampleFactor > 1d;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,112 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.OldFacetsAccumulator;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
|
||||||
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
|
|
||||||
import org.apache.lucene.facet.sampling.Sampler.SampleResult;
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest;
|
|
||||||
import org.apache.lucene.facet.search.FacetResult;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wrap any Facets Accumulator with sampling.
|
|
||||||
* <p>
|
|
||||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
|
||||||
* does not guarantee accurate values for
|
|
||||||
* {@link FacetResult#getNumValidDescendants()}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SamplingWrapper extends OldFacetsAccumulator {
|
|
||||||
|
|
||||||
private OldFacetsAccumulator delegee;
|
|
||||||
private Sampler sampler;
|
|
||||||
|
|
||||||
public SamplingWrapper(OldFacetsAccumulator delegee, Sampler sampler) {
|
|
||||||
super(delegee.searchParams, delegee.indexReader, delegee.taxonomyReader);
|
|
||||||
this.delegee = delegee;
|
|
||||||
this.sampler = sampler;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
|
|
||||||
// Replacing the original searchParams with the over-sampled (and without statistics-compute)
|
|
||||||
FacetSearchParams original = delegee.searchParams;
|
|
||||||
boolean shouldOversample = sampler.samplingParams.shouldOverSample();
|
|
||||||
|
|
||||||
if (shouldOversample) {
|
|
||||||
delegee.searchParams = sampler.overSampledSearchParams(original);
|
|
||||||
}
|
|
||||||
|
|
||||||
SampleResult sampleSet = sampler.getSampleSet(docids);
|
|
||||||
|
|
||||||
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
|
|
||||||
|
|
||||||
List<FacetResult> results = new ArrayList<FacetResult>();
|
|
||||||
SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
|
|
||||||
|
|
||||||
for (FacetResult fres : sampleRes) {
|
|
||||||
// for sure fres is not null because this is guaranteed by the delegee.
|
|
||||||
FacetRequest fr = fres.getFacetRequest();
|
|
||||||
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
|
|
||||||
if (sampleFixer != null) {
|
|
||||||
// fix the result of current request
|
|
||||||
sampleFixer.fixResult(docids, fres, sampleSet.actualSampleRatio);
|
|
||||||
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldOversample) {
|
|
||||||
// Using the sampler to trim the extra (over-sampled) results
|
|
||||||
fres = sampler.trimResult(fres);
|
|
||||||
}
|
|
||||||
|
|
||||||
// final labeling if allowed (because labeling is a costly operation)
|
|
||||||
if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
|
|
||||||
// category does not exist, add an empty result
|
|
||||||
results.add(emptyResult(fres.getFacetResultNode().ordinal, fr));
|
|
||||||
} else {
|
|
||||||
frh.labelResult(fres);
|
|
||||||
results.add(fres);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldOversample) {
|
|
||||||
delegee.searchParams = original; // Back to original params
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double getComplementThreshold() {
|
|
||||||
return delegee.getComplementThreshold();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setComplementThreshold(double complementThreshold) {
|
|
||||||
delegee.setComplementThreshold(complementThreshold);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,168 +0,0 @@
|
||||||
package org.apache.lucene.facet.sampling;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDs;
|
|
||||||
import org.apache.lucene.facet.old.ScoredDocIDsIterator;
|
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
|
||||||
import org.apache.lucene.facet.search.DrillDownQuery;
|
|
||||||
import org.apache.lucene.facet.search.FacetResultNode;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.MultiFields;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fix sampling results by correct results, by counting the intersection between
|
|
||||||
* two lists: a TermDocs (list of documents in a certain category) and a
|
|
||||||
* DocIdSetIterator (list of documents matching the query).
|
|
||||||
* <p>
|
|
||||||
* This fixer is suitable for scenarios which prioritize accuracy over
|
|
||||||
* performance.
|
|
||||||
* <p>
|
|
||||||
* <b>Note:</b> for statistically more accurate top-k selection, set
|
|
||||||
* {@link SamplingParams#setOversampleFactor(double) oversampleFactor} to at
|
|
||||||
* least 2, so that the top-k categories would have better chance of showing up
|
|
||||||
* in the sampled top-cK results (see {@link SamplingParams#getOversampleFactor}
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class TakmiSampleFixer extends SampleFixer {
|
|
||||||
|
|
||||||
private TaxonomyReader taxonomyReader;
|
|
||||||
private IndexReader indexReader;
|
|
||||||
private FacetSearchParams searchParams;
|
|
||||||
|
|
||||||
public TakmiSampleFixer(IndexReader indexReader,
|
|
||||||
TaxonomyReader taxonomyReader, FacetSearchParams searchParams) {
|
|
||||||
this.indexReader = indexReader;
|
|
||||||
this.taxonomyReader = taxonomyReader;
|
|
||||||
this.searchParams = searchParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) throws IOException {
|
|
||||||
recount(facetResNode, docIds);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal utility: recount for a facet result node
|
|
||||||
*
|
|
||||||
* @param fresNode
|
|
||||||
* result node to be recounted
|
|
||||||
* @param docIds
|
|
||||||
* full set of matching documents.
|
|
||||||
* @throws IOException If there is a low-level I/O error.
|
|
||||||
*/
|
|
||||||
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
|
|
||||||
// TODO (Facet): change from void to return the new, smaller docSet, and use
|
|
||||||
// that for the children, as this will make their intersection ops faster.
|
|
||||||
// can do this only when the new set is "sufficiently" smaller.
|
|
||||||
|
|
||||||
/* We need the category's path name in order to do its recounting.
|
|
||||||
* If it is missing, because the option to label only part of the
|
|
||||||
* facet results was exercise, we need to calculate them anyway, so
|
|
||||||
* in essence sampling with recounting spends some extra cycles for
|
|
||||||
* labeling results for which labels are not required. */
|
|
||||||
if (fresNode.label == null) {
|
|
||||||
fresNode.label = taxonomyReader.getPath(fresNode.ordinal);
|
|
||||||
}
|
|
||||||
FacetLabel catPath = fresNode.label;
|
|
||||||
|
|
||||||
Term drillDownTerm = DrillDownQuery.term(searchParams.indexingParams, catPath);
|
|
||||||
// TODO (Facet): avoid Multi*?
|
|
||||||
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
|
|
||||||
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
|
|
||||||
drillDownTerm.field(), drillDownTerm.bytes(),
|
|
||||||
0), docIds.iterator());
|
|
||||||
fresNode.value = updatedCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Count the size of the intersection between two lists: a TermDocs (list of
|
|
||||||
* documents in a certain category) and a DocIdSetIterator (list of documents
|
|
||||||
* matching a query).
|
|
||||||
*/
|
|
||||||
private static int countIntersection(DocsEnum p1, ScoredDocIDsIterator p2)
|
|
||||||
throws IOException {
|
|
||||||
// The documentation of of both TermDocs and DocIdSetIterator claim
|
|
||||||
// that we must do next() before doc(). So we do, and if one of the
|
|
||||||
// lists is empty, obviously return 0;
|
|
||||||
if (p1 == null || p1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (!p2.next()) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int d1 = p1.docID();
|
|
||||||
int d2 = p2.getDocID();
|
|
||||||
|
|
||||||
int count = 0;
|
|
||||||
for (;;) {
|
|
||||||
if (d1 == d2) {
|
|
||||||
++count;
|
|
||||||
if (p1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
break; // end of list 1, nothing more in intersection
|
|
||||||
}
|
|
||||||
d1 = p1.docID();
|
|
||||||
if (!advance(p2, d1)) {
|
|
||||||
break; // end of list 2, nothing more in intersection
|
|
||||||
}
|
|
||||||
d2 = p2.getDocID();
|
|
||||||
} else if (d1 < d2) {
|
|
||||||
if (p1.advance(d2) == DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
break; // end of list 1, nothing more in intersection
|
|
||||||
}
|
|
||||||
d1 = p1.docID();
|
|
||||||
} else /* d1>d2 */ {
|
|
||||||
if (!advance(p2, d1)) {
|
|
||||||
break; // end of list 2, nothing more in intersection
|
|
||||||
}
|
|
||||||
d2 = p2.getDocID();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* utility: advance the iterator until finding (or exceeding) specific
|
|
||||||
* document
|
|
||||||
*
|
|
||||||
* @param iterator
|
|
||||||
* iterator being advanced
|
|
||||||
* @param targetDoc
|
|
||||||
* target of advancing
|
|
||||||
* @return false if iterator exhausted, true otherwise.
|
|
||||||
*/
|
|
||||||
private static boolean advance(ScoredDocIDsIterator iterator, int targetDoc) {
|
|
||||||
while (iterator.next()) {
|
|
||||||
if (iterator.getDocID() >= targetDoc) {
|
|
||||||
return true; // target reached
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false; // exhausted
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Facets sampling</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
Facets sampling.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,110 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A thread-safe pool of {@code int[]} and {@code float[]} arrays. One specifies
|
|
||||||
* the maximum number of arrays in the constructor. Calls to
|
|
||||||
* {@link #allocateFloatArray()} or {@link #allocateIntArray()} take an array
|
|
||||||
* from the pool, and if one is not available, allocate a new one. When you are
|
|
||||||
* done using the array, you should {@link #free(int[]) free} it.
|
|
||||||
* <p>
|
|
||||||
* This class is used by {@link ReusingFacetArrays} for temporal facet
|
|
||||||
* aggregation arrays, which can be reused across searches instead of being
|
|
||||||
* allocated afresh on every search.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class ArraysPool {
|
|
||||||
|
|
||||||
private final ArrayBlockingQueue<int[]> intsPool;
|
|
||||||
private final ArrayBlockingQueue<float[]> floatsPool;
|
|
||||||
|
|
||||||
public final int arrayLength;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Specifies the max number of arrays to pool, as well as the length of each
|
|
||||||
* array to allocate.
|
|
||||||
*
|
|
||||||
* @param arrayLength the size of the arrays to allocate
|
|
||||||
* @param maxArrays the maximum number of arrays to pool, from each type
|
|
||||||
*
|
|
||||||
* @throws IllegalArgumentException if maxArrays is set to 0.
|
|
||||||
*/
|
|
||||||
public ArraysPool(int arrayLength, int maxArrays) {
|
|
||||||
if (maxArrays == 0) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"maxArrays cannot be 0 - don't use this class if you don't intend to pool arrays");
|
|
||||||
}
|
|
||||||
this.arrayLength = arrayLength;
|
|
||||||
this.intsPool = new ArrayBlockingQueue<int[]>(maxArrays);
|
|
||||||
this.floatsPool = new ArrayBlockingQueue<float[]>(maxArrays);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocates a new {@code int[]}. If there's an available array in the pool,
|
|
||||||
* it is used, otherwise a new array is allocated.
|
|
||||||
*/
|
|
||||||
public final int[] allocateIntArray() {
|
|
||||||
int[] arr = intsPool.poll();
|
|
||||||
if (arr == null) {
|
|
||||||
return new int[arrayLength];
|
|
||||||
}
|
|
||||||
Arrays.fill(arr, 0); // reset array
|
|
||||||
return arr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocates a new {@code float[]}. If there's an available array in the pool,
|
|
||||||
* it is used, otherwise a new array is allocated.
|
|
||||||
*/
|
|
||||||
public final float[] allocateFloatArray() {
|
|
||||||
float[] arr = floatsPool.poll();
|
|
||||||
if (arr == null) {
|
|
||||||
return new float[arrayLength];
|
|
||||||
}
|
|
||||||
Arrays.fill(arr, 0f); // reset array
|
|
||||||
return arr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Frees a no-longer-needed array. If there's room in the pool, the array is
|
|
||||||
* added to it, otherwise discarded.
|
|
||||||
*/
|
|
||||||
public final void free(int[] arr) {
|
|
||||||
if (arr != null) {
|
|
||||||
// use offer - if there isn't room, we don't want to wait
|
|
||||||
intsPool.offer(arr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Frees a no-longer-needed array. If there's room in the pool, the array is
|
|
||||||
* added to it, otherwise discarded.
|
|
||||||
*/
|
|
||||||
public final void free(float[] arr) {
|
|
||||||
if (arr != null) {
|
|
||||||
// use offer - if there isn't room, we don't want to wait
|
|
||||||
floatsPool.offer(arr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,54 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
|
|
||||||
import org.apache.lucene.facet.search.OrdinalsCache.CachedOrds;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetsAggregator} which updates categories values by counting their
|
|
||||||
* occurrences in matching documents. Uses {@link OrdinalsCache} to obtain the
|
|
||||||
* category ordinals of each segment.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class CachedOrdsCountingFacetsAggregator extends IntRollupFacetsAggregator {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
|
|
||||||
final CachedOrds ords = OrdinalsCache.getCachedOrds(matchingDocs.context, clp);
|
|
||||||
if (ords == null) {
|
|
||||||
return; // this segment has no ordinals for the given category list
|
|
||||||
}
|
|
||||||
final int[] counts = facetArrays.getIntArray();
|
|
||||||
int doc = 0;
|
|
||||||
int length = matchingDocs.bits.length();
|
|
||||||
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
|
|
||||||
int start = ords.offsets[doc];
|
|
||||||
int end = ords.offsets[doc + 1];
|
|
||||||
for (int i = start; i < end; i++) {
|
|
||||||
++counts[ords.ordinals[i]];
|
|
||||||
}
|
|
||||||
++doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,56 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An interface for obtaining the category ordinals of documents.
|
|
||||||
* {@link #getOrdinals(int, IntsRef)} calls are done with document IDs that are
|
|
||||||
* local to the reader given to {@link #setNextReader(AtomicReaderContext)}.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this class operates as a key to a map, and therefore you should
|
|
||||||
* implement {@code equals()} and {@code hashCode()} for proper behavior.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public interface CategoryListIterator {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the {@link AtomicReaderContext} for which
|
|
||||||
* {@link #getOrdinals(int, IntsRef)} calls will be made. Returns true iff any
|
|
||||||
* of the documents in this reader have category ordinals. This method must be
|
|
||||||
* called before any calls to {@link #getOrdinals(int, IntsRef)}.
|
|
||||||
*/
|
|
||||||
public boolean setNextReader(AtomicReaderContext context) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stores the category ordinals of the given document ID in the given
|
|
||||||
* {@link IntsRef}, starting at position 0 upto {@link IntsRef#length}. Grows
|
|
||||||
* the {@link IntsRef} if it is not large enough.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> if the requested document does not have category ordinals
|
|
||||||
* associated with it, {@link IntsRef#length} is set to zero.
|
|
||||||
*/
|
|
||||||
public void getOrdinals(int docID, IntsRef ints) throws IOException;
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Facet request for counting facets.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class CountFacetRequest extends FacetRequest {
|
|
||||||
|
|
||||||
public CountFacetRequest(FacetLabel path, int num) {
|
|
||||||
super(path, num);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
|
|
||||||
return CountingFacetsAggregator.create(fip.getCategoryListParams(categoryPath));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,72 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder;
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetsAggregator} which counts the number of times each category
|
|
||||||
* appears in the given set of documents. This aggregator uses the
|
|
||||||
* {@link CategoryListIterator} to read the encoded categories. If you used the
|
|
||||||
* default settings while idnexing, you can use
|
|
||||||
* {@link FastCountingFacetsAggregator} for better performance.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class CountingFacetsAggregator extends IntRollupFacetsAggregator {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a {@link FacetsAggregator} suitable for counting categories given
|
|
||||||
* the {@link CategoryListParams}.
|
|
||||||
*/
|
|
||||||
public static FacetsAggregator create(CategoryListParams clp) {
|
|
||||||
if (clp.createEncoder().createMatchingDecoder().getClass() == DGapVInt8IntDecoder.class) {
|
|
||||||
return new FastCountingFacetsAggregator();
|
|
||||||
} else {
|
|
||||||
return new CountingFacetsAggregator();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final IntsRef ordinals = new IntsRef(32);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
|
|
||||||
final CategoryListIterator cli = clp.createCategoryListIterator(0);
|
|
||||||
if (!cli.setNextReader(matchingDocs.context)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int length = matchingDocs.bits.length();
|
|
||||||
final int[] counts = facetArrays.getIntArray();
|
|
||||||
int doc = 0;
|
|
||||||
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
|
|
||||||
cli.getOrdinals(doc, ordinals);
|
|
||||||
final int upto = ordinals.offset + ordinals.length;
|
|
||||||
for (int i = ordinals.offset; i < upto; i++) {
|
|
||||||
++counts[ordinals.ints[i]];
|
|
||||||
}
|
|
||||||
++doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,136 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.search.FacetRequest.SortOrder;
|
|
||||||
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link FacetResultsHandler} which counts the top-K facets at depth 1 only
|
|
||||||
* and always labels all result categories. The results are always sorted by
|
|
||||||
* value, in descending order.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class DepthOneFacetResultsHandler extends FacetResultsHandler {
|
|
||||||
|
|
||||||
private static class FacetResultNodeQueue extends PriorityQueue<FacetResultNode> {
|
|
||||||
|
|
||||||
public FacetResultNodeQueue(int maxSize, boolean prepopulate) {
|
|
||||||
super(maxSize, prepopulate);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected FacetResultNode getSentinelObject() {
|
|
||||||
return new FacetResultNode(TaxonomyReader.INVALID_ORDINAL, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean lessThan(FacetResultNode a, FacetResultNode b) {
|
|
||||||
return a.compareTo(b) < 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public DepthOneFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays,
|
|
||||||
OrdinalValueResolver resolver) {
|
|
||||||
super(taxonomyReader, facetRequest, resolver, facetArrays);
|
|
||||||
assert facetRequest.getDepth() == 1 : "this handler only computes the top-K facets at depth 1";
|
|
||||||
assert facetRequest.numResults == facetRequest.getNumLabel() : "this handler always labels all top-K results";
|
|
||||||
assert facetRequest.getSortOrder() == SortOrder.DESCENDING : "this handler always sorts results in descending order";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final FacetResult compute() throws IOException {
|
|
||||||
ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays();
|
|
||||||
final int[] children = arrays.children();
|
|
||||||
final int[] siblings = arrays.siblings();
|
|
||||||
|
|
||||||
int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath);
|
|
||||||
|
|
||||||
FacetResultNode root = new FacetResultNode(rootOrd, resolver.valueOf(rootOrd));
|
|
||||||
root.label = facetRequest.categoryPath;
|
|
||||||
if (facetRequest.numResults > taxonomyReader.getSize()) {
|
|
||||||
// specialize this case, user is interested in all available results
|
|
||||||
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
|
|
||||||
int ordinal = children[rootOrd];
|
|
||||||
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
|
|
||||||
double value = resolver.valueOf(ordinal);
|
|
||||||
if (value > 0) {
|
|
||||||
FacetResultNode node = new FacetResultNode(ordinal, value);
|
|
||||||
node.label = taxonomyReader.getPath(ordinal);
|
|
||||||
nodes.add(node);
|
|
||||||
}
|
|
||||||
ordinal = siblings[ordinal];
|
|
||||||
}
|
|
||||||
|
|
||||||
CollectionUtil.introSort(nodes, Collections.reverseOrder(new Comparator<FacetResultNode>() {
|
|
||||||
@Override
|
|
||||||
public int compare(FacetResultNode o1, FacetResultNode o2) {
|
|
||||||
return o1.compareTo(o2);
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
|
|
||||||
root.subResults = nodes;
|
|
||||||
return new FacetResult(facetRequest, root, nodes.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
|
|
||||||
PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true);
|
|
||||||
int ordinal = children[rootOrd];
|
|
||||||
FacetResultNode top = pq.top();
|
|
||||||
int numSiblings = 0;
|
|
||||||
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
|
|
||||||
double value = resolver.valueOf(ordinal);
|
|
||||||
if (value > 0) {
|
|
||||||
++numSiblings;
|
|
||||||
if (value > top.value) {
|
|
||||||
top.value = value;
|
|
||||||
top.ordinal = ordinal;
|
|
||||||
top = pq.updateTop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ordinal = siblings[ordinal];
|
|
||||||
}
|
|
||||||
|
|
||||||
// pop() the least (sentinel) elements
|
|
||||||
int pqsize = pq.size();
|
|
||||||
int size = numSiblings < pqsize ? numSiblings : pqsize;
|
|
||||||
for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
|
|
||||||
|
|
||||||
// create the FacetResultNodes.
|
|
||||||
FacetResultNode[] subResults = new FacetResultNode[size];
|
|
||||||
for (int i = size - 1; i >= 0; i--) {
|
|
||||||
FacetResultNode node = pq.pop();
|
|
||||||
node.label = taxonomyReader.getPath(node.ordinal);
|
|
||||||
subResults[i] = node;
|
|
||||||
}
|
|
||||||
root.subResults = Arrays.asList(subResults);
|
|
||||||
return new FacetResult(facetRequest, root, numSiblings);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,87 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.encoding.IntDecoder;
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** A {@link CategoryListIterator} which reads the ordinals from a {@link BinaryDocValues}. */
|
|
||||||
public class DocValuesCategoryListIterator implements CategoryListIterator {
|
|
||||||
|
|
||||||
private final IntDecoder decoder;
|
|
||||||
private final String field;
|
|
||||||
private final int hashCode;
|
|
||||||
private final BytesRef bytes = new BytesRef(32);
|
|
||||||
|
|
||||||
private BinaryDocValues current;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new {@link DocValuesCategoryListIterator}.
|
|
||||||
*/
|
|
||||||
public DocValuesCategoryListIterator(String field, IntDecoder decoder) {
|
|
||||||
this.field = field;
|
|
||||||
this.decoder = decoder;
|
|
||||||
this.hashCode = field.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return hashCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (!(o instanceof DocValuesCategoryListIterator)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
DocValuesCategoryListIterator other = (DocValuesCategoryListIterator) o;
|
|
||||||
if (hashCode != other.hashCode) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hash codes are the same, check equals() to avoid cases of hash-collisions.
|
|
||||||
return field.equals(other.field);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
|
||||||
current = context.reader().getBinaryDocValues(field);
|
|
||||||
return current != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void getOrdinals(int docID, IntsRef ints) throws IOException {
|
|
||||||
assert current != null : "don't call this if setNextReader returned false";
|
|
||||||
current.get(docID, bytes);
|
|
||||||
ints.length = 0;
|
|
||||||
if (bytes.length > 0) {
|
|
||||||
decoder.decode(bytes, ints);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return field;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,222 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
|
||||||
import org.apache.lucene.search.BooleanClause;
|
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
|
||||||
import org.apache.lucene.search.Filter;
|
|
||||||
import org.apache.lucene.search.FilteredQuery;
|
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.TermQuery;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link Query} for drill-down over {@link FacetLabel categories}. You
|
|
||||||
* should call {@link #add(FacetLabel...)} for every group of categories you
|
|
||||||
* want to drill-down over. Each category in the group is {@code OR'ed} with
|
|
||||||
* the others, and groups are {@code AND'ed}.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> if you choose to create your own {@link Query} by calling
|
|
||||||
* {@link #term}, it is recommended to wrap it with {@link ConstantScoreQuery}
|
|
||||||
* and set the {@link ConstantScoreQuery#setBoost(float) boost} to {@code 0.0f},
|
|
||||||
* so that it does not affect the scores of the documents.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class DrillDownQuery extends Query {
|
|
||||||
|
|
||||||
/** Return a drill-down {@link Term} for a category. */
|
|
||||||
public static Term term(FacetIndexingParams iParams, FacetLabel path) {
|
|
||||||
CategoryListParams clp = iParams.getCategoryListParams(path);
|
|
||||||
char[] buffer = new char[path.fullPathLength()];
|
|
||||||
iParams.drillDownTermText(path, buffer);
|
|
||||||
return new Term(clp.field, String.valueOf(buffer));
|
|
||||||
}
|
|
||||||
|
|
||||||
private final BooleanQuery query;
|
|
||||||
private final Map<String,Integer> drillDownDims = new LinkedHashMap<String,Integer>();
|
|
||||||
final FacetIndexingParams fip;
|
|
||||||
|
|
||||||
/** Used by clone() */
|
|
||||||
DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Map<String,Integer> drillDownDims) {
|
|
||||||
this.fip = fip;
|
|
||||||
this.query = query.clone();
|
|
||||||
this.drillDownDims.putAll(drillDownDims);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Used by DrillSideways */
|
|
||||||
DrillDownQuery(Filter filter, DrillDownQuery other) {
|
|
||||||
query = new BooleanQuery(true); // disable coord
|
|
||||||
|
|
||||||
BooleanClause[] clauses = other.query.getClauses();
|
|
||||||
if (clauses.length == other.drillDownDims.size()) {
|
|
||||||
throw new IllegalArgumentException("cannot apply filter unless baseQuery isn't null; pass ConstantScoreQuery instead");
|
|
||||||
}
|
|
||||||
assert clauses.length == 1+other.drillDownDims.size(): clauses.length + " vs " + (1+other.drillDownDims.size());
|
|
||||||
drillDownDims.putAll(other.drillDownDims);
|
|
||||||
query.add(new FilteredQuery(clauses[0].getQuery(), filter), Occur.MUST);
|
|
||||||
for(int i=1;i<clauses.length;i++) {
|
|
||||||
query.add(clauses[i].getQuery(), Occur.MUST);
|
|
||||||
}
|
|
||||||
fip = other.fip;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Used by DrillSideways */
|
|
||||||
DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
|
|
||||||
this.fip = fip;
|
|
||||||
this.query = new BooleanQuery(true);
|
|
||||||
if (baseQuery != null) {
|
|
||||||
query.add(baseQuery, Occur.MUST);
|
|
||||||
}
|
|
||||||
for(Query clause : clauses) {
|
|
||||||
query.add(clause, Occur.MUST);
|
|
||||||
}
|
|
||||||
this.drillDownDims.putAll(drillDownDims);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new {@link DrillDownQuery} without a base query,
|
|
||||||
* to perform a pure browsing query (equivalent to using
|
|
||||||
* {@link MatchAllDocsQuery} as base).
|
|
||||||
*/
|
|
||||||
public DrillDownQuery(FacetIndexingParams fip) {
|
|
||||||
this(fip, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new {@link DrillDownQuery} over the given base query. Can be
|
|
||||||
* {@code null}, in which case the result {@link Query} from
|
|
||||||
* {@link #rewrite(IndexReader)} will be a pure browsing query, filtering on
|
|
||||||
* the added categories only.
|
|
||||||
*/
|
|
||||||
public DrillDownQuery(FacetIndexingParams fip, Query baseQuery) {
|
|
||||||
query = new BooleanQuery(true); // disable coord
|
|
||||||
if (baseQuery != null) {
|
|
||||||
query.add(baseQuery, Occur.MUST);
|
|
||||||
}
|
|
||||||
this.fip = fip;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds one dimension of drill downs; if you pass multiple values they are
|
|
||||||
* OR'd, and then the entire dimension is AND'd against the base query.
|
|
||||||
*/
|
|
||||||
public void add(FacetLabel... paths) {
|
|
||||||
Query q;
|
|
||||||
if (paths[0].length == 0) {
|
|
||||||
throw new IllegalArgumentException("all CategoryPaths must have length > 0");
|
|
||||||
}
|
|
||||||
String dim = paths[0].components[0];
|
|
||||||
if (drillDownDims.containsKey(dim)) {
|
|
||||||
throw new IllegalArgumentException("dimension '" + dim + "' was already added");
|
|
||||||
}
|
|
||||||
if (paths.length == 1) {
|
|
||||||
q = new TermQuery(term(fip, paths[0]));
|
|
||||||
} else {
|
|
||||||
BooleanQuery bq = new BooleanQuery(true); // disable coord
|
|
||||||
for (FacetLabel cp : paths) {
|
|
||||||
if (cp.length == 0) {
|
|
||||||
throw new IllegalArgumentException("all CategoryPaths must have length > 0");
|
|
||||||
}
|
|
||||||
if (!cp.components[0].equals(dim)) {
|
|
||||||
throw new IllegalArgumentException("multiple (OR'd) drill-down paths must be under same dimension; got '"
|
|
||||||
+ dim + "' and '" + cp.components[0] + "'");
|
|
||||||
}
|
|
||||||
bq.add(new TermQuery(term(fip, cp)), Occur.SHOULD);
|
|
||||||
}
|
|
||||||
q = bq;
|
|
||||||
}
|
|
||||||
|
|
||||||
add(dim, q);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Expert: add a custom drill-down subQuery. Use this
|
|
||||||
* when you have a separate way to drill-down on the
|
|
||||||
* dimension than the indexed facet ordinals. */
|
|
||||||
public void add(String dim, Query subQuery) {
|
|
||||||
|
|
||||||
// TODO: we should use FilteredQuery?
|
|
||||||
|
|
||||||
// So scores of the drill-down query don't have an
|
|
||||||
// effect:
|
|
||||||
final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subQuery);
|
|
||||||
drillDownQuery.setBoost(0.0f);
|
|
||||||
|
|
||||||
query.add(drillDownQuery, Occur.MUST);
|
|
||||||
|
|
||||||
drillDownDims.put(dim, drillDownDims.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DrillDownQuery clone() {
|
|
||||||
return new DrillDownQuery(fip, query, drillDownDims);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
final int prime = 31;
|
|
||||||
int result = super.hashCode();
|
|
||||||
return prime * result + query.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (!(obj instanceof DrillDownQuery)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
DrillDownQuery other = (DrillDownQuery) obj;
|
|
||||||
return query.equals(other.query) && super.equals(other);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Query rewrite(IndexReader r) throws IOException {
|
|
||||||
if (query.clauses().size() == 0) {
|
|
||||||
// baseQuery given to the ctor was null + no drill-downs were added
|
|
||||||
// note that if only baseQuery was given to the ctor, but no drill-down terms
|
|
||||||
// is fine, since the rewritten query will be the original base query.
|
|
||||||
throw new IllegalStateException("no base query or drill-down categories given");
|
|
||||||
}
|
|
||||||
return query;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString(String field) {
|
|
||||||
return query.toString(field);
|
|
||||||
}
|
|
||||||
|
|
||||||
BooleanQuery getBooleanQuery() {
|
|
||||||
return query;
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<String,Integer> getDims() {
|
|
||||||
return drillDownDims;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,188 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.IdentityHashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.search.Collector;
|
|
||||||
import org.apache.lucene.search.Scorer.ChildScorer;
|
|
||||||
import org.apache.lucene.search.Scorer;
|
|
||||||
import org.apache.lucene.search.Weight;
|
|
||||||
|
|
||||||
/** Collector that scrutinizes each hit to determine if it
|
|
||||||
* passed all constraints (a true hit) or if it missed
|
|
||||||
* exactly one dimension (a near-miss, to count for
|
|
||||||
* drill-sideways counts on that dimension). */
|
|
||||||
class DrillSidewaysCollector extends Collector {
|
|
||||||
|
|
||||||
private final Collector hitCollector;
|
|
||||||
private final Collector drillDownCollector;
|
|
||||||
private final Collector[] drillSidewaysCollectors;
|
|
||||||
private final Scorer[] subScorers;
|
|
||||||
private final int exactCount;
|
|
||||||
|
|
||||||
// Maps Weight to either -1 (mainQuery) or to integer
|
|
||||||
// index of the dims drillDown. We needs this when
|
|
||||||
// visiting the child scorers to correlate back to the
|
|
||||||
// right scorers:
|
|
||||||
private final Map<Weight,Integer> weightToIndex = new IdentityHashMap<Weight,Integer>();
|
|
||||||
|
|
||||||
private Scorer mainScorer;
|
|
||||||
|
|
||||||
public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors,
|
|
||||||
Map<String,Integer> dims) {
|
|
||||||
this.hitCollector = hitCollector;
|
|
||||||
this.drillDownCollector = drillDownCollector;
|
|
||||||
this.drillSidewaysCollectors = drillSidewaysCollectors;
|
|
||||||
subScorers = new Scorer[dims.size()];
|
|
||||||
|
|
||||||
if (dims.size() == 1) {
|
|
||||||
// When we have only one dim, we insert the
|
|
||||||
// MatchAllDocsQuery, bringing the clause count to
|
|
||||||
// 2:
|
|
||||||
exactCount = 2;
|
|
||||||
} else {
|
|
||||||
exactCount = dims.size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void collect(int doc) throws IOException {
|
|
||||||
//System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount);
|
|
||||||
|
|
||||||
if (mainScorer == null) {
|
|
||||||
// This segment did not have any docs with any
|
|
||||||
// drill-down field & value:
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mainScorer.freq() == exactCount) {
|
|
||||||
// All sub-clauses from the drill-down filters
|
|
||||||
// matched, so this is a "real" hit, so we first
|
|
||||||
// collect in both the hitCollector and the
|
|
||||||
// drillDown collector:
|
|
||||||
//System.out.println(" hit " + drillDownCollector);
|
|
||||||
hitCollector.collect(doc);
|
|
||||||
if (drillDownCollector != null) {
|
|
||||||
drillDownCollector.collect(doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Also collect across all drill-sideways counts so
|
|
||||||
// we "merge in" drill-down counts for this
|
|
||||||
// dimension.
|
|
||||||
for(int i=0;i<subScorers.length;i++) {
|
|
||||||
// This cannot be null, because it was a hit,
|
|
||||||
// meaning all drill-down dims matched, so all
|
|
||||||
// dims must have non-null scorers:
|
|
||||||
assert subScorers[i] != null;
|
|
||||||
int subDoc = subScorers[i].docID();
|
|
||||||
assert subDoc == doc;
|
|
||||||
drillSidewaysCollectors[i].collect(doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
boolean found = false;
|
|
||||||
for(int i=0;i<subScorers.length;i++) {
|
|
||||||
if (subScorers[i] == null) {
|
|
||||||
// This segment did not have any docs with this
|
|
||||||
// drill-down field & value:
|
|
||||||
drillSidewaysCollectors[i].collect(doc);
|
|
||||||
assert allMatchesFrom(i+1, doc);
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
int subDoc = subScorers[i].docID();
|
|
||||||
//System.out.println(" i=" + i + " sub: " + subDoc);
|
|
||||||
if (subDoc != doc) {
|
|
||||||
//System.out.println(" +ds[" + i + "]");
|
|
||||||
assert subDoc > doc: "subDoc=" + subDoc + " doc=" + doc;
|
|
||||||
drillSidewaysCollectors[i].collect(doc);
|
|
||||||
assert allMatchesFrom(i+1, doc);
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert found;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only used by assert:
|
|
||||||
private boolean allMatchesFrom(int startFrom, int doc) {
|
|
||||||
for(int i=startFrom;i<subScorers.length;i++) {
|
|
||||||
assert subScorers[i].docID() == doc;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean acceptsDocsOutOfOrder() {
|
|
||||||
// We actually could accept docs out of order, but, we
|
|
||||||
// need to force BooleanScorer2 so that the
|
|
||||||
// sub-scorers are "on" each docID we are collecting:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setNextReader(AtomicReaderContext leaf) throws IOException {
|
|
||||||
//System.out.println("DS.setNextReader reader=" + leaf.reader());
|
|
||||||
hitCollector.setNextReader(leaf);
|
|
||||||
if (drillDownCollector != null) {
|
|
||||||
drillDownCollector.setNextReader(leaf);
|
|
||||||
}
|
|
||||||
for(Collector dsc : drillSidewaysCollectors) {
|
|
||||||
dsc.setNextReader(leaf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void setWeight(Weight weight, int index) {
|
|
||||||
assert !weightToIndex.containsKey(weight);
|
|
||||||
weightToIndex.put(weight, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void findScorers(Scorer scorer) {
|
|
||||||
Integer index = weightToIndex.get(scorer.getWeight());
|
|
||||||
if (index != null) {
|
|
||||||
if (index.intValue() == -1) {
|
|
||||||
mainScorer = scorer;
|
|
||||||
} else {
|
|
||||||
subScorers[index] = scorer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(ChildScorer child : scorer.getChildren()) {
|
|
||||||
findScorers(child.child);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setScorer(Scorer scorer) throws IOException {
|
|
||||||
mainScorer = null;
|
|
||||||
Arrays.fill(subScorers, null);
|
|
||||||
findScorers(scorer);
|
|
||||||
hitCollector.setScorer(scorer);
|
|
||||||
if (drillDownCollector != null) {
|
|
||||||
drillDownCollector.setScorer(scorer);
|
|
||||||
}
|
|
||||||
for(Collector dsc : drillSidewaysCollectors) {
|
|
||||||
dsc.setScorer(scorer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,195 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.search.Collector;
|
|
||||||
import org.apache.lucene.search.Explanation;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.Scorer;
|
|
||||||
import org.apache.lucene.search.Weight;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
|
|
||||||
class DrillSidewaysQuery extends Query {
|
|
||||||
final Query baseQuery;
|
|
||||||
final Collector drillDownCollector;
|
|
||||||
final Collector[] drillSidewaysCollectors;
|
|
||||||
final Term[][] drillDownTerms;
|
|
||||||
|
|
||||||
DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Term[][] drillDownTerms) {
|
|
||||||
this.baseQuery = baseQuery;
|
|
||||||
this.drillDownCollector = drillDownCollector;
|
|
||||||
this.drillSidewaysCollectors = drillSidewaysCollectors;
|
|
||||||
this.drillDownTerms = drillDownTerms;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString(String field) {
|
|
||||||
return "DrillSidewaysQuery";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Query rewrite(IndexReader reader) throws IOException {
|
|
||||||
Query newQuery = baseQuery;
|
|
||||||
while(true) {
|
|
||||||
Query rewrittenQuery = newQuery.rewrite(reader);
|
|
||||||
if (rewrittenQuery == newQuery) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
newQuery = rewrittenQuery;
|
|
||||||
}
|
|
||||||
if (newQuery == baseQuery) {
|
|
||||||
return this;
|
|
||||||
} else {
|
|
||||||
return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
|
||||||
final Weight baseWeight = baseQuery.createWeight(searcher);
|
|
||||||
|
|
||||||
return new Weight() {
|
|
||||||
@Override
|
|
||||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
|
|
||||||
return baseWeight.explain(context, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Query getQuery() {
|
|
||||||
return baseQuery;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getValueForNormalization() throws IOException {
|
|
||||||
return baseWeight.getValueForNormalization();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void normalize(float norm, float topLevelBoost) {
|
|
||||||
baseWeight.normalize(norm, topLevelBoost);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean scoresDocsOutOfOrder() {
|
|
||||||
// TODO: would be nice if AssertingIndexSearcher
|
|
||||||
// confirmed this for us
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
|
||||||
|
|
||||||
DrillSidewaysScorer.DocsEnumsAndFreq[] dims = new DrillSidewaysScorer.DocsEnumsAndFreq[drillDownTerms.length];
|
|
||||||
TermsEnum termsEnum = null;
|
|
||||||
String lastField = null;
|
|
||||||
int nullCount = 0;
|
|
||||||
for(int dim=0;dim<dims.length;dim++) {
|
|
||||||
dims[dim] = new DrillSidewaysScorer.DocsEnumsAndFreq();
|
|
||||||
dims[dim].sidewaysCollector = drillSidewaysCollectors[dim];
|
|
||||||
String field = drillDownTerms[dim][0].field();
|
|
||||||
dims[dim].dim = drillDownTerms[dim][0].text();
|
|
||||||
if (lastField == null || !lastField.equals(field)) {
|
|
||||||
AtomicReader reader = context.reader();
|
|
||||||
Terms terms = reader.terms(field);
|
|
||||||
if (terms != null) {
|
|
||||||
termsEnum = terms.iterator(null);
|
|
||||||
} else {
|
|
||||||
termsEnum = null;
|
|
||||||
}
|
|
||||||
lastField = field;
|
|
||||||
}
|
|
||||||
dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
|
|
||||||
if (termsEnum == null) {
|
|
||||||
nullCount++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for(int i=0;i<drillDownTerms[dim].length;i++) {
|
|
||||||
if (termsEnum.seekExact(drillDownTerms[dim][i].bytes())) {
|
|
||||||
DocsEnum docsEnum = termsEnum.docs(null, null, 0);
|
|
||||||
if (docsEnum != null) {
|
|
||||||
dims[dim].docsEnums[i] = docsEnum;
|
|
||||||
dims[dim].maxCost = Math.max(dims[dim].maxCost, docsEnum.cost());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort drill-downs by most restrictive first:
|
|
||||||
Arrays.sort(dims);
|
|
||||||
|
|
||||||
// TODO: it could be better if we take acceptDocs
|
|
||||||
// into account instead of baseScorer?
|
|
||||||
Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
|
|
||||||
|
|
||||||
if (baseScorer == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new DrillSidewaysScorer(this, context,
|
|
||||||
baseScorer,
|
|
||||||
drillDownCollector, dims);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: these should do "deeper" equals/hash on the 2-D drillDownTerms array
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
final int prime = 31;
|
|
||||||
int result = super.hashCode();
|
|
||||||
result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
|
|
||||||
result = prime * result
|
|
||||||
+ ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
|
|
||||||
result = prime * result + Arrays.hashCode(drillDownTerms);
|
|
||||||
result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (this == obj) return true;
|
|
||||||
if (!super.equals(obj)) return false;
|
|
||||||
if (getClass() != obj.getClass()) return false;
|
|
||||||
DrillSidewaysQuery other = (DrillSidewaysQuery) obj;
|
|
||||||
if (baseQuery == null) {
|
|
||||||
if (other.baseQuery != null) return false;
|
|
||||||
} else if (!baseQuery.equals(other.baseQuery)) return false;
|
|
||||||
if (drillDownCollector == null) {
|
|
||||||
if (other.drillDownCollector != null) return false;
|
|
||||||
} else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
|
|
||||||
if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
|
|
||||||
if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,654 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.search.Collector;
|
|
||||||
import org.apache.lucene.search.Scorer;
|
|
||||||
import org.apache.lucene.search.Weight;
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
|
||||||
|
|
||||||
class DrillSidewaysScorer extends Scorer {
|
|
||||||
|
|
||||||
//private static boolean DEBUG = false;
|
|
||||||
|
|
||||||
private final Collector drillDownCollector;
|
|
||||||
|
|
||||||
private final DocsEnumsAndFreq[] dims;
|
|
||||||
|
|
||||||
// DrillDown DocsEnums:
|
|
||||||
private final Scorer baseScorer;
|
|
||||||
|
|
||||||
private final AtomicReaderContext context;
|
|
||||||
|
|
||||||
private static final int CHUNK = 2048;
|
|
||||||
private static final int MASK = CHUNK-1;
|
|
||||||
|
|
||||||
private int collectDocID = -1;
|
|
||||||
private float collectScore;
|
|
||||||
|
|
||||||
DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
|
|
||||||
DocsEnumsAndFreq[] dims) {
|
|
||||||
super(w);
|
|
||||||
this.dims = dims;
|
|
||||||
this.context = context;
|
|
||||||
this.baseScorer = baseScorer;
|
|
||||||
this.drillDownCollector = drillDownCollector;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void score(Collector collector) throws IOException {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println("\nscore: reader=" + context.reader());
|
|
||||||
//}
|
|
||||||
//System.out.println("score r=" + context.reader());
|
|
||||||
collector.setScorer(this);
|
|
||||||
if (drillDownCollector != null) {
|
|
||||||
drillDownCollector.setScorer(this);
|
|
||||||
drillDownCollector.setNextReader(context);
|
|
||||||
}
|
|
||||||
for(DocsEnumsAndFreq dim : dims) {
|
|
||||||
dim.sidewaysCollector.setScorer(this);
|
|
||||||
dim.sidewaysCollector.setNextReader(context);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: if we ever allow null baseScorer ... it will
|
|
||||||
// mean we DO score docs out of order ... hmm, or if we
|
|
||||||
// change up the order of the conjuntions below
|
|
||||||
assert baseScorer != null;
|
|
||||||
|
|
||||||
// Position all scorers to their first matching doc:
|
|
||||||
baseScorer.nextDoc();
|
|
||||||
for(DocsEnumsAndFreq dim : dims) {
|
|
||||||
for (DocsEnum docsEnum : dim.docsEnums) {
|
|
||||||
if (docsEnum != null) {
|
|
||||||
docsEnum.nextDoc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final int numDims = dims.length;
|
|
||||||
|
|
||||||
DocsEnum[][] docsEnums = new DocsEnum[numDims][];
|
|
||||||
Collector[] sidewaysCollectors = new Collector[numDims];
|
|
||||||
long drillDownCost = 0;
|
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
|
||||||
docsEnums[dim] = dims[dim].docsEnums;
|
|
||||||
sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
|
|
||||||
for (DocsEnum de : dims[dim].docsEnums) {
|
|
||||||
if (de != null) {
|
|
||||||
drillDownCost += de.cost();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
long baseQueryCost = baseScorer.cost();
|
|
||||||
|
|
||||||
/*
|
|
||||||
System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount);
|
|
||||||
System.out.println(" maxDoc=" + context.reader().maxDoc());
|
|
||||||
System.out.println(" maxCost=" + maxCost);
|
|
||||||
System.out.println(" dims[0].freq=" + dims[0].freq);
|
|
||||||
if (numDims > 1) {
|
|
||||||
System.out.println(" dims[1].freq=" + dims[1].freq);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (baseQueryCost < drillDownCost/10) {
|
|
||||||
//System.out.println("baseAdvance");
|
|
||||||
doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors);
|
|
||||||
} else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) {
|
|
||||||
//System.out.println("drillDownAdvance");
|
|
||||||
doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
|
|
||||||
} else {
|
|
||||||
//System.out.println("union");
|
|
||||||
doUnionScoring(collector, docsEnums, sidewaysCollectors);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Used when drill downs are highly constraining vs
|
|
||||||
* baseQuery. */
|
|
||||||
private void doDrillDownAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
|
|
||||||
final int maxDoc = context.reader().maxDoc();
|
|
||||||
final int numDims = dims.length;
|
|
||||||
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" doDrillDownAdvanceScoring");
|
|
||||||
//}
|
|
||||||
|
|
||||||
// TODO: maybe a class like BS, instead of parallel arrays
|
|
||||||
int[] filledSlots = new int[CHUNK];
|
|
||||||
int[] docIDs = new int[CHUNK];
|
|
||||||
float[] scores = new float[CHUNK];
|
|
||||||
int[] missingDims = new int[CHUNK];
|
|
||||||
int[] counts = new int[CHUNK];
|
|
||||||
|
|
||||||
docIDs[0] = -1;
|
|
||||||
int nextChunkStart = CHUNK;
|
|
||||||
|
|
||||||
final FixedBitSet seen = new FixedBitSet(CHUNK);
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
|
|
||||||
//}
|
|
||||||
|
|
||||||
// First dim:
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" dim0");
|
|
||||||
//}
|
|
||||||
for(DocsEnum docsEnum : docsEnums[0]) {
|
|
||||||
if (docsEnum == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int docID = docsEnum.docID();
|
|
||||||
while (docID < nextChunkStart) {
|
|
||||||
int slot = docID & MASK;
|
|
||||||
|
|
||||||
if (docIDs[slot] != docID) {
|
|
||||||
seen.set(slot);
|
|
||||||
// Mark slot as valid:
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
|
|
||||||
//}
|
|
||||||
docIDs[slot] = docID;
|
|
||||||
missingDims[slot] = 1;
|
|
||||||
counts[slot] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
docID = docsEnum.nextDoc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second dim:
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" dim1");
|
|
||||||
//}
|
|
||||||
for(DocsEnum docsEnum : docsEnums[1]) {
|
|
||||||
if (docsEnum == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int docID = docsEnum.docID();
|
|
||||||
while (docID < nextChunkStart) {
|
|
||||||
int slot = docID & MASK;
|
|
||||||
|
|
||||||
if (docIDs[slot] != docID) {
|
|
||||||
// Mark slot as valid:
|
|
||||||
seen.set(slot);
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
|
|
||||||
//}
|
|
||||||
docIDs[slot] = docID;
|
|
||||||
missingDims[slot] = 0;
|
|
||||||
counts[slot] = 1;
|
|
||||||
} else {
|
|
||||||
// TODO: single-valued dims will always be true
|
|
||||||
// below; we could somehow specialize
|
|
||||||
if (missingDims[slot] >= 1) {
|
|
||||||
missingDims[slot] = 2;
|
|
||||||
counts[slot] = 2;
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
|
|
||||||
//}
|
|
||||||
} else {
|
|
||||||
counts[slot] = 1;
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
|
|
||||||
//}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
docID = docsEnum.nextDoc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// After this we can "upgrade" to conjunction, because
|
|
||||||
// any doc not seen by either dim 0 or dim 1 cannot be
|
|
||||||
// a hit or a near miss:
|
|
||||||
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" baseScorer");
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Fold in baseScorer, using advance:
|
|
||||||
int filledCount = 0;
|
|
||||||
int slot0 = 0;
|
|
||||||
while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != -1) {
|
|
||||||
int ddDocID = docIDs[slot0];
|
|
||||||
assert ddDocID != -1;
|
|
||||||
|
|
||||||
int baseDocID = baseScorer.docID();
|
|
||||||
if (baseDocID < ddDocID) {
|
|
||||||
baseDocID = baseScorer.advance(ddDocID);
|
|
||||||
}
|
|
||||||
if (baseDocID == ddDocID) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
|
|
||||||
//}
|
|
||||||
scores[slot0] = baseScorer.score();
|
|
||||||
filledSlots[filledCount++] = slot0;
|
|
||||||
counts[slot0]++;
|
|
||||||
} else {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
|
|
||||||
//}
|
|
||||||
docIDs[slot0] = -1;
|
|
||||||
|
|
||||||
// TODO: we could jump slot0 forward to the
|
|
||||||
// baseDocID ... but we'd need to set docIDs for
|
|
||||||
// intervening slots to -1
|
|
||||||
}
|
|
||||||
slot0++;
|
|
||||||
}
|
|
||||||
seen.clear(0, CHUNK);
|
|
||||||
|
|
||||||
if (filledCount == 0) {
|
|
||||||
if (nextChunkStart >= maxDoc) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
nextChunkStart += CHUNK;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: factor this out & share w/ union scorer,
|
|
||||||
// except we start from dim=2 instead:
|
|
||||||
for(int dim=2;dim<numDims;dim++) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
|
|
||||||
//}
|
|
||||||
for(DocsEnum docsEnum : docsEnums[dim]) {
|
|
||||||
if (docsEnum == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int docID = docsEnum.docID();
|
|
||||||
while (docID < nextChunkStart) {
|
|
||||||
int slot = docID & MASK;
|
|
||||||
if (docIDs[slot] == docID && counts[slot] >= dim) {
|
|
||||||
// TODO: single-valued dims will always be true
|
|
||||||
// below; we could somehow specialize
|
|
||||||
if (missingDims[slot] >= dim) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
|
|
||||||
//}
|
|
||||||
missingDims[slot] = dim+1;
|
|
||||||
counts[slot] = dim+2;
|
|
||||||
} else {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
|
|
||||||
//}
|
|
||||||
counts[slot] = dim+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// TODO: sometimes use advance?
|
|
||||||
docID = docsEnum.nextDoc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect:
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" now collect: " + filledCount + " hits");
|
|
||||||
//}
|
|
||||||
for(int i=0;i<filledCount;i++) {
|
|
||||||
int slot = filledSlots[i];
|
|
||||||
collectDocID = docIDs[slot];
|
|
||||||
collectScore = scores[slot];
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]);
|
|
||||||
//}
|
|
||||||
if (counts[slot] == 1+numDims) {
|
|
||||||
collectHit(collector, sidewaysCollectors);
|
|
||||||
} else if (counts[slot] == numDims) {
|
|
||||||
collectNearMiss(sidewaysCollectors, missingDims[slot]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nextChunkStart >= maxDoc) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
nextChunkStart += CHUNK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Used when base query is highly constraining vs the
|
|
||||||
* drilldowns; in this case we just .next() on base and
|
|
||||||
* .advance() on the dims. */
|
|
||||||
private void doBaseAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" doBaseAdvanceScoring");
|
|
||||||
//}
|
|
||||||
int docID = baseScorer.docID();
|
|
||||||
|
|
||||||
final int numDims = dims.length;
|
|
||||||
|
|
||||||
nextDoc: while (docID != NO_MORE_DOCS) {
|
|
||||||
int failedDim = -1;
|
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
|
||||||
// TODO: should we sort this 2nd dimension of
|
|
||||||
// docsEnums from most frequent to least?
|
|
||||||
boolean found = false;
|
|
||||||
for(DocsEnum docsEnum : docsEnums[dim]) {
|
|
||||||
if (docsEnum == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (docsEnum.docID() < docID) {
|
|
||||||
docsEnum.advance(docID);
|
|
||||||
}
|
|
||||||
if (docsEnum.docID() == docID) {
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!found) {
|
|
||||||
if (failedDim != -1) {
|
|
||||||
// More than one dim fails on this document, so
|
|
||||||
// it's neither a hit nor a near-miss; move to
|
|
||||||
// next doc:
|
|
||||||
docID = baseScorer.nextDoc();
|
|
||||||
continue nextDoc;
|
|
||||||
} else {
|
|
||||||
failedDim = dim;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
collectDocID = docID;
|
|
||||||
|
|
||||||
// TODO: we could score on demand instead since we are
|
|
||||||
// daat here:
|
|
||||||
collectScore = baseScorer.score();
|
|
||||||
|
|
||||||
if (failedDim == -1) {
|
|
||||||
collectHit(collector, sidewaysCollectors);
|
|
||||||
} else {
|
|
||||||
collectNearMiss(sidewaysCollectors, failedDim);
|
|
||||||
}
|
|
||||||
|
|
||||||
docID = baseScorer.nextDoc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" hit");
|
|
||||||
//}
|
|
||||||
|
|
||||||
collector.collect(collectDocID);
|
|
||||||
if (drillDownCollector != null) {
|
|
||||||
drillDownCollector.collect(collectDocID);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: we could "fix" faceting of the sideways counts
|
|
||||||
// to do this "union" (of the drill down hits) in the
|
|
||||||
// end instead:
|
|
||||||
|
|
||||||
// Tally sideways counts:
|
|
||||||
for(int dim=0;dim<sidewaysCollectors.length;dim++) {
|
|
||||||
sidewaysCollectors[dim].collect(collectDocID);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void collectNearMiss(Collector[] sidewaysCollectors, int dim) throws IOException {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" missingDim=" + dim);
|
|
||||||
//}
|
|
||||||
sidewaysCollectors[dim].collect(collectDocID);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void doUnionScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" doUnionScoring");
|
|
||||||
//}
|
|
||||||
|
|
||||||
final int maxDoc = context.reader().maxDoc();
|
|
||||||
final int numDims = dims.length;
|
|
||||||
|
|
||||||
// TODO: maybe a class like BS, instead of parallel arrays
|
|
||||||
int[] filledSlots = new int[CHUNK];
|
|
||||||
int[] docIDs = new int[CHUNK];
|
|
||||||
float[] scores = new float[CHUNK];
|
|
||||||
int[] missingDims = new int[CHUNK];
|
|
||||||
int[] counts = new int[CHUNK];
|
|
||||||
|
|
||||||
docIDs[0] = -1;
|
|
||||||
|
|
||||||
// NOTE: this is basically a specialized version of
|
|
||||||
// BooleanScorer, to the minShouldMatch=N-1 case, but
|
|
||||||
// carefully tracking which dimension failed to match
|
|
||||||
|
|
||||||
int nextChunkStart = CHUNK;
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
|
|
||||||
//}
|
|
||||||
int filledCount = 0;
|
|
||||||
int docID = baseScorer.docID();
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" base docID=" + docID);
|
|
||||||
//}
|
|
||||||
while (docID < nextChunkStart) {
|
|
||||||
int slot = docID & MASK;
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id"));
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Mark slot as valid:
|
|
||||||
assert docIDs[slot] != docID: "slot=" + slot + " docID=" + docID;
|
|
||||||
docIDs[slot] = docID;
|
|
||||||
scores[slot] = baseScorer.score();
|
|
||||||
filledSlots[filledCount++] = slot;
|
|
||||||
missingDims[slot] = 0;
|
|
||||||
counts[slot] = 1;
|
|
||||||
|
|
||||||
docID = baseScorer.nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (filledCount == 0) {
|
|
||||||
if (nextChunkStart >= maxDoc) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
nextChunkStart += CHUNK;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// First drill-down dim, basically adds SHOULD onto
|
|
||||||
// the baseQuery:
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" dim=0 [" + dims[0].dim + "]");
|
|
||||||
//}
|
|
||||||
for(DocsEnum docsEnum : docsEnums[0]) {
|
|
||||||
if (docsEnum == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
docID = docsEnum.docID();
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" start docID=" + docID);
|
|
||||||
//}
|
|
||||||
while (docID < nextChunkStart) {
|
|
||||||
int slot = docID & MASK;
|
|
||||||
if (docIDs[slot] == docID) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " count=2");
|
|
||||||
//}
|
|
||||||
missingDims[slot] = 1;
|
|
||||||
counts[slot] = 2;
|
|
||||||
}
|
|
||||||
docID = docsEnum.nextDoc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int dim=1;dim<numDims;dim++) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
|
|
||||||
//}
|
|
||||||
for(DocsEnum docsEnum : docsEnums[dim]) {
|
|
||||||
if (docsEnum == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
docID = docsEnum.docID();
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" start docID=" + docID);
|
|
||||||
//}
|
|
||||||
while (docID < nextChunkStart) {
|
|
||||||
int slot = docID & MASK;
|
|
||||||
if (docIDs[slot] == docID && counts[slot] >= dim) {
|
|
||||||
// This doc is still in the running...
|
|
||||||
// TODO: single-valued dims will always be true
|
|
||||||
// below; we could somehow specialize
|
|
||||||
if (missingDims[slot] >= dim) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
|
|
||||||
//}
|
|
||||||
missingDims[slot] = dim+1;
|
|
||||||
counts[slot] = dim+2;
|
|
||||||
} else {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
|
|
||||||
//}
|
|
||||||
counts[slot] = dim+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
docID = docsEnum.nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: sometimes use advance?
|
|
||||||
|
|
||||||
/*
|
|
||||||
int docBase = nextChunkStart - CHUNK;
|
|
||||||
for(int i=0;i<filledCount;i++) {
|
|
||||||
int slot = filledSlots[i];
|
|
||||||
docID = docBase + filledSlots[i];
|
|
||||||
if (docIDs[slot] == docID && counts[slot] >= dim) {
|
|
||||||
// This doc is still in the running...
|
|
||||||
int ddDocID = docsEnum.docID();
|
|
||||||
if (ddDocID < docID) {
|
|
||||||
ddDocID = docsEnum.advance(docID);
|
|
||||||
}
|
|
||||||
if (ddDocID == docID) {
|
|
||||||
if (missingDims[slot] >= dim && counts[slot] == allMatchCount) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
|
|
||||||
// }
|
|
||||||
missingDims[slot] = dim+1;
|
|
||||||
counts[slot] = dim+2;
|
|
||||||
} else {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
|
|
||||||
// }
|
|
||||||
counts[slot] = dim+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect:
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" now collect: " + filledCount + " hits");
|
|
||||||
//}
|
|
||||||
for(int i=0;i<filledCount;i++) {
|
|
||||||
// NOTE: This is actually in-order collection,
|
|
||||||
// because we only accept docs originally returned by
|
|
||||||
// the baseScorer (ie that Scorer is AND'd)
|
|
||||||
int slot = filledSlots[i];
|
|
||||||
collectDocID = docIDs[slot];
|
|
||||||
collectScore = scores[slot];
|
|
||||||
//if (DEBUG) {
|
|
||||||
// System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]);
|
|
||||||
//}
|
|
||||||
//System.out.println(" collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims);
|
|
||||||
if (counts[slot] == 1+numDims) {
|
|
||||||
//System.out.println(" hit");
|
|
||||||
collectHit(collector, sidewaysCollectors);
|
|
||||||
} else if (counts[slot] == numDims) {
|
|
||||||
//System.out.println(" sw");
|
|
||||||
collectNearMiss(sidewaysCollectors, missingDims[slot]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nextChunkStart >= maxDoc) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
nextChunkStart += CHUNK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return collectDocID;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float score() {
|
|
||||||
return collectScore;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int freq() {
|
|
||||||
return 1+dims.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return baseScorer.cost();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Collection<ChildScorer> getChildren() {
|
|
||||||
return Collections.singletonList(new ChildScorer(baseScorer, "MUST"));
|
|
||||||
}
|
|
||||||
|
|
||||||
static class DocsEnumsAndFreq implements Comparable<DocsEnumsAndFreq> {
|
|
||||||
DocsEnum[] docsEnums;
|
|
||||||
// Max cost for all docsEnums for this dim:
|
|
||||||
long maxCost;
|
|
||||||
Collector sidewaysCollector;
|
|
||||||
String dim;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTo(DocsEnumsAndFreq other) {
|
|
||||||
if (maxCost < other.maxCost) {
|
|
||||||
return -1;
|
|
||||||
} else if (maxCost > other.maxCost) {
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,83 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provider of arrays used for facets aggregation. Returns either an
|
|
||||||
* {@code int[]} or {@code float[]} of the specified array length. When the
|
|
||||||
* arrays are no longer needed, you should call {@link #free()}, so that e.g.
|
|
||||||
* they will be reclaimed.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> if you need to reuse the allocated arrays between search
|
|
||||||
* requests, use {@link ReusingFacetArrays}.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> this class is not thread safe. You typically allocate it per
|
|
||||||
* search.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FacetArrays {
|
|
||||||
|
|
||||||
private int[] ints;
|
|
||||||
private float[] floats;
|
|
||||||
|
|
||||||
public final int arrayLength;
|
|
||||||
|
|
||||||
/** Arrays will be allocated at the specified length. */
|
|
||||||
public FacetArrays(int arrayLength) {
|
|
||||||
this.arrayLength = arrayLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected float[] newFloatArray() {
|
|
||||||
return new float[arrayLength];
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int[] newIntArray() {
|
|
||||||
return new int[arrayLength];
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void doFree(float[] floats, int[] ints) {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Notifies that the arrays obtained from {@link #getIntArray()}
|
|
||||||
* or {@link #getFloatArray()} are no longer needed and can be freed.
|
|
||||||
*/
|
|
||||||
public final void free() {
|
|
||||||
doFree(floats, ints);
|
|
||||||
ints = null;
|
|
||||||
floats = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final int[] getIntArray() {
|
|
||||||
if (ints == null) {
|
|
||||||
ints = newIntArray();
|
|
||||||
}
|
|
||||||
return ints;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final float[] getFloatArray() {
|
|
||||||
if (floats == null) {
|
|
||||||
floats = newFloatArray();
|
|
||||||
}
|
|
||||||
return floats;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,212 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Defines an aggregation request for a category. Allows specifying the
|
|
||||||
* {@link #numResults number of child categories} to return as well as
|
|
||||||
* {@link #getSortOrder() which} categories to consider the "top" (highest or
|
|
||||||
* lowest ranking ones).
|
|
||||||
* <p>
|
|
||||||
* If the category being aggregated is hierarchical, you can also specify the
|
|
||||||
* {@link #setDepth(int) depth} up which to aggregate child categories as well
|
|
||||||
* as how the result should be {@link #setResultMode(ResultMode) constructed}.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public abstract class FacetRequest {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* When {@link FacetRequest#getDepth()} is greater than 1, defines the
|
|
||||||
* structure of the result as well as how constraints such as
|
|
||||||
* {@link FacetRequest#numResults} and {@link FacetRequest#getNumLabel()} are
|
|
||||||
* applied.
|
|
||||||
*/
|
|
||||||
public enum ResultMode {
|
|
||||||
/**
|
|
||||||
* Constraints are applied per node, and the result has a full tree
|
|
||||||
* structure. Default result mode.
|
|
||||||
*/
|
|
||||||
PER_NODE_IN_TREE,
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constraints are applied globally, on total number of results, and the
|
|
||||||
* result has a flat structure.
|
|
||||||
*/
|
|
||||||
GLOBAL_FLAT
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Defines which categories to return. If {@link #DESCENDING} (the default),
|
|
||||||
* the highest {@link FacetRequest#numResults} weighted categories will be
|
|
||||||
* returned, otherwise the lowest ones.
|
|
||||||
*/
|
|
||||||
public enum SortOrder { ASCENDING, DESCENDING }
|
|
||||||
|
|
||||||
/** The category being aggregated in this facet request. */
|
|
||||||
public final FacetLabel categoryPath;
|
|
||||||
|
|
||||||
/** The number of child categories to return for {@link #categoryPath}. */
|
|
||||||
public final int numResults;
|
|
||||||
|
|
||||||
private int numLabel;
|
|
||||||
private int depth = 1;
|
|
||||||
private SortOrder sortOrder = SortOrder.DESCENDING;
|
|
||||||
private ResultMode resultMode = ResultMode.PER_NODE_IN_TREE;
|
|
||||||
|
|
||||||
// Computed at construction; based on categoryPath and numResults.
|
|
||||||
private final int hashCode;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor with the given category to aggregate and the number of child
|
|
||||||
* categories to return.
|
|
||||||
*
|
|
||||||
* @param path
|
|
||||||
* the category to aggregate. Cannot be {@code null}.
|
|
||||||
* @param numResults
|
|
||||||
* the number of child categories to return. If set to
|
|
||||||
* {@code Integer.MAX_VALUE}, all immediate child categories will be
|
|
||||||
* returned. Must be greater than 0.
|
|
||||||
*/
|
|
||||||
public FacetRequest(FacetLabel path, int numResults) {
|
|
||||||
if (numResults <= 0) {
|
|
||||||
throw new IllegalArgumentException("num results must be a positive (>0) number: " + numResults);
|
|
||||||
}
|
|
||||||
if (path == null) {
|
|
||||||
throw new IllegalArgumentException("category path cannot be null!");
|
|
||||||
}
|
|
||||||
categoryPath = path;
|
|
||||||
this.numResults = numResults;
|
|
||||||
numLabel = numResults;
|
|
||||||
hashCode = categoryPath.hashCode() ^ this.numResults;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the {@link FacetsAggregator} which can aggregate the categories of
|
|
||||||
* this facet request. The aggregator is expected to aggregate category values
|
|
||||||
* into {@link FacetArrays}. If the facet request does not support that, e.g.
|
|
||||||
* {@link RangeFacetRequest}, it can return {@code null}. Note though that
|
|
||||||
* such requests require a dedicated {@link FacetsAccumulator}.
|
|
||||||
*/
|
|
||||||
public abstract FacetsAggregator createFacetsAggregator(FacetIndexingParams fip);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (o instanceof FacetRequest) {
|
|
||||||
FacetRequest that = (FacetRequest) o;
|
|
||||||
return that.hashCode == this.hashCode &&
|
|
||||||
that.categoryPath.equals(this.categoryPath) &&
|
|
||||||
that.numResults == this.numResults &&
|
|
||||||
that.depth == this.depth &&
|
|
||||||
that.resultMode == this.resultMode &&
|
|
||||||
that.numLabel == this.numLabel &&
|
|
||||||
that.sortOrder == this.sortOrder;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* How deeply to look under {@link #categoryPath}. By default, only its
|
|
||||||
* immediate children are aggregated (depth=1). If set to
|
|
||||||
* {@code Integer.MAX_VALUE}, the entire sub-tree of the category will be
|
|
||||||
* aggregated.
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE:</b> setting depth to 0 means that only the category itself should
|
|
||||||
* be aggregated. In that case, make sure to index the category with
|
|
||||||
* {@link OrdinalPolicy#ALL_PARENTS}, unless it is not the root category (the
|
|
||||||
* dimension), in which case {@link OrdinalPolicy#ALL_BUT_DIMENSION} is fine
|
|
||||||
* too.
|
|
||||||
*/
|
|
||||||
public final int getDepth() {
|
|
||||||
// TODO an AUTO_EXPAND option could be useful
|
|
||||||
return depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allows to specify the number of categories to label. By default all
|
|
||||||
* returned categories are labeled.
|
|
||||||
* <p>
|
|
||||||
* This allows an app to request a large number of results to return, while
|
|
||||||
* labeling them on-demand (e.g. when the UI requests to show more
|
|
||||||
* categories).
|
|
||||||
*/
|
|
||||||
public final int getNumLabel() {
|
|
||||||
return numLabel;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Return the requested result mode (defaults to {@link ResultMode#PER_NODE_IN_TREE}. */
|
|
||||||
public final ResultMode getResultMode() {
|
|
||||||
return resultMode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Return the requested order of results (defaults to {@link SortOrder#DESCENDING}. */
|
|
||||||
public final SortOrder getSortOrder() {
|
|
||||||
return sortOrder;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return hashCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the depth up to which to aggregate facets.
|
|
||||||
*
|
|
||||||
* @see #getDepth()
|
|
||||||
*/
|
|
||||||
public void setDepth(int depth) {
|
|
||||||
this.depth = depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the number of categories to label.
|
|
||||||
*
|
|
||||||
* @see #getNumLabel()
|
|
||||||
*/
|
|
||||||
public void setNumLabel(int numLabel) {
|
|
||||||
this.numLabel = numLabel;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the {@link ResultMode} for this request.
|
|
||||||
*
|
|
||||||
* @see #getResultMode()
|
|
||||||
*/
|
|
||||||
public void setResultMode(ResultMode resultMode) {
|
|
||||||
this.resultMode = resultMode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the {@link SortOrder} for this request.
|
|
||||||
*
|
|
||||||
* @see #getSortOrder()
|
|
||||||
*/
|
|
||||||
public void setSortOrder(SortOrder sortOrder) {
|
|
||||||
this.sortOrder = sortOrder;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return categoryPath.toString() + " nRes=" + numResults + " nLbl=" + numLabel;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,117 +0,0 @@
|
||||||
package org.apache.lucene.facet.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Result of faceted search.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class FacetResult {
|
|
||||||
|
|
||||||
private static FacetResultNode addIfNotExist(Map<FacetLabel, FacetResultNode> nodes, FacetResultNode node) {
|
|
||||||
FacetResultNode n = nodes.get(node.label);
|
|
||||||
if (n == null) {
|
|
||||||
nodes.put(node.label, node);
|
|
||||||
n = node;
|
|
||||||
}
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final FacetRequest facetRequest;
|
|
||||||
private final FacetResultNode rootNode;
|
|
||||||
private final int numValidDescendants;
|
|
||||||
|
|
||||||
public FacetResult(FacetRequest facetRequest, FacetResultNode rootNode, int numValidDescendants) {
|
|
||||||
this.facetRequest = facetRequest;
|
|
||||||
this.rootNode = rootNode;
|
|
||||||
this.numValidDescendants = numValidDescendants;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Facet result node matching the root of the {@link #getFacetRequest() facet request}.
|
|
||||||
* @see #getFacetRequest()
|
|
||||||
* @see FacetRequest#categoryPath
|
|
||||||
*/
|
|
||||||
public final FacetResultNode getFacetResultNode() {
|
|
||||||
return rootNode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of descendants of {@link #getFacetResultNode() root facet result
|
|
||||||
* node}, up till the requested depth.
|
|
||||||
*/
|
|
||||||
public final int getNumValidDescendants() {
|
|
||||||
return numValidDescendants;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Request for which this result was obtained.
|
|
||||||
*/
|
|
||||||
public final FacetRequest getFacetRequest() {
|
|
||||||
return this.facetRequest;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* String representation of this facet result.
|
|
||||||
* Use with caution: might return a very long string.
|
|
||||||
* @param prefix prefix for each result line
|
|
||||||
* @see #toString()
|
|
||||||
*/
|
|
||||||
public String toString(String prefix) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
String nl = "";
|
|
||||||
|
|
||||||
// request
|
|
||||||
if (this.facetRequest != null) {
|
|
||||||
sb.append(nl).append(prefix).append("Request: ").append(
|
|
||||||
this.facetRequest.toString());
|
|
||||||
nl = "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
// total facets
|
|
||||||
sb.append(nl).append(prefix).append("Num valid Descendants (up to specified depth): ").append(
|
|
||||||
this.numValidDescendants);
|
|
||||||
nl = "\n";
|
|
||||||
|
|
||||||
// result node
|
|
||||||
if (this.rootNode != null) {
|
|
||||||
sb.append(nl).append(this.rootNode.toString(prefix + "\t"));
|
|
||||||
}
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return toString("");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue