LUCENE-4340: Move bloom PF utilities to lucene/codecs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1381504 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2012-09-06 08:02:36 +00:00
parent db6badeb11
commit 3f9aa4dcbb
11 changed files with 64 additions and 158 deletions

View File

@ -17,7 +17,6 @@ package org.apache.lucene.codecs.bloom;
*/
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.FuzzySet;
/**

View File

@ -33,6 +33,7 @@ import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
@ -46,11 +47,8 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.FuzzySet.ContainsResult;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* <p>

View File

@ -17,9 +17,6 @@ package org.apache.lucene.codecs.bloom;
*/
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.hash.HashFunction;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* Default policy is to allocate a bitset with 10% saturation given a unique term per document.

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util;
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -20,7 +20,8 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.hash.HashFunction;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
/**
* <p>
@ -45,9 +46,20 @@ import org.apache.lucene.util.hash.HashFunction;
* @lucene.experimental
*/
public class FuzzySet {
public static final int FUZZY_SERIALIZATION_VERSION=1;
public static final int VERSION_SPI = 1; // HashFunction used to be loaded through a SPI
public static final int VERSION_START = VERSION_SPI;
public static final int VERSION_CURRENT = 2;
public static HashFunction hashFunctionForVersion(int version) {
if (version < VERSION_START) {
throw new IllegalArgumentException("Version " + version + " is too old, expected at least " + VERSION_START);
} else if (version > VERSION_CURRENT) {
throw new IllegalArgumentException("Version " + version + " is too new, expected at most " + VERSION_CURRENT);
}
return MurmurHash2.INSTANCE;
}
/**
* Result from {@link FuzzySet#contains(BytesRef)}:
* can never return definitively YES (always MAYBE),
@ -174,8 +186,7 @@ public class FuzzySet {
*/
public void serialize(DataOutput out) throws IOException
{
out.writeInt(FUZZY_SERIALIZATION_VERSION);
out.writeString(hashFunction.getName());
out.writeInt(VERSION_CURRENT);
out.writeInt(bloomSize);
long[] bits = filter.getBits();
out.writeInt(bits.length);
@ -188,11 +199,10 @@ public class FuzzySet {
public static FuzzySet deserialize(DataInput in) throws IOException
{
int version=in.readInt();
if(version!=FUZZY_SERIALIZATION_VERSION)
{
throw new IOException("Error deserializing: set version is not "+FUZZY_SERIALIZATION_VERSION);
if (version == VERSION_SPI) {
in.readString();
}
HashFunction hashFunction=HashFunction.forName(in.readString());
final HashFunction hashFunction = hashFunctionForVersion(version);
int bloomSize=in.readInt();
int numLongs=in.readInt();
long[]longs=new long[numLongs];

View File

@ -0,0 +1,36 @@
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
/**
* Base class for hashing functions that can be referred to by name.
* Subclasses are expected to provide threadsafe implementations of the hash function
* on the range of bytes referenced in the provided {@link BytesRef}
* @lucene.experimental
*/
public abstract class HashFunction {
/**
* Hashes the contents of the referenced bytes
* @param bytes the data to be hashed
* @return the hash of the bytes referenced by bytes.offset and length bytes.length
*/
public abstract int hash(BytesRef bytes);
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.hash;
package org.apache.lucene.codecs.bloom;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -32,14 +32,11 @@ import org.apache.lucene.util.BytesRef;
* </p>
* @lucene.experimental
*/
public class MurmurHash2 extends HashFunction{
public static final String HASH_NAME="MurmurHash2";
public MurmurHash2() {
super(HASH_NAME);
}
public final class MurmurHash2 extends HashFunction{
public static final MurmurHash2 INSTANCE = new MurmurHash2();
private MurmurHash2() {}
public static int hash(byte[] data, int seed, int offset, int len) {
int m = 0x5bd1e995;

View File

@ -1,84 +0,0 @@
package org.apache.lucene.util.hash;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Set;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NamedSPILoader;
/**
* Base class for hashing functions that can be referred to by name.
* Subclasses are expected to provide threadsafe implementations of the hash function
* on the range of bytes referenced in the provided {@link BytesRef}
* @lucene.experimental
*/
public abstract class HashFunction implements NamedSPILoader.NamedSPI {
/**
* Hashes the contents of the referenced bytes
* @param bytes the data to be hashed
* @return the hash of the bytes referenced by bytes.offset and length bytes.length
*/
public abstract int hash(BytesRef bytes);
private static final NamedSPILoader<HashFunction> loader =
new NamedSPILoader<HashFunction>(HashFunction.class);
private final String name;
public HashFunction(String name) {
NamedSPILoader.checkServiceName(name);
this.name = name;
}
/** Returns this codec's name */
@Override
public final String getName() {
return name;
}
/** looks up a hash function by name */
public static HashFunction forName(String name) {
return loader.lookup(name);
}
/** returns a list of all available hash function names */
public static Set<String> availableHashFunctionNames() {
return loader.availableServices();
}
/**
* Reloads the hash function list from the given {@link ClassLoader}.
* Changes to the function list are visible after the method ends, all
* iterators ({@link #availableHashFunctionNames()},...) stay consistent.
*
* <p><b>NOTE:</b> Only new functions are added, existing ones are
* never removed or replaced.
*
* <p><em>This method is expensive and should only be called for discovery
* of new functions on the given classpath/classloader!</em>
*/
public static void reloadHashFunctions(ClassLoader classloader) {
loader.reload(classloader);
}
@Override
public String toString() {
return name;
}
}

View File

@ -1,25 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Hashing functions load-able via SPI service
</body>
</html>

View File

@ -1,16 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.util.hash.MurmurHash2

View File

@ -22,13 +22,10 @@ import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.FuzzySet;
import org.apache.lucene.util.hash.MurmurHash2;
/**
* A class used for testing {@link BloomFilteringPostingsFormat} with a concrete

View File

@ -36,7 +36,6 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.util.hash.HashFunction;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.handler.admin.CoreAdminHandler;
@ -176,8 +175,6 @@ public class SolrResourceLoader implements ResourceLoader
* this ResourceLoader.
*/
void reloadLuceneSPI() {
// Hash functions:
HashFunction.reloadHashFunctions(this.classLoader);
// Codecs:
PostingsFormat.reloadPostingsFormats(this.classLoader);
Codec.reloadCodecs(this.classLoader);