mirror of https://github.com/apache/lucene.git
LUCENE-2649: Objects in the FieldCache can optionally store valid Bits
Apologies for 'CTR' rather then 'RTC' -- we can always revert if I jumped the gun! git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1001303 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ffe3bb6578
commit
aeab95d3af
|
@ -232,6 +232,9 @@ New features
|
||||||
* LUCENE-2648: PackedInts.Iterator now supports to advance by more than a
|
* LUCENE-2648: PackedInts.Iterator now supports to advance by more than a
|
||||||
single ordinal. (Simon Willnauer)
|
single ordinal. (Simon Willnauer)
|
||||||
|
|
||||||
|
* LUCENE-2649: Objects in the FieldCache can optionally store Bits
|
||||||
|
that mark which docs have real values in the native[] (ryan)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
|
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
|
||||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.cache.EntryCreator;
|
||||||
|
import org.apache.lucene.search.cache.CachedArray.*;
|
||||||
import org.apache.lucene.util.NumericUtils;
|
import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -46,6 +48,14 @@ public interface FieldCache {
|
||||||
Object value;
|
Object value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops
|
||||||
|
* processing terms and returns the current FieldCache
|
||||||
|
* array.
|
||||||
|
*/
|
||||||
|
public static final class StopFillCacheException extends RuntimeException {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Marker interface as super-interface to all parsers. It
|
* Marker interface as super-interface to all parsers. It
|
||||||
* is used to specify a custom parser to {@link
|
* is used to specify a custom parser to {@link
|
||||||
|
@ -314,6 +324,19 @@ public interface FieldCache {
|
||||||
public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
|
public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||||
|
* reads the terms in <code>field</code> as bytes and returns an array of
|
||||||
|
* size <code>reader.maxDoc()</code> of the value each document has in the
|
||||||
|
* given field.
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the bytes.
|
||||||
|
* @param creator Used to make the ByteValues
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public ByteValues getBytes(IndexReader reader, String field, EntryCreator<ByteValues> creator ) throws IOException;
|
||||||
|
|
||||||
|
|
||||||
/** Checks the internal cache for an appropriate entry, and if none is
|
/** Checks the internal cache for an appropriate entry, and if none is
|
||||||
* found, reads the terms in <code>field</code> as shorts and returns an array
|
* found, reads the terms in <code>field</code> as shorts and returns an array
|
||||||
* of size <code>reader.maxDoc()</code> of the value each document
|
* of size <code>reader.maxDoc()</code> of the value each document
|
||||||
|
@ -339,6 +362,20 @@ public interface FieldCache {
|
||||||
public short[] getShorts (IndexReader reader, String field, ShortParser parser)
|
public short[] getShorts (IndexReader reader, String field, ShortParser parser)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
|
||||||
|
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||||
|
* reads the terms in <code>field</code> as shorts and returns an array of
|
||||||
|
* size <code>reader.maxDoc()</code> of the value each document has in the
|
||||||
|
* given field.
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the shorts.
|
||||||
|
* @param creator Computes short for string values.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public ShortValues getShorts(IndexReader reader, String field, EntryCreator<ShortValues> creator ) throws IOException;
|
||||||
|
|
||||||
|
|
||||||
/** Checks the internal cache for an appropriate entry, and if none is
|
/** Checks the internal cache for an appropriate entry, and if none is
|
||||||
* found, reads the terms in <code>field</code> as integers and returns an array
|
* found, reads the terms in <code>field</code> as integers and returns an array
|
||||||
* of size <code>reader.maxDoc()</code> of the value each document
|
* of size <code>reader.maxDoc()</code> of the value each document
|
||||||
|
@ -364,6 +401,19 @@ public interface FieldCache {
|
||||||
public int[] getInts (IndexReader reader, String field, IntParser parser)
|
public int[] getInts (IndexReader reader, String field, IntParser parser)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||||
|
* reads the terms in <code>field</code> as integers and returns an array of
|
||||||
|
* size <code>reader.maxDoc()</code> of the value each document has in the
|
||||||
|
* given field.
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the integers.
|
||||||
|
* @param creator Computes integer for string values.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public IntValues getInts(IndexReader reader, String field, EntryCreator<IntValues> creator ) throws IOException;
|
||||||
|
|
||||||
|
|
||||||
/** Checks the internal cache for an appropriate entry, and if
|
/** Checks the internal cache for an appropriate entry, and if
|
||||||
* none is found, reads the terms in <code>field</code> as floats and returns an array
|
* none is found, reads the terms in <code>field</code> as floats and returns an array
|
||||||
* of size <code>reader.maxDoc()</code> of the value each document
|
* of size <code>reader.maxDoc()</code> of the value each document
|
||||||
|
@ -389,6 +439,19 @@ public interface FieldCache {
|
||||||
public float[] getFloats (IndexReader reader, String field,
|
public float[] getFloats (IndexReader reader, String field,
|
||||||
FloatParser parser) throws IOException;
|
FloatParser parser) throws IOException;
|
||||||
|
|
||||||
|
/** Checks the internal cache for an appropriate entry, and if
|
||||||
|
* none is found, reads the terms in <code>field</code> as floats and returns an array
|
||||||
|
* of size <code>reader.maxDoc()</code> of the value each document
|
||||||
|
* has in the given field.
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the floats.
|
||||||
|
* @param creator Computes float for string values.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public FloatValues getFloats(IndexReader reader, String field, EntryCreator<FloatValues> creator ) throws IOException;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks the internal cache for an appropriate entry, and if none is
|
* Checks the internal cache for an appropriate entry, and if none is
|
||||||
* found, reads the terms in <code>field</code> as longs and returns an array
|
* found, reads the terms in <code>field</code> as longs and returns an array
|
||||||
|
@ -418,6 +481,20 @@ public interface FieldCache {
|
||||||
public long[] getLongs(IndexReader reader, String field, LongParser parser)
|
public long[] getLongs(IndexReader reader, String field, LongParser parser)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks the internal cache for an appropriate entry, and if none is found,
|
||||||
|
* reads the terms in <code>field</code> as longs and returns an array of
|
||||||
|
* size <code>reader.maxDoc()</code> of the value each document has in the
|
||||||
|
* given field.
|
||||||
|
*
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the longs.
|
||||||
|
* @param creator Computes integer for string values.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public LongValues getLongs(IndexReader reader, String field, EntryCreator<LongValues> creator ) throws IOException;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks the internal cache for an appropriate entry, and if none is
|
* Checks the internal cache for an appropriate entry, and if none is
|
||||||
|
@ -448,6 +525,21 @@ public interface FieldCache {
|
||||||
public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
|
public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks the internal cache for an appropriate entry, and if none is found,
|
||||||
|
* reads the terms in <code>field</code> as doubles and returns an array of
|
||||||
|
* size <code>reader.maxDoc()</code> of the value each document has in the
|
||||||
|
* given field.
|
||||||
|
*
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the doubles.
|
||||||
|
* @param creator Computes integer for string values.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public DoubleValues getDoubles(IndexReader reader, String field, EntryCreator<DoubleValues> creator ) throws IOException;
|
||||||
|
|
||||||
|
|
||||||
/** Returned by {@link #getTerms} */
|
/** Returned by {@link #getTerms} */
|
||||||
public abstract static class DocTerms {
|
public abstract static class DocTerms {
|
||||||
/** The BytesRef argument must not be null; the method
|
/** The BytesRef argument must not be null; the method
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,131 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.FieldCache.ByteParser;
|
||||||
|
import org.apache.lucene.search.cache.CachedArray.ByteValues;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
public class ByteValuesCreator extends CachedArrayCreator<ByteValues>
|
||||||
|
{
|
||||||
|
protected ByteParser parser;
|
||||||
|
|
||||||
|
public ByteValuesCreator( String field, ByteParser parser, int options )
|
||||||
|
{
|
||||||
|
super( field, options );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ByteValuesCreator( String field, ByteParser parser )
|
||||||
|
{
|
||||||
|
super( field );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Class getArrayType() {
|
||||||
|
return Byte.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ByteValues create(IndexReader reader) throws IOException {
|
||||||
|
return validate( new ByteValues(), reader );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ByteValues validate(ByteValues entry, IndexReader reader) throws IOException {
|
||||||
|
boolean ok = false;
|
||||||
|
if( hasOption(OPTION_CACHE_VALUES) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.values == null ) {
|
||||||
|
fillByteValues(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( hasOption(OPTION_CACHE_BITS) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.valid == null ) {
|
||||||
|
fillValidBits(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !ok ) {
|
||||||
|
throw new RuntimeException( "the config must cache values and/or bits" );
|
||||||
|
}
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fillByteValues( ByteValues vals, IndexReader reader, String field ) throws IOException
|
||||||
|
{
|
||||||
|
if( parser == null ) {
|
||||||
|
parser = FieldCache.DEFAULT_BYTE_PARSER;
|
||||||
|
}
|
||||||
|
assertSameParserAndResetCounts(vals, parser);
|
||||||
|
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
int maxDoc = reader.maxDoc();
|
||||||
|
vals.values = new byte[maxDoc];
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||||
|
DocsEnum docs = null;
|
||||||
|
try {
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final byte termval = parser.parseByte(term);
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
vals.values[docID] = termval;
|
||||||
|
vals.numDocs++;
|
||||||
|
if( validBits != null ) {
|
||||||
|
validBits.set( docID );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vals.numTerms++;
|
||||||
|
}
|
||||||
|
} catch (FieldCache.StopFillCacheException stop) {}
|
||||||
|
|
||||||
|
if( vals.valid == null ) {
|
||||||
|
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( vals.valid == null && vals.numDocs < 1 ) {
|
||||||
|
vals.valid = new Bits.MatchNoBits( maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
public abstract class CachedArray
|
||||||
|
{
|
||||||
|
public Integer parserHashCode; // a flag to make sure you don't change what you are asking for in subsequent requests
|
||||||
|
public int numDocs;
|
||||||
|
public int numTerms;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NOTE: these Bits may have false positives for deleted documents. That is,
|
||||||
|
* Documents that are deleted may be marked as valid but the array value is not.
|
||||||
|
*/
|
||||||
|
public Bits valid;
|
||||||
|
|
||||||
|
public CachedArray() {
|
||||||
|
this.parserHashCode = null;
|
||||||
|
this.numDocs = 0;
|
||||||
|
this.numTerms = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the native array
|
||||||
|
*/
|
||||||
|
public abstract Object getRawArray();
|
||||||
|
|
||||||
|
//-------------------------------------------------------------
|
||||||
|
// Concrete Values
|
||||||
|
//-------------------------------------------------------------
|
||||||
|
|
||||||
|
public static class ByteValues extends CachedArray {
|
||||||
|
public byte[] values = null;
|
||||||
|
@Override public byte[] getRawArray() { return values; }
|
||||||
|
};
|
||||||
|
|
||||||
|
public static class ShortValues extends CachedArray {
|
||||||
|
public short[] values = null;
|
||||||
|
@Override public short[] getRawArray() { return values; }
|
||||||
|
};
|
||||||
|
|
||||||
|
public static class IntValues extends CachedArray {
|
||||||
|
public int[] values = null;
|
||||||
|
@Override public int[] getRawArray() { return values; }
|
||||||
|
};
|
||||||
|
|
||||||
|
public static class FloatValues extends CachedArray {
|
||||||
|
public float[] values = null;
|
||||||
|
@Override public float[] getRawArray() { return values; }
|
||||||
|
};
|
||||||
|
|
||||||
|
public static class LongValues extends CachedArray {
|
||||||
|
public long[] values = null;
|
||||||
|
@Override public long[] getRawArray() { return values; }
|
||||||
|
};
|
||||||
|
|
||||||
|
public static class DoubleValues extends CachedArray {
|
||||||
|
public double[] values = null;
|
||||||
|
@Override public double[] getRawArray() { return values; }
|
||||||
|
};
|
||||||
|
}
|
|
@ -0,0 +1,148 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache.Parser;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
public abstract class CachedArrayCreator<T extends CachedArray> extends EntryCreatorWithOptions<T>
|
||||||
|
{
|
||||||
|
public static final int OPTION_VALIDATE = 1;
|
||||||
|
public static final int OPTION_CACHE_VALUES = 2;
|
||||||
|
public static final int OPTION_CACHE_BITS = 4;
|
||||||
|
|
||||||
|
// Composite Options Fields
|
||||||
|
public static final int CACHE_VALUES_AND_BITS = OPTION_CACHE_VALUES ^ OPTION_CACHE_BITS;
|
||||||
|
public static final int CACHE_VALUES_AND_BITS_VALIDATE = OPTION_CACHE_VALUES ^ OPTION_CACHE_BITS ^ OPTION_VALIDATE;
|
||||||
|
|
||||||
|
public String field;
|
||||||
|
|
||||||
|
public CachedArrayCreator( String field )
|
||||||
|
{
|
||||||
|
super( OPTION_CACHE_VALUES ^ OPTION_VALIDATE );
|
||||||
|
if( field == null ) {
|
||||||
|
throw new IllegalArgumentException( "field can not be null" );
|
||||||
|
}
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CachedArrayCreator( String field, int flags )
|
||||||
|
{
|
||||||
|
super( flags );
|
||||||
|
if( field == null ) {
|
||||||
|
throw new IllegalArgumentException( "field can not be null" );
|
||||||
|
}
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note that the 'flags' are not part of the key -- subsequent calls to the cache
|
||||||
|
* with different options will use the same cache entry.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public EntryKey getCacheKey() {
|
||||||
|
return new SimpleEntryKey( CachedArray.class, getArrayType(), field );
|
||||||
|
//return new Integer( CachedArrayCreator.class.hashCode() ^ getArrayType().hashCode() ^ field.hashCode() );
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the type that the array will hold */
|
||||||
|
public abstract Class getArrayType();
|
||||||
|
|
||||||
|
protected void assertSameParserAndResetCounts(T value, Parser parser)
|
||||||
|
{
|
||||||
|
int parserHashCode = parser.hashCode();
|
||||||
|
if( value.parserHashCode != null && value.parserHashCode != parserHashCode ) {
|
||||||
|
throw new RuntimeException( "Parser changed in subsequet call. "
|
||||||
|
+value.parserHashCode+" != "+parserHashCode + " :: " + parser );
|
||||||
|
}
|
||||||
|
value.parserHashCode = parserHashCode;
|
||||||
|
value.numDocs = value.numTerms = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility function to help check what bits are valid
|
||||||
|
*/
|
||||||
|
protected Bits checkMatchAllBits( Bits deleted, OpenBitSet valid, int maxDocs, int numDocs )
|
||||||
|
{
|
||||||
|
if( numDocs != maxDocs ) {
|
||||||
|
if( hasOption( OPTION_CACHE_BITS ) ) {
|
||||||
|
if( deleted == null ) {
|
||||||
|
for( int i=0; i<maxDocs; i++ ) {
|
||||||
|
if( !valid.get(i) ) {
|
||||||
|
return valid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for( int i=0; i<maxDocs; i++ ) {
|
||||||
|
if( !deleted.get(i) && !valid.get(i) ) {
|
||||||
|
return valid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new Bits.MatchAllBits( maxDocs );
|
||||||
|
}
|
||||||
|
|
||||||
|
public void fillValidBits( T vals, IndexReader reader, String field ) throws IOException
|
||||||
|
{
|
||||||
|
vals.numDocs = vals.numTerms = 0;
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
OpenBitSet validBits = new OpenBitSet( reader.maxDoc() );
|
||||||
|
DocsEnum docs = null;
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
validBits.set( docID );
|
||||||
|
vals.numDocs++;
|
||||||
|
}
|
||||||
|
vals.numTerms++;
|
||||||
|
}
|
||||||
|
|
||||||
|
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, reader.maxDoc() );
|
||||||
|
}
|
||||||
|
if( vals.numDocs < 1 ) {
|
||||||
|
vals.valid = new Bits.MatchNoBits( reader.maxDoc() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,171 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache.DocTerms;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.packed.GrowableWriter;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
// TODO: this if DocTermsIndex was already created, we should share it...
|
||||||
|
public class DocTermsCreator<T extends DocTerms> extends EntryCreatorWithOptions<T>
|
||||||
|
{
|
||||||
|
public static final int FASTER_BUT_MORE_RAM = 2;
|
||||||
|
|
||||||
|
public String field;
|
||||||
|
|
||||||
|
public DocTermsCreator( String field )
|
||||||
|
{
|
||||||
|
super( FASTER_BUT_MORE_RAM ); // By default turn on FASTER_BUT_MORE_RAM
|
||||||
|
if( field == null ) {
|
||||||
|
throw new IllegalArgumentException( "field can not be null" );
|
||||||
|
}
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocTermsCreator( String field, int flags )
|
||||||
|
{
|
||||||
|
super( flags );
|
||||||
|
if( field == null ) {
|
||||||
|
throw new IllegalArgumentException( "field can not be null" );
|
||||||
|
}
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SimpleEntryKey getCacheKey() {
|
||||||
|
return new SimpleEntryKey( DocTermsCreator.class, field );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T create(IndexReader reader) throws IOException {
|
||||||
|
|
||||||
|
String field = StringHelper.intern(this.field); // TODO?? necessary?
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
|
||||||
|
final boolean fasterButMoreRAM = hasOption( FASTER_BUT_MORE_RAM );
|
||||||
|
final int termCountHardLimit = reader.maxDoc();
|
||||||
|
|
||||||
|
// Holds the actual term data, expanded.
|
||||||
|
final PagedBytes bytes = new PagedBytes(15);
|
||||||
|
|
||||||
|
int startBPV;
|
||||||
|
|
||||||
|
if (terms != null) {
|
||||||
|
// Try for coarse estimate for number of bits; this
|
||||||
|
// should be an underestimate most of the time, which
|
||||||
|
// is fine -- GrowableWriter will reallocate as needed
|
||||||
|
long numUniqueTerms = 0;
|
||||||
|
try {
|
||||||
|
numUniqueTerms = terms.getUniqueTermCount();
|
||||||
|
} catch (UnsupportedOperationException uoe) {
|
||||||
|
numUniqueTerms = -1;
|
||||||
|
}
|
||||||
|
if (numUniqueTerms != -1) {
|
||||||
|
if (numUniqueTerms > termCountHardLimit) {
|
||||||
|
numUniqueTerms = termCountHardLimit;
|
||||||
|
}
|
||||||
|
startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
|
||||||
|
} else {
|
||||||
|
startBPV = 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
startBPV = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM);
|
||||||
|
|
||||||
|
// pointer==0 means not set
|
||||||
|
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||||
|
|
||||||
|
if (terms != null) {
|
||||||
|
int termCount = 0;
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
DocsEnum docs = null;
|
||||||
|
while(true) {
|
||||||
|
if (termCount++ == termCountHardLimit) {
|
||||||
|
// app is misusing the API (there is more than
|
||||||
|
// one term per doc); in this case we make best
|
||||||
|
// effort to load what we can (see LUCENE-2142)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final long pointer = bytes.copyUsingLengthPrefix(term);
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
docToOffset.set(docID, pointer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// maybe an int-only impl?
|
||||||
|
return (T)new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T validate(T entry, IndexReader reader) throws IOException {
|
||||||
|
// TODO? nothing? perhaps subsequent call with FASTER_BUT_MORE_RAM?
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class DocTermsImpl extends DocTerms {
|
||||||
|
private final PagedBytes.Reader bytes;
|
||||||
|
private final PackedInts.Reader docToOffset;
|
||||||
|
|
||||||
|
public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) {
|
||||||
|
this.bytes = bytes;
|
||||||
|
this.docToOffset = docToOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return docToOffset.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean exists(int docID) {
|
||||||
|
return docToOffset.get(docID) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getTerm(int docID, BytesRef ret) {
|
||||||
|
final int pointer = (int) docToOffset.get(docID);
|
||||||
|
return bytes.fillUsingLengthPrefix(ret, pointer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,318 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.packed.GrowableWriter;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
public class DocTermsIndexCreator<T extends DocTermsIndex> extends EntryCreatorWithOptions<T>
|
||||||
|
{
|
||||||
|
public static final int FASTER_BUT_MORE_RAM = 2;
|
||||||
|
|
||||||
|
public String field;
|
||||||
|
|
||||||
|
public DocTermsIndexCreator( String field )
|
||||||
|
{
|
||||||
|
super( FASTER_BUT_MORE_RAM ); // By default turn on FASTER_BUT_MORE_RAM
|
||||||
|
if( field == null ) {
|
||||||
|
throw new IllegalArgumentException( "field can not be null" );
|
||||||
|
}
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocTermsIndexCreator( String field, int flags )
|
||||||
|
{
|
||||||
|
super( flags );
|
||||||
|
if( field == null ) {
|
||||||
|
throw new IllegalArgumentException( "field can not be null" );
|
||||||
|
}
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EntryKey getCacheKey() {
|
||||||
|
return new SimpleEntryKey( DocTermsIndexCreator.class, field );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T create(IndexReader reader) throws IOException
|
||||||
|
{
|
||||||
|
String field = StringHelper.intern(this.field); // TODO?? necessary?
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
|
||||||
|
final boolean fasterButMoreRAM = hasOption(FASTER_BUT_MORE_RAM);
|
||||||
|
|
||||||
|
final PagedBytes bytes = new PagedBytes(15);
|
||||||
|
|
||||||
|
int startBytesBPV;
|
||||||
|
int startTermsBPV;
|
||||||
|
int startNumUniqueTerms;
|
||||||
|
|
||||||
|
int maxDoc = reader.maxDoc();
|
||||||
|
final int termCountHardLimit;
|
||||||
|
if (maxDoc == Integer.MAX_VALUE) {
|
||||||
|
termCountHardLimit = Integer.MAX_VALUE;
|
||||||
|
} else {
|
||||||
|
termCountHardLimit = maxDoc+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (terms != null) {
|
||||||
|
// Try for coarse estimate for number of bits; this
|
||||||
|
// should be an underestimate most of the time, which
|
||||||
|
// is fine -- GrowableWriter will reallocate as needed
|
||||||
|
long numUniqueTerms = 0;
|
||||||
|
try {
|
||||||
|
numUniqueTerms = terms.getUniqueTermCount();
|
||||||
|
} catch (UnsupportedOperationException uoe) {
|
||||||
|
numUniqueTerms = -1;
|
||||||
|
}
|
||||||
|
if (numUniqueTerms != -1) {
|
||||||
|
|
||||||
|
if (numUniqueTerms > termCountHardLimit) {
|
||||||
|
// app is misusing the API (there is more than
|
||||||
|
// one term per doc); in this case we make best
|
||||||
|
// effort to load what we can (see LUCENE-2142)
|
||||||
|
numUniqueTerms = termCountHardLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
|
||||||
|
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
|
||||||
|
|
||||||
|
startNumUniqueTerms = (int) numUniqueTerms;
|
||||||
|
} else {
|
||||||
|
startBytesBPV = 1;
|
||||||
|
startTermsBPV = 1;
|
||||||
|
startNumUniqueTerms = 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
startBytesBPV = 1;
|
||||||
|
startTermsBPV = 1;
|
||||||
|
startNumUniqueTerms = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM);
|
||||||
|
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, reader.maxDoc(), fasterButMoreRAM);
|
||||||
|
|
||||||
|
// 0 is reserved for "unset"
|
||||||
|
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||||
|
int termOrd = 1;
|
||||||
|
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
DocsEnum docs = null;
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (termOrd >= termCountHardLimit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (termOrd == termOrdToBytesOffset.size()) {
|
||||||
|
// NOTE: this code only runs if the incoming
|
||||||
|
// reader impl doesn't implement
|
||||||
|
// getUniqueTermCount (which should be uncommon)
|
||||||
|
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
|
||||||
|
}
|
||||||
|
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
docToTermOrd.set(docID, termOrd);
|
||||||
|
}
|
||||||
|
termOrd++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (termOrdToBytesOffset.size() > termOrd) {
|
||||||
|
termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// maybe an int-only impl?
|
||||||
|
return (T)new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T validate(T entry, IndexReader reader) throws IOException {
|
||||||
|
// TODO? nothing? perhaps subsequent call with FASTER_BUT_MORE_RAM?
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public static class DocTermsIndexImpl extends DocTermsIndex {
|
||||||
|
private final PagedBytes.Reader bytes;
|
||||||
|
private final PackedInts.Reader termOrdToBytesOffset;
|
||||||
|
private final PackedInts.Reader docToTermOrd;
|
||||||
|
private final int numOrd;
|
||||||
|
|
||||||
|
public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
|
||||||
|
this.bytes = bytes;
|
||||||
|
this.docToTermOrd = docToTermOrd;
|
||||||
|
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||||
|
this.numOrd = numOrd;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PackedInts.Reader getDocToOrd() {
|
||||||
|
return docToTermOrd;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numOrd() {
|
||||||
|
return numOrd;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getOrd(int docID) {
|
||||||
|
return (int) docToTermOrd.get(docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return docToTermOrd.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef lookup(int ord, BytesRef ret) {
|
||||||
|
return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum getTermsEnum() {
|
||||||
|
return this.new DocTermsIndexEnum();
|
||||||
|
}
|
||||||
|
|
||||||
|
class DocTermsIndexEnum extends TermsEnum {
|
||||||
|
int currentOrd;
|
||||||
|
int currentBlockNumber;
|
||||||
|
int end; // end position in the current block
|
||||||
|
final byte[][] blocks;
|
||||||
|
final int[] blockEnds;
|
||||||
|
|
||||||
|
final BytesRef term = new BytesRef();
|
||||||
|
|
||||||
|
public DocTermsIndexEnum() {
|
||||||
|
currentOrd = 0;
|
||||||
|
currentBlockNumber = 0;
|
||||||
|
blocks = bytes.getBlocks();
|
||||||
|
blockEnds = bytes.getBlockEnds();
|
||||||
|
currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get(0));
|
||||||
|
end = blockEnds[currentBlockNumber];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
|
||||||
|
// TODO - we can support with binary search
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SeekStatus seek(long ord) throws IOException {
|
||||||
|
assert(ord >= 0 && ord <= numOrd);
|
||||||
|
// TODO: if gap is small, could iterate from current position? Or let user decide that?
|
||||||
|
currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get((int)ord));
|
||||||
|
end = blockEnds[currentBlockNumber];
|
||||||
|
currentOrd = (int)ord;
|
||||||
|
return SeekStatus.FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef next() throws IOException {
|
||||||
|
int start = term.offset + term.length;
|
||||||
|
if (start >= end) {
|
||||||
|
// switch byte blocks
|
||||||
|
if (currentBlockNumber +1 >= blocks.length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
currentBlockNumber++;
|
||||||
|
term.bytes = blocks[currentBlockNumber];
|
||||||
|
end = blockEnds[currentBlockNumber];
|
||||||
|
start = 0;
|
||||||
|
if (end<=0) return null; // special case of empty last array
|
||||||
|
}
|
||||||
|
|
||||||
|
currentOrd++;
|
||||||
|
|
||||||
|
byte[] block = term.bytes;
|
||||||
|
if ((block[start] & 128) == 0) {
|
||||||
|
term.length = block[start];
|
||||||
|
term.offset = start+1;
|
||||||
|
} else {
|
||||||
|
term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff);
|
||||||
|
term.offset = start+2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return term;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef term() throws IOException {
|
||||||
|
return term;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ord() throws IOException {
|
||||||
|
return currentOrd;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docFreq() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,150 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.FieldCache.DoubleParser;
|
||||||
|
import org.apache.lucene.search.cache.CachedArray.DoubleValues;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
public class DoubleValuesCreator extends CachedArrayCreator<DoubleValues>
|
||||||
|
{
|
||||||
|
protected DoubleParser parser;
|
||||||
|
|
||||||
|
public DoubleValuesCreator( String field, DoubleParser parser, int options )
|
||||||
|
{
|
||||||
|
super( field, options );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DoubleValuesCreator( String field, DoubleParser parser )
|
||||||
|
{
|
||||||
|
super( field );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Class getArrayType() {
|
||||||
|
return Double.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DoubleValues create(IndexReader reader) throws IOException {
|
||||||
|
return validate( new DoubleValues(), reader );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DoubleValues validate(DoubleValues entry, IndexReader reader) throws IOException {
|
||||||
|
boolean ok = false;
|
||||||
|
if( hasOption(OPTION_CACHE_VALUES) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.values == null ) {
|
||||||
|
fillDoubleValues(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( hasOption(OPTION_CACHE_BITS) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.valid == null ) {
|
||||||
|
fillValidBits(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !ok ) {
|
||||||
|
throw new RuntimeException( "the config must cache values and/or bits" );
|
||||||
|
}
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fillDoubleValues( DoubleValues vals, IndexReader reader, String field ) throws IOException
|
||||||
|
{
|
||||||
|
if( parser == null ) {
|
||||||
|
try {
|
||||||
|
parser = FieldCache.DEFAULT_DOUBLE_PARSER;
|
||||||
|
fillDoubleValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
catch (NumberFormatException ne) {
|
||||||
|
vals.parserHashCode = null; // wipe the previous one
|
||||||
|
parser = FieldCache.NUMERIC_UTILS_DOUBLE_PARSER;
|
||||||
|
fillDoubleValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertSameParserAndResetCounts(vals, parser);
|
||||||
|
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
int maxDoc = reader.maxDoc();
|
||||||
|
vals.values = null;
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||||
|
DocsEnum docs = null;
|
||||||
|
try {
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final double termval = parser.parseDouble(term);
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new double[maxDoc];
|
||||||
|
}
|
||||||
|
vals.values[docID] = termval;
|
||||||
|
vals.numDocs++;
|
||||||
|
if( validBits != null ) {
|
||||||
|
validBits.set( docID );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vals.numTerms++;
|
||||||
|
}
|
||||||
|
} catch (FieldCache.StopFillCacheException stop) {}
|
||||||
|
|
||||||
|
if( vals.valid == null ) {
|
||||||
|
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new double[maxDoc];
|
||||||
|
}
|
||||||
|
|
||||||
|
if( vals.valid == null && vals.numDocs < 1 ) {
|
||||||
|
vals.valid = new Bits.MatchNoBits( maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,72 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Cached Values for a given key
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class EntryCreator<T> implements Serializable
|
||||||
|
{
|
||||||
|
public abstract T create( IndexReader reader ) throws IOException;
|
||||||
|
public abstract T validate( T entry, IndexReader reader ) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicate if a cached cached value should be checked before usage.
|
||||||
|
* This is useful if an application wants to support subsequent calls
|
||||||
|
* to the same cached object that may alter the cached object. If
|
||||||
|
* an application wants to avoid this (synchronized) check, it should
|
||||||
|
* return 'false'
|
||||||
|
*
|
||||||
|
* @return 'true' if the Cache should call 'validate' before returning a cached object
|
||||||
|
*/
|
||||||
|
public boolean shouldValidate() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return A key to identify valid cache entries for subsequent requests
|
||||||
|
*/
|
||||||
|
public abstract EntryKey getCacheKey();
|
||||||
|
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
// The Following code is a hack to make things work while the
|
||||||
|
// EntryCreator is stored in in the FieldCache.
|
||||||
|
// When the FieldCache is replaced with a simpler map LUCENE-2665
|
||||||
|
// This can be removed
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if( obj instanceof EntryCreator ) {
|
||||||
|
return getCacheKey().equals( ((EntryCreator)obj).getCacheKey() );
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return getCacheKey().hashCode();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
public abstract class EntryCreatorWithOptions<T> extends EntryCreator<T>
|
||||||
|
{
|
||||||
|
public static final int OPTION_VALIDATE = 1;
|
||||||
|
|
||||||
|
public int flags;
|
||||||
|
|
||||||
|
public EntryCreatorWithOptions( int flag ) {
|
||||||
|
this.flags = flag;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean shouldValidate() {
|
||||||
|
return hasOption( OPTION_VALIDATE );
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasOption( int key )
|
||||||
|
{
|
||||||
|
return (flags & key) == key;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Simple marker class -- Perhaps it could/should just be an Object
|
||||||
|
*/
|
||||||
|
public abstract class EntryKey {
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,150 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.FieldCache.FloatParser;
|
||||||
|
import org.apache.lucene.search.cache.CachedArray.FloatValues;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
public class FloatValuesCreator extends CachedArrayCreator<FloatValues>
|
||||||
|
{
|
||||||
|
protected FloatParser parser;
|
||||||
|
|
||||||
|
public FloatValuesCreator( String field, FloatParser parser, int options )
|
||||||
|
{
|
||||||
|
super( field, options );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FloatValuesCreator( String field, FloatParser parser )
|
||||||
|
{
|
||||||
|
super( field );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Class getArrayType() {
|
||||||
|
return Float.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FloatValues create(IndexReader reader) throws IOException {
|
||||||
|
return validate( new FloatValues(), reader );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FloatValues validate(FloatValues entry, IndexReader reader) throws IOException {
|
||||||
|
boolean ok = false;
|
||||||
|
if( hasOption(OPTION_CACHE_VALUES) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.values == null ) {
|
||||||
|
fillFloatValues(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( hasOption(OPTION_CACHE_BITS) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.valid == null ) {
|
||||||
|
fillValidBits(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !ok ) {
|
||||||
|
throw new RuntimeException( "the config must cache values and/or bits" );
|
||||||
|
}
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fillFloatValues( FloatValues vals, IndexReader reader, String field ) throws IOException
|
||||||
|
{
|
||||||
|
if( parser == null ) {
|
||||||
|
try {
|
||||||
|
parser = FieldCache.DEFAULT_FLOAT_PARSER;
|
||||||
|
fillFloatValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
catch (NumberFormatException ne) {
|
||||||
|
vals.parserHashCode = null; // wipe the previous one
|
||||||
|
parser = FieldCache.NUMERIC_UTILS_FLOAT_PARSER;
|
||||||
|
fillFloatValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertSameParserAndResetCounts(vals, parser);
|
||||||
|
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
int maxDoc = reader.maxDoc();
|
||||||
|
vals.values = null;
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||||
|
DocsEnum docs = null;
|
||||||
|
try {
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final float termval = parser.parseFloat(term);
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new float[maxDoc];
|
||||||
|
}
|
||||||
|
vals.values[docID] = termval;
|
||||||
|
vals.numDocs++;
|
||||||
|
if( validBits != null ) {
|
||||||
|
validBits.set( docID );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vals.numTerms++;
|
||||||
|
}
|
||||||
|
} catch (FieldCache.StopFillCacheException stop) {}
|
||||||
|
|
||||||
|
if( vals.valid == null ) {
|
||||||
|
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new float[maxDoc];
|
||||||
|
}
|
||||||
|
|
||||||
|
if( vals.valid == null && vals.numDocs < 1 ) {
|
||||||
|
vals.valid = new Bits.MatchNoBits( maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,150 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.FieldCache.IntParser;
|
||||||
|
import org.apache.lucene.search.cache.CachedArray.IntValues;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
public class IntValuesCreator extends CachedArrayCreator<IntValues>
|
||||||
|
{
|
||||||
|
protected IntParser parser;
|
||||||
|
|
||||||
|
public IntValuesCreator( String field, IntParser parser, int options )
|
||||||
|
{
|
||||||
|
super( field, options );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntValuesCreator( String field, IntParser parser )
|
||||||
|
{
|
||||||
|
super( field );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Class getArrayType() {
|
||||||
|
return Integer.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntValues create(IndexReader reader) throws IOException {
|
||||||
|
return validate( new IntValues(), reader );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntValues validate(IntValues entry, IndexReader reader) throws IOException {
|
||||||
|
boolean ok = false;
|
||||||
|
if( hasOption(OPTION_CACHE_VALUES) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.values == null ) {
|
||||||
|
fillIntValues(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( hasOption(OPTION_CACHE_BITS) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.valid == null ) {
|
||||||
|
fillValidBits(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !ok ) {
|
||||||
|
throw new RuntimeException( "the config must cache values and/or bits" );
|
||||||
|
}
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fillIntValues( IntValues vals, IndexReader reader, String field ) throws IOException
|
||||||
|
{
|
||||||
|
if( parser == null ) {
|
||||||
|
try {
|
||||||
|
parser = FieldCache.DEFAULT_INT_PARSER;
|
||||||
|
fillIntValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
catch (NumberFormatException ne) {
|
||||||
|
vals.parserHashCode = null;
|
||||||
|
parser = FieldCache.NUMERIC_UTILS_INT_PARSER;
|
||||||
|
fillIntValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertSameParserAndResetCounts(vals, parser);
|
||||||
|
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
int maxDoc = reader.maxDoc();
|
||||||
|
vals.values = null;
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||||
|
DocsEnum docs = null;
|
||||||
|
try {
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final int termval = parser.parseInt(term);
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new int[maxDoc];
|
||||||
|
}
|
||||||
|
vals.values[docID] = termval;
|
||||||
|
vals.numDocs++;
|
||||||
|
if( validBits != null ) {
|
||||||
|
validBits.set( docID );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vals.numTerms++;
|
||||||
|
}
|
||||||
|
} catch (FieldCache.StopFillCacheException stop) {}
|
||||||
|
|
||||||
|
if( vals.valid == null ) {
|
||||||
|
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new int[maxDoc];
|
||||||
|
}
|
||||||
|
|
||||||
|
if( vals.valid == null && vals.numDocs < 1 ) {
|
||||||
|
vals.valid = new Bits.MatchNoBits( maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,150 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.FieldCache.LongParser;
|
||||||
|
import org.apache.lucene.search.cache.CachedArray.LongValues;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
public class LongValuesCreator extends CachedArrayCreator<LongValues>
|
||||||
|
{
|
||||||
|
protected LongParser parser;
|
||||||
|
|
||||||
|
public LongValuesCreator( String field, LongParser parser, int options )
|
||||||
|
{
|
||||||
|
super( field, options );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LongValuesCreator( String field, LongParser parser )
|
||||||
|
{
|
||||||
|
super( field );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Class getArrayType() {
|
||||||
|
return Long.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongValues create(IndexReader reader) throws IOException {
|
||||||
|
return validate( new LongValues(), reader );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongValues validate(LongValues entry, IndexReader reader) throws IOException {
|
||||||
|
boolean ok = false;
|
||||||
|
if( hasOption(OPTION_CACHE_VALUES) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.values == null ) {
|
||||||
|
fillLongValues(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( hasOption(OPTION_CACHE_BITS) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.valid == null ) {
|
||||||
|
fillValidBits(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !ok ) {
|
||||||
|
throw new RuntimeException( "the config must cache values and/or bits" );
|
||||||
|
}
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fillLongValues( LongValues vals, IndexReader reader, String field ) throws IOException
|
||||||
|
{
|
||||||
|
if( parser == null ) {
|
||||||
|
try {
|
||||||
|
parser = FieldCache.DEFAULT_LONG_PARSER;
|
||||||
|
fillLongValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
catch (NumberFormatException ne) {
|
||||||
|
vals.parserHashCode = null; // wipe the previous one
|
||||||
|
parser = FieldCache.NUMERIC_UTILS_LONG_PARSER;
|
||||||
|
fillLongValues( vals, reader, field );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertSameParserAndResetCounts(vals, parser);
|
||||||
|
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
int maxDoc = reader.maxDoc();
|
||||||
|
vals.values = null;
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||||
|
DocsEnum docs = null;
|
||||||
|
try {
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final long termval = parser.parseLong(term);
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new long[maxDoc];
|
||||||
|
}
|
||||||
|
vals.values[docID] = termval;
|
||||||
|
vals.numDocs++;
|
||||||
|
if( validBits != null ) {
|
||||||
|
validBits.set( docID );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vals.numTerms++;
|
||||||
|
}
|
||||||
|
} catch (FieldCache.StopFillCacheException stop) {}
|
||||||
|
|
||||||
|
if( vals.valid == null ) {
|
||||||
|
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(vals.values == null) {
|
||||||
|
vals.values = new long[maxDoc];
|
||||||
|
}
|
||||||
|
|
||||||
|
if( vals.valid == null && vals.numDocs < 1 ) {
|
||||||
|
vals.valid = new Bits.MatchNoBits( maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,132 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.FieldCache.ShortParser;
|
||||||
|
import org.apache.lucene.search.cache.CachedArray.ShortValues;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
public class ShortValuesCreator extends CachedArrayCreator<ShortValues>
|
||||||
|
{
|
||||||
|
protected ShortParser parser;
|
||||||
|
|
||||||
|
public ShortValuesCreator( String field, ShortParser parser, int options )
|
||||||
|
{
|
||||||
|
super( field, options );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShortValuesCreator( String field, ShortParser parser )
|
||||||
|
{
|
||||||
|
super( field );
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Class getArrayType() {
|
||||||
|
return Short.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
//--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ShortValues create(IndexReader reader) throws IOException {
|
||||||
|
return validate( new ShortValues(), reader );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ShortValues validate(ShortValues entry, IndexReader reader) throws IOException {
|
||||||
|
boolean ok = false;
|
||||||
|
if( hasOption(OPTION_CACHE_VALUES) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.values == null ) {
|
||||||
|
fillShortValues(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( hasOption(OPTION_CACHE_BITS) ) {
|
||||||
|
ok = true;
|
||||||
|
if( entry.valid == null ) {
|
||||||
|
fillValidBits(entry, reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !ok ) {
|
||||||
|
throw new RuntimeException( "the config must cache values and/or bits" );
|
||||||
|
}
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fillShortValues( ShortValues vals, IndexReader reader, String field ) throws IOException
|
||||||
|
{
|
||||||
|
if( parser == null ) {
|
||||||
|
parser = FieldCache.DEFAULT_SHORT_PARSER;
|
||||||
|
}
|
||||||
|
assertSameParserAndResetCounts(vals, parser);
|
||||||
|
|
||||||
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
int maxDoc = reader.maxDoc();
|
||||||
|
vals.values = new short[maxDoc];
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
|
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||||
|
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
|
||||||
|
DocsEnum docs = null;
|
||||||
|
try {
|
||||||
|
while(true) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final Short termval = parser.parseShort(term);
|
||||||
|
docs = termsEnum.docs(delDocs, docs);
|
||||||
|
while (true) {
|
||||||
|
final int docID = docs.nextDoc();
|
||||||
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
vals.values[docID] = termval;
|
||||||
|
vals.numDocs++;
|
||||||
|
if( validBits != null ) {
|
||||||
|
validBits.set( docID );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vals.numTerms++;
|
||||||
|
}
|
||||||
|
} catch (FieldCache.StopFillCacheException stop) {}
|
||||||
|
|
||||||
|
if( vals.valid == null ) {
|
||||||
|
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( vals.valid == null && vals.numDocs < 1 ) {
|
||||||
|
vals.valid = new Bits.MatchNoBits( maxDoc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,59 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
public class SimpleEntryKey extends EntryKey
|
||||||
|
{
|
||||||
|
public final Class clazz;
|
||||||
|
public final Object[] args;
|
||||||
|
public final int hash;
|
||||||
|
|
||||||
|
public SimpleEntryKey( Class clazz, Object ... args ) {
|
||||||
|
this.clazz = clazz;
|
||||||
|
this.args = args;
|
||||||
|
|
||||||
|
int hash = clazz.hashCode();
|
||||||
|
if( args != null ) {
|
||||||
|
for( Object obj : args ) {
|
||||||
|
hash ^= obj.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.hash = hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if( obj instanceof SimpleEntryKey ) {
|
||||||
|
SimpleEntryKey key = (SimpleEntryKey)obj;
|
||||||
|
if( key.hash != hash ||
|
||||||
|
key.clazz != clazz ||
|
||||||
|
key.args.length != args.length ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// In the off chance that the hash etc is all the same
|
||||||
|
// we should actually check the values
|
||||||
|
for( int i=0; i<args.length; i++ ) {
|
||||||
|
if( !args[i].equals( key.args[i] ) ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder str = new StringBuilder();
|
||||||
|
str.append( '[' ).append( clazz.getName() ).append( ':' );
|
||||||
|
for( Object v : args ) {
|
||||||
|
str.append( v ).append( ':' );
|
||||||
|
}
|
||||||
|
str.append( hash ).append( ']' );
|
||||||
|
return str.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -26,4 +26,36 @@ public interface Bits {
|
||||||
public int length();
|
public int length();
|
||||||
|
|
||||||
public static final Bits[] EMPTY_ARRAY = new Bits[0];
|
public static final Bits[] EMPTY_ARRAY = new Bits[0];
|
||||||
|
|
||||||
|
public static class MatchAllBits implements Bits {
|
||||||
|
final int len;
|
||||||
|
|
||||||
|
public MatchAllBits( int len ) {
|
||||||
|
this.len = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean get(int index) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int length() {
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class MatchNoBits implements Bits {
|
||||||
|
final int len;
|
||||||
|
|
||||||
|
public MatchNoBits( int len ) {
|
||||||
|
this.len = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean get(int index) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int length() {
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,190 @@
|
||||||
|
package org.apache.lucene.search.cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.FieldCache.*;
|
||||||
|
import org.apache.lucene.search.FieldCache.Parser;
|
||||||
|
import org.apache.lucene.search.FieldCache.ShortParser;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
|
|
||||||
|
import static org.hamcrest.CoreMatchers.*;
|
||||||
|
|
||||||
|
public class TestEntryCreators extends LuceneTestCase {
|
||||||
|
protected IndexReader reader;
|
||||||
|
private static final int NUM_DOCS = 500 * RANDOM_MULTIPLIER;
|
||||||
|
private Directory directory;
|
||||||
|
|
||||||
|
static class NumberTypeTester {
|
||||||
|
String funcName;
|
||||||
|
Class<? extends CachedArrayCreator> creator;
|
||||||
|
Class<? extends Parser> parser;
|
||||||
|
String field;
|
||||||
|
Number[] values;
|
||||||
|
|
||||||
|
public NumberTypeTester( String f, String func, Class<? extends CachedArrayCreator> creator, Class<? extends Parser> parser ) {
|
||||||
|
field = f;
|
||||||
|
funcName = func;
|
||||||
|
this.creator = creator;
|
||||||
|
this.parser = parser;
|
||||||
|
values = new Number[NUM_DOCS];
|
||||||
|
}
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return field;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private NumberTypeTester[] typeTests;
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
directory = newDirectory();
|
||||||
|
RandomIndexWriter writer= new RandomIndexWriter(random, directory);
|
||||||
|
|
||||||
|
typeTests = new NumberTypeTester[] {
|
||||||
|
new NumberTypeTester( "theRandomByte", "getBytes", ByteValuesCreator.class, ByteParser.class ),
|
||||||
|
new NumberTypeTester( "theRandomShort", "getShorts", ShortValuesCreator.class, ShortParser.class ),
|
||||||
|
new NumberTypeTester( "theRandomInt", "getInts", IntValuesCreator.class, IntParser.class ),
|
||||||
|
new NumberTypeTester( "theRandomLong", "getLongs", LongValuesCreator.class, LongParser.class ),
|
||||||
|
new NumberTypeTester( "theRandomFloat", "getFloats", FloatValuesCreator.class, FloatParser.class ),
|
||||||
|
new NumberTypeTester( "theRandomDouble", "getDoubles", DoubleValuesCreator.class, DoubleParser.class ),
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++){
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
// Test the valid bits
|
||||||
|
for( NumberTypeTester tester : typeTests ) {
|
||||||
|
if (random.nextInt(20) != 17 && i > 1) {
|
||||||
|
tester.values[i] = 10 + random.nextInt( 20 ); // get some field overlap
|
||||||
|
doc.add(newField(tester.field, String.valueOf(tester.values[i]),
|
||||||
|
Field.Store.NO, Field.Index.NOT_ANALYZED ));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader = writer.getReader();
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
reader.close();
|
||||||
|
directory.close();
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testKeys() throws IOException {
|
||||||
|
// Check that the keys are unique for different fields
|
||||||
|
|
||||||
|
EntryKey key_1 = new ByteValuesCreator( "field1", null ).getCacheKey();
|
||||||
|
EntryKey key_2 = new ByteValuesCreator( "field2", null ).getCacheKey();
|
||||||
|
assertThat("different fields should have a different key", key_1, not(key_2) );
|
||||||
|
|
||||||
|
key_1 = new ByteValuesCreator( "field1", null ).getCacheKey();
|
||||||
|
key_2 = new ShortValuesCreator( "field1", null ).getCacheKey();
|
||||||
|
assertThat( "same field different type should have different key", key_1, not( key_2 ) );
|
||||||
|
|
||||||
|
key_1 = new ByteValuesCreator( "ff", null ).getCacheKey();
|
||||||
|
key_2 = new ByteValuesCreator( "ff", null ).getCacheKey();
|
||||||
|
assertThat( "same args should have same key", key_1, is( key_2 ) );
|
||||||
|
|
||||||
|
key_1 = new ByteValuesCreator( "ff", null, ByteValuesCreator.OPTION_CACHE_BITS ^ ByteValuesCreator.OPTION_CACHE_VALUES ).getCacheKey();
|
||||||
|
key_2 = new ByteValuesCreator( "ff", null ).getCacheKey();
|
||||||
|
assertThat( "different options should share same key", key_1, is( key_2 ) );
|
||||||
|
|
||||||
|
key_1 = new IntValuesCreator( "ff", FieldCache.DEFAULT_INT_PARSER ).getCacheKey();
|
||||||
|
key_2 = new IntValuesCreator( "ff", FieldCache.NUMERIC_UTILS_INT_PARSER ).getCacheKey();
|
||||||
|
assertThat( "diferent parser should have same key", key_1, is( key_2 ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
private CachedArray getWithReflection( FieldCache cache, NumberTypeTester tester, int flags ) throws IOException
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
Method getXXX = cache.getClass().getMethod( tester.funcName, IndexReader.class, String.class, EntryCreator.class );
|
||||||
|
Constructor constructor = tester.creator.getConstructor( String.class, tester.parser, Integer.TYPE );
|
||||||
|
CachedArrayCreator creator = (CachedArrayCreator)constructor.newInstance( tester.field, null, flags );
|
||||||
|
return (CachedArray) getXXX.invoke(cache, reader, tester.field, creator );
|
||||||
|
}
|
||||||
|
catch( Exception ex ) {
|
||||||
|
throw new RuntimeException( "Reflection failed", ex );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCachedArrays() throws IOException
|
||||||
|
{
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
|
||||||
|
// Check the Different CachedArray Types
|
||||||
|
CachedArray last = null;
|
||||||
|
CachedArray justbits = null;
|
||||||
|
|
||||||
|
for( NumberTypeTester tester : typeTests ) {
|
||||||
|
justbits = getWithReflection( cache, tester, CachedArrayCreator.OPTION_CACHE_BITS );
|
||||||
|
assertNull( "should not get values : "+tester, justbits.getRawArray() );
|
||||||
|
assertNotNull( "should get bits : "+tester, justbits.valid );
|
||||||
|
last = getWithReflection( cache, tester, CachedArrayCreator.CACHE_VALUES_AND_BITS );
|
||||||
|
assertEquals( "should use same cached object : "+tester, justbits, last );
|
||||||
|
assertNull( "Validate=false shoudl not regenerate : "+tester, justbits.getRawArray() );
|
||||||
|
last = getWithReflection( cache, tester, CachedArrayCreator.CACHE_VALUES_AND_BITS_VALIDATE );
|
||||||
|
assertEquals( "should use same cached object : "+tester, justbits, last );
|
||||||
|
assertNotNull( "Validate=true should add the Array : "+tester, justbits.getRawArray() );
|
||||||
|
checkCachedArrayValuesAndBits( tester, last );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkCachedArrayValuesAndBits( NumberTypeTester tester, CachedArray cachedVals )
|
||||||
|
{
|
||||||
|
// for( int i=0; i<NUM_DOCS; i++ ) {
|
||||||
|
// System.out.println( i + "] "+ tester.values[i] + " :: " + cachedVals.valid.get(i) );
|
||||||
|
// }
|
||||||
|
|
||||||
|
int numDocs =0;
|
||||||
|
Set<Number> distinctTerms = new HashSet<Number>();
|
||||||
|
for( int i=0; i<NUM_DOCS; i++ ) {
|
||||||
|
Number v = tester.values[i];
|
||||||
|
boolean isValid = cachedVals.valid.get(i);
|
||||||
|
if( v != null ) {
|
||||||
|
numDocs++;
|
||||||
|
distinctTerms.add( v );
|
||||||
|
assertTrue( "Valid bit should be true ("+i+"="+tester.values[i]+") "+tester, isValid );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assertFalse( "Valid bit should be false ("+i+") "+tester, isValid );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals( "Cached numTerms does not match : "+tester, distinctTerms.size(), cachedVals.numTerms );
|
||||||
|
assertEquals( "Cached numDocs does not match : "+tester, numDocs, cachedVals.numDocs );
|
||||||
|
assertEquals( "Ordinal should match numDocs : "+tester, numDocs, ((OpenBitSet)cachedVals.valid).cardinality() );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue