LUCENE-2649: Objects in the FieldCache can optionally store valid Bits

Apologies for 'CTR' rather then 'RTC' -- we can always revert if I jumped the gun!  

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1001303 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2010-09-25 19:32:37 +00:00
parent ffe3bb6578
commit aeab95d3af
19 changed files with 2200 additions and 752 deletions

View File

@ -232,6 +232,9 @@ New features
* LUCENE-2648: PackedInts.Iterator now supports to advance by more than a * LUCENE-2648: PackedInts.Iterator now supports to advance by more than a
single ordinal. (Simon Willnauer) single ordinal. (Simon Willnauer)
* LUCENE-2649: Objects in the FieldCache can optionally store Bits
that mark which docs have real values in the native[] (ryan)
Optimizations Optimizations
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching. * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.

View File

@ -19,6 +19,8 @@ package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.cache.EntryCreator;
import org.apache.lucene.search.cache.CachedArray.*;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -46,6 +48,14 @@ public interface FieldCache {
Object value; Object value;
} }
/**
* Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops
* processing terms and returns the current FieldCache
* array.
*/
public static final class StopFillCacheException extends RuntimeException {
}
/** /**
* Marker interface as super-interface to all parsers. It * Marker interface as super-interface to all parsers. It
* is used to specify a custom parser to {@link * is used to specify a custom parser to {@link
@ -314,6 +324,19 @@ public interface FieldCache {
public byte[] getBytes (IndexReader reader, String field, ByteParser parser) public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
throws IOException; throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> as bytes and returns an array of
* size <code>reader.maxDoc()</code> of the value each document has in the
* given field.
* @param reader Used to get field values.
* @param field Which field contains the bytes.
* @param creator Used to make the ByteValues
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public ByteValues getBytes(IndexReader reader, String field, EntryCreator<ByteValues> creator ) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is /** Checks the internal cache for an appropriate entry, and if none is
* found, reads the terms in <code>field</code> as shorts and returns an array * found, reads the terms in <code>field</code> as shorts and returns an array
* of size <code>reader.maxDoc()</code> of the value each document * of size <code>reader.maxDoc()</code> of the value each document
@ -339,6 +362,20 @@ public interface FieldCache {
public short[] getShorts (IndexReader reader, String field, ShortParser parser) public short[] getShorts (IndexReader reader, String field, ShortParser parser)
throws IOException; throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> as shorts and returns an array of
* size <code>reader.maxDoc()</code> of the value each document has in the
* given field.
* @param reader Used to get field values.
* @param field Which field contains the shorts.
* @param creator Computes short for string values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public ShortValues getShorts(IndexReader reader, String field, EntryCreator<ShortValues> creator ) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is /** Checks the internal cache for an appropriate entry, and if none is
* found, reads the terms in <code>field</code> as integers and returns an array * found, reads the terms in <code>field</code> as integers and returns an array
* of size <code>reader.maxDoc()</code> of the value each document * of size <code>reader.maxDoc()</code> of the value each document
@ -364,6 +401,19 @@ public interface FieldCache {
public int[] getInts (IndexReader reader, String field, IntParser parser) public int[] getInts (IndexReader reader, String field, IntParser parser)
throws IOException; throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> as integers and returns an array of
* size <code>reader.maxDoc()</code> of the value each document has in the
* given field.
* @param reader Used to get field values.
* @param field Which field contains the integers.
* @param creator Computes integer for string values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public IntValues getInts(IndexReader reader, String field, EntryCreator<IntValues> creator ) throws IOException;
/** Checks the internal cache for an appropriate entry, and if /** Checks the internal cache for an appropriate entry, and if
* none is found, reads the terms in <code>field</code> as floats and returns an array * none is found, reads the terms in <code>field</code> as floats and returns an array
* of size <code>reader.maxDoc()</code> of the value each document * of size <code>reader.maxDoc()</code> of the value each document
@ -389,6 +439,19 @@ public interface FieldCache {
public float[] getFloats (IndexReader reader, String field, public float[] getFloats (IndexReader reader, String field,
FloatParser parser) throws IOException; FloatParser parser) throws IOException;
/** Checks the internal cache for an appropriate entry, and if
* none is found, reads the terms in <code>field</code> as floats and returns an array
* of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
* @param reader Used to get field values.
* @param field Which field contains the floats.
* @param creator Computes float for string values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public FloatValues getFloats(IndexReader reader, String field, EntryCreator<FloatValues> creator ) throws IOException;
/** /**
* Checks the internal cache for an appropriate entry, and if none is * Checks the internal cache for an appropriate entry, and if none is
* found, reads the terms in <code>field</code> as longs and returns an array * found, reads the terms in <code>field</code> as longs and returns an array
@ -418,6 +481,20 @@ public interface FieldCache {
public long[] getLongs(IndexReader reader, String field, LongParser parser) public long[] getLongs(IndexReader reader, String field, LongParser parser)
throws IOException; throws IOException;
/**
* Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> as longs and returns an array of
* size <code>reader.maxDoc()</code> of the value each document has in the
* given field.
*
* @param reader Used to get field values.
* @param field Which field contains the longs.
* @param creator Computes integer for string values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public LongValues getLongs(IndexReader reader, String field, EntryCreator<LongValues> creator ) throws IOException;
/** /**
* Checks the internal cache for an appropriate entry, and if none is * Checks the internal cache for an appropriate entry, and if none is
@ -448,6 +525,21 @@ public interface FieldCache {
public double[] getDoubles(IndexReader reader, String field, DoubleParser parser) public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
throws IOException; throws IOException;
/**
* Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> as doubles and returns an array of
* size <code>reader.maxDoc()</code> of the value each document has in the
* given field.
*
* @param reader Used to get field values.
* @param field Which field contains the doubles.
* @param creator Computes integer for string values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public DoubleValues getDoubles(IndexReader reader, String field, EntryCreator<DoubleValues> creator ) throws IOException;
/** Returned by {@link #getTerms} */ /** Returned by {@link #getTerms} */
public abstract static class DocTerms { public abstract static class DocTerms {
/** The BytesRef argument must not be null; the method /** The BytesRef argument must not be null; the method

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,131 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.ByteParser;
import org.apache.lucene.search.cache.CachedArray.ByteValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
public class ByteValuesCreator extends CachedArrayCreator<ByteValues>
{
protected ByteParser parser;
public ByteValuesCreator( String field, ByteParser parser, int options )
{
super( field, options );
this.parser = parser;
}
public ByteValuesCreator( String field, ByteParser parser )
{
super( field );
this.parser = parser;
}
@Override
public Class getArrayType() {
return Byte.class;
}
//--------------------------------------------------------------------------------
//--------------------------------------------------------------------------------
@Override
public ByteValues create(IndexReader reader) throws IOException {
return validate( new ByteValues(), reader );
}
@Override
public ByteValues validate(ByteValues entry, IndexReader reader) throws IOException {
boolean ok = false;
if( hasOption(OPTION_CACHE_VALUES) ) {
ok = true;
if( entry.values == null ) {
fillByteValues(entry, reader, field);
}
}
if( hasOption(OPTION_CACHE_BITS) ) {
ok = true;
if( entry.valid == null ) {
fillValidBits(entry, reader, field);
}
}
if( !ok ) {
throw new RuntimeException( "the config must cache values and/or bits" );
}
return entry;
}
protected void fillByteValues( ByteValues vals, IndexReader reader, String field ) throws IOException
{
if( parser == null ) {
parser = FieldCache.DEFAULT_BYTE_PARSER;
}
assertSameParserAndResetCounts(vals, parser);
Terms terms = MultiFields.getTerms(reader, field);
int maxDoc = reader.maxDoc();
vals.values = new byte[maxDoc];
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final byte termval = parser.parseByte(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
vals.values[docID] = termval;
vals.numDocs++;
if( validBits != null ) {
validBits.set( docID );
}
}
vals.numTerms++;
}
} catch (FieldCache.StopFillCacheException stop) {}
if( vals.valid == null ) {
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
}
}
if( vals.valid == null && vals.numDocs < 1 ) {
vals.valid = new Bits.MatchNoBits( maxDoc );
}
}
}

View File

@ -0,0 +1,78 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Bits;
public abstract class CachedArray
{
public Integer parserHashCode; // a flag to make sure you don't change what you are asking for in subsequent requests
public int numDocs;
public int numTerms;
/**
* NOTE: these Bits may have false positives for deleted documents. That is,
* Documents that are deleted may be marked as valid but the array value is not.
*/
public Bits valid;
public CachedArray() {
this.parserHashCode = null;
this.numDocs = 0;
this.numTerms = 0;
}
/**
* @return the native array
*/
public abstract Object getRawArray();
//-------------------------------------------------------------
// Concrete Values
//-------------------------------------------------------------
public static class ByteValues extends CachedArray {
public byte[] values = null;
@Override public byte[] getRawArray() { return values; }
};
public static class ShortValues extends CachedArray {
public short[] values = null;
@Override public short[] getRawArray() { return values; }
};
public static class IntValues extends CachedArray {
public int[] values = null;
@Override public int[] getRawArray() { return values; }
};
public static class FloatValues extends CachedArray {
public float[] values = null;
@Override public float[] getRawArray() { return values; }
};
public static class LongValues extends CachedArray {
public long[] values = null;
@Override public long[] getRawArray() { return values; }
};
public static class DoubleValues extends CachedArray {
public double[] values = null;
@Override public double[] getRawArray() { return values; }
};
}

View File

@ -0,0 +1,148 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache.Parser;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
public abstract class CachedArrayCreator<T extends CachedArray> extends EntryCreatorWithOptions<T>
{
public static final int OPTION_VALIDATE = 1;
public static final int OPTION_CACHE_VALUES = 2;
public static final int OPTION_CACHE_BITS = 4;
// Composite Options Fields
public static final int CACHE_VALUES_AND_BITS = OPTION_CACHE_VALUES ^ OPTION_CACHE_BITS;
public static final int CACHE_VALUES_AND_BITS_VALIDATE = OPTION_CACHE_VALUES ^ OPTION_CACHE_BITS ^ OPTION_VALIDATE;
public String field;
public CachedArrayCreator( String field )
{
super( OPTION_CACHE_VALUES ^ OPTION_VALIDATE );
if( field == null ) {
throw new IllegalArgumentException( "field can not be null" );
}
this.field = field;
}
public CachedArrayCreator( String field, int flags )
{
super( flags );
if( field == null ) {
throw new IllegalArgumentException( "field can not be null" );
}
this.field = field;
}
/**
* Note that the 'flags' are not part of the key -- subsequent calls to the cache
* with different options will use the same cache entry.
*/
@Override
public EntryKey getCacheKey() {
return new SimpleEntryKey( CachedArray.class, getArrayType(), field );
//return new Integer( CachedArrayCreator.class.hashCode() ^ getArrayType().hashCode() ^ field.hashCode() );
}
/** Return the type that the array will hold */
public abstract Class getArrayType();
protected void assertSameParserAndResetCounts(T value, Parser parser)
{
int parserHashCode = parser.hashCode();
if( value.parserHashCode != null && value.parserHashCode != parserHashCode ) {
throw new RuntimeException( "Parser changed in subsequet call. "
+value.parserHashCode+" != "+parserHashCode + " :: " + parser );
}
value.parserHashCode = parserHashCode;
value.numDocs = value.numTerms = 0;
}
/**
* Utility function to help check what bits are valid
*/
protected Bits checkMatchAllBits( Bits deleted, OpenBitSet valid, int maxDocs, int numDocs )
{
if( numDocs != maxDocs ) {
if( hasOption( OPTION_CACHE_BITS ) ) {
if( deleted == null ) {
for( int i=0; i<maxDocs; i++ ) {
if( !valid.get(i) ) {
return valid;
}
}
}
else {
for( int i=0; i<maxDocs; i++ ) {
if( !deleted.get(i) && !valid.get(i) ) {
return valid;
}
}
}
}
else {
return null;
}
}
return new Bits.MatchAllBits( maxDocs );
}
public void fillValidBits( T vals, IndexReader reader, String field ) throws IOException
{
vals.numDocs = vals.numTerms = 0;
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
OpenBitSet validBits = new OpenBitSet( reader.maxDoc() );
DocsEnum docs = null;
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
validBits.set( docID );
vals.numDocs++;
}
vals.numTerms++;
}
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, reader.maxDoc() );
}
if( vals.numDocs < 1 ) {
vals.valid = new Bits.MatchNoBits( reader.maxDoc() );
}
}
}

View File

@ -0,0 +1,171 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache.DocTerms;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
// TODO: this if DocTermsIndex was already created, we should share it...
public class DocTermsCreator<T extends DocTerms> extends EntryCreatorWithOptions<T>
{
public static final int FASTER_BUT_MORE_RAM = 2;
public String field;
public DocTermsCreator( String field )
{
super( FASTER_BUT_MORE_RAM ); // By default turn on FASTER_BUT_MORE_RAM
if( field == null ) {
throw new IllegalArgumentException( "field can not be null" );
}
this.field = field;
}
public DocTermsCreator( String field, int flags )
{
super( flags );
if( field == null ) {
throw new IllegalArgumentException( "field can not be null" );
}
this.field = field;
}
@Override
public SimpleEntryKey getCacheKey() {
return new SimpleEntryKey( DocTermsCreator.class, field );
}
@Override
public T create(IndexReader reader) throws IOException {
String field = StringHelper.intern(this.field); // TODO?? necessary?
Terms terms = MultiFields.getTerms(reader, field);
final boolean fasterButMoreRAM = hasOption( FASTER_BUT_MORE_RAM );
final int termCountHardLimit = reader.maxDoc();
// Holds the actual term data, expanded.
final PagedBytes bytes = new PagedBytes(15);
int startBPV;
if (terms != null) {
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = 0;
try {
numUniqueTerms = terms.getUniqueTermCount();
} catch (UnsupportedOperationException uoe) {
numUniqueTerms = -1;
}
if (numUniqueTerms != -1) {
if (numUniqueTerms > termCountHardLimit) {
numUniqueTerms = termCountHardLimit;
}
startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
} else {
startBPV = 1;
}
} else {
startBPV = 1;
}
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM);
// pointer==0 means not set
bytes.copyUsingLengthPrefix(new BytesRef());
if (terms != null) {
int termCount = 0;
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
DocsEnum docs = null;
while(true) {
if (termCount++ == termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
break;
}
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final long pointer = bytes.copyUsingLengthPrefix(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
docToOffset.set(docID, pointer);
}
}
}
// maybe an int-only impl?
return (T)new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable());
}
@Override
public T validate(T entry, IndexReader reader) throws IOException {
// TODO? nothing? perhaps subsequent call with FASTER_BUT_MORE_RAM?
return entry;
}
private static class DocTermsImpl extends DocTerms {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader docToOffset;
public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) {
this.bytes = bytes;
this.docToOffset = docToOffset;
}
@Override
public int size() {
return docToOffset.size();
}
@Override
public boolean exists(int docID) {
return docToOffset.get(docID) == 0;
}
@Override
public BytesRef getTerm(int docID, BytesRef ret) {
final int pointer = (int) docToOffset.get(docID);
return bytes.fillUsingLengthPrefix(ret, pointer);
}
}
}

View File

@ -0,0 +1,318 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache.DocTermsIndex;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
public class DocTermsIndexCreator<T extends DocTermsIndex> extends EntryCreatorWithOptions<T>
{
public static final int FASTER_BUT_MORE_RAM = 2;
public String field;
public DocTermsIndexCreator( String field )
{
super( FASTER_BUT_MORE_RAM ); // By default turn on FASTER_BUT_MORE_RAM
if( field == null ) {
throw new IllegalArgumentException( "field can not be null" );
}
this.field = field;
}
public DocTermsIndexCreator( String field, int flags )
{
super( flags );
if( field == null ) {
throw new IllegalArgumentException( "field can not be null" );
}
this.field = field;
}
@Override
public EntryKey getCacheKey() {
return new SimpleEntryKey( DocTermsIndexCreator.class, field );
}
@Override
public T create(IndexReader reader) throws IOException
{
String field = StringHelper.intern(this.field); // TODO?? necessary?
Terms terms = MultiFields.getTerms(reader, field);
final boolean fasterButMoreRAM = hasOption(FASTER_BUT_MORE_RAM);
final PagedBytes bytes = new PagedBytes(15);
int startBytesBPV;
int startTermsBPV;
int startNumUniqueTerms;
int maxDoc = reader.maxDoc();
final int termCountHardLimit;
if (maxDoc == Integer.MAX_VALUE) {
termCountHardLimit = Integer.MAX_VALUE;
} else {
termCountHardLimit = maxDoc+1;
}
if (terms != null) {
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = 0;
try {
numUniqueTerms = terms.getUniqueTermCount();
} catch (UnsupportedOperationException uoe) {
numUniqueTerms = -1;
}
if (numUniqueTerms != -1) {
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM);
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, reader.maxDoc(), fasterButMoreRAM);
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
int termOrd = 1;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
DocsEnum docs = null;
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
if (termOrd >= termCountHardLimit) {
break;
}
if (termOrd == termOrdToBytesOffset.size()) {
// NOTE: this code only runs if the incoming
// reader impl doesn't implement
// getUniqueTermCount (which should be uncommon)
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
}
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
docToTermOrd.set(docID, termOrd);
}
termOrd++;
}
if (termOrdToBytesOffset.size() > termOrd) {
termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
}
}
// maybe an int-only impl?
return (T)new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
}
@Override
public T validate(T entry, IndexReader reader) throws IOException {
// TODO? nothing? perhaps subsequent call with FASTER_BUT_MORE_RAM?
return entry;
}
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
public static class DocTermsIndexImpl extends DocTermsIndex {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final PackedInts.Reader docToTermOrd;
private final int numOrd;
public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
this.bytes = bytes;
this.docToTermOrd = docToTermOrd;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.numOrd = numOrd;
}
@Override
public PackedInts.Reader getDocToOrd() {
return docToTermOrd;
}
@Override
public int numOrd() {
return numOrd;
}
@Override
public int getOrd(int docID) {
return (int) docToTermOrd.get(docID);
}
@Override
public int size() {
return docToTermOrd.size();
}
@Override
public BytesRef lookup(int ord, BytesRef ret) {
return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord));
}
@Override
public TermsEnum getTermsEnum() {
return this.new DocTermsIndexEnum();
}
class DocTermsIndexEnum extends TermsEnum {
int currentOrd;
int currentBlockNumber;
int end; // end position in the current block
final byte[][] blocks;
final int[] blockEnds;
final BytesRef term = new BytesRef();
public DocTermsIndexEnum() {
currentOrd = 0;
currentBlockNumber = 0;
blocks = bytes.getBlocks();
blockEnds = bytes.getBlockEnds();
currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get(0));
end = blockEnds[currentBlockNumber];
}
@Override
public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
// TODO - we can support with binary search
throw new UnsupportedOperationException();
}
@Override
public SeekStatus seek(long ord) throws IOException {
assert(ord >= 0 && ord <= numOrd);
// TODO: if gap is small, could iterate from current position? Or let user decide that?
currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get((int)ord));
end = blockEnds[currentBlockNumber];
currentOrd = (int)ord;
return SeekStatus.FOUND;
}
@Override
public BytesRef next() throws IOException {
int start = term.offset + term.length;
if (start >= end) {
// switch byte blocks
if (currentBlockNumber +1 >= blocks.length) {
return null;
}
currentBlockNumber++;
term.bytes = blocks[currentBlockNumber];
end = blockEnds[currentBlockNumber];
start = 0;
if (end<=0) return null; // special case of empty last array
}
currentOrd++;
byte[] block = term.bytes;
if ((block[start] & 128) == 0) {
term.length = block[start];
term.offset = start+1;
} else {
term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff);
term.offset = start+2;
}
return term;
}
@Override
public BytesRef term() throws IOException {
return term;
}
@Override
public long ord() throws IOException {
return currentOrd;
}
@Override
public int docFreq() {
throw new UnsupportedOperationException();
}
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
throw new UnsupportedOperationException();
}
}
}
}

View File

@ -0,0 +1,150 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.DoubleParser;
import org.apache.lucene.search.cache.CachedArray.DoubleValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
public class DoubleValuesCreator extends CachedArrayCreator<DoubleValues>
{
protected DoubleParser parser;
public DoubleValuesCreator( String field, DoubleParser parser, int options )
{
super( field, options );
this.parser = parser;
}
public DoubleValuesCreator( String field, DoubleParser parser )
{
super( field );
this.parser = parser;
}
@Override
public Class getArrayType() {
return Double.class;
}
//--------------------------------------------------------------------------------
//--------------------------------------------------------------------------------
@Override
public DoubleValues create(IndexReader reader) throws IOException {
return validate( new DoubleValues(), reader );
}
@Override
public DoubleValues validate(DoubleValues entry, IndexReader reader) throws IOException {
boolean ok = false;
if( hasOption(OPTION_CACHE_VALUES) ) {
ok = true;
if( entry.values == null ) {
fillDoubleValues(entry, reader, field);
}
}
if( hasOption(OPTION_CACHE_BITS) ) {
ok = true;
if( entry.valid == null ) {
fillValidBits(entry, reader, field);
}
}
if( !ok ) {
throw new RuntimeException( "the config must cache values and/or bits" );
}
return entry;
}
protected void fillDoubleValues( DoubleValues vals, IndexReader reader, String field ) throws IOException
{
if( parser == null ) {
try {
parser = FieldCache.DEFAULT_DOUBLE_PARSER;
fillDoubleValues( vals, reader, field );
return;
}
catch (NumberFormatException ne) {
vals.parserHashCode = null; // wipe the previous one
parser = FieldCache.NUMERIC_UTILS_DOUBLE_PARSER;
fillDoubleValues( vals, reader, field );
return;
}
}
assertSameParserAndResetCounts(vals, parser);
Terms terms = MultiFields.getTerms(reader, field);
int maxDoc = reader.maxDoc();
vals.values = null;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final double termval = parser.parseDouble(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if(vals.values == null) {
vals.values = new double[maxDoc];
}
vals.values[docID] = termval;
vals.numDocs++;
if( validBits != null ) {
validBits.set( docID );
}
}
vals.numTerms++;
}
} catch (FieldCache.StopFillCacheException stop) {}
if( vals.valid == null ) {
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
}
}
if(vals.values == null) {
vals.values = new double[maxDoc];
}
if( vals.valid == null && vals.numDocs < 1 ) {
vals.valid = new Bits.MatchNoBits( maxDoc );
}
}
}

View File

@ -0,0 +1,72 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Serializable;
import org.apache.lucene.index.IndexReader;
/**
* Create Cached Values for a given key
*
* @lucene.experimental
*/
public abstract class EntryCreator<T> implements Serializable
{
public abstract T create( IndexReader reader ) throws IOException;
public abstract T validate( T entry, IndexReader reader ) throws IOException;
/**
* Indicate if a cached cached value should be checked before usage.
* This is useful if an application wants to support subsequent calls
* to the same cached object that may alter the cached object. If
* an application wants to avoid this (synchronized) check, it should
* return 'false'
*
* @return 'true' if the Cache should call 'validate' before returning a cached object
*/
public boolean shouldValidate() {
return true;
}
/**
* @return A key to identify valid cache entries for subsequent requests
*/
public abstract EntryKey getCacheKey();
//------------------------------------------------------------------------
// The Following code is a hack to make things work while the
// EntryCreator is stored in in the FieldCache.
// When the FieldCache is replaced with a simpler map LUCENE-2665
// This can be removed
//------------------------------------------------------------------------
public boolean equals(Object obj) {
if( obj instanceof EntryCreator ) {
return getCacheKey().equals( ((EntryCreator)obj).getCacheKey() );
}
return false;
}
@Override
public int hashCode() {
return getCacheKey().hashCode();
}
}

View File

@ -0,0 +1,41 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public abstract class EntryCreatorWithOptions<T> extends EntryCreator<T>
{
public static final int OPTION_VALIDATE = 1;
public int flags;
public EntryCreatorWithOptions( int flag ) {
this.flags = flag;
}
@Override
public boolean shouldValidate() {
return hasOption( OPTION_VALIDATE );
}
public boolean hasOption( int key )
{
return (flags & key) == key;
}
}

View File

@ -0,0 +1,9 @@
package org.apache.lucene.search.cache;
/**
* A Simple marker class -- Perhaps it could/should just be an Object
*/
public abstract class EntryKey {
}

View File

@ -0,0 +1,150 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.FloatParser;
import org.apache.lucene.search.cache.CachedArray.FloatValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
public class FloatValuesCreator extends CachedArrayCreator<FloatValues>
{
protected FloatParser parser;
public FloatValuesCreator( String field, FloatParser parser, int options )
{
super( field, options );
this.parser = parser;
}
public FloatValuesCreator( String field, FloatParser parser )
{
super( field );
this.parser = parser;
}
@Override
public Class getArrayType() {
return Float.class;
}
//--------------------------------------------------------------------------------
//--------------------------------------------------------------------------------
@Override
public FloatValues create(IndexReader reader) throws IOException {
return validate( new FloatValues(), reader );
}
@Override
public FloatValues validate(FloatValues entry, IndexReader reader) throws IOException {
boolean ok = false;
if( hasOption(OPTION_CACHE_VALUES) ) {
ok = true;
if( entry.values == null ) {
fillFloatValues(entry, reader, field);
}
}
if( hasOption(OPTION_CACHE_BITS) ) {
ok = true;
if( entry.valid == null ) {
fillValidBits(entry, reader, field);
}
}
if( !ok ) {
throw new RuntimeException( "the config must cache values and/or bits" );
}
return entry;
}
protected void fillFloatValues( FloatValues vals, IndexReader reader, String field ) throws IOException
{
if( parser == null ) {
try {
parser = FieldCache.DEFAULT_FLOAT_PARSER;
fillFloatValues( vals, reader, field );
return;
}
catch (NumberFormatException ne) {
vals.parserHashCode = null; // wipe the previous one
parser = FieldCache.NUMERIC_UTILS_FLOAT_PARSER;
fillFloatValues( vals, reader, field );
return;
}
}
assertSameParserAndResetCounts(vals, parser);
Terms terms = MultiFields.getTerms(reader, field);
int maxDoc = reader.maxDoc();
vals.values = null;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final float termval = parser.parseFloat(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if(vals.values == null) {
vals.values = new float[maxDoc];
}
vals.values[docID] = termval;
vals.numDocs++;
if( validBits != null ) {
validBits.set( docID );
}
}
vals.numTerms++;
}
} catch (FieldCache.StopFillCacheException stop) {}
if( vals.valid == null ) {
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
}
}
if(vals.values == null) {
vals.values = new float[maxDoc];
}
if( vals.valid == null && vals.numDocs < 1 ) {
vals.valid = new Bits.MatchNoBits( maxDoc );
}
}
}

View File

@ -0,0 +1,150 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.IntParser;
import org.apache.lucene.search.cache.CachedArray.IntValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
public class IntValuesCreator extends CachedArrayCreator<IntValues>
{
protected IntParser parser;
public IntValuesCreator( String field, IntParser parser, int options )
{
super( field, options );
this.parser = parser;
}
public IntValuesCreator( String field, IntParser parser )
{
super( field );
this.parser = parser;
}
@Override
public Class getArrayType() {
return Integer.class;
}
//--------------------------------------------------------------------------------
//--------------------------------------------------------------------------------
@Override
public IntValues create(IndexReader reader) throws IOException {
return validate( new IntValues(), reader );
}
@Override
public IntValues validate(IntValues entry, IndexReader reader) throws IOException {
boolean ok = false;
if( hasOption(OPTION_CACHE_VALUES) ) {
ok = true;
if( entry.values == null ) {
fillIntValues(entry, reader, field);
}
}
if( hasOption(OPTION_CACHE_BITS) ) {
ok = true;
if( entry.valid == null ) {
fillValidBits(entry, reader, field);
}
}
if( !ok ) {
throw new RuntimeException( "the config must cache values and/or bits" );
}
return entry;
}
protected void fillIntValues( IntValues vals, IndexReader reader, String field ) throws IOException
{
if( parser == null ) {
try {
parser = FieldCache.DEFAULT_INT_PARSER;
fillIntValues( vals, reader, field );
return;
}
catch (NumberFormatException ne) {
vals.parserHashCode = null;
parser = FieldCache.NUMERIC_UTILS_INT_PARSER;
fillIntValues( vals, reader, field );
return;
}
}
assertSameParserAndResetCounts(vals, parser);
Terms terms = MultiFields.getTerms(reader, field);
int maxDoc = reader.maxDoc();
vals.values = null;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final int termval = parser.parseInt(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if(vals.values == null) {
vals.values = new int[maxDoc];
}
vals.values[docID] = termval;
vals.numDocs++;
if( validBits != null ) {
validBits.set( docID );
}
}
vals.numTerms++;
}
} catch (FieldCache.StopFillCacheException stop) {}
if( vals.valid == null ) {
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
}
}
if(vals.values == null) {
vals.values = new int[maxDoc];
}
if( vals.valid == null && vals.numDocs < 1 ) {
vals.valid = new Bits.MatchNoBits( maxDoc );
}
}
}

View File

@ -0,0 +1,150 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.LongParser;
import org.apache.lucene.search.cache.CachedArray.LongValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
public class LongValuesCreator extends CachedArrayCreator<LongValues>
{
protected LongParser parser;
public LongValuesCreator( String field, LongParser parser, int options )
{
super( field, options );
this.parser = parser;
}
public LongValuesCreator( String field, LongParser parser )
{
super( field );
this.parser = parser;
}
@Override
public Class getArrayType() {
return Long.class;
}
//--------------------------------------------------------------------------------
//--------------------------------------------------------------------------------
@Override
public LongValues create(IndexReader reader) throws IOException {
return validate( new LongValues(), reader );
}
@Override
public LongValues validate(LongValues entry, IndexReader reader) throws IOException {
boolean ok = false;
if( hasOption(OPTION_CACHE_VALUES) ) {
ok = true;
if( entry.values == null ) {
fillLongValues(entry, reader, field);
}
}
if( hasOption(OPTION_CACHE_BITS) ) {
ok = true;
if( entry.valid == null ) {
fillValidBits(entry, reader, field);
}
}
if( !ok ) {
throw new RuntimeException( "the config must cache values and/or bits" );
}
return entry;
}
protected void fillLongValues( LongValues vals, IndexReader reader, String field ) throws IOException
{
if( parser == null ) {
try {
parser = FieldCache.DEFAULT_LONG_PARSER;
fillLongValues( vals, reader, field );
return;
}
catch (NumberFormatException ne) {
vals.parserHashCode = null; // wipe the previous one
parser = FieldCache.NUMERIC_UTILS_LONG_PARSER;
fillLongValues( vals, reader, field );
return;
}
}
assertSameParserAndResetCounts(vals, parser);
Terms terms = MultiFields.getTerms(reader, field);
int maxDoc = reader.maxDoc();
vals.values = null;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final long termval = parser.parseLong(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if(vals.values == null) {
vals.values = new long[maxDoc];
}
vals.values[docID] = termval;
vals.numDocs++;
if( validBits != null ) {
validBits.set( docID );
}
}
vals.numTerms++;
}
} catch (FieldCache.StopFillCacheException stop) {}
if( vals.valid == null ) {
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
}
}
if(vals.values == null) {
vals.values = new long[maxDoc];
}
if( vals.valid == null && vals.numDocs < 1 ) {
vals.valid = new Bits.MatchNoBits( maxDoc );
}
}
}

View File

@ -0,0 +1,132 @@
package org.apache.lucene.search.cache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.ShortParser;
import org.apache.lucene.search.cache.CachedArray.ShortValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
public class ShortValuesCreator extends CachedArrayCreator<ShortValues>
{
protected ShortParser parser;
public ShortValuesCreator( String field, ShortParser parser, int options )
{
super( field, options );
this.parser = parser;
}
public ShortValuesCreator( String field, ShortParser parser )
{
super( field );
this.parser = parser;
}
@Override
public Class getArrayType() {
return Short.class;
}
//--------------------------------------------------------------------------------
//--------------------------------------------------------------------------------
@Override
public ShortValues create(IndexReader reader) throws IOException {
return validate( new ShortValues(), reader );
}
@Override
public ShortValues validate(ShortValues entry, IndexReader reader) throws IOException {
boolean ok = false;
if( hasOption(OPTION_CACHE_VALUES) ) {
ok = true;
if( entry.values == null ) {
fillShortValues(entry, reader, field);
}
}
if( hasOption(OPTION_CACHE_BITS) ) {
ok = true;
if( entry.valid == null ) {
fillValidBits(entry, reader, field);
}
}
if( !ok ) {
throw new RuntimeException( "the config must cache values and/or bits" );
}
return entry;
}
protected void fillShortValues( ShortValues vals, IndexReader reader, String field ) throws IOException
{
if( parser == null ) {
parser = FieldCache.DEFAULT_SHORT_PARSER;
}
assertSameParserAndResetCounts(vals, parser);
Terms terms = MultiFields.getTerms(reader, field);
int maxDoc = reader.maxDoc();
vals.values = new short[maxDoc];
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null;
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final Short termval = parser.parseShort(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
vals.values[docID] = termval;
vals.numDocs++;
if( validBits != null ) {
validBits.set( docID );
}
}
vals.numTerms++;
}
} catch (FieldCache.StopFillCacheException stop) {}
if( vals.valid == null ) {
vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc );
}
}
if( vals.valid == null && vals.numDocs < 1 ) {
vals.valid = new Bits.MatchNoBits( maxDoc );
}
}
}

View File

@ -0,0 +1,59 @@
package org.apache.lucene.search.cache;
public class SimpleEntryKey extends EntryKey
{
public final Class clazz;
public final Object[] args;
public final int hash;
public SimpleEntryKey( Class clazz, Object ... args ) {
this.clazz = clazz;
this.args = args;
int hash = clazz.hashCode();
if( args != null ) {
for( Object obj : args ) {
hash ^= obj.hashCode();
}
}
this.hash = hash;
}
@Override
public boolean equals(Object obj) {
if( obj instanceof SimpleEntryKey ) {
SimpleEntryKey key = (SimpleEntryKey)obj;
if( key.hash != hash ||
key.clazz != clazz ||
key.args.length != args.length ) {
return false;
}
// In the off chance that the hash etc is all the same
// we should actually check the values
for( int i=0; i<args.length; i++ ) {
if( !args[i].equals( key.args[i] ) ) {
return false;
}
}
return true;
}
return false;
}
@Override
public int hashCode() {
return hash;
}
@Override
public String toString() {
StringBuilder str = new StringBuilder();
str.append( '[' ).append( clazz.getName() ).append( ':' );
for( Object v : args ) {
str.append( v ).append( ':' );
}
str.append( hash ).append( ']' );
return str.toString();
}
}

View File

@ -26,4 +26,36 @@ public interface Bits {
public int length(); public int length();
public static final Bits[] EMPTY_ARRAY = new Bits[0]; public static final Bits[] EMPTY_ARRAY = new Bits[0];
public static class MatchAllBits implements Bits {
final int len;
public MatchAllBits( int len ) {
this.len = len;
}
public boolean get(int index) {
return true;
}
public int length() {
return len;
}
}
public static class MatchNoBits implements Bits {
final int len;
public MatchNoBits( int len ) {
this.len = len;
}
public boolean get(int index) {
return false;
}
public int length() {
return len;
}
}
} }

View File

@ -0,0 +1,190 @@
package org.apache.lucene.search.cache;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.*;
import org.apache.lucene.search.FieldCache.Parser;
import org.apache.lucene.search.FieldCache.ShortParser;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.OpenBitSet;
import static org.hamcrest.CoreMatchers.*;
public class TestEntryCreators extends LuceneTestCase {
protected IndexReader reader;
private static final int NUM_DOCS = 500 * RANDOM_MULTIPLIER;
private Directory directory;
static class NumberTypeTester {
String funcName;
Class<? extends CachedArrayCreator> creator;
Class<? extends Parser> parser;
String field;
Number[] values;
public NumberTypeTester( String f, String func, Class<? extends CachedArrayCreator> creator, Class<? extends Parser> parser ) {
field = f;
funcName = func;
this.creator = creator;
this.parser = parser;
values = new Number[NUM_DOCS];
}
public String toString()
{
return field;
}
}
private NumberTypeTester[] typeTests;
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
RandomIndexWriter writer= new RandomIndexWriter(random, directory);
typeTests = new NumberTypeTester[] {
new NumberTypeTester( "theRandomByte", "getBytes", ByteValuesCreator.class, ByteParser.class ),
new NumberTypeTester( "theRandomShort", "getShorts", ShortValuesCreator.class, ShortParser.class ),
new NumberTypeTester( "theRandomInt", "getInts", IntValuesCreator.class, IntParser.class ),
new NumberTypeTester( "theRandomLong", "getLongs", LongValuesCreator.class, LongParser.class ),
new NumberTypeTester( "theRandomFloat", "getFloats", FloatValuesCreator.class, FloatParser.class ),
new NumberTypeTester( "theRandomDouble", "getDoubles", DoubleValuesCreator.class, DoubleParser.class ),
};
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
// Test the valid bits
for( NumberTypeTester tester : typeTests ) {
if (random.nextInt(20) != 17 && i > 1) {
tester.values[i] = 10 + random.nextInt( 20 ); // get some field overlap
doc.add(newField(tester.field, String.valueOf(tester.values[i]),
Field.Store.NO, Field.Index.NOT_ANALYZED ));
}
}
writer.addDocument(doc);
}
reader = writer.getReader();
writer.close();
}
@Override
public void tearDown() throws Exception {
reader.close();
directory.close();
super.tearDown();
}
public void testKeys() throws IOException {
// Check that the keys are unique for different fields
EntryKey key_1 = new ByteValuesCreator( "field1", null ).getCacheKey();
EntryKey key_2 = new ByteValuesCreator( "field2", null ).getCacheKey();
assertThat("different fields should have a different key", key_1, not(key_2) );
key_1 = new ByteValuesCreator( "field1", null ).getCacheKey();
key_2 = new ShortValuesCreator( "field1", null ).getCacheKey();
assertThat( "same field different type should have different key", key_1, not( key_2 ) );
key_1 = new ByteValuesCreator( "ff", null ).getCacheKey();
key_2 = new ByteValuesCreator( "ff", null ).getCacheKey();
assertThat( "same args should have same key", key_1, is( key_2 ) );
key_1 = new ByteValuesCreator( "ff", null, ByteValuesCreator.OPTION_CACHE_BITS ^ ByteValuesCreator.OPTION_CACHE_VALUES ).getCacheKey();
key_2 = new ByteValuesCreator( "ff", null ).getCacheKey();
assertThat( "different options should share same key", key_1, is( key_2 ) );
key_1 = new IntValuesCreator( "ff", FieldCache.DEFAULT_INT_PARSER ).getCacheKey();
key_2 = new IntValuesCreator( "ff", FieldCache.NUMERIC_UTILS_INT_PARSER ).getCacheKey();
assertThat( "diferent parser should have same key", key_1, is( key_2 ) );
}
private CachedArray getWithReflection( FieldCache cache, NumberTypeTester tester, int flags ) throws IOException
{
try {
Method getXXX = cache.getClass().getMethod( tester.funcName, IndexReader.class, String.class, EntryCreator.class );
Constructor constructor = tester.creator.getConstructor( String.class, tester.parser, Integer.TYPE );
CachedArrayCreator creator = (CachedArrayCreator)constructor.newInstance( tester.field, null, flags );
return (CachedArray) getXXX.invoke(cache, reader, tester.field, creator );
}
catch( Exception ex ) {
throw new RuntimeException( "Reflection failed", ex );
}
}
public void testCachedArrays() throws IOException
{
FieldCache cache = FieldCache.DEFAULT;
// Check the Different CachedArray Types
CachedArray last = null;
CachedArray justbits = null;
for( NumberTypeTester tester : typeTests ) {
justbits = getWithReflection( cache, tester, CachedArrayCreator.OPTION_CACHE_BITS );
assertNull( "should not get values : "+tester, justbits.getRawArray() );
assertNotNull( "should get bits : "+tester, justbits.valid );
last = getWithReflection( cache, tester, CachedArrayCreator.CACHE_VALUES_AND_BITS );
assertEquals( "should use same cached object : "+tester, justbits, last );
assertNull( "Validate=false shoudl not regenerate : "+tester, justbits.getRawArray() );
last = getWithReflection( cache, tester, CachedArrayCreator.CACHE_VALUES_AND_BITS_VALIDATE );
assertEquals( "should use same cached object : "+tester, justbits, last );
assertNotNull( "Validate=true should add the Array : "+tester, justbits.getRawArray() );
checkCachedArrayValuesAndBits( tester, last );
}
}
private void checkCachedArrayValuesAndBits( NumberTypeTester tester, CachedArray cachedVals )
{
// for( int i=0; i<NUM_DOCS; i++ ) {
// System.out.println( i + "] "+ tester.values[i] + " :: " + cachedVals.valid.get(i) );
// }
int numDocs =0;
Set<Number> distinctTerms = new HashSet<Number>();
for( int i=0; i<NUM_DOCS; i++ ) {
Number v = tester.values[i];
boolean isValid = cachedVals.valid.get(i);
if( v != null ) {
numDocs++;
distinctTerms.add( v );
assertTrue( "Valid bit should be true ("+i+"="+tester.values[i]+") "+tester, isValid );
}
else {
assertFalse( "Valid bit should be false ("+i+") "+tester, isValid );
}
}
assertEquals( "Cached numTerms does not match : "+tester, distinctTerms.size(), cachedVals.numTerms );
assertEquals( "Cached numDocs does not match : "+tester, numDocs, cachedVals.numDocs );
assertEquals( "Ordinal should match numDocs : "+tester, numDocs, ((OpenBitSet)cachedVals.valid).cardinality() );
}
}