LUCENE-1789: add MultiValueSource wrapper to protect an existing ValueSource from using 2X RAM in FieldCache if composite reader is passed to getValues

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@804006 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-08-13 19:39:29 +00:00
parent 3cab215c2c
commit 1eb8149fb3
11 changed files with 263 additions and 3 deletions

View File

@ -686,6 +686,14 @@ New features
the same field in both a reader, and one of it's (descendant) sub
readers.
(Chris Hostetter, Mark Miller)
36. LUCENE-1789: Added utility class
oal.search.function.MultiValueSource to ease the transition to
segment based searching for any apps that directly call
oal.search.function.* APIs. This class wraps any other
ValueSource, but takes care when composite (multi-segment) are
passed to not double RAM usage in the FieldCache. (Chris
Hostetter, Mark Miller, Mike McCandless)
Optimizations

View File

@ -36,6 +36,16 @@ import java.io.IOException;
*
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
* searching, if {@link #getValues} is invoked with a
* composite (multi-segment) reader, this can easily cause
* double RAM usage for the values in the FieldCache. It's
* best to switch your application to pass only atomic
* (single segment) readers to this API. Alternatively, for
* a short-term fix, you could wrap your ValueSource using
* {@link MultiValueSource}, which costs more CPU per lookup
* but will not consume double the FieldCache RAM.</p>
*/
public class ByteFieldSource extends FieldCacheSource {
private FieldCache.ByteParser parser;

View File

@ -39,6 +39,15 @@ import org.apache.lucene.search.FieldCache;
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
* searching, if {@link #getValues} is invoked with a
* composite (multi-segment) reader, this can easily cause
* double RAM usage for the values in the FieldCache. It's
* best to switch your application to pass only atomic
* (single segment) readers to this API. Alternatively, for
* a short-term fix, you could wrap your ValueSource using
* {@link MultiValueSource}, which costs more CPU per lookup
* but will not consume double the FieldCache RAM.</p>
*/
public abstract class FieldCacheSource extends ValueSource {
private String field;

View File

@ -37,6 +37,15 @@ import java.io.IOException;
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
* searching, if {@link #getValues} is invoked with a
* composite (multi-segment) reader, this can easily cause
* double RAM usage for the values in the FieldCache. It's
* best to switch your application to pass only atomic
* (single segment) readers to this API. Alternatively, for
* a short-term fix, you could wrap your ValueSource using
* {@link MultiValueSource}, which costs more CPU per lookup
* but will not consume double the FieldCache RAM.</p>
*/
public class FloatFieldSource extends FieldCacheSource {
private FieldCache.FloatParser parser;

View File

@ -37,7 +37,15 @@ import java.io.IOException;
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*
*
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
* searching, if {@link #getValues} is invoked with a
* composite (multi-segment) reader, this can easily cause
* double RAM usage for the values in the FieldCache. It's
* best to switch your application to pass only atomic
* (single segment) readers to this API. Alternatively, for
* a short-term fix, you could wrap your ValueSource using
* {@link MultiValueSource}, which costs more CPU per lookup
* but will not consume double the FieldCache RAM.</p>
*/
public class IntFieldSource extends FieldCacheSource {
private FieldCache.IntParser parser;

View File

@ -0,0 +1,124 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Explanation;
/** This class wraps another ValueSource, but protects
* against accidental double RAM usage in FieldCache when
* a composite reader is passed to {@link #getValues}.
*
* <p><b>NOTE</b>: this class adds a CPU penalty to every
* lookup, as it must resolve the incoming document to the
* right sub-reader using a binary search.</p>
*
* @deprecated This class is temporary, to ease the
* migration to segment-based searching. Please change your
* code to not pass composite readers to these APIs. */
public final class MultiValueSource extends ValueSource {
final ValueSource other;
public MultiValueSource(ValueSource other) {
this.other = other;
}
public DocValues getValues(IndexReader reader) throws IOException {
IndexReader[] subReaders = reader.getSequentialSubReaders();
if (subReaders != null) {
// This is a composite reader
return new MultiDocValues(subReaders);
} else {
// Already an atomic reader -- just delegate
return other.getValues(reader);
}
}
public String description() {
return other.description();
}
public boolean equals(Object o) {
if (o instanceof MultiValueSource) {
return ((MultiValueSource) o).other.equals(other);
} else {
return false;
}
}
public int hashCode() {
return 31 * other.hashCode();
}
private final class MultiDocValues extends DocValues {
final DocValues[] docValues;
final int[] docStarts;
MultiDocValues(IndexReader[] subReaders) throws IOException {
docValues = new DocValues[subReaders.length];
docStarts = new int[subReaders.length];
int base = 0;
for(int i=0;i<subReaders.length;i++) {
docValues[i] = other.getValues(subReaders[i]);
docStarts[i] = base;
base += subReaders[i].maxDoc();
}
}
public float floatVal(int doc) {
final int n = ReaderUtil.subIndex(doc, docStarts);
return docValues[n].floatVal(doc-docStarts[n]);
}
public int intVal(int doc) {
final int n = ReaderUtil.subIndex(doc, docStarts);
return docValues[n].intVal(doc-docStarts[n]);
}
public long longVal(int doc) {
final int n = ReaderUtil.subIndex(doc, docStarts);
return docValues[n].longVal(doc-docStarts[n]);
}
public double doubleVal(int doc) {
final int n = ReaderUtil.subIndex(doc, docStarts);
return docValues[n].doubleVal(doc-docStarts[n]);
}
public String strVal(int doc) {
final int n = ReaderUtil.subIndex(doc, docStarts);
return docValues[n].strVal(doc-docStarts[n]);
}
public String toString(int doc) {
final int n = ReaderUtil.subIndex(doc, docStarts);
return docValues[n].toString(doc-docStarts[n]);
}
public Explanation explain(int doc) {
final int n = ReaderUtil.subIndex(doc, docStarts);
return docValues[n].explain(doc-docStarts[n]);
}
}
}

View File

@ -44,6 +44,15 @@ import java.io.IOException;
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
* searching, if {@link #getValues} is invoked with a
* composite (multi-segment) reader, this can easily cause
* double RAM usage for the values in the FieldCache. It's
* best to switch your application to pass only atomic
* (single segment) readers to this API. Alternatively, for
* a short-term fix, you could wrap your ValueSource using
* {@link MultiValueSource}, which costs more CPU per lookup
* but will not consume double the FieldCache RAM.</p>
*/
public class OrdFieldSource extends ValueSource {

View File

@ -45,6 +45,15 @@ import java.io.IOException;
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
* searching, if {@link #getValues} is invoked with a
* composite (multi-segment) reader, this can easily cause
* double RAM usage for the values in the FieldCache. It's
* best to switch your application to pass only atomic
* (single segment) readers to this API. Alternatively, for
* a short-term fix, you could wrap your ValueSource using
* {@link MultiValueSource}, which costs more CPU per lookup
* but will not consume double the FieldCache RAM.</p>
*/
public class ReverseOrdFieldSource extends ValueSource {

View File

@ -36,6 +36,16 @@ import java.io.IOException;
*
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
* searching, if {@link #getValues} is invoked with a
* composite (multi-segment) reader, this can easily cause
* double RAM usage for the values in the FieldCache. It's
* best to switch your application to pass only atomic
* (single segment) readers to this API. Alternatively, for
* a short-term fix, you could wrap your ValueSource using
* {@link MultiValueSource}, which costs more CPU per lookup
* but will not consume double the FieldCache RAM.</p>
*/
public class ShortFieldSource extends FieldCacheSource {
private FieldCache.ShortParser parser;

View File

@ -58,9 +58,9 @@ public class TestOrdValues extends FunctionTestSetup {
IndexSearcher s = new IndexSearcher(dir);
ValueSource vs;
if (inOrder) {
vs = new OrdFieldSource(field);
vs = new MultiValueSource(new OrdFieldSource(field));
} else {
vs = new ReverseOrdFieldSource(field);
vs = new MultiValueSource(new ReverseOrdFieldSource(field));
}
Query q = new ValueSourceQuery(vs);

View File

@ -0,0 +1,64 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.*;
import org.apache.lucene.store.*;
import org.apache.lucene.search.*;
import org.apache.lucene.search.function.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.index.*;
import org.apache.lucene.document.*;
public class TestValueSource extends LuceneTestCase {
public void testMultiValueSource() throws Exception {
Directory dir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(f);
for(int i=0;i<17;i++) {
f.setValue(""+i);
w.addDocument(doc);
w.commit();
}
IndexReader r = w.getReader();
w.close();
assertTrue(r.getSequentialSubReaders().length > 1);
ValueSource s1 = new IntFieldSource("field");
DocValues v1 = s1.getValues(r);
DocValues v2 = new MultiValueSource(s1).getValues(r);
for(int i=0;i<r.maxDoc();i++) {
assertEquals(v1.intVal(i), i);
assertEquals(v2.intVal(i), i);
}
FieldCache.DEFAULT.purgeAllCaches();
r.close();
dir.close();
}
}