LUCENE-3677: remove old byte[] norms api from IndexReader

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1227755 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-01-05 18:55:20 +00:00
parent 1f2f29ce48
commit bbfc831d52
16 changed files with 75 additions and 155 deletions

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.lang.reflect.Array;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
@ -639,20 +640,24 @@ public class CheckIndex {
infoStream.print(" test: field norms.........");
}
FieldInfos infos = reader.fieldInfos();
byte[] b;
DocValues dv;
for (final String fieldName : fieldNames) {
FieldInfo info = infos.fieldInfo(fieldName);
if (reader.hasNorms(fieldName)) {
b = reader.norms(fieldName);
if (b.length != reader.maxDoc()) {
throw new RuntimeException("norms for field: " + fieldName + " are of the wrong size");
dv = reader.normValues(fieldName);
assert dv != null;
if (dv.getSource().hasArray()) {
Object array = dv.getSource().getArray();
if (Array.getLength(array) != reader.maxDoc()) {
throw new RuntimeException("norms for field: " + fieldName + " are of the wrong size");
}
}
if (!info.isIndexed || info.omitNorms) {
throw new RuntimeException("field: " + fieldName + " should omit norms but has them!");
}
++status.totFields;
} else {
if (reader.norms(fieldName) != null) {
if (reader.normValues(fieldName) != null) {
throw new RuntimeException("field: " + fieldName + " should omit norms but has them!");
}
if (info.isIndexed && !info.omitNorms) {

View File

@ -727,27 +727,9 @@ public abstract class IndexReader implements Closeable {
// backward compatible implementation.
// SegmentReader has an efficient implementation.
ensureOpen();
return norms(field) != null;
return normValues(field) != null;
}
/** Returns the byte-encoded normalization factor for the named field of
* every document. This is used by the search code to score documents.
* Returns null if norms were not indexed for this field.
*
* @see org.apache.lucene.document.Field#setBoost(float)
*/
// TODO: cut over to source once we support other formats like float
public final byte[] norms(String field) throws IOException {
DocValues docValues = normValues(field);
if (docValues != null) {
Source source = docValues.getSource();
assert source.hasArray(); // TODO cut over to source
return (byte[])source.getArray();
}
return null;
}
/**
* Returns {@link Fields} for this reader.
* This method may return null if the reader has no

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.similarities;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.CollectionStatistics;
@ -165,7 +166,7 @@ public class BM25Similarity extends Similarity {
@Override
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
final byte[] norms = context.reader.norms(fieldName);
final DocValues norms = context.reader.normValues(fieldName);
return norms == null
? new ExactBM25DocScorerNoNorms((BM25Stats)stats)
: new ExactBM25DocScorer((BM25Stats)stats, norms);
@ -173,7 +174,7 @@ public class BM25Similarity extends Similarity {
@Override
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
return new SloppyBM25DocScorer((BM25Stats) stats, context.reader.norms(fieldName));
return new SloppyBM25DocScorer((BM25Stats) stats, context.reader.normValues(fieldName));
}
private class ExactBM25DocScorer extends ExactDocScorer {
@ -182,12 +183,12 @@ public class BM25Similarity extends Similarity {
private final byte[] norms;
private final float[] cache;
ExactBM25DocScorer(BM25Stats stats, byte norms[]) {
ExactBM25DocScorer(BM25Stats stats, DocValues norms) throws IOException {
assert norms != null;
this.stats = stats;
this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
this.cache = stats.cache;
this.norms = norms;
this.norms = (byte[])norms.getSource().getArray();
}
@Override
@ -235,11 +236,11 @@ public class BM25Similarity extends Similarity {
private final byte[] norms;
private final float[] cache;
SloppyBM25DocScorer(BM25Stats stats, byte norms[]) {
SloppyBM25DocScorer(BM25Stats stats, DocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.weight * (k1 + 1);
this.cache = stats.cache;
this.norms = norms;
this.norms = norms == null ? null : (byte[])norms.getSource().getArray();
}
@Override

View File

@ -57,7 +57,7 @@ import org.apache.lucene.util.TermContext;
* <a name="indextime"/>
* At indexing time, the indexer calls {@link #computeNorm(FieldInvertState)}, allowing
* the Similarity implementation to return a per-document byte for the field that will
* be later accessible via {@link IndexReader#norms(String)}. Lucene makes no assumption
* be later accessible via {@link IndexReader#normValues(String)}. Lucene makes no assumption
* about what is in this byte, but it is most useful for encoding length normalization
* information.
* <p>

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search.similarities;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.CollectionStatistics;
@ -177,7 +178,7 @@ public abstract class SimilarityBase extends Similarity {
@Override
public ExactDocScorer exactDocScorer(Stats stats, String fieldName,
AtomicReaderContext context) throws IOException {
byte norms[] = context.reader.norms(fieldName);
DocValues norms = context.reader.normValues(fieldName);
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
@ -196,7 +197,7 @@ public abstract class SimilarityBase extends Similarity {
@Override
public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName,
AtomicReaderContext context) throws IOException {
byte norms[] = context.reader.norms(fieldName);
DocValues norms = context.reader.normValues(fieldName);
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
@ -274,9 +275,9 @@ public abstract class SimilarityBase extends Similarity {
private final BasicStats stats;
private final byte[] norms;
BasicExactDocScorer(BasicStats stats, byte norms[]) {
BasicExactDocScorer(BasicStats stats, DocValues norms) throws IOException {
this.stats = stats;
this.norms = norms;
this.norms = norms == null ? null : (byte[])norms.getSource().getArray();
}
@Override
@ -303,9 +304,9 @@ public abstract class SimilarityBase extends Similarity {
private final BasicStats stats;
private final byte[] norms;
BasicSloppyDocScorer(BasicStats stats, byte norms[]) {
BasicSloppyDocScorer(BasicStats stats, DocValues norms) throws IOException {
this.stats = stats;
this.norms = norms;
this.norms = norms == null ? null : (byte[])norms.getSource().getArray();
}
@Override

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.similarities;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics;
@ -704,12 +705,12 @@ public abstract class TFIDFSimilarity extends Similarity {
@Override
public final ExactDocScorer exactDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
return new ExactTFIDFDocScorer((IDFStats)stats, context.reader.norms(fieldName));
return new ExactTFIDFDocScorer((IDFStats)stats, context.reader.normValues(fieldName));
}
@Override
public final SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName, AtomicReaderContext context) throws IOException {
return new SloppyTFIDFDocScorer((IDFStats)stats, context.reader.norms(fieldName));
return new SloppyTFIDFDocScorer((IDFStats)stats, context.reader.normValues(fieldName));
}
// TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
@ -721,10 +722,10 @@ public abstract class TFIDFSimilarity extends Similarity {
private static final int SCORE_CACHE_SIZE = 32;
private float[] scoreCache = new float[SCORE_CACHE_SIZE];
ExactTFIDFDocScorer(IDFStats stats, byte norms[]) {
ExactTFIDFDocScorer(IDFStats stats, DocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.value;
this.norms = norms;
this.norms = norms == null ? null : (byte[])norms.getSource().getArray();
for (int i = 0; i < SCORE_CACHE_SIZE; i++)
scoreCache[i] = tf(i) * weightValue;
}
@ -750,10 +751,10 @@ public abstract class TFIDFSimilarity extends Similarity {
private final float weightValue;
private final byte[] norms;
SloppyTFIDFDocScorer(IDFStats stats, byte norms[]) {
SloppyTFIDFDocScorer(IDFStats stats, DocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.value;
this.norms = norms;
this.norms = norms == null ? null : (byte[])norms.getSource().getArray();
}
@Override

View File

@ -1,86 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.util.ReaderUtil;
/**
* Exposes flex API, merged from flex API of sub-segments.
* This is useful when you're interacting with an {@link
* IndexReader} implementation that consists of sequential
* sub-readers (eg DirectoryReader or {@link
* MultiReader}).
*
* <p><b>NOTE</b>: for multi readers, you'll get better
* performance by gathering the sub readers using {@link
* ReaderUtil#gatherSubReaders} and then operate per-reader,
* instead of using this class.
*
* @lucene.experimental
*/
public class MultiNorms {
// no need to instantiate this
private MultiNorms() { }
/**
* Warning: this is heavy! Do not use in a loop, or implement norms()
* in your own reader with this (you should likely cache the result).
*/
public static byte[] norms(IndexReader r, String field) throws IOException {
final IndexReader[] subs = r.getSequentialSubReaders();
if (subs == null) {
// already an atomic reader
return r.norms(field);
} else if (subs.length == 0 || !r.hasNorms(field)) {
// no norms
return null;
} else if (subs.length == 1) {
return norms(subs[0], field);
} else {
// TODO: optimize more maybe
byte norms[] = new byte[r.maxDoc()];
final List<IndexReader> leaves = new ArrayList<IndexReader>();
ReaderUtil.gatherSubReaders(leaves, r);
int end = 0;
for (IndexReader leaf : leaves) {
Fields fields = leaf.fields();
boolean hasField = (fields != null && fields.terms(field) != null);
int start = end;
byte leafNorms[] = leaf.norms(field);
if (leafNorms == null) {
if (hasField) { // omitted norms
return null;
}
// doesn't have field, fill bytes
leafNorms = new byte[leaf.maxDoc()];
Arrays.fill(leafNorms, (byte) 0);
}
System.arraycopy(leafNorms, 0, norms, start, leafNorms.length);
end += leaf.maxDoc();
}
return norms;
}
}
}

View File

@ -462,9 +462,9 @@ public class TestDuelingCodecs extends LuceneTestCase {
while ((field = fieldsEnum.next()) != null) {
assertEquals(info, leftReader.hasNorms(field), rightReader.hasNorms(field));
if (leftReader.hasNorms(field)) {
byte leftNorms[] = MultiNorms.norms(leftReader, field);
byte rightNorms[] = MultiNorms.norms(rightReader, field);
assertArrayEquals(info, leftNorms, rightNorms);
DocValues leftNorms = MultiDocValues.getNormDocValues(leftReader, field);
DocValues rightNorms = MultiDocValues.getNormDocValues(rightReader, field);
assertDocValues(leftNorms, rightNorms);
}
}
}
@ -528,15 +528,19 @@ public class TestDuelingCodecs extends LuceneTestCase {
for (String field : leftValues) {
DocValues leftDocValues = MultiDocValues.getDocValues(leftReader, field);
DocValues rightDocValues = MultiDocValues.getDocValues(rightReader, field);
assertNotNull(info, leftDocValues);
assertNotNull(info, rightDocValues);
assertEquals(info, leftDocValues.type(), rightDocValues.type());
assertEquals(info, leftDocValues.getValueSize(), rightDocValues.getValueSize());
assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource());
assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource());
assertDocValues(leftDocValues, rightDocValues);
}
}
public void assertDocValues(DocValues leftDocValues, DocValues rightDocValues) throws Exception {
assertNotNull(info, leftDocValues);
assertNotNull(info, rightDocValues);
assertEquals(info, leftDocValues.type(), rightDocValues.type());
assertEquals(info, leftDocValues.getValueSize(), rightDocValues.getValueSize());
assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource());
assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource());
}
/**
* checks source API
*/

View File

@ -509,6 +509,7 @@ public class TestIndexReader extends LuceneTestCase {
writer.addDocument(doc);
}
// TODO: maybe this can reuse the logic of test dueling codecs?
public static void assertIndexEquals(IndexReader index1, IndexReader index2) throws IOException {
assertEquals("IndexReaders have different values for numDocs.", index1.numDocs(), index2.numDocs());
assertEquals("IndexReaders have different values for maxDoc.", index1.maxDoc(), index2.maxDoc());
@ -531,13 +532,16 @@ public class TestIndexReader extends LuceneTestCase {
it1 = fields1.iterator();
while (it1.hasNext()) {
String curField = it1.next();
byte[] norms1 = MultiNorms.norms(index1, curField);
byte[] norms2 = MultiNorms.norms(index2, curField);
DocValues norms1 = MultiDocValues.getNormDocValues(index1, curField);
DocValues norms2 = MultiDocValues.getNormDocValues(index2, curField);
if (norms1 != null && norms2 != null)
{
assertEquals(norms1.length, norms2.length);
for (int i = 0; i < norms1.length; i++) {
assertEquals("Norm different for doc " + i + " and field '" + curField + "'.", norms1[i], norms2[i]);
// todo: generalize this (like TestDuelingCodecs assert)
byte[] b1 = (byte[]) norms1.getSource().getArray();
byte[] b2 = (byte[]) norms2.getSource().getArray();
assertEquals(b1.length, b2.length);
for (int i = 0; i < b1.length; i++) {
assertEquals("Norm different for doc " + i + " and field '" + curField + "'.", b1[i], b2[i]);
}
}
else

View File

@ -75,7 +75,7 @@ public class TestMaxTermFrequency extends LuceneTestCase {
}
public void test() throws Exception {
byte fooNorms[] = MultiNorms.norms(reader, "foo");
byte fooNorms[] = (byte[])MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray();
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
}

View File

@ -82,11 +82,11 @@ public class TestNorms extends LuceneTestCase {
IndexReader reader = writer.getReader();
writer.close();
byte fooNorms[] = MultiNorms.norms(reader, "foo");
byte fooNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray();
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(0, fooNorms[i]);
byte barNorms[] = MultiNorms.norms(reader, "bar");
byte barNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "bar").getSource().getArray();
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(1, barNorms[i]);

View File

@ -289,12 +289,15 @@ public class TestOmitNorms extends LuceneTestCase {
}
IndexReader ir1 = riw.getReader();
byte[] norms1 = MultiNorms.norms(ir1, field);
// todo: generalize
DocValues dv1 = MultiDocValues.getNormDocValues(ir1, field);
byte[] norms1 = dv1 == null ? null : (byte[]) dv1.getSource().getArray();
// fully merge and validate MultiNorms against single segment.
riw.forceMerge(1);
IndexReader ir2 = riw.getReader();
byte[] norms2 = ir2.getSequentialSubReaders()[0].norms(field);
DocValues dv2 = ir2.getSequentialSubReaders()[0].normValues(field);
byte[] norms2 = dv2 == null ? null : (byte[]) dv2.getSource().getArray();
assertArrayEquals(norms1, norms2);
ir1.close();

View File

@ -172,7 +172,7 @@ public class TestSegmentReader extends LuceneTestCase {
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
if (!reader.hasNorms(f.name())) {
// test for norms of null
byte [] norms = MultiNorms.norms(reader, f.name());
DocValues norms = MultiDocValues.getNormDocValues(reader, f.name());
assertNull(norms);
}
}

View File

@ -73,7 +73,7 @@ public class TestUniqueTermCount extends LuceneTestCase {
}
public void test() throws Exception {
byte fooNorms[] = MultiNorms.norms(reader, "foo");
byte fooNorms[] = (byte[])MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray();
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
}

View File

@ -24,7 +24,7 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiNorms;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
@ -74,8 +74,9 @@ public class TestSimilarityProvider extends LuceneTestCase {
public void testBasics() throws Exception {
// sanity check of norms writer
byte fooNorms[] = MultiNorms.norms(reader, "foo");
byte barNorms[] = MultiNorms.norms(reader, "bar");
// TODO: generalize
byte fooNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray();
byte barNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "bar").getSource().getArray();
for (int i = 0; i < fooNorms.length; i++) {
assertFalse(fooNorms[i] == barNorms[i]);
}

View File

@ -17,6 +17,7 @@
package org.apache.lucene.queries.function.valuesource;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
@ -56,10 +57,13 @@ public class NormValueSource extends ValueSource {
throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
}
final TFIDFSimilarity similarity = (TFIDFSimilarity) sim;
final byte[] norms = readerContext.reader.norms(field);
if (norms == null) {
DocValues dv = readerContext.reader.normValues(field);
if (dv == null) {
return new ConstDoubleDocValues(0.0, this);
}
final byte[] norms = (byte[]) dv.getSource().getArray();
return new FloatDocValues(this) {
@Override