clear some nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436509 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-21 17:39:30 +00:00
parent c67ef8657f
commit 02fe1c42c9
11 changed files with 53 additions and 244 deletions

View File

@ -31,7 +31,6 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
// nocommit fix exception handling (make sure tests find problems first)
class DiskDocValuesConsumer extends DocValuesConsumer {
final IndexOutput data, meta;
final int maxDoc;

View File

@ -1,160 +0,0 @@
package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat.SimpleTextDocValuesReader;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat.SimpleTextDocValuesWriter;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.PackedInts;
/** Indexes doc values to disk and loads them in RAM at
* search time. */
// nocommit: nuke this wrapper and just make a nice impl for 4.1 (e.g. FST for sortedbytes)
public class MemoryDocValuesFormat extends DocValuesFormat {
public MemoryDocValuesFormat() {
super("Memory");
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
// nocommit use a more efficient format ;):
return new SimpleTextDocValuesWriter(state, "dat");
}
// nocommit the get's of this thing need to use a map. its returning new ram instances
// per-thread!
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
final int maxDoc = state.segmentInfo.getDocCount();
final DocValuesProducer producer = new SimpleTextDocValuesReader(state, "dat");
return new DocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericDocValues valuesIn = producer.getNumeric(field);
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
for(int docID=0;docID<maxDoc;docID++) {
long v = valuesIn.get(docID);
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
final long delta = maxValue - minValue;
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
final PackedInts.Mutable values = PackedInts.getMutable(maxDoc, bitsRequired, PackedInts.COMPACT);
for(int docID=0;docID<maxDoc;docID++) {
values.set(docID, valuesIn.get(docID) - minValue);
}
final long finalMinValue = minValue;
return new NumericDocValues() {
@Override
public long get(int docID) {
return finalMinValue + values.get(docID);
}
};
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
BinaryDocValues valuesIn = producer.getBinary(field);
// nocommit more ram efficient
final byte[][] values = new byte[maxDoc][];
BytesRef scratch = new BytesRef();
for(int docID=0;docID<maxDoc;docID++) {
valuesIn.get(docID, scratch);
values[docID] = new byte[scratch.length];
System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
}
return new BinaryDocValues() {
@Override
public void get(int docID, BytesRef result) {
result.bytes = values[docID];
result.offset = 0;
result.length = result.bytes.length;
}
};
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
SortedDocValues valuesIn = producer.getSorted(field);
final int valueCount = valuesIn.getValueCount();
// nocommit used packed ints and so on
final byte[][] values = new byte[valueCount][];
BytesRef scratch = new BytesRef();
for(int ord=0;ord<values.length;ord++) {
valuesIn.lookupOrd(ord, scratch);
values[ord] = new byte[scratch.length];
System.arraycopy(scratch.bytes, scratch.offset, values[ord], 0, scratch.length);
}
final int[] docToOrd = new int[maxDoc];
for(int docID=0;docID<maxDoc;docID++) {
docToOrd[docID] = valuesIn.getOrd(docID);
}
return new SortedDocValues() {
@Override
public int getOrd(int docID) {
return docToOrd[docID];
}
@Override
public void lookupOrd(int ord, BytesRef result) {
result.bytes = values[ord];
result.offset = 0;
result.length = result.bytes.length;
}
@Override
public int getValueCount() {
return valueCount;
}
};
}
@Override
public void close() throws IOException {
producer.close();
}
};
}
}

View File

@ -39,11 +39,9 @@ public final class SimpleTextCodec extends Codec {
private final SegmentInfoFormat segmentInfos = new SimpleTextSegmentInfoFormat();
private final FieldInfosFormat fieldInfosFormat = new SimpleTextFieldInfosFormat();
private final TermVectorsFormat vectorsFormat = new SimpleTextTermVectorsFormat();
private final NormsFormat simpleNormsFormat = new SimpleTextNormsFormat();
private final NormsFormat normsFormat = new SimpleTextNormsFormat();
private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();
// nocommit rename
private final DocValuesFormat simpleDVFormat = new SimpleTextDocValuesFormat();
private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
public SimpleTextCodec() {
super("SimpleText");
@ -76,7 +74,7 @@ public final class SimpleTextCodec extends Codec {
@Override
public NormsFormat normsFormat() {
return simpleNormsFormat;
return normsFormat;
}
@Override
@ -86,6 +84,6 @@ public final class SimpleTextCodec extends Codec {
@Override
public DocValuesFormat docValuesFormat() {
return simpleDVFormat;
return dvFormat;
}
}

View File

@ -148,7 +148,7 @@ public class SimpleTextDocValuesFormat extends DocValuesFormat {
//System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs");
data = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
numDocs = state.segmentInfo.getDocCount();
isNorms = ext.equals("slen");
isNorms = ext.equals("len");
}
// for asserting
@ -422,7 +422,7 @@ public class SimpleTextDocValuesFormat extends DocValuesFormat {
//System.out.println(" field=" + fieldName);
// nocommit hack hack hack!!:
DocValuesType dvType = ext.equals("slen") ? DocValuesType.NUMERIC : fieldInfo.getDocValuesType();
DocValuesType dvType = ext.equals("len") ? DocValuesType.NUMERIC : fieldInfo.getDocValuesType();
assert dvType != null;
if (dvType == DocValuesType.NUMERIC) {
readLine();

View File

@ -18,19 +18,14 @@ package org.apache.lucene.codecs.simpletext;
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat.SimpleTextDocValuesReader;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat.SimpleTextDocValuesWriter;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
/**
* plain-text norms format.
@ -40,17 +35,16 @@ import org.apache.lucene.util.BytesRef;
* @lucene.experimental
*/
public class SimpleTextNormsFormat extends NormsFormat {
// nocommit put back to len once we replace current norms format:
private static final String NORMS_SEG_EXTENSION = "slen";
private static final String NORMS_SEG_EXTENSION = "len";
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
return new SimpleTextSimpleNormsConsumer(state);
return new SimpleTextNormsConsumer(state);
}
@Override
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
return new SimpleTextSimpleNormsProducer(state);
return new SimpleTextNormsProducer(state);
}
/**
@ -60,8 +54,8 @@ public class SimpleTextNormsFormat extends NormsFormat {
*
* @lucene.experimental
*/
public static class SimpleTextSimpleNormsProducer extends SimpleTextDocValuesReader {
public SimpleTextSimpleNormsProducer(SegmentReadState state) throws IOException {
public static class SimpleTextNormsProducer extends SimpleTextDocValuesReader {
public SimpleTextNormsProducer(SegmentReadState state) throws IOException {
// All we do is change the extension from .dat -> .len;
// otherwise this is a normal simple doc values file:
super(state, NORMS_SEG_EXTENSION);
@ -75,8 +69,8 @@ public class SimpleTextNormsFormat extends NormsFormat {
*
* @lucene.experimental
*/
public static class SimpleTextSimpleNormsConsumer extends SimpleTextDocValuesWriter {
public SimpleTextSimpleNormsConsumer(SegmentWriteState state) throws IOException {
public static class SimpleTextNormsConsumer extends SimpleTextDocValuesWriter {
public SimpleTextNormsConsumer(SegmentWriteState state) throws IOException {
// All we do is change the extension from .dat -> .len;
// otherwise this is a normal simple doc values file:
super(state, NORMS_SEG_EXTENSION);

View File

@ -14,5 +14,4 @@
# limitations under the License.
org.apache.lucene.codecs.diskdv.DiskDocValuesFormat
org.apache.lucene.codecs.memory.MemoryDocValuesFormat
org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat

View File

@ -87,7 +87,7 @@ public final class Lucene40Codec extends Codec {
return infosFormat;
}
// nocommit need a read-only Lucene40SimpleDVFormat
// nocommit need a read-only Lucene40DVFormat / read-write in the impersonator
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Disk");
@Override
@ -96,7 +96,7 @@ public final class Lucene40Codec extends Codec {
return defaultDVFormat;
}
// nocommit need a read-only Lucene40SimpleNormsFormat:
// nocommit need a read-only Lucene40NormsFormat / read-write in the impersonator
private final NormsFormat simpleNormsFormat = new Lucene41NormsFormat();
@Override

View File

@ -78,7 +78,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
return new FieldsWriter(state);
}
static class SimpleDVConsumerAndSuffix implements Closeable {
static class ConsumerAndSuffix implements Closeable {
DocValuesConsumer consumer;
int suffix;
@ -90,7 +90,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
private class FieldsWriter extends DocValuesConsumer {
private final Map<DocValuesFormat,SimpleDVConsumerAndSuffix> formats = new HashMap<DocValuesFormat,SimpleDVConsumerAndSuffix>();
private final Map<DocValuesFormat,ConsumerAndSuffix> formats = new HashMap<DocValuesFormat,ConsumerAndSuffix>();
private final Map<String,Integer> suffixes = new HashMap<String,Integer>();
private final SegmentWriteState segmentWriteState;
@ -126,7 +126,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
Integer suffix;
SimpleDVConsumerAndSuffix consumer = formats.get(format);
ConsumerAndSuffix consumer = formats.get(format);
if (consumer == null) {
// First time we are seeing this format; create a new instance
@ -142,7 +142,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
final String segmentSuffix = getFullSegmentSuffix(field.name,
segmentWriteState.segmentSuffix,
getSuffix(formatName, Integer.toString(suffix)));
consumer = new SimpleDVConsumerAndSuffix();
consumer = new ConsumerAndSuffix();
consumer.consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix));
consumer.suffix = suffix;
formats.put(format, consumer);

View File

@ -54,8 +54,8 @@ final class SegmentCoreReaders {
final FieldInfos fieldInfos;
final FieldsProducer fields;
final DocValuesProducer simpleDVProducer;
final DocValuesProducer simpleNormsProducer;
final DocValuesProducer dvProducer;
final DocValuesProducer normsProducer;
final int termsIndexDivisor;
@ -67,7 +67,7 @@ final class SegmentCoreReaders {
// TODO: make a single thread local w/ a
// Thingy class holding fieldsReader, termVectorsReader,
// simpleNormsProducer, simpleDVProducer
// normsProducer, dvProducer
final CloseableThreadLocal<StoredFieldsReader> fieldsReaderLocal = new CloseableThreadLocal<StoredFieldsReader>() {
@Override
@ -83,14 +83,14 @@ final class SegmentCoreReaders {
}
};
final CloseableThreadLocal<Map<String,Object>> simpleDocValuesLocal = new CloseableThreadLocal<Map<String,Object>>() {
final CloseableThreadLocal<Map<String,Object>> docValuesLocal = new CloseableThreadLocal<Map<String,Object>>() {
@Override
protected Map<String,Object> initialValue() {
return new HashMap<String,Object>();
}
};
final CloseableThreadLocal<Map<String,Object>> simpleNormsLocal = new CloseableThreadLocal<Map<String,Object>>() {
final CloseableThreadLocal<Map<String,Object>> normsLocal = new CloseableThreadLocal<Map<String,Object>>() {
@Override
protected Map<String,Object> initialValue() {
return new HashMap<String,Object>();
@ -129,26 +129,19 @@ final class SegmentCoreReaders {
// ask codec for its Norms:
// TODO: since we don't write any norms file if there are no norms,
// kinda jaky to assume the codec handles the case of no norms file at all gracefully?!
// nocommit shouldn't need null check:
assert codec.docValuesFormat() != null;
if (codec.docValuesFormat() != null) {
if (fieldInfos.hasDocValues()) {
simpleDVProducer = codec.docValuesFormat().fieldsProducer(segmentReadState);
} else {
simpleDVProducer = null;
}
if (fieldInfos.hasDocValues()) {
dvProducer = codec.docValuesFormat().fieldsProducer(segmentReadState);
assert dvProducer != null;
} else {
simpleDVProducer = null;
dvProducer = null;
}
// nocommit shouldn't need null check:
if (codec.normsFormat() != null) {
if (fieldInfos.hasNorms()) {
simpleNormsProducer = codec.normsFormat().normsProducer(segmentReadState);
} else {
simpleNormsProducer = null;
}
if (fieldInfos.hasNorms()) {
normsProducer = codec.normsFormat().normsProducer(segmentReadState);
assert normsProducer != null;
} else {
simpleNormsProducer = null;
normsProducer = null;
}
fieldsReaderOrig = si.info.getCodec().storedFieldsFormat().fieldsReader(cfsDir, si.info, fieldInfos, context);
@ -192,16 +185,13 @@ final class SegmentCoreReaders {
return null;
}
// nocommit change to assert != null!!
if (simpleDVProducer == null) {
return null;
}
assert dvProducer != null;
Map<String,Object> dvFields = simpleDocValuesLocal.get();
Map<String,Object> dvFields = docValuesLocal.get();
NumericDocValues dvs = (NumericDocValues) dvFields.get(field);
if (dvs == null) {
dvs = simpleDVProducer.getNumeric(fi);
dvs = dvProducer.getNumeric(fi);
dvFields.put(field, dvs);
}
@ -223,16 +213,13 @@ final class SegmentCoreReaders {
return null;
}
// nocommit change to assert != null!!
if (simpleDVProducer == null) {
return null;
}
assert dvProducer != null;
Map<String,Object> dvFields = simpleDocValuesLocal.get();
Map<String,Object> dvFields = docValuesLocal.get();
BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field);
if (dvs == null) {
dvs = simpleDVProducer.getBinary(fi);
dvs = dvProducer.getBinary(fi);
dvFields.put(field, dvs);
}
@ -254,18 +241,13 @@ final class SegmentCoreReaders {
return null;
}
assert simpleDVProducer != null;
assert dvProducer != null;
// nocommit change to assert != null!!
if (simpleDVProducer == null) {
return null;
}
Map<String,Object> dvFields = simpleDocValuesLocal.get();
Map<String,Object> dvFields = docValuesLocal.get();
SortedDocValues dvs = (SortedDocValues) dvFields.get(field);
if (dvs == null) {
dvs = simpleDVProducer.getSorted(fi);
dvs = dvProducer.getSorted(fi);
dvFields.put(field, dvs);
}
@ -281,16 +263,14 @@ final class SegmentCoreReaders {
if (!fi.hasNorms()) {
return null;
}
// nocommit change to assert != null!!
if (simpleNormsProducer == null) {
return null;
}
assert normsProducer != null;
Map<String,Object> normFields = simpleNormsLocal.get();
Map<String,Object> normFields = normsLocal.get();
NumericDocValues norms = (NumericDocValues) normFields.get(field);
if (norms == null) {
norms = simpleNormsProducer.getNumeric(fi);
norms = normsProducer.getNumeric(fi);
normFields.put(field, norms);
}
@ -299,8 +279,8 @@ final class SegmentCoreReaders {
void decRef() throws IOException {
if (ref.decrementAndGet() == 0) {
IOUtils.close(termVectorsLocal, fieldsReaderLocal, simpleDocValuesLocal, simpleNormsLocal, fields, simpleDVProducer,
termVectorsReaderOrig, fieldsReaderOrig, cfsReader, simpleNormsProducer);
IOUtils.close(termVectorsLocal, fieldsReaderLocal, docValuesLocal, normsLocal, fields, dvProducer,
termVectorsReaderOrig, fieldsReaderOrig, cfsReader, normsProducer);
notifyCoreClosedListeners();
}
}

View File

@ -705,8 +705,7 @@ public class TestDemoDocValue extends LuceneTestCase {
Directory directory = newDirectory();
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
// TODO: Fix the CFS/suffixing of Lucene41DocValues so it actually works with this
final DocValuesFormat fast = DocValuesFormat.forName("Memory");
final DocValuesFormat fast = DocValuesFormat.forName("Lucene41");
final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
iwc.setCodec(new Lucene41Codec() {
@Override

View File

@ -31,12 +31,12 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene41.Lucene41DocValuesFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryDocValuesFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
@ -142,9 +142,9 @@ public class RandomCodec extends Lucene41Codec {
new MemoryPostingsFormat(false, random.nextFloat()));
addDocValues(avoidCodecs,
new Lucene41DocValuesFormat(),
new DiskDocValuesFormat(),
new SimpleTextDocValuesFormat(),
new MemoryDocValuesFormat());
new SimpleTextDocValuesFormat());
Collections.shuffle(formats, random);
Collections.shuffle(dvFormats, random);