LUCENE-5761: Remove DiskDocValuesFormat

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1602862 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-06-16 13:14:01 +00:00
parent 0a70e721ce
commit ff3582d5a8
17 changed files with 31 additions and 288 deletions

View File

@ -226,6 +226,9 @@ API Changes
* LUCENE-5757: Moved RamUsageEstimator's reflection-based processing to RamUsageTester
in the test-framework module. (Robert Muir)
* LUCENE-5761: Removed DiskDocValuesFormat, it was very inefficient and saved very little
RAM over the default codec. (Robert Muir)
Optimizations
* LUCENE-5603: hunspell stemmer more efficiently strips prefixes

View File

@ -1,63 +0,0 @@
package org.apache.lucene.codecs.diskdv;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
/**
* DocValues format that keeps most things on disk.
* <p>
* Only things like disk offsets are loaded into ram.
* <p>
* @lucene.experimental
*/
public final class DiskDocValuesFormat extends DocValuesFormat {
public DiskDocValuesFormat() {
super("Disk");
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new Lucene49DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION) {
@Override
protected void addTermsDict(FieldInfo field, Iterable<BytesRef> values) throws IOException {
addBinaryField(field, values);
}
};
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new DiskDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
}
public static final String DATA_CODEC = "DiskDocValuesData";
public static final String DATA_EXTENSION = "dvdd";
public static final String META_CODEC = "DiskDocValuesMetadata";
public static final String META_EXTENSION = "dvdm";
}

View File

@ -1,50 +0,0 @@
package org.apache.lucene.codecs.diskdv;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
class DiskDocValuesProducer extends Lucene49DocValuesProducer {
DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
super(state, dataCodec, dataExtension, metaCodec, metaExtension);
}
@Override
protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
data.seek(bytes.addressesOffset);
return MonotonicBlockPackedReader.of(data.clone(), bytes.packedIntsVersion, bytes.blockSize, bytes.count+1, true);
}
@Override
protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
throw new AssertionError();
}
@Override
protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
data.seek(entry.offset);
return MonotonicBlockPackedReader.of(data.clone(), entry.packedIntsVersion, entry.blockSize, entry.count+1, true);
}
}

View File

@ -1,46 +0,0 @@
package org.apache.lucene.codecs.diskdv;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
/** Norms format that keeps all norms on disk */
public final class DiskNormsFormat extends NormsFormat {
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
return new Lucene49DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
}
@Override
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
return new DiskDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
}
static final String DATA_CODEC = "DiskNormsData";
static final String DATA_EXTENSION = "dnvd";
static final String META_CODEC = "DiskNormsMetadata";
static final String META_EXTENSION = "dnvm";
}

View File

@ -1,25 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
DocValuesFormat that accesses values directly from disk.
</body>
</html>

View File

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.codecs.diskdv.DiskDocValuesFormat
org.apache.lucene.codecs.memory.MemoryDocValuesFormat
org.apache.lucene.codecs.memory.DirectDocValuesFormat
org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat

View File

@ -1,34 +0,0 @@
package org.apache.lucene.codecs.diskdv;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Tests DiskDocValuesFormat
*/
public class TestDiskDocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
private final Codec codec = TestUtil.alwaysDocValuesFormat(new DiskDocValuesFormat());
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -1,32 +0,0 @@
package org.apache.lucene.codecs.diskdv;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
import org.apache.lucene.index.BaseNormsFormatTestCase;
/** Tests DiskNormsFormat */
public class TestDiskNormsFormat extends BaseNormsFormatTestCase {
private final Codec codec = new CheapBastardCodec();
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -64,7 +64,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
/** reader for {@link Lucene45DocValuesFormat} */
public class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable {
class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map<Integer,NumericEntry> numerics;
private final Map<Integer,BinaryEntry> binaries;
private final Map<Integer,SortedSetEntry> sortedSets;

View File

@ -39,7 +39,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/** writer for {@link Lucene49DocValuesFormat} */
public class Lucene49DocValuesConsumer extends DocValuesConsumer implements Closeable {
class Lucene49DocValuesConsumer extends DocValuesConsumer implements Closeable {
static final int BLOCK_SIZE = 16384;
static final int ADDRESS_INTERVAL = 16;
@ -299,7 +299,7 @@ public class Lucene49DocValuesConsumer extends DocValuesConsumer implements Clos
}
/** expert: writes a value dictionary for a sorted/sortedset field */
protected void addTermsDict(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
private void addTermsDict(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
// first check if its a "fixed-length" terms dict
int minLength = Integer.MAX_VALUE;
int maxLength = Integer.MIN_VALUE;

View File

@ -63,7 +63,7 @@ import org.apache.lucene.util.packed.DirectReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
/** reader for {@link Lucene49DocValuesFormat} */
public class Lucene49DocValuesProducer extends DocValuesProducer implements Closeable {
class Lucene49DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map<Integer,NumericEntry> numerics;
private final Map<Integer,BinaryEntry> binaries;
private final Map<Integer,SortedSetEntry> sortedSets;
@ -80,7 +80,7 @@ public class Lucene49DocValuesProducer extends DocValuesProducer implements Clos
private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<>();
/** expert: instantiates a new reader */
protected Lucene49DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
Lucene49DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
@ -408,9 +408,8 @@ public class Lucene49DocValuesProducer extends DocValuesProducer implements Clos
};
}
/** returns an address instance for variable-length binary values.
* @lucene.internal */
protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
/** returns an address instance for variable-length binary values. */
private MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
final MonotonicBlockPackedReader addresses;
synchronized (addressInstances) {
MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
@ -450,9 +449,8 @@ public class Lucene49DocValuesProducer extends DocValuesProducer implements Clos
};
}
/** returns an address instance for prefix-compressed binary values.
* @lucene.internal */
protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
/** returns an address instance for prefix-compressed binary values. */
private MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
final MonotonicBlockPackedReader addresses;
final long interval = bytes.addressInterval;
synchronized (addressInstances) {
@ -527,9 +525,8 @@ public class Lucene49DocValuesProducer extends DocValuesProducer implements Clos
};
}
/** returns an address instance for sortedset ordinal lists
* @lucene.internal */
protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
/** returns an address instance for sortedset ordinal lists */
private MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
final MonotonicBlockPackedReader ordIndex;
synchronized (ordIndexInstances) {
MonotonicBlockPackedReader ordIndexInstance = ordIndexInstances.get(field.number);
@ -711,7 +708,7 @@ public class Lucene49DocValuesProducer extends DocValuesProducer implements Clos
}
/** metadata entry for a numeric docvalues field */
protected static class NumericEntry {
static class NumericEntry {
private NumericEntry() {}
/** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
long missingOffset;
@ -736,7 +733,7 @@ public class Lucene49DocValuesProducer extends DocValuesProducer implements Clos
}
/** metadata entry for a binary docvalues field */
protected static class BinaryEntry {
static class BinaryEntry {
private BinaryEntry() {}
/** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
long missingOffset;
@ -759,7 +756,7 @@ public class Lucene49DocValuesProducer extends DocValuesProducer implements Clos
}
/** metadata entry for a sorted-set docvalues field */
protected static class SortedSetEntry {
static class SortedSetEntry {
private SortedSetEntry() {}
int format;
}

View File

@ -97,7 +97,7 @@ import org.apache.lucene.util.packed.PackedInts;
* </ol>
* @lucene.experimental
*/
public class Lucene49NormsFormat extends NormsFormat {
public final class Lucene49NormsFormat extends NormsFormat {
/** Sole Constructor */
public Lucene49NormsFormat() {}

View File

@ -18,8 +18,6 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
import org.apache.lucene.codecs.memory.DirectDocValuesFormat;
import org.apache.lucene.document.Document;
@ -48,10 +46,9 @@ public class TestSortedSetSelector extends LuceneTestCase {
public static void beforeClass() throws Exception {
savedCodec = Codec.getDefault();
// currently only these codecs that support random access ordinals
int victim = random().nextInt(3);
int victim = random().nextInt(2);
switch(victim) {
case 0: Codec.setDefault(TestUtil.alwaysDocValuesFormat(new DirectDocValuesFormat())); break;
case 1: Codec.setDefault(TestUtil.alwaysDocValuesFormat(new DiskDocValuesFormat())); break;
default: Codec.setDefault(TestUtil.alwaysDocValuesFormat(new Lucene49DocValuesFormat()));
}
}

View File

@ -23,12 +23,12 @@ import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
import org.apache.lucene.codecs.diskdv.DiskNormsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
import org.apache.lucene.codecs.lucene49.Lucene49DocValuesFormat;
import org.apache.lucene.codecs.lucene49.Lucene49NormsFormat;
/** Codec that tries to use as little ram as possible because he spent all his money on beer */
// TODO: better name :)
@ -40,9 +40,8 @@ public class CheapBastardCodec extends FilterCodec {
// uncompressing versions, waste lots of disk but no ram
private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
private final TermVectorsFormat termVectors = new Lucene40TermVectorsFormat();
// these go to disk for all docvalues/norms datastructures
private final DocValuesFormat docValues = new DiskDocValuesFormat();
private final NormsFormat norms = new DiskNormsFormat();
private final DocValuesFormat docValues = new Lucene49DocValuesFormat();
private final NormsFormat norms = new Lucene49NormsFormat();
public CheapBastardCodec() {
super("CheapBastard", new Lucene49Codec());

View File

@ -32,7 +32,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat;
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval;
@ -151,7 +150,6 @@ public class RandomCodec extends Lucene49Codec {
addDocValues(avoidCodecs,
new Lucene49DocValuesFormat(),
new DiskDocValuesFormat(),
new MemoryDocValuesFormat(),
new SimpleTextDocValuesFormat(),
new AssertingDocValuesFormat());

View File

@ -21,8 +21,8 @@
<fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText"/>
<fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene41"/>
<fieldType name="string_disk" class="solr.StrField" docValuesFormat="Disk" />
<fieldType name="string_memory" class="solr.StrField" docValuesFormat="Lucene49" />
<fieldType name="string_disk" class="solr.StrField" docValuesFormat="Lucene49" />
<fieldType name="string_memory" class="solr.StrField" docValuesFormat="Memory" />
<fieldType name="string" class="solr.StrField" />

View File

@ -53,9 +53,9 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields();
SchemaField schemaField = fields.get("string_disk_f");
PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat();
assertEquals("Disk", format.getDocValuesFormatForField(schemaField.getName()).getName());
assertEquals("Lucene49", format.getDocValuesFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_memory_f");
assertEquals("Lucene49",
assertEquals("Memory",
format.getDocValuesFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_f");
assertEquals("Lucene49",
@ -78,9 +78,9 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
Codec codec = h.getCore().getCodec();
PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat();
assertEquals("Disk", format.getDocValuesFormatForField("foo_disk").getName());
assertEquals("Disk", format.getDocValuesFormatForField("bar_disk").getName());
assertEquals("Lucene49", format.getDocValuesFormatForField("foo_memory").getName());
assertEquals("Lucene49", format.getDocValuesFormatForField("bar_memory").getName());
assertEquals("Lucene49", format.getDocValuesFormatForField("foo_disk").getName());
assertEquals("Lucene49", format.getDocValuesFormatForField("bar_disk").getName());
assertEquals("Memory", format.getDocValuesFormatForField("foo_memory").getName());
assertEquals("Memory", format.getDocValuesFormatForField("bar_memory").getName());
}
}