LUCENE-8267: removed references to memory codecs.

This commit is contained in:
Dawid Weiss 2018-05-08 10:32:11 +02:00
parent 08ee037ff8
commit 85c00e77ef
58 changed files with 62 additions and 2799 deletions

View File

@ -41,6 +41,9 @@ API Changes
* LUCENE-8242: Deprecated method IndexSearcher#createNormalizedWeight() has * LUCENE-8242: Deprecated method IndexSearcher#createNormalizedWeight() has
been removed (Alan Woodward) been removed (Alan Woodward)
* LUCENE-8267: Memory codecs removed from the codebase (MemoryPostings,
MemoryDocValues). (Dawid Weiss)
Changes in Runtime Behavior Changes in Runtime Behavior
* LUCENE-7837: Indices that were created before the previous major version * LUCENE-7837: Indices that were created before the previous major version

View File

@ -43,3 +43,7 @@ the same index options for that field.
Instead use IndexSearcher.createWeight(), rewriting the query first, and using Instead use IndexSearcher.createWeight(), rewriting the query first, and using
a boost of 1f. a boost of 1f.
## Memory codecs removed (LUCENE-8267) ##
Memory codecs have been removed from the codebase (MemoryPostings, MemoryDocValues).

View File

@ -1,539 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.LegacyDocValuesIterables;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.MathUtil;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_COMPRESSED;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_SIZE;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BYTES;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.DELTA_COMPRESSED;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.FST;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.GCD_COMPRESSED;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.NUMBER;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_NUMERIC;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_NUMERIC_SINGLETON;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_SET;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_SET_SINGLETON;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.TABLE_COMPRESSED;
import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.VERSION_CURRENT;
/**
* Writer for {@link MemoryDocValuesFormat}
*/
class MemoryDocValuesConsumer extends DocValuesConsumer {
IndexOutput data, meta;
final int maxDoc;
final float acceptableOverheadRatio;
MemoryDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, float acceptableOverheadRatio) throws IOException {
this.acceptableOverheadRatio = acceptableOverheadRatio;
maxDoc = state.segmentInfo.maxDoc();
boolean success = false;
try {
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.createOutput(dataName, state.context);
CodecUtil.writeIndexHeader(data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
meta = state.directory.createOutput(metaName, state.context);
CodecUtil.writeIndexHeader(meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(this);
}
}
}
@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
addNumericField(field, LegacyDocValuesIterables.numericIterable(field, valuesProducer, maxDoc), true);
}
void addNumericField(FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(NUMBER);
meta.writeLong(data.getFilePointer());
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
long blockSum = 0;
long gcd = 0;
boolean missing = false;
// TODO: more efficient?
HashSet<Long> uniqueValues = null;
long count = 0;
if (optimizeStorage) {
uniqueValues = new HashSet<>();
long currentBlockMin = Long.MAX_VALUE;
long currentBlockMax = Long.MIN_VALUE;
for (Number nv : values) {
final long v;
if (nv == null) {
v = 0;
missing = true;
} else {
v = nv.longValue();
}
if (gcd != 1) {
if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
// in that case v - minValue might overflow and make the GCD computation return
// wrong results. Since these extreme values are unlikely, we just discard
// GCD computation for them
gcd = 1;
} else if (count != 0) { // minValue needs to be set first
gcd = MathUtil.gcd(gcd, v - minValue);
}
}
currentBlockMin = Math.min(minValue, v);
currentBlockMax = Math.max(maxValue, v);
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
if (uniqueValues != null) {
if (uniqueValues.add(v)) {
if (uniqueValues.size() > 256) {
uniqueValues = null;
}
}
}
++count;
if (count % BLOCK_SIZE == 0) {
final long blockDelta = currentBlockMax - currentBlockMin;
final int blockDeltaRequired = PackedInts.unsignedBitsRequired(blockDelta);
final int blockBPV = PackedInts.fastestFormatAndBits(BLOCK_SIZE, blockDeltaRequired, acceptableOverheadRatio).bitsPerValue;
blockSum += blockBPV;
currentBlockMax = Long.MIN_VALUE;
currentBlockMin = Long.MAX_VALUE;
}
}
} else {
for (Number nv : values) {
long v = nv.longValue();
maxValue = Math.max(v, maxValue);
minValue = Math.min(v, minValue);
count++;
}
}
if (missing) {
long start = data.getFilePointer();
writeMissingBitset(values);
meta.writeLong(start);
meta.writeLong(data.getFilePointer() - start);
} else {
meta.writeLong(-1L);
}
final long delta = maxValue - minValue;
final int deltaRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
final FormatAndBits deltaBPV = PackedInts.fastestFormatAndBits(maxDoc, deltaRequired, acceptableOverheadRatio);
final FormatAndBits tableBPV;
if (count < Integer.MAX_VALUE && uniqueValues != null) {
tableBPV = PackedInts.fastestFormatAndBits(maxDoc, PackedInts.bitsRequired(uniqueValues.size()-1), acceptableOverheadRatio);
} else {
tableBPV = null;
}
final FormatAndBits gcdBPV;
if (count < Integer.MAX_VALUE && gcd != 0 && gcd != 1) {
final long gcdDelta = (maxValue - minValue) / gcd;
final int gcdRequired = gcdDelta < 0 ? 64 : PackedInts.bitsRequired(gcdDelta);
gcdBPV = PackedInts.fastestFormatAndBits(maxDoc, gcdRequired, acceptableOverheadRatio);
} else {
gcdBPV = null;
}
boolean doBlock = false;
if (blockSum != 0) {
int numBlocks = maxDoc / BLOCK_SIZE;
float avgBPV = blockSum / (float)numBlocks;
// just a heuristic, with tiny amounts of blocks our estimate is skewed as we ignore the final "incomplete" block.
// with at least 4 blocks it's pretty accurate. The difference must also be significant (according to acceptable overhead).
if (numBlocks >= 4 && (avgBPV+avgBPV*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
doBlock = true;
}
}
// blockpackedreader allows us to read in huge streams of ints
if (count >= Integer.MAX_VALUE) {
doBlock = true;
}
if (tableBPV != null && (tableBPV.bitsPerValue+tableBPV.bitsPerValue*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
// small number of unique values
meta.writeByte(TABLE_COMPRESSED); // table-compressed
Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
final HashMap<Long,Integer> encode = new HashMap<>();
int length = 1 << tableBPV.bitsPerValue;
data.writeVInt(length);
for (int i = 0; i < decode.length; i++) {
data.writeLong(decode[i]);
encode.put(decode[i], i);
}
for (int i = decode.length; i < length; i++) {
data.writeLong(0);
}
meta.writeVInt(PackedInts.VERSION_CURRENT);
meta.writeLong(count);
data.writeVInt(tableBPV.format.getId());
data.writeVInt(tableBPV.bitsPerValue);
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, tableBPV.format, (int)count, tableBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
for(Number nv : values) {
writer.add(encode.get(nv == null ? 0 : nv.longValue()));
}
writer.finish();
} else if (gcdBPV != null && (gcdBPV.bitsPerValue+gcdBPV.bitsPerValue*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
meta.writeByte(GCD_COMPRESSED);
meta.writeVInt(PackedInts.VERSION_CURRENT);
meta.writeLong(count);
data.writeLong(minValue);
data.writeLong(gcd);
data.writeVInt(gcdBPV.format.getId());
data.writeVInt(gcdBPV.bitsPerValue);
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, gcdBPV.format, (int)count, gcdBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
for (Number nv : values) {
long value = nv == null ? 0 : nv.longValue();
writer.add((value - minValue) / gcd);
}
writer.finish();
} else if (doBlock) {
meta.writeByte(BLOCK_COMPRESSED); // block delta-compressed
meta.writeVInt(PackedInts.VERSION_CURRENT);
meta.writeLong(count);
data.writeVInt(BLOCK_SIZE);
final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
for (Number nv : values) {
writer.add(nv == null ? 0 : nv.longValue());
}
writer.finish();
} else {
meta.writeByte(DELTA_COMPRESSED); // delta-compressed
meta.writeVInt(PackedInts.VERSION_CURRENT);
meta.writeLong(count);
final long minDelta = deltaBPV.bitsPerValue == 64 ? 0 : minValue;
data.writeLong(minDelta);
data.writeVInt(deltaBPV.format.getId());
data.writeVInt(deltaBPV.bitsPerValue);
final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, deltaBPV.format, (int)count, deltaBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
for (Number nv : values) {
long v = nv == null ? 0 : nv.longValue();
writer.add(v - minDelta);
}
writer.finish();
}
}
@Override
public void close() throws IOException {
boolean success = false;
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
CodecUtil.writeFooter(meta); // write checksum
}
if (data != null) {
CodecUtil.writeFooter(data);
}
success = true;
} finally {
if (success) {
IOUtils.close(data, meta);
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
data = meta = null;
}
}
@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
addBinaryField(field, LegacyDocValuesIterables.binaryIterable(field, valuesProducer, maxDoc));
}
private void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
// write the byte[] data
meta.writeVInt(field.number);
meta.writeByte(BYTES);
int minLength = Integer.MAX_VALUE;
int maxLength = Integer.MIN_VALUE;
final long startFP = data.getFilePointer();
boolean missing = false;
int upto = 0;
for(BytesRef v : values) {
final int length;
if (v == null) {
length = 0;
missing = true;
} else {
length = v.length;
}
if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH + " but got length=" + length + " v=" + v + "; upto=" + upto + " values=" + values);
}
upto++;
minLength = Math.min(minLength, length);
maxLength = Math.max(maxLength, length);
if (v != null) {
data.writeBytes(v.bytes, v.offset, v.length);
}
}
meta.writeLong(startFP);
meta.writeLong(data.getFilePointer() - startFP);
if (missing) {
long start = data.getFilePointer();
writeMissingBitset(values);
meta.writeLong(start);
meta.writeLong(data.getFilePointer() - start);
} else {
meta.writeLong(-1L);
}
meta.writeVInt(minLength);
meta.writeVInt(maxLength);
// if minLength == maxLength, it's a fixed-length byte[], we are done (the addresses are implicit)
// otherwise, we need to record the length fields...
if (minLength != maxLength) {
meta.writeVInt(PackedInts.VERSION_CURRENT);
meta.writeVInt(BLOCK_SIZE);
final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
long addr = 0;
for (BytesRef v : values) {
if (v != null) {
addr += v.length;
}
writer.add(addr);
}
writer.finish();
}
}
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v, scratch), ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
meta.writeVLong(ord);
}
// TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
// but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
void writeMissingBitset(Iterable<?> values) throws IOException {
long bits = 0;
int count = 0;
for (Object v : values) {
if (count == 64) {
data.writeLong(bits);
count = 0;
bits = 0;
}
if (v != null) {
bits |= 1L << (count & 0x3f);
}
count++;
}
if (count > 0) {
data.writeLong(bits);
}
}
@Override
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
addSortedField(field,
LegacyDocValuesIterables.valuesIterable(valuesProducer.getSorted(field)),
LegacyDocValuesIterables.sortedOrdIterable(valuesProducer, field, maxDoc));
}
private void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
// write the ordinals as numerics
addNumericField(field, docToOrd, false);
// write the values as FST
writeFST(field, values);
}
@Override
public void addSortedNumericField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException {
final Iterable<Number> docToValueCount = LegacyDocValuesIterables.sortedNumericToDocCount(valuesProducer, field, maxDoc);
final Iterable<Number> values = LegacyDocValuesIterables.sortedNumericToValues(valuesProducer, field);
meta.writeVInt(field.number);
if (isSingleValued(docToValueCount)) {
meta.writeByte(SORTED_NUMERIC_SINGLETON);
addNumericField(field, singletonView(docToValueCount, values, null), true);
} else {
meta.writeByte(SORTED_NUMERIC);
// write the addresses:
meta.writeVInt(PackedInts.VERSION_CURRENT);
meta.writeVInt(BLOCK_SIZE);
meta.writeLong(data.getFilePointer());
final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
long addr = 0;
writer.add(addr);
for (Number v : docToValueCount) {
addr += v.longValue();
writer.add(addr);
}
writer.finish();
long valueCount = writer.ord();
meta.writeLong(valueCount);
// write the values
addNumericField(field, values, true);
}
}
// note: this might not be the most efficient... but it's fairly simple
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
Iterable<BytesRef> values = LegacyDocValuesIterables.valuesIterable(valuesProducer.getSortedSet(field));
Iterable<Number> docToOrdCount = LegacyDocValuesIterables.sortedSetOrdCountIterable(valuesProducer, field, maxDoc);
Iterable<Number> ords = LegacyDocValuesIterables.sortedSetOrdsIterable(valuesProducer, field);
meta.writeVInt(field.number);
if (isSingleValued(docToOrdCount)) {
meta.writeByte(SORTED_SET_SINGLETON);
addSortedField(field, values, singletonView(docToOrdCount, ords, -1L));
} else {
meta.writeByte(SORTED_SET);
// write the ordinals as a binary field
addBinaryField(field, new Iterable<BytesRef>() {
@Override
public Iterator<BytesRef> iterator() {
return new SortedSetIterator(docToOrdCount.iterator(), ords.iterator());
}
});
// write the values as FST
writeFST(field, values);
}
}
// per-document vint-encoded byte[]
static class SortedSetIterator implements Iterator<BytesRef> {
byte[] buffer = new byte[10];
ByteArrayDataOutput out = new ByteArrayDataOutput();
BytesRef ref = new BytesRef();
final Iterator<Number> counts;
final Iterator<Number> ords;
SortedSetIterator(Iterator<Number> counts, Iterator<Number> ords) {
this.counts = counts;
this.ords = ords;
}
@Override
public boolean hasNext() {
return counts.hasNext();
}
@Override
public BytesRef next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
int count = counts.next().intValue();
int maxSize = count*9; // worst case
if (maxSize > buffer.length) {
buffer = ArrayUtil.grow(buffer, maxSize);
}
try {
encodeValues(count);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
ref.bytes = buffer;
ref.offset = 0;
ref.length = out.getPosition();
return ref;
}
// encodes count values to buffer
private void encodeValues(int count) throws IOException {
out.reset(buffer);
long lastOrd = 0;
for (int i = 0; i < count; i++) {
long ord = ords.next().longValue();
out.writeVLong(ord - lastOrd);
lastOrd = ord;
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}

View File

@ -1,72 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.packed.PackedInts;
/** In-memory docvalues format */
public class MemoryDocValuesFormat extends DocValuesFormat {
/** Maximum length for each binary doc values field. */
public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
final float acceptableOverheadRatio;
/**
* Calls {@link #MemoryDocValuesFormat(float)
* MemoryDocValuesFormat(PackedInts.DEFAULT)}
*/
public MemoryDocValuesFormat() {
this(PackedInts.DEFAULT);
}
/**
* Creates a new MemoryDocValuesFormat with the specified
* <code>acceptableOverheadRatio</code> for NumericDocValues.
* @param acceptableOverheadRatio compression parameter for numerics.
* Currently this is only used when the number of unique values is small.
*
* @lucene.experimental
*/
public MemoryDocValuesFormat(float acceptableOverheadRatio) {
super("Memory");
this.acceptableOverheadRatio = acceptableOverheadRatio;
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new MemoryDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new MemoryDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
}
static final String DATA_CODEC = "MemoryDocValuesData";
static final String DATA_EXTENSION = "mdvd";
static final String METADATA_CODEC = "MemoryDocValuesMetadata";
static final String METADATA_EXTENSION = "mdvm";
}

View File

@ -1,877 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.*;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
/**
* Reader for {@link MemoryDocValuesFormat}
*/
class MemoryDocValuesProducer extends DocValuesProducer {
// metadata maps (just file pointers and minimal stuff)
private final Map<String,NumericEntry> numerics = new HashMap<>();
private final Map<String,BinaryEntry> binaries = new HashMap<>();
private final Map<String,FSTEntry> fsts = new HashMap<>();
private final Map<String,SortedSetEntry> sortedSets = new HashMap<>();
private final Map<String,SortedNumericEntry> sortedNumerics = new HashMap<>();
private final IndexInput data;
// ram instances we have already loaded
private final Map<String,LegacyNumericDocValues> numericInstances = new HashMap<>();
private final Map<String,BytesAndAddresses> pagedBytesInstances = new HashMap<>();
private final Map<String,FST<Long>> fstInstances = new HashMap<>();
private final Map<String,FixedBitSet> docsWithFieldInstances = new HashMap<>();
private final Map<String,MonotonicBlockPackedReader> addresses = new HashMap<>();
private final Map<String,Accountable> numericInfo = new HashMap<>();
private final int numEntries;
private final int maxDoc;
private final AtomicLong ramBytesUsed;
private final int version;
private final boolean merging;
static final byte NUMBER = 0;
static final byte BYTES = 1;
static final byte FST = 2;
static final byte SORTED_SET = 4;
static final byte SORTED_SET_SINGLETON = 5;
static final byte SORTED_NUMERIC = 6;
static final byte SORTED_NUMERIC_SINGLETON = 7;
static final int BLOCK_SIZE = 4096;
static final byte DELTA_COMPRESSED = 0;
static final byte TABLE_COMPRESSED = 1;
static final byte BLOCK_COMPRESSED = 2;
static final byte GCD_COMPRESSED = 3;
static final int VERSION_START = 4;
static final int VERSION_CURRENT = VERSION_START;
// clone for merge: when merging we don't do any instances.put()s
MemoryDocValuesProducer(MemoryDocValuesProducer original) throws IOException {
assert Thread.holdsLock(original);
numerics.putAll(original.numerics);
binaries.putAll(original.binaries);
fsts.putAll(original.fsts);
sortedSets.putAll(original.sortedSets);
sortedNumerics.putAll(original.sortedNumerics);
data = original.data.clone();
numericInstances.putAll(original.numericInstances);
pagedBytesInstances.putAll(original.pagedBytesInstances);
fstInstances.putAll(original.fstInstances);
docsWithFieldInstances.putAll(original.docsWithFieldInstances);
addresses.putAll(original.addresses);
numericInfo.putAll(original.numericInfo);
numEntries = original.numEntries;
maxDoc = original.maxDoc;
ramBytesUsed = new AtomicLong(original.ramBytesUsed.get());
version = original.version;
merging = true;
}
MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.maxDoc();
merging = false;
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
boolean success = false;
try {
version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
numEntries = readFields(in, state.fieldInfos);
CodecUtil.checkFooter(in);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
success = true;
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
this.data = state.directory.openInput(dataName, state.context);
success = false;
try {
final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch: meta=" + version + ", data=" + version2, data);
}
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(data);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(this.data);
}
}
}
private NumericEntry readNumericEntry(IndexInput meta) throws IOException {
NumericEntry entry = new NumericEntry();
entry.offset = meta.readLong();
entry.missingOffset = meta.readLong();
if (entry.missingOffset != -1) {
entry.missingBytes = meta.readLong();
} else {
entry.missingBytes = 0;
}
entry.format = meta.readByte();
switch(entry.format) {
case DELTA_COMPRESSED:
case TABLE_COMPRESSED:
case BLOCK_COMPRESSED:
case GCD_COMPRESSED:
break;
default:
throw new CorruptIndexException("Unknown format: " + entry.format, meta);
}
entry.packedIntsVersion = meta.readVInt();
entry.count = meta.readLong();
return entry;
}
private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
BinaryEntry entry = new BinaryEntry();
entry.offset = meta.readLong();
entry.numBytes = meta.readLong();
entry.missingOffset = meta.readLong();
if (entry.missingOffset != -1) {
entry.missingBytes = meta.readLong();
} else {
entry.missingBytes = 0;
}
entry.minLength = meta.readVInt();
entry.maxLength = meta.readVInt();
if (entry.minLength != entry.maxLength) {
entry.packedIntsVersion = meta.readVInt();
entry.blockSize = meta.readVInt();
}
return entry;
}
private FSTEntry readFSTEntry(IndexInput meta) throws IOException {
FSTEntry entry = new FSTEntry();
entry.offset = meta.readLong();
entry.numOrds = meta.readVLong();
return entry;
}
private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
int numEntries = 0;
int fieldNumber = meta.readVInt();
while (fieldNumber != -1) {
numEntries++;
FieldInfo info = infos.fieldInfo(fieldNumber);
if (info == null) {
throw new CorruptIndexException("invalid field number: " + fieldNumber, meta);
}
int fieldType = meta.readByte();
if (fieldType == NUMBER) {
numerics.put(info.name, readNumericEntry(meta));
} else if (fieldType == BYTES) {
binaries.put(info.name, readBinaryEntry(meta));
} else if (fieldType == FST) {
fsts.put(info.name,readFSTEntry(meta));
} else if (fieldType == SORTED_SET) {
SortedSetEntry entry = new SortedSetEntry();
entry.singleton = false;
sortedSets.put(info.name, entry);
} else if (fieldType == SORTED_SET_SINGLETON) {
SortedSetEntry entry = new SortedSetEntry();
entry.singleton = true;
sortedSets.put(info.name, entry);
} else if (fieldType == SORTED_NUMERIC) {
SortedNumericEntry entry = new SortedNumericEntry();
entry.singleton = false;
entry.packedIntsVersion = meta.readVInt();
entry.blockSize = meta.readVInt();
entry.addressOffset = meta.readLong();
entry.valueCount = meta.readLong();
sortedNumerics.put(info.name, entry);
} else if (fieldType == SORTED_NUMERIC_SINGLETON) {
SortedNumericEntry entry = new SortedNumericEntry();
entry.singleton = true;
sortedNumerics.put(info.name, entry);
} else {
throw new CorruptIndexException("invalid entry type: " + fieldType + ", fieldName=" + info.name, meta);
}
fieldNumber = meta.readVInt();
}
return numEntries;
}
@Override
public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
LegacyNumericDocValues instance = numericInstances.get(field.name);
if (instance == null) {
instance = loadNumeric(field);
if (!merging) {
numericInstances.put(field.name, instance);
}
}
NumericEntry ne = numerics.get(field.name);
return new LegacyNumericDocValuesWrapper(getMissingBits(field, ne.missingOffset, ne.missingBytes), instance);
}
private synchronized LegacyNumericDocValues getNumericNonIterator(FieldInfo field) throws IOException {
LegacyNumericDocValues instance = numericInstances.get(field.name);
if (instance == null) {
instance = loadNumeric(field);
if (!merging) {
numericInstances.put(field.name, instance);
}
}
return instance;
}
@Override
public long ramBytesUsed() {
return ramBytesUsed.get();
}
@Override
public synchronized Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.addAll(Accountables.namedAccountables("numeric field", numericInfo));
resources.addAll(Accountables.namedAccountables("pagedbytes field", pagedBytesInstances));
resources.addAll(Accountables.namedAccountables("term dict field", fstInstances));
resources.addAll(Accountables.namedAccountables("missing bitset field", docsWithFieldInstances));
resources.addAll(Accountables.namedAccountables("addresses field", addresses));
return Collections.unmodifiableList(resources);
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(data.clone());
}
@Override
public synchronized DocValuesProducer getMergeInstance() throws IOException {
return new MemoryDocValuesProducer(this);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(entries=" + numEntries + ")";
}
private LegacyNumericDocValues loadNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.name);
IndexInput data = this.data.clone();
data.seek(entry.offset + entry.missingBytes);
switch (entry.format) {
case TABLE_COMPRESSED:
int size = data.readVInt();
if (size > 256) {
throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, got=" + size, data);
}
final long decode[] = new long[size];
for (int i = 0; i < decode.length; i++) {
decode[i] = data.readLong();
}
final int formatID = data.readVInt();
final int bitsPerValue = data.readVInt();
final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, (int)entry.count, bitsPerValue);
if (!merging) {
ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed());
numericInfo.put(field.name, Accountables.namedAccountable("table compressed", ordsReader));
}
return new LegacyNumericDocValues() {
@Override
public long get(int docID) {
return decode[(int)ordsReader.get(docID)];
}
};
case DELTA_COMPRESSED:
final long minDelta = data.readLong();
final int formatIDDelta = data.readVInt();
final int bitsPerValueDelta = data.readVInt();
final PackedInts.Reader deltaReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatIDDelta), entry.packedIntsVersion, (int)entry.count, bitsPerValueDelta);
if (!merging) {
ramBytesUsed.addAndGet(deltaReader.ramBytesUsed());
numericInfo.put(field.name, Accountables.namedAccountable("delta compressed", deltaReader));
}
return new LegacyNumericDocValues() {
@Override
public long get(int docID) {
return minDelta + deltaReader.get(docID);
}
};
case BLOCK_COMPRESSED:
final int blockSize = data.readVInt();
final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, entry.count, false);
if (!merging) {
ramBytesUsed.addAndGet(reader.ramBytesUsed());
numericInfo.put(field.name, Accountables.namedAccountable("block compressed", reader));
}
return new LegacyNumericDocValues() {
@Override
public long get(int docID) {
return reader.get(docID);
}
};
case GCD_COMPRESSED:
final long min = data.readLong();
final long mult = data.readLong();
final int formatIDGCD = data.readVInt();
final int bitsPerValueGCD = data.readVInt();
final PackedInts.Reader quotientReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatIDGCD), entry.packedIntsVersion, (int)entry.count, bitsPerValueGCD);
if (!merging) {
ramBytesUsed.addAndGet(quotientReader.ramBytesUsed());
numericInfo.put(field.name, Accountables.namedAccountable("gcd compressed", quotientReader));
}
return new LegacyNumericDocValues() {
@Override
public long get(int docID) {
return min + mult * quotientReader.get(docID);
}
};
default:
throw new AssertionError();
}
}
private LegacyBinaryDocValues getLegacyBinary(FieldInfo field) throws IOException {
BinaryEntry entry = binaries.get(field.name);
BytesAndAddresses instance;
synchronized (this) {
instance = pagedBytesInstances.get(field.name);
if (instance == null) {
instance = loadBinary(field);
if (!merging) {
pagedBytesInstances.put(field.name, instance);
}
}
}
final PagedBytes.Reader bytesReader = instance.reader;
final MonotonicBlockPackedReader addresses = instance.addresses;
if (addresses == null) {
assert entry.minLength == entry.maxLength;
final int fixedLength = entry.minLength;
return new LegacyBinaryDocValues() {
final BytesRef term = new BytesRef();
@Override
public BytesRef get(int docID) {
bytesReader.fillSlice(term, fixedLength * (long)docID, fixedLength);
return term;
}
};
} else {
return new LegacyBinaryDocValues() {
final BytesRef term = new BytesRef();
@Override
public BytesRef get(int docID) {
long startAddress = docID == 0 ? 0 : addresses.get(docID-1);
long endAddress = addresses.get(docID);
bytesReader.fillSlice(term, startAddress, (int) (endAddress - startAddress));
return term;
}
};
}
}
@Override
public synchronized BinaryDocValues getBinary(FieldInfo field) throws IOException {
BinaryEntry be = binaries.get(field.name);
return new LegacyBinaryDocValuesWrapper(getMissingBits(field, be.missingOffset, be.missingBytes), getLegacyBinary(field));
}
private BytesAndAddresses loadBinary(FieldInfo field) throws IOException {
BytesAndAddresses bytesAndAddresses = new BytesAndAddresses();
BinaryEntry entry = binaries.get(field.name);
IndexInput data = this.data.clone();
data.seek(entry.offset);
PagedBytes bytes = new PagedBytes(16);
bytes.copy(data, entry.numBytes);
bytesAndAddresses.reader = bytes.freeze(true);
if (!merging) {
ramBytesUsed.addAndGet(bytesAndAddresses.reader.ramBytesUsed());
}
if (entry.minLength != entry.maxLength) {
data.seek(data.getFilePointer() + entry.missingBytes);
bytesAndAddresses.addresses = MonotonicBlockPackedReader.of(data, entry.packedIntsVersion, entry.blockSize, maxDoc, false);
if (!merging) {
ramBytesUsed.addAndGet(bytesAndAddresses.addresses.ramBytesUsed());
}
}
return bytesAndAddresses;
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return new LegacySortedDocValuesWrapper(getSortedNonIterator(field), maxDoc);
}
private LegacySortedDocValues getSortedNonIterator(FieldInfo field) throws IOException {
final FSTEntry entry = fsts.get(field.name);
if (entry.numOrds == 0) {
return DocValues.emptyLegacySorted();
}
FST<Long> instance;
synchronized(this) {
instance = fstInstances.get(field.name);
if (instance == null) {
IndexInput data = this.data.clone();
data.seek(entry.offset);
instance = new FST<>(data, PositiveIntOutputs.getSingleton());
if (!merging) {
ramBytesUsed.addAndGet(instance.ramBytesUsed());
fstInstances.put(field.name, instance);
}
}
}
final LegacyNumericDocValues docToOrd = getNumericNonIterator(field);
final FST<Long> fst = instance;
// per-thread resources
final BytesReader in = fst.getBytesReader();
final Arc<Long> firstArc = new Arc<>();
final Arc<Long> scratchArc = new Arc<>();
final IntsRefBuilder scratchInts = new IntsRefBuilder();
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
return new LegacySortedDocValues() {
final BytesRefBuilder term = new BytesRefBuilder();
@Override
public int getOrd(int docID) {
return (int) docToOrd.get(docID);
}
@Override
public BytesRef lookupOrd(int ord) {
try {
in.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
return Util.toBytesRef(output, term);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
@Override
public int lookupTerm(BytesRef key) {
try {
InputOutput<Long> o = fstEnum.seekCeil(key);
if (o == null) {
return -getValueCount()-1;
} else if (o.input.equals(key)) {
return o.output.intValue();
} else {
return (int) -o.output-1;
}
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
@Override
public int getValueCount() {
return (int)entry.numOrds;
}
@Override
public TermsEnum termsEnum() {
return new FSTTermsEnum(fst);
}
};
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
SortedNumericEntry entry = sortedNumerics.get(field.name);
if (entry.singleton) {
LegacyNumericDocValues values = getNumericNonIterator(field);
NumericEntry ne = numerics.get(field.name);
Bits docsWithField = getMissingBits(field, ne.missingOffset, ne.missingBytes);
return DocValues.singleton(new LegacyNumericDocValuesWrapper(docsWithField, values));
} else {
final LegacyNumericDocValues values = getNumericNonIterator(field);
final MonotonicBlockPackedReader addr;
synchronized (this) {
MonotonicBlockPackedReader res = addresses.get(field.name);
if (res == null) {
IndexInput data = this.data.clone();
data.seek(entry.addressOffset);
res = MonotonicBlockPackedReader.of(data, entry.packedIntsVersion, entry.blockSize, entry.valueCount, false);
if (!merging) {
addresses.put(field.name, res);
ramBytesUsed.addAndGet(res.ramBytesUsed());
}
}
addr = res;
}
return new LegacySortedNumericDocValuesWrapper(new LegacySortedNumericDocValues() {
int startOffset;
int endOffset;
@Override
public void setDocument(int doc) {
startOffset = (int) addr.get(doc);
endOffset = (int) addr.get(doc+1);
}
@Override
public long valueAt(int index) {
return values.get(startOffset + index);
}
@Override
public int count() {
return (endOffset - startOffset);
}
}, maxDoc);
}
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
SortedSetEntry sortedSetEntry = sortedSets.get(field.name);
if (sortedSetEntry.singleton) {
return DocValues.singleton(getSorted(field));
}
final FSTEntry entry = fsts.get(field.name);
if (entry.numOrds == 0) {
return DocValues.emptySortedSet(); // empty FST!
}
FST<Long> instance;
synchronized(this) {
instance = fstInstances.get(field.name);
if (instance == null) {
IndexInput data = this.data.clone();
data.seek(entry.offset);
instance = new FST<>(data, PositiveIntOutputs.getSingleton());
if (!merging) {
ramBytesUsed.addAndGet(instance.ramBytesUsed());
fstInstances.put(field.name, instance);
}
}
}
final LegacyBinaryDocValues docToOrds = getLegacyBinary(field);
final FST<Long> fst = instance;
// per-thread resources
final BytesReader in = fst.getBytesReader();
final Arc<Long> firstArc = new Arc<>();
final Arc<Long> scratchArc = new Arc<>();
final IntsRefBuilder scratchInts = new IntsRefBuilder();
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
final ByteArrayDataInput input = new ByteArrayDataInput();
return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {
final BytesRefBuilder term = new BytesRefBuilder();
BytesRef ref;
long currentOrd;
@Override
public long nextOrd() {
if (input.eof()) {
return NO_MORE_ORDS;
} else {
currentOrd += input.readVLong();
return currentOrd;
}
}
@Override
public void setDocument(int docID) {
ref = docToOrds.get(docID);
input.reset(ref.bytes, ref.offset, ref.length);
currentOrd = 0;
}
@Override
public BytesRef lookupOrd(long ord) {
try {
in.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
return Util.toBytesRef(output, term);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
@Override
public long lookupTerm(BytesRef key) {
try {
InputOutput<Long> o = fstEnum.seekCeil(key);
if (o == null) {
return -getValueCount()-1;
} else if (o.input.equals(key)) {
return o.output.intValue();
} else {
return -o.output-1;
}
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
@Override
public long getValueCount() {
return entry.numOrds;
}
@Override
public TermsEnum termsEnum() {
return new FSTTermsEnum(fst);
}
}, maxDoc);
}
private Bits getMissingBits(FieldInfo field, final long offset, final long length) throws IOException {
if (offset == -1) {
return new Bits.MatchAllBits(maxDoc);
} else {
FixedBitSet instance;
synchronized(this) {
instance = docsWithFieldInstances.get(field.name);
if (instance == null) {
IndexInput data = this.data.clone();
data.seek(offset);
assert length % 8 == 0;
long bits[] = new long[(int) length >> 3];
for (int i = 0; i < bits.length; i++) {
bits[i] = data.readLong();
}
instance = new FixedBitSet(bits, maxDoc);
if (!merging) {
docsWithFieldInstances.put(field.name, instance);
ramBytesUsed.addAndGet(instance.ramBytesUsed());
}
}
}
return instance;
}
}
@Override
public void close() throws IOException {
data.close();
}
static class NumericEntry {
long offset;
long count;
long missingOffset;
long missingBytes;
byte format;
int packedIntsVersion;
}
static class BinaryEntry {
long offset;
long missingOffset;
long missingBytes;
long numBytes;
int minLength;
int maxLength;
int packedIntsVersion;
int blockSize;
}
static class FSTEntry {
long offset;
long numOrds;
}
static class SortedSetEntry {
boolean singleton;
}
static class SortedNumericEntry {
boolean singleton;
long addressOffset;
int packedIntsVersion;
int blockSize;
long valueCount;
}
static class BytesAndAddresses implements Accountable {
PagedBytes.Reader reader;
MonotonicBlockPackedReader addresses;
@Override
public long ramBytesUsed() {
long bytesUsed = reader.ramBytesUsed();
if (addresses != null) {
bytesUsed += addresses.ramBytesUsed();
}
return bytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
if (addresses != null) {
resources.add(Accountables.namedAccountable("addresses", addresses));
}
resources.add(Accountables.namedAccountable("term bytes", reader));
return Collections.unmodifiableList(resources);
}
}
// exposes FSTEnum directly as a TermsEnum: avoids binary-search next()
static class FSTTermsEnum extends TermsEnum {
final BytesRefFSTEnum<Long> in;
// this is all for the complicated seek(ord)...
// maybe we should add a FSTEnum that supports this operation?
final FST<Long> fst;
final FST.BytesReader bytesReader;
final Arc<Long> firstArc = new Arc<>();
final Arc<Long> scratchArc = new Arc<>();
final IntsRefBuilder scratchInts = new IntsRefBuilder();
final BytesRefBuilder scratchBytes = new BytesRefBuilder();
FSTTermsEnum(FST<Long> fst) {
this.fst = fst;
in = new BytesRefFSTEnum<>(fst);
bytesReader = fst.getBytesReader();
}
@Override
public BytesRef next() throws IOException {
InputOutput<Long> io = in.next();
if (io == null) {
return null;
} else {
return io.input;
}
}
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
if (in.seekCeil(text) == null) {
return SeekStatus.END;
} else if (term().equals(text)) {
// TODO: add SeekStatus to FSTEnum like in https://issues.apache.org/jira/browse/LUCENE-3729
// to remove this comparision?
return SeekStatus.FOUND;
} else {
return SeekStatus.NOT_FOUND;
}
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
if (in.seekExact(text) == null) {
return false;
} else {
return true;
}
}
@Override
public void seekExact(long ord) throws IOException {
// TODO: would be better to make this simpler and faster.
// but we dont want to introduce a bug that corrupts our enum state!
bytesReader.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
// TODO: we could do this lazily, better to try to push into FSTEnum though?
in.seekExact(Util.toBytesRef(output, new BytesRefBuilder()));
}
@Override
public BytesRef term() throws IOException {
return in.current().input;
}
@Override
public long ord() throws IOException {
return in.current().output;
}
@Override
public int docFreq() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long totalTermFreq() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}
}
}

View File

@ -13,5 +13,4 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
org.apache.lucene.codecs.memory.MemoryDocValuesFormat
org.apache.lucene.codecs.memory.DirectDocValuesFormat org.apache.lucene.codecs.memory.DirectDocValuesFormat

View File

@ -18,4 +18,4 @@ org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat org.apache.lucene.codecs.memory.DirectPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
org.apache.lucene.codecs.memory.FSTPostingsFormat org.apache.lucene.codecs.memory.FSTPostingsFormat
org.apache.lucene.codecs.memory.MemoryPostingsFormat

View File

@ -1,39 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Tests MemoryDocValuesFormat
*/
public class TestMemoryDocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
private final Codec codec = TestUtil.alwaysDocValuesFormat(new MemoryDocValuesFormat());
@Override
protected Codec getCodec() {
return codec;
}
@Override
protected boolean codecAcceptsHugeBinaryValues(String field) {
return false;
}
}

View File

@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Tests MemoryPostingsFormat
*/
public class TestMemoryPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize doPack
private final Codec codec = TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat());
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -42,13 +42,10 @@ public class TestExternalCodecs extends LuceneTestCase {
private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly"); private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly");
private final PostingsFormat defaultFormat = TestUtil.getDefaultPostingsFormat(); private final PostingsFormat defaultFormat = TestUtil.getDefaultPostingsFormat();
private final PostingsFormat memoryFormat = PostingsFormat.forName("Memory");
@Override @Override
public PostingsFormat getPostingsFormatForField(String field) { public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("field2") || field.equals("id")) { if (field.equals("field2") || field.equals("field1") || field.equals("id")) {
return memoryFormat;
} else if (field.equals("field1")) {
return defaultFormat; return defaultFormat;
} else { } else {
return ramFormat; return ramFormat;

View File

@ -89,7 +89,7 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(analyzer); IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat(); final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
final DocValuesFormat slow = DocValuesFormat.forName("Memory"); final DocValuesFormat slow = DocValuesFormat.forName("Direct");
iwc.setCodec(new AssertingCodec() { iwc.setCodec(new AssertingCodec() {
@Override @Override
public DocValuesFormat getDocValuesFormatForField(String field) { public DocValuesFormat getDocValuesFormatForField(String field) {

View File

@ -33,7 +33,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval; import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
import org.apache.lucene.codecs.memory.DirectPostingsFormat; import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
@ -165,7 +164,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
//((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10); //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwconf.setCodec(new MockCodec2()); // uses standard for field content iwconf.setCodec(new MockCodec()); // uses standard for field content
writer = newWriter(dir, iwconf); writer = newWriter(dir, iwconf);
// swap in new codec for currently written segments // swap in new codec for currently written segments
if (VERBOSE) { if (VERBOSE) {
@ -218,23 +217,6 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
public static class MockCodec extends AssertingCodec { public static class MockCodec extends AssertingCodec {
final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat(); final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
final PostingsFormat direct = new DirectPostingsFormat(); final PostingsFormat direct = new DirectPostingsFormat();
final PostingsFormat memory = new MemoryPostingsFormat();
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
return direct;
} else if (field.equals("content")) {
return memory;
} else {
return luceneDefault;
}
}
}
public static class MockCodec2 extends AssertingCodec {
final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
final PostingsFormat direct = new DirectPostingsFormat();
@Override @Override
public PostingsFormat getPostingsFormatForField(String field) { public PostingsFormat getPostingsFormatForField(String field) {
@ -287,9 +269,9 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
@Override @Override
public PostingsFormat getPostingsFormatForField(String field) { public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) { if ("id".equals(field)) {
return new MemoryPostingsFormat(); return new DirectPostingsFormat();
} else if ("date".equals(field)) { } else if ("date".equals(field)) {
return new MemoryPostingsFormat(); return new DirectPostingsFormat();
} else { } else {
return super.getPostingsFormatForField(field); return super.getPostingsFormatForField(field);
} }

View File

@ -33,7 +33,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TimeUnits; import org.apache.lucene.util.TimeUnits;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@SuppressCodecs({"SimpleText", "Memory", "Direct"}) @SuppressCodecs({"SimpleText", "Direct"})
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit @TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
// The six hour time was achieved on a Linux 3.13 system with these specs: // The six hour time was achieved on a Linux 3.13 system with these specs:
// 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA. // 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA.

View File

@ -35,7 +35,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TimeUnits; import org.apache.lucene.util.TimeUnits;
@SuppressCodecs({"SimpleText", "Memory", "Direct"}) @SuppressCodecs({"SimpleText", "Direct"})
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit @TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
@Monster("Takes ~30min") @Monster("Takes ~30min")
@SuppressSysoutChecks(bugUrl = "Stuff gets printed") @SuppressSysoutChecks(bugUrl = "Stuff gets printed")

View File

@ -31,7 +31,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@SuppressCodecs({"SimpleText", "Memory", "Direct"}) @SuppressCodecs({"SimpleText", "Direct"})
@TimeoutSuite(millis = 8 * TimeUnits.HOUR) @TimeoutSuite(millis = 8 * TimeUnits.HOUR)
// The two hour time was achieved on a Linux 3.13 system with these specs: // The two hour time was achieved on a Linux 3.13 system with these specs:
// 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA. // 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA.

View File

@ -35,7 +35,7 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
// //
// or: python -u /l/util/src/python/repeatLuceneTest.py -heap 6g -once -nolog -tmpDir /b/tmp -logDir /l/logs Test2BPoints.test2D -verbose // or: python -u /l/util/src/python/repeatLuceneTest.py -heap 6g -once -nolog -tmpDir /b/tmp -logDir /l/logs Test2BPoints.test2D -verbose
@SuppressCodecs({ "SimpleText", "Memory", "Direct", "Compressing" }) @SuppressCodecs({ "SimpleText", "Direct", "Compressing" })
@TimeoutSuite(millis = 365 * 24 * TimeUnits.HOUR) // hopefully ~1 year is long enough ;) @TimeoutSuite(millis = 365 * 24 * TimeUnits.HOUR) // hopefully ~1 year is long enough ;)
@Monster("takes at least 4 hours and consumes many GB of temp disk space") @Monster("takes at least 4 hours and consumes many GB of temp disk space")
public class Test2BPoints extends LuceneTestCase { public class Test2BPoints extends LuceneTestCase {

View File

@ -36,7 +36,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
* Test indexes ~82M docs with 52 positions each, so you get &gt; Integer.MAX_VALUE positions * Test indexes ~82M docs with 52 positions each, so you get &gt; Integer.MAX_VALUE positions
* @lucene.experimental * @lucene.experimental
*/ */
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
@Monster("uses lots of space and takes a few minutes") @Monster("uses lots of space and takes a few minutes")
public class Test2BPositions extends LuceneTestCase { public class Test2BPositions extends LuceneTestCase {

View File

@ -35,7 +35,7 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
* Test indexes ~82M docs with 26 terms each, so you get &gt; Integer.MAX_VALUE terms/docs pairs * Test indexes ~82M docs with 26 terms each, so you get &gt; Integer.MAX_VALUE terms/docs pairs
* @lucene.experimental * @lucene.experimental
*/ */
@SuppressCodecs({ "SimpleText", "Memory", "Direct", "Compressing" }) @SuppressCodecs({ "SimpleText", "Direct", "Compressing" })
@TimeoutSuite(millis = 4 * TimeUnits.HOUR) @TimeoutSuite(millis = 4 * TimeUnits.HOUR)
public class Test2BPostings extends LuceneTestCase { public class Test2BPostings extends LuceneTestCase {

View File

@ -42,7 +42,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
* so you get &gt; Integer.MAX_VALUE postings data for the term * so you get &gt; Integer.MAX_VALUE postings data for the term
* @lucene.experimental * @lucene.experimental
*/ */
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
@Monster("takes ~20GB-30GB of space and 10 minutes") @Monster("takes ~20GB-30GB of space and 10 minutes")
public class Test2BPostingsBytes extends LuceneTestCase { public class Test2BPostingsBytes extends LuceneTestCase {

View File

@ -32,7 +32,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@SuppressCodecs({"SimpleText", "Memory", "Direct"}) @SuppressCodecs({"SimpleText", "Direct"})
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit @TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
// The six hour time was achieved on a Linux 3.13 system with these specs: // The six hour time was achieved on a Linux 3.13 system with these specs:
// 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA. // 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA.

View File

@ -32,7 +32,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@SuppressCodecs({"SimpleText", "Memory", "Direct"}) @SuppressCodecs({"SimpleText", "Direct"})
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit @TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
// The six hour time was achieved on a Linux 3.13 system with these specs: // The six hour time was achieved on a Linux 3.13 system with these specs:
// 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA. // 3-core AMD at 2.5Ghz, 12 GB RAM, 5GB test heap, 2 test JVMs, 2TB SATA.

View File

@ -55,7 +55,7 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
// //
// ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene62 -Dtestcase=Test2BTerms // ant test -Dtests.monster=true -Dtests.heapsize=8g -Dtests.codec=Lucene62 -Dtestcase=Test2BTerms
// //
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
@Monster("very slow, use 5g minimum heap") @Monster("very slow, use 5g minimum heap")
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit @TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
@SuppressSysoutChecks(bugUrl = "Stuff gets printed") @SuppressSysoutChecks(bugUrl = "Stuff gets printed")

View File

@ -28,7 +28,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat; import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
@ -1088,14 +1088,11 @@ public class TestAddIndexes extends LuceneTestCase {
private static final class CustomPerFieldCodec extends AssertingCodec { private static final class CustomPerFieldCodec extends AssertingCodec {
private final PostingsFormat directFormat = PostingsFormat.forName("Direct"); private final PostingsFormat directFormat = PostingsFormat.forName("Direct");
private final PostingsFormat defaultFormat = TestUtil.getDefaultPostingsFormat(); private final PostingsFormat defaultFormat = TestUtil.getDefaultPostingsFormat();
private final PostingsFormat memoryFormat = PostingsFormat.forName("Memory");
@Override @Override
public PostingsFormat getPostingsFormatForField(String field) { public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) { if (field.equals("id")) {
return directFormat; return directFormat;
} else if (field.equals("content")) {
return memoryFormat;
} else { } else {
return defaultFormat; return defaultFormat;
} }
@ -1164,7 +1161,7 @@ public class TestAddIndexes extends LuceneTestCase {
{ {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat())); conf.setCodec(TestUtil.alwaysPostingsFormat(new DirectPostingsFormat()));
IndexWriter w = new IndexWriter(dir, conf); IndexWriter w = new IndexWriter(dir, conf);
expectThrows(IllegalArgumentException.class, () -> { expectThrows(IllegalArgumentException.class, () -> {
w.addIndexes(toAdd); w.addIndexes(toAdd);

View File

@ -40,7 +40,7 @@ import org.apache.lucene.util.TestUtil;
* totalTermFreq of its integer value, and checks that the totalTermFreq is correct. * totalTermFreq of its integer value, and checks that the totalTermFreq is correct.
*/ */
// TODO: somehow factor this with BagOfPostings? it's almost the same // TODO: somehow factor this with BagOfPostings? it's almost the same
@SuppressCodecs({"Direct", "Memory"}) // at night this makes like 200k/300k docs and will make Direct's heart beat! @SuppressCodecs({"Direct"}) // at night this makes like 200k/300k docs and will make Direct's heart beat!
public class TestBagOfPositions extends LuceneTestCase { public class TestBagOfPositions extends LuceneTestCase {
public void test() throws Exception { public void test() throws Exception {
List<String> postingsList = new ArrayList<>(); List<String> postingsList = new ArrayList<>();

View File

@ -38,7 +38,7 @@ import org.apache.lucene.util.TestUtil;
* Simple test that adds numeric terms, where each term has the * Simple test that adds numeric terms, where each term has the
* docFreq of its integer value, and checks that the docFreq is correct. * docFreq of its integer value, and checks that the docFreq is correct.
*/ */
@SuppressCodecs({"Direct", "Memory"}) // at night this makes like 200k/300k docs and will make Direct's heart beat! @SuppressCodecs({"Direct"}) // at night this makes like 200k/300k docs and will make Direct's heart beat!
public class TestBagOfPostings extends LuceneTestCase { public class TestBagOfPostings extends LuceneTestCase {
public void test() throws Exception { public void test() throws Exception {
List<String> postingsList = new ArrayList<>(); List<String> postingsList = new ArrayList<>();

View File

@ -24,7 +24,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
* Just like TestDuelingCodecs, only with a lot more documents. * Just like TestDuelingCodecs, only with a lot more documents.
*/ */
@Nightly @Nightly
@SuppressCodecs({"Memory", "Direct"}) // it can be too much for these codecs @SuppressCodecs({"Direct"}) // it can be too much for these codecs
public class TestDuelingCodecsAtNight extends TestDuelingCodecs { public class TestDuelingCodecsAtNight extends TestDuelingCodecs {
// use a big number of documents // use a big number of documents

View File

@ -177,7 +177,6 @@ public class TestIndexWriterCommit extends LuceneTestCase {
final String idFormat = TestUtil.getPostingsFormat("id"); final String idFormat = TestUtil.getPostingsFormat("id");
final String contentFormat = TestUtil.getPostingsFormat("content"); final String contentFormat = TestUtil.getPostingsFormat("content");
assumeFalse("This test cannot run with Memory codec", idFormat.equals("Memory") || contentFormat.equals("Memory"));
MockDirectoryWrapper dir = newMockDirectory(); MockDirectoryWrapper dir = newMockDirectory();
Analyzer analyzer; Analyzer analyzer;
if (random().nextBoolean()) { if (random().nextBoolean()) {

View File

@ -994,7 +994,6 @@ public class TestIndexWriterDelete extends LuceneTestCase {
// TODO: move this test to its own class and just @SuppressCodecs? // TODO: move this test to its own class and just @SuppressCodecs?
// TODO: is it enough to just use newFSDirectory? // TODO: is it enough to just use newFSDirectory?
final String fieldFormat = TestUtil.getPostingsFormat("field"); final String fieldFormat = TestUtil.getPostingsFormat("field");
assumeFalse("This test cannot run with Memory codec", fieldFormat.equals("Memory"));
assumeFalse("This test cannot run with SimpleText codec", fieldFormat.equals("SimpleText")); assumeFalse("This test cannot run with SimpleText codec", fieldFormat.equals("SimpleText"));
assumeFalse("This test cannot run with Direct codec", fieldFormat.equals("Direct")); assumeFalse("This test cannot run with Direct codec", fieldFormat.equals("Direct"));
final Random r = random(); final Random r = random();

View File

@ -39,7 +39,7 @@ import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TimeUnits; import org.apache.lucene.util.TimeUnits;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
@TimeoutSuite(millis = 8 * TimeUnits.HOUR) @TimeoutSuite(millis = 8 * TimeUnits.HOUR)
public class TestIndexWriterMaxDocs extends LuceneTestCase { public class TestIndexWriterMaxDocs extends LuceneTestCase {

View File

@ -162,7 +162,6 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase {
final String idFormat = TestUtil.getPostingsFormat("id"); final String idFormat = TestUtil.getPostingsFormat("id");
final String contentFormat = TestUtil.getPostingsFormat("content"); final String contentFormat = TestUtil.getPostingsFormat("content");
assumeFalse("This test cannot run with Memory codec", idFormat.equals("Memory") || contentFormat.equals("Memory"));
int START_COUNT = 57; int START_COUNT = 57;
int NUM_DIR = TEST_NIGHTLY ? 50 : 5; int NUM_DIR = TEST_NIGHTLY ? 50 : 5;

View File

@ -130,7 +130,6 @@ public class TestLazyProxSkipping extends LuceneTestCase {
public void testLazySkipping() throws IOException { public void testLazySkipping() throws IOException {
final String fieldFormat = TestUtil.getPostingsFormat(this.field); final String fieldFormat = TestUtil.getPostingsFormat(this.field);
assumeFalse("This test cannot run with Memory postings format", fieldFormat.equals("Memory"));
assumeFalse("This test cannot run with Direct postings format", fieldFormat.equals("Direct")); assumeFalse("This test cannot run with Direct postings format", fieldFormat.equals("Direct"));
assumeFalse("This test cannot run with SimpleText postings format", fieldFormat.equals("SimpleText")); assumeFalse("This test cannot run with SimpleText postings format", fieldFormat.equals("SimpleText"));

View File

@ -35,7 +35,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
public class TestLongPostings extends LuceneTestCase { public class TestLongPostings extends LuceneTestCase {
// Produces a realistic unicode random string that // Produces a realistic unicode random string that

View File

@ -30,7 +30,7 @@ import org.junit.Before;
// - mix in forceMerge, addIndexes // - mix in forceMerge, addIndexes
// - randomoly mix in non-congruent docs // - randomoly mix in non-congruent docs
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
public class TestNRTThreads extends ThreadedIndexingAndSearchingTestCase { public class TestNRTThreads extends ThreadedIndexingAndSearchingTestCase {
private boolean useNonNrtReaders = true; private boolean useNonNrtReaders = true;

View File

@ -43,7 +43,7 @@ import org.apache.lucene.util.TestUtil;
* Test that norms info is preserved during index life - including * Test that norms info is preserved during index life - including
* separate norms, addDocument, addIndexes, forceMerge. * separate norms, addDocument, addIndexes, forceMerge.
*/ */
@SuppressCodecs({ "Memory", "Direct", "SimpleText" }) @SuppressCodecs({ "Direct", "SimpleText" })
@Slow @Slow
public class TestNorms extends LuceneTestCase { public class TestNorms extends LuceneTestCase {
static final String BYTE_TEST_FIELD = "normsTestByte"; static final String BYTE_TEST_FIELD = "normsTestByte";

View File

@ -21,7 +21,7 @@ import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat; import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.document.*; import org.apache.lucene.document.*;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
@ -42,9 +42,9 @@ public class TestRollingUpdates extends LuceneTestCase {
final LineFileDocs docs = new LineFileDocs(random); final LineFileDocs docs = new LineFileDocs(random);
//provider.register(new MemoryCodec());
if (random().nextBoolean()) { if (random().nextBoolean()) {
Codec.setDefault(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat()))); Codec.setDefault(TestUtil.alwaysPostingsFormat(
new DirectPostingsFormat()));
} }
MockAnalyzer analyzer = new MockAnalyzer(random()); MockAnalyzer analyzer = new MockAnalyzer(random());

View File

@ -46,7 +46,7 @@ import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.util.automaton.RegExp;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
public class TestTermsEnum extends LuceneTestCase { public class TestTermsEnum extends LuceneTestCase {
public void test() throws Exception { public void test() throws Exception {

View File

@ -47,7 +47,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearchingTestCase { public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearchingTestCase {
// Not guaranteed to reflect deletes: // Not guaranteed to reflect deletes:

View File

@ -29,7 +29,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
public class TestSearchWithThreads extends LuceneTestCase { public class TestSearchWithThreads extends LuceneTestCase {
int NUM_DOCS; int NUM_DOCS;
static final int NUM_SEARCH_THREADS = 5; static final int NUM_SEARCH_THREADS = 5;

View File

@ -50,7 +50,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory; import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase { public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
boolean warmCalled; boolean warmCalled;

View File

@ -41,7 +41,7 @@ import org.apache.lucene.util.TestUtil;
// - test pulling docs in 2nd round trip... // - test pulling docs in 2nd round trip...
// - filter too // - filter too
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
public class TestShardSearching extends ShardSearchingTestBase { public class TestShardSearching extends ShardSearchingTestBase {
private static class PreviousSearchState { private static class PreviousSearchState {

View File

@ -29,7 +29,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/** Tests for SortedSetSortField selectors other than MIN, /** Tests for SortedSetSortField selectors other than MIN,
* these require optional codec support (random access to ordinals) */ * these require optional codec support (random access to ordinals) */
@SuppressCodecs({"Memory", "SimpleText"}) // TODO: fix MemoryDV to support random access to ordinals @SuppressCodecs({"SimpleText"})
public class TestSortedSetSelector extends LuceneTestCase { public class TestSortedSetSelector extends LuceneTestCase {
public void testMax() throws Exception { public void testMax() throws Exception {

View File

@ -81,7 +81,7 @@ import static org.apache.lucene.util.fst.FSTTester.getRandomString;
import static org.apache.lucene.util.fst.FSTTester.simpleRandomString; import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
import static org.apache.lucene.util.fst.FSTTester.toIntsRef; import static org.apache.lucene.util.fst.FSTTester.toIntsRef;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) @SuppressCodecs({ "SimpleText", "Direct" })
@Slow @Slow
public class TestFSTs extends LuceneTestCase { public class TestFSTs extends LuceneTestCase {

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/** /**
* Random testing for {@link InetAddressRange} * Random testing for {@link InetAddressRange}
*/ */
@SuppressCodecs({"Direct", "Memory"}) @SuppressCodecs({"Direct"})
public class TestInetAddressRangeQueries extends BaseRangeFieldQueryTestCase { public class TestInetAddressRangeQueries extends BaseRangeFieldQueryTestCase {
private static final String FIELD_NAME = "ipRangeField"; private static final String FIELD_NAME = "ipRangeField";

View File

@ -51,7 +51,7 @@ import org.apache.lucene.util.TestUtil;
/** Child process with silly naive TCP socket server to handle /** Child process with silly naive TCP socket server to handle
* between-node commands, launched for each node by TestNRTReplication. */ * between-node commands, launched for each node by TestNRTReplication. */
@SuppressCodecs({"MockRandom", "Memory", "Direct", "SimpleText"}) @SuppressCodecs({"MockRandom", "Direct", "SimpleText"})
@SuppressSysoutChecks(bugUrl = "Stuff gets printed, important stuff for debugging a failure") @SuppressSysoutChecks(bugUrl = "Stuff gets printed, important stuff for debugging a failure")
@SuppressForbidden(reason = "We need Unsafe to actually crush :-)") @SuppressForbidden(reason = "We need Unsafe to actually crush :-)")
public class SimpleServer extends LuceneTestCase { public class SimpleServer extends LuceneTestCase {

View File

@ -44,7 +44,7 @@ import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.SeedUtils; import com.carrotsearch.randomizedtesting.SeedUtils;
// MockRandom's .sd file has no index header/footer: // MockRandom's .sd file has no index header/footer:
@SuppressCodecs({"MockRandom", "Memory", "Direct", "SimpleText"}) @SuppressCodecs({"MockRandom", "Direct", "SimpleText"})
@SuppressSysoutChecks(bugUrl = "Stuff gets printed, important stuff for debugging a failure") @SuppressSysoutChecks(bugUrl = "Stuff gets printed, important stuff for debugging a failure")
public class TestNRTReplication extends LuceneTestCase { public class TestNRTReplication extends LuceneTestCase {

View File

@ -112,7 +112,7 @@ import com.carrotsearch.randomizedtesting.SeedUtils;
*/ */
// MockRandom's .sd file has no index header/footer: // MockRandom's .sd file has no index header/footer:
@SuppressCodecs({"MockRandom", "Memory", "Direct", "SimpleText"}) @SuppressCodecs({"MockRandom", "Direct", "SimpleText"})
@SuppressSysoutChecks(bugUrl = "Stuff gets printed, important stuff for debugging a failure") @SuppressSysoutChecks(bugUrl = "Stuff gets printed, important stuff for debugging a failure")
public class TestStressNRTReplication extends LuceneTestCase { public class TestStressNRTReplication extends LuceneTestCase {

View File

@ -549,9 +549,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
Directory dir = null; Directory dir = null;
RandomIndexWriter iw = null; RandomIndexWriter iw = null;
final String postingsFormat = TestUtil.getPostingsFormat("dummy"); final String postingsFormat = TestUtil.getPostingsFormat("dummy");
boolean codecOk = iterations * maxWordLength < 100000 || boolean codecOk = iterations * maxWordLength < 100000 || !(postingsFormat.equals("SimpleText"));
!(postingsFormat.equals("Memory") ||
postingsFormat.equals("SimpleText"));
if (rarely(random) && codecOk) { if (rarely(random) && codecOk) {
dir = newFSDirectory(createTempDir("bttc")); dir = newFSDirectory(createTempDir("bttc"));
iw = new RandomIndexWriter(new Random(seed), dir, a); iw = new RandomIndexWriter(new Random(seed), dir, a);

View File

@ -627,7 +627,6 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
@Nightly @Nightly
public void testRandomBig() throws Exception { public void testRandomBig() throws Exception {
assumeFalse("Direct codec can OOME on this test", TestUtil.getDocValuesFormat(FIELD_NAME).equals("Direct")); assumeFalse("Direct codec can OOME on this test", TestUtil.getDocValuesFormat(FIELD_NAME).equals("Direct"));
assumeFalse("Memory codec can OOME on this test", TestUtil.getDocValuesFormat(FIELD_NAME).equals("Memory"));
doTestRandom(200000); doTestRandom(200000);
} }

View File

@ -46,8 +46,6 @@ import org.apache.lucene.codecs.memory.DirectDocValuesFormat;
import org.apache.lucene.codecs.memory.DirectPostingsFormat; import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat; import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat;
import org.apache.lucene.codecs.memory.FSTPostingsFormat; import org.apache.lucene.codecs.memory.FSTPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryDocValuesFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -205,14 +203,11 @@ public class RandomCodec extends AssertingCodec {
new LuceneVarGapFixedInterval(TestUtil.nextInt(random, 1, 1000)), new LuceneVarGapFixedInterval(TestUtil.nextInt(random, 1, 1000)),
new LuceneVarGapDocFreqInterval(TestUtil.nextInt(random, 1, 100), TestUtil.nextInt(random, 1, 1000)), new LuceneVarGapDocFreqInterval(TestUtil.nextInt(random, 1, 100), TestUtil.nextInt(random, 1, 1000)),
TestUtil.getDefaultPostingsFormat(), TestUtil.getDefaultPostingsFormat(),
new AssertingPostingsFormat(), new AssertingPostingsFormat());
new MemoryPostingsFormat(true, random.nextFloat()),
new MemoryPostingsFormat(false, random.nextFloat()));
addDocValues(avoidCodecs, addDocValues(avoidCodecs,
TestUtil.getDefaultDocValuesFormat(), TestUtil.getDefaultDocValuesFormat(),
new DirectDocValuesFormat(), // maybe not a great idea... new DirectDocValuesFormat(), // maybe not a great idea...
new MemoryDocValuesFormat(),
TestUtil.getDefaultDocValuesFormat(), TestUtil.getDefaultDocValuesFormat(),
new AssertingDocValuesFormat()); new AssertingDocValuesFormat());

View File

@ -980,7 +980,7 @@ public final class TestUtil {
// TODO: remove this, push this test to Lucene40/Lucene42 codec tests // TODO: remove this, push this test to Lucene40/Lucene42 codec tests
public static boolean fieldSupportsHugeBinaryDocValues(String field) { public static boolean fieldSupportsHugeBinaryDocValues(String field) {
String dvFormat = getDocValuesFormat(field); String dvFormat = getDocValuesFormat(field);
if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42") || dvFormat.equals("Memory")) { if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42")) {
return false; return false;
} }
return true; return true;

View File

@ -41,6 +41,10 @@ Upgrade Notes
method signatures was required to fix it. Third party components that use this API method signatures was required to fix it. Third party components that use this API
need to be updated. (Eros Taborelli, Erick Erickson, ab) need to be updated. (Eros Taborelli, Erick Erickson, ab)
* LUCENE-8267: Memory codecs have been removed from the codebase (MemoryPostings,
MemoryDocValues). If you used postingsFormat="Memory" or docValuesFormat="Memory"
switch to "Direct" instead. (Dawid Weiss)
================== 7.4.0 ================== ================== 7.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -16,11 +16,10 @@
limitations under the License. limitations under the License.
--> -->
<schema name="codec" version="1.2"> <schema name="codec" version="1.2">
<fieldType name="string_direct" class="solr.StrField" postingsFormat="Direct"/> <fieldType name="string_direct" class="solr.StrField" postingsFormat="Direct" docValuesFormat="Direct"/>
<fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene50"/> <fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene50"/>
<fieldType name="string_disk" class="solr.StrField" docValuesFormat="Lucene70"/> <fieldType name="string_disk" class="solr.StrField" docValuesFormat="Lucene70"/>
<fieldType name="string_memory" class="solr.StrField" docValuesFormat="Memory"/>
<fieldType name="string" class="solr.StrField"/> <fieldType name="string" class="solr.StrField"/>
@ -39,7 +38,6 @@
<field name="string_standard_f" type="string_standard" indexed="true" stored="true"/> <field name="string_standard_f" type="string_standard" indexed="true" stored="true"/>
<field name="string_disk_f" type="string_disk" indexed="false" stored="false" docValues="true" default=""/> <field name="string_disk_f" type="string_disk" indexed="false" stored="false" docValues="true" default=""/>
<field name="string_memory_f" type="string_memory" indexed="false" stored="false" docValues="true" default=""/>
<field name="string_f" type="string" indexed="true" stored="true" docValues="true" required="true"/> <field name="string_f" type="string" indexed="true" stored="true" docValues="true" required="true"/>
<field name="text" type="text_general" indexed="true" stored="true"/> <field name="text" type="text_general" indexed="true" stored="true"/>
@ -48,7 +46,6 @@
<dynamicField name="*_standard" type="string_standard" indexed="true" stored="true"/> <dynamicField name="*_standard" type="string_standard" indexed="true" stored="true"/>
<dynamicField name="*_disk" type="string_disk" indexed="false" stored="false" docValues="true"/> <dynamicField name="*_disk" type="string_disk" indexed="false" stored="false" docValues="true"/>
<dynamicField name="*_memory" type="string_memory" indexed="false" stored="false" docValues="true"/>
<uniqueKey>string_f</uniqueKey> <uniqueKey>string_f</uniqueKey>
</schema> </schema>

View File

@ -63,9 +63,8 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
SchemaField schemaField = fields.get("string_disk_f"); SchemaField schemaField = fields.get("string_disk_f");
PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat(); PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat();
assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField(schemaField.getName()).getName()); assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_memory_f"); schemaField = fields.get("string_direct_f");
assertEquals("Memory", assertEquals("Direct", format.getDocValuesFormatForField(schemaField.getName()).getName());
format.getDocValuesFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_f"); schemaField = fields.get("string_f");
assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), assertEquals(TestUtil.getDefaultDocValuesFormat().getName(),
format.getDocValuesFormatForField(schemaField.getName()).getName()); format.getDocValuesFormatForField(schemaField.getName()).getName());
@ -87,8 +86,8 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField("foo_disk").getName()); assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField("foo_disk").getName());
assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField("bar_disk").getName()); assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField("bar_disk").getName());
assertEquals("Memory", format.getDocValuesFormatForField("foo_memory").getName()); assertEquals("Direct", format.getDocValuesFormatForField("foo_direct").getName());
assertEquals("Memory", format.getDocValuesFormatForField("bar_memory").getName()); assertEquals("Direct", format.getDocValuesFormatForField("bar_direct").getName());
} }
private void reloadCoreAndRecreateIndex() { private void reloadCoreAndRecreateIndex() {

View File

@ -40,7 +40,7 @@ import org.junit.BeforeClass;
* Tests the behavior of <code>field(foo,min|max)</code> on numerious types of multivalued 'foo' fields, * Tests the behavior of <code>field(foo,min|max)</code> on numerious types of multivalued 'foo' fields,
* as well as the beahvior of sorting on <code>foo asc|desc</code> to implicitly choose the min|max. * as well as the beahvior of sorting on <code>foo asc|desc</code> to implicitly choose the min|max.
*/ */
@SuppressCodecs({"Memory", "SimpleText"}) // see TestSortedSetSelector @SuppressCodecs({"SimpleText"}) // see TestSortedSetSelector
public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
/** Initializes core and does some sanity checking of schema */ /** Initializes core and does some sanity checking of schema */

View File

@ -24,7 +24,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
* *
* @see SortByFunctionTest * @see SortByFunctionTest
**/ **/
@SuppressCodecs({"Memory", "SimpleText"}) // see TestSortedSetSelector @SuppressCodecs({"SimpleText"}) // see TestSortedSetSelector
public class TestSortByMinMaxFunction extends SortByFunctionTest { public class TestSortByMinMaxFunction extends SortByFunctionTest {
@Override @Override

View File

@ -30,7 +30,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper; import org.apache.solr.index.SlowCompositeReaderWrapper;
@LuceneTestCase.SuppressCodecs({"Memory"})
public class TestDocTermOrdsUninvertLimit extends LuceneTestCase { public class TestDocTermOrdsUninvertLimit extends LuceneTestCase {
/* UnInvertedField had a reference block limitation of 2^24. This unit test triggered it. /* UnInvertedField had a reference block limitation of 2^24. This unit test triggered it.

View File

@ -165,38 +165,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
b.length = bytes.length; b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b)); doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", "" + docID, Field.Store.YES)); doc.add(new StringField("id", "" + docID, Field.Store.YES));
try { w.addDocument(doc);
w.addDocument(doc);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
// OK: some codecs can't handle binary DV > 32K
assertFalse(codecAcceptsHugeBinaryValues("field"));
w.rollback();
d.close();
return;
}
}
} }
DirectoryReader r; DirectoryReader r = DirectoryReader.open(w);
try {
r = DirectoryReader.open(w);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
assertFalse(codecAcceptsHugeBinaryValues("field"));
// OK: some codecs can't handle binary DV > 32K
w.rollback();
d.close();
return;
}
}
try (LeafReader ar = SlowCompositeReaderWrapper.wrap(r)) { try (LeafReader ar = SlowCompositeReaderWrapper.wrap(r)) {
TestUtil.checkReader(ar); TestUtil.checkReader(ar);
@ -210,86 +183,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
assertEquals(expected.length, bytes.length); assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes); assertEquals(new BytesRef(expected), bytes);
} }
assertTrue(codecAcceptsHugeBinaryValues("field"));
} }
} }
} }
} }
private static final int LARGE_BINARY_FIELD_LENGTH = (1 << 15) - 2;
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
public void testHugeBinaryValueLimit() throws Exception {
// We only test DVFormats that have a limit
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
Analyzer analyzer = new MockAnalyzer(random());
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
boolean doFixed = random().nextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed) {
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.nextInt(random(), 10, 20);
fixedLength = LARGE_BINARY_FIELD_LENGTH;
} else {
numDocs = TestUtil.nextInt(random(), 100, 200);
}
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
List<byte[]> docBytes = new ArrayList<>();
long totalBytes = 0;
for(int docID=0;docID<numDocs;docID++) {
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed) {
numBytes = fixedLength;
} else if (docID == 0 || random().nextInt(5) == 3) {
numBytes = LARGE_BINARY_FIELD_LENGTH;
} else {
numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024*1024) {
break;
}
byte[] bytes = new byte[numBytes];
random().nextBytes(bytes);
docBytes.add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", ""+docID, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader r = DirectoryReader.open(w);
w.close();
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
for(int docID=0;docID<docBytes.size();docID++) {
assertEquals(docID, s.nextDoc());
Document doc = ar.document(docID);
BytesRef bytes = s.binaryValue();
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes);
}
ar.close();
d.close();
}
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception { private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
@ -616,9 +514,4 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
} }
} }
protected boolean codecAcceptsHugeBinaryValues(String field) {
String name = TestUtil.getDocValuesFormat(field);
return !(name.equals("Memory")); // Direct has a different type of limit
}
} }