mirror of https://github.com/apache/lucene.git
LUCENE-3216: Store DocValues per segment instead of per field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1143776 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b448fc6ef7
commit
5abd2b1085
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.SegmentInfo;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
|
@ -58,7 +57,7 @@ public class AppendingCodec extends Codec {
|
|||
public static String CODEC_NAME = "Appending";
|
||||
|
||||
public AppendingCodec() {
|
||||
name = CODEC_NAME;
|
||||
super(CODEC_NAME);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -138,22 +137,22 @@ public class AppendingCodec extends Codec {
|
|||
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
|
||||
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
||||
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
StandardCodec.getStandardExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
private final SegmentCodecs segmentCodecs;
|
||||
|
||||
PerFieldCodecWrapper(SegmentCodecs segmentCodecs) {
|
||||
name = "PerField";
|
||||
super("PerField");
|
||||
this.segmentCodecs = segmentCodecs;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index.codecs;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.PerDocWriteState;
|
||||
|
@ -25,13 +26,21 @@ import org.apache.lucene.index.SegmentInfo;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** @lucene.experimental */
|
||||
public abstract class Codec {
|
||||
public static final Codec[] EMPTY = new Codec[0];
|
||||
/** Unique name that's used to retrieve this codec when
|
||||
* reading the index */
|
||||
public String name;
|
||||
public final String name;
|
||||
private boolean dvUseCompoundFile = true;
|
||||
private Comparator<BytesRef> docValuesSortComparator = BytesRef
|
||||
.getUTF8SortedAsUnicodeComparator();
|
||||
|
||||
protected Codec(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/** Writes a new segment */
|
||||
public abstract FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException;
|
||||
|
@ -68,7 +77,48 @@ public abstract class Codec {
|
|||
|
||||
/** Records all file extensions this codec uses */
|
||||
public abstract void getExtensions(Set<String> extensions);
|
||||
|
||||
|
||||
/**
|
||||
* If set to <code>true</code> this codec will use a compound file for
|
||||
* IndexDocValues, otherwise each IndexDocValues field will create up to 2
|
||||
* files per segment.
|
||||
* <p>
|
||||
* NOTE: The default values is <code>true</code>.
|
||||
*/
|
||||
public void setDocValuesUseCFS(boolean docValuesUseCFS) {
|
||||
this.dvUseCompoundFile = docValuesUseCFS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff compound file should be used for
|
||||
* IndexDocValues, otherwise <code>false</code>.
|
||||
*
|
||||
* @see #setDocValuesUseCFS(boolean);
|
||||
* @return <code>true</code> iff compound file should be used for
|
||||
* IndexDocValues, otherwise <code>false</code>.
|
||||
*/
|
||||
public boolean getDocValuesUseCFS() {
|
||||
return dvUseCompoundFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the {@link BytesRef} comparator for sorted IndexDocValue variants. The
|
||||
* default is {@link BytesRef#getUTF8SortedAsUnicodeComparator()}. *
|
||||
*/
|
||||
public void setDocValuesSortComparator(
|
||||
Comparator<BytesRef> docValuesSortComparator) {
|
||||
this.docValuesSortComparator = docValuesSortComparator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link BytesRef} comparator for sorted IndexDocValue variants.
|
||||
* The default is {@link BytesRef#getUTF8SortedAsUnicodeComparator()}.
|
||||
*/
|
||||
public Comparator<BytesRef> getDocValuesSortComparator() {
|
||||
return docValuesSortComparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
|
|
|
@ -44,7 +44,7 @@ public class CoreCodecProvider extends CodecProvider {
|
|||
public CoreCodecProvider() {
|
||||
register(new StandardCodec());
|
||||
register(new PreFlexCodec());
|
||||
register(new PulsingCodec(1));
|
||||
register(new PulsingCodec());
|
||||
register(new SimpleTextCodec());
|
||||
register(new MemoryCodec());
|
||||
}
|
||||
|
|
|
@ -31,79 +31,102 @@ import org.apache.lucene.index.values.Writer;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultDocValuesConsumer extends PerDocConsumer {
|
||||
private final String segmentName;
|
||||
private final int codecId;
|
||||
private final Directory directory;
|
||||
private final AtomicLong bytesUsed;
|
||||
private final Comparator<BytesRef> comparator;
|
||||
|
||||
public DefaultDocValuesConsumer(PerDocWriteState state, Comparator<BytesRef> comparator) {
|
||||
private boolean useCompoundFile;
|
||||
|
||||
public DefaultDocValuesConsumer(PerDocWriteState state, Comparator<BytesRef> comparator, boolean useCompoundFile) throws IOException {
|
||||
this.segmentName = state.segmentName;
|
||||
this.codecId = state.codecId;
|
||||
this.bytesUsed = state.bytesUsed;
|
||||
this.directory = state.directory;
|
||||
//TODO maybe we should enable a global CFS that all codecs can pull on demand to further reduce the number of files?
|
||||
this.directory = useCompoundFile ? state.directory.createCompoundOutput(IndexFileNames.segmentFileName(segmentName, state.codecId, IndexFileNames.COMPOUND_FILE_EXTENSION)) : state.directory;
|
||||
this.comparator = comparator;
|
||||
this.useCompoundFile = useCompoundFile;
|
||||
}
|
||||
|
||||
|
||||
public void close() throws IOException {
|
||||
if (useCompoundFile) {
|
||||
this.directory.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
|
||||
return Writer.create(field.getDocValues(),
|
||||
docValuesId(segmentName, codecId, field.number),
|
||||
// TODO can we have a compound file per segment and codec for
|
||||
// docvalues?
|
||||
directory, comparator, bytesUsed);
|
||||
}
|
||||
|
||||
@SuppressWarnings("fallthrough")
|
||||
public static void files(Directory dir, SegmentInfo segmentInfo, int codecId,
|
||||
Set<String> files) throws IOException {
|
||||
Set<String> files, boolean useCompoundFile) throws IOException {
|
||||
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.getCodecId() == codecId && fieldInfo.hasDocValues()) {
|
||||
String filename = docValuesId(segmentInfo.name, codecId,
|
||||
fieldInfo.number);
|
||||
switch (fieldInfo.getDocValues()) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.INDEX_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.INDEX_EXTENSION));
|
||||
// until here all types use an index
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.DATA_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.DATA_EXTENSION));
|
||||
break;
|
||||
|
||||
default:
|
||||
assert false;
|
||||
if (useCompoundFile) {
|
||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, IndexFileNames.COMPOUND_FILE_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, codecId, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, codecId, IndexFileNames.COMPOUND_FILE_EXTENSION));
|
||||
return;
|
||||
} else {
|
||||
switch (fieldInfo.getDocValues()) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.INDEX_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.INDEX_EXTENSION));
|
||||
// until here all types use an index
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.DATA_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.DATA_EXTENSION));
|
||||
break;
|
||||
|
||||
default:
|
||||
assert false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static String docValuesId(String segmentsName, int codecID, int fieldId) {
|
||||
return segmentsName + "_" + codecID + "-" + fieldId;
|
||||
}
|
||||
|
||||
public static void getDocValuesExtensions(Set<String> extensions) {
|
||||
extensions.add(Writer.DATA_EXTENSION);
|
||||
extensions.add(Writer.INDEX_EXTENSION);
|
||||
|
||||
public static void getDocValuesExtensions(Set<String> extensions, boolean useCompoundFile) {
|
||||
if (useCompoundFile) {
|
||||
extensions.add(IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
|
||||
extensions.add(IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||
} else {
|
||||
extensions.add(Writer.DATA_EXTENSION);
|
||||
extensions.add(Writer.INDEX_EXTENSION);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,12 +16,16 @@ package org.apache.lucene.index.codecs;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.values.Bytes;
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
|
@ -29,6 +33,8 @@ import org.apache.lucene.index.values.Floats;
|
|||
import org.apache.lucene.index.values.Ints;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Abstract base class for FieldsProducer implementations supporting
|
||||
|
@ -39,8 +45,12 @@ import org.apache.lucene.store.Directory;
|
|||
public class DefaultDocValuesProducer extends PerDocValues {
|
||||
|
||||
protected final TreeMap<String, IndexDocValues> docValues;
|
||||
private final boolean useCompoundFile;
|
||||
private final Closeable cfs;
|
||||
private final Comparator<BytesRef> sortComparator;
|
||||
|
||||
/**
|
||||
*
|
||||
* Creates a new {@link DefaultDocValuesProducer} instance and loads all
|
||||
* {@link IndexDocValues} instances for this segment and codec.
|
||||
*
|
||||
|
@ -52,12 +62,27 @@ public class DefaultDocValuesProducer extends PerDocValues {
|
|||
* the {@link FieldInfos}
|
||||
* @param codecId
|
||||
* the codec ID
|
||||
* @param useCompoundFile
|
||||
* if <code>true</code> this producer opens a compound file to read
|
||||
* IndexDocValues fields, otherwise each field defines its own set of
|
||||
* files.
|
||||
* @param sortComparator
|
||||
* defines the sort order for sorted IndexDocValues variants
|
||||
* @throws IOException
|
||||
* if an {@link IOException} occurs
|
||||
*/
|
||||
public DefaultDocValuesProducer(SegmentInfo si, Directory dir,
|
||||
FieldInfos fieldInfo, int codecId) throws IOException {
|
||||
docValues = load(fieldInfo, si.name, si.docCount, dir, codecId);
|
||||
public DefaultDocValuesProducer(SegmentInfo si, Directory dir,
|
||||
FieldInfos fieldInfo, int codecId, boolean useCompoundFile, Comparator<BytesRef> sortComparator) throws IOException {
|
||||
this.useCompoundFile = useCompoundFile;
|
||||
this.sortComparator = sortComparator;
|
||||
final Directory directory;
|
||||
if (useCompoundFile) {
|
||||
cfs = directory = dir.openCompoundInput(IndexFileNames.segmentFileName(si.name, codecId, IndexFileNames.COMPOUND_FILE_EXTENSION), 1024);
|
||||
} else {
|
||||
cfs = null;
|
||||
directory = dir;
|
||||
}
|
||||
docValues = load(fieldInfo, si.name, si.docCount, directory, codecId);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -85,14 +110,14 @@ public class DefaultDocValuesProducer extends PerDocValues {
|
|||
final String id = DefaultDocValuesConsumer.docValuesId(segment,
|
||||
codecId, fieldInfo.number);
|
||||
values.put(field,
|
||||
loadDocValues(docCount, dir, id, fieldInfo.getDocValues()));
|
||||
loadDocValues(docCount, dir, id, fieldInfo.getDocValues(), sortComparator));
|
||||
}
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
// if we fail we must close all opened resources if there are any
|
||||
closeDocValues(values.values());
|
||||
closeInternal(values.values());
|
||||
}
|
||||
}
|
||||
return values;
|
||||
|
@ -112,6 +137,7 @@ public class DefaultDocValuesProducer extends PerDocValues {
|
|||
* the unique file ID within the segment
|
||||
* @param type
|
||||
* the type to load
|
||||
* @param sortComparator byte comparator used by sorted variants
|
||||
* @return a {@link IndexDocValues} instance for the given type
|
||||
* @throws IOException
|
||||
* if an {@link IOException} occurs
|
||||
|
@ -119,7 +145,7 @@ public class DefaultDocValuesProducer extends PerDocValues {
|
|||
* if the given {@link ValueType} is not supported
|
||||
*/
|
||||
protected IndexDocValues loadDocValues(int docCount, Directory dir, String id,
|
||||
ValueType type) throws IOException {
|
||||
ValueType type, Comparator<BytesRef> sortComparator) throws IOException {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
|
@ -132,39 +158,37 @@ public class DefaultDocValuesProducer extends PerDocValues {
|
|||
case FLOAT_64:
|
||||
return Floats.getValues(dir, id, docCount);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount);
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, sortComparator);
|
||||
case BYTES_FIXED_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount);
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, sortComparator);
|
||||
case BYTES_FIXED_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount);
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, sortComparator);
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount);
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, sortComparator);
|
||||
case BYTES_VAR_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount);
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, sortComparator);
|
||||
case BYTES_VAR_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount);
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, sortComparator);
|
||||
default:
|
||||
throw new IllegalStateException("unrecognized index values mode " + type);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
closeDocValues(docValues.values());
|
||||
closeInternal(docValues.values());
|
||||
}
|
||||
|
||||
private void closeDocValues(final Collection<IndexDocValues> values)
|
||||
throws IOException {
|
||||
IOException ex = null;
|
||||
for (IndexDocValues docValues : values) {
|
||||
try {
|
||||
docValues.close();
|
||||
} catch (IOException e) {
|
||||
ex = e;
|
||||
}
|
||||
}
|
||||
if (ex != null) {
|
||||
throw ex;
|
||||
}
|
||||
private void closeInternal(Collection<? extends Closeable> closeables) throws IOException {
|
||||
final Collection<? extends Closeable> toClose;
|
||||
if (useCompoundFile) {
|
||||
final ArrayList<Closeable> list = new ArrayList<Closeable>(closeables);
|
||||
list.add(cfs);
|
||||
toClose = list;
|
||||
} else {
|
||||
toClose = docValues.values();
|
||||
|
||||
}
|
||||
IOUtils.closeSafely(false, toClose);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -76,9 +76,9 @@ import org.apache.lucene.util.fst.FST;
|
|||
* @lucene.experimental */
|
||||
|
||||
public class MemoryCodec extends Codec {
|
||||
|
||||
|
||||
public MemoryCodec() {
|
||||
name = "Memory";
|
||||
super("Memory");
|
||||
}
|
||||
|
||||
private static final boolean VERBOSE = false;
|
||||
|
@ -778,22 +778,22 @@ public class MemoryCodec extends Codec {
|
|||
@Override
|
||||
public void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, EXTENSION));
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
extensions.add(EXTENSION);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ public class PreFlexCodec extends Codec {
|
|||
public static final String PROX_EXTENSION = "prx";
|
||||
|
||||
public PreFlexCodec() {
|
||||
name = "PreFlex";
|
||||
super("PreFlex");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
|||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** This codec "inlines" the postings for terms that have
|
||||
|
@ -58,10 +57,19 @@ public class PulsingCodec extends Codec {
|
|||
|
||||
private final int freqCutoff;
|
||||
|
||||
/**
|
||||
* Creates a {@link PulsingCodec} with <tt>freqCutoff = 1</tt>
|
||||
*
|
||||
* @see PulsingCodec#PulsingCodec(int)
|
||||
*/
|
||||
public PulsingCodec() {
|
||||
this(1);
|
||||
}
|
||||
|
||||
/** Terms with freq <= freqCutoff are inlined into terms
|
||||
* dict. */
|
||||
public PulsingCodec(int freqCutoff) {
|
||||
name = "Pulsing";
|
||||
super("Pulsing");
|
||||
this.freqCutoff = freqCutoff;
|
||||
}
|
||||
|
||||
|
@ -157,22 +165,22 @@ public class PulsingCodec extends Codec {
|
|||
StandardPostingsReader.files(dir, segmentInfo, id, files);
|
||||
BlockTermsReader.files(dir, segmentInfo, id, files);
|
||||
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
StandardCodec.getStandardExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.lucene.index.codecs.PerDocConsumer;
|
|||
import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** For debugging, curiosity, transparency only!! Do not
|
||||
* use this codec in production.
|
||||
|
@ -44,11 +43,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
*
|
||||
* @lucene.experimental */
|
||||
public class SimpleTextCodec extends Codec {
|
||||
|
||||
|
||||
public SimpleTextCodec() {
|
||||
name = "SimpleText";
|
||||
super("SimpleText");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
return new SimpleTextFieldsWriter(state);
|
||||
|
@ -69,23 +69,23 @@ public class SimpleTextCodec extends Codec {
|
|||
@Override
|
||||
public void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException {
|
||||
files.add(getPostingsFileName(segmentInfo.name, id));
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
extensions.add(POSTINGS_EXTENSION);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
// TODO: would be great if these used a plain text impl
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,14 +40,13 @@ import org.apache.lucene.index.codecs.BlockTermsWriter;
|
|||
import org.apache.lucene.index.codecs.BlockTermsReader;
|
||||
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Default codec.
|
||||
* @lucene.experimental */
|
||||
public class StandardCodec extends Codec {
|
||||
|
||||
public StandardCodec() {
|
||||
name = "Standard";
|
||||
super("Standard");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -140,13 +139,13 @@ public class StandardCodec extends Codec {
|
|||
StandardPostingsReader.files(dir, segmentInfo, id, files);
|
||||
BlockTermsReader.files(dir, segmentInfo, id, files);
|
||||
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
getStandardExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
public static void getStandardExtensions(Set<String> extensions) {
|
||||
|
@ -158,11 +157,11 @@ public class StandardCodec extends Codec {
|
|||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -151,12 +151,13 @@ public final class Bytes {
|
|||
* otherwise <code>false</code>
|
||||
* @param maxDoc
|
||||
* the number of document values stored for the given ID
|
||||
* @param sortComparator byte comparator used by sorted variants
|
||||
* @return an initialized {@link IndexDocValues} instance.
|
||||
* @throws IOException
|
||||
* if an {@link IOException} occurs
|
||||
*/
|
||||
public static IndexDocValues getValues(Directory dir, String id, Mode mode,
|
||||
boolean fixedSize, int maxDoc) throws IOException {
|
||||
boolean fixedSize, int maxDoc, Comparator<BytesRef> sortComparator) throws IOException {
|
||||
// TODO -- I can peek @ header to determing fixed/mode?
|
||||
if (fixedSize) {
|
||||
if (mode == Mode.STRAIGHT) {
|
||||
|
@ -172,7 +173,7 @@ public final class Bytes {
|
|||
} else if (mode == Mode.DEREF) {
|
||||
return new VarDerefBytesImpl.Reader(dir, id, maxDoc);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new VarSortedBytesImpl.Reader(dir, id, maxDoc);
|
||||
return new VarSortedBytesImpl.Reader(dir, id, maxDoc, sortComparator);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -130,6 +130,18 @@ public abstract class IndexDocValues implements Closeable {
|
|||
throws IOException {
|
||||
return cache.loadSorted(this, comparator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link SortedSource} instance using a default {@link BytesRef}
|
||||
* comparator for this {@link IndexDocValues} field instance like
|
||||
* {@link #getSource()}.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link Source} instead of a {@link SortedSource}.
|
||||
*/
|
||||
public SortedSource getSortedSorted() throws IOException {
|
||||
return getSortedSorted(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads and returns a {@link SortedSource} instance for this
|
||||
|
@ -142,7 +154,19 @@ public abstract class IndexDocValues implements Closeable {
|
|||
throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Loads and returns a {@link SortedSource} instance using a default
|
||||
* {@link BytesRef} comparator for this {@link IndexDocValues} field instance
|
||||
* like {@link #load()}.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link Source} instead of a {@link SortedSource}.
|
||||
*/
|
||||
public SortedSource loadSorted() throws IOException {
|
||||
return loadSorted(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link ValueType} of this {@link IndexDocValues} instance
|
||||
*/
|
||||
|
|
|
@ -167,14 +167,16 @@ class VarSortedBytesImpl {
|
|||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
|
||||
Reader(Directory dir, String id, int maxDoc) throws IOException {
|
||||
private final Comparator<BytesRef> defaultComp;
|
||||
Reader(Directory dir, String id, int maxDoc, Comparator<BytesRef> comparator) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true);
|
||||
this.defaultComp = comparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public org.apache.lucene.index.values.IndexDocValues.Source load()
|
||||
throws IOException {
|
||||
return loadSorted(null);
|
||||
return loadSorted(defaultComp);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -60,7 +60,7 @@ public abstract class CompoundFileDirectory extends Directory {
|
|||
* NOTE: subclasses must call {@link #initForRead(Map)} before the directory can be used.
|
||||
*/
|
||||
public CompoundFileDirectory(Directory directory, String fileName, int readBufferSize) throws IOException {
|
||||
assert !(directory instanceof CompoundFileDirectory) : "compound file inside of compound file: " + fileName;
|
||||
|
||||
this.directory = directory;
|
||||
this.fileName = fileName;
|
||||
this.readBufferSize = readBufferSize;
|
||||
|
@ -75,6 +75,7 @@ public abstract class CompoundFileDirectory extends Directory {
|
|||
}
|
||||
|
||||
protected final void initForWrite() {
|
||||
assert !(directory instanceof CompoundFileDirectory) : "compound file inside of compound file: " + fileName;
|
||||
this.entries = SENTINEL;
|
||||
this.openForWrite = true;
|
||||
this.isOpen = true;
|
||||
|
@ -174,7 +175,11 @@ public abstract class CompoundFileDirectory extends Directory {
|
|||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
ensureOpen();
|
||||
if (!isOpen) {
|
||||
// allow double close - usually to be consistent with other closeables
|
||||
assert entries == null;
|
||||
return; // already closed
|
||||
}
|
||||
entries = null;
|
||||
isOpen = false;
|
||||
if (writer != null) {
|
||||
|
@ -285,12 +290,13 @@ public abstract class CompoundFileDirectory extends Directory {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException */
|
||||
@Override
|
||||
public final CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
|
||||
// NOTE: final to make nested compounding impossible.
|
||||
throw new UnsupportedOperationException();
|
||||
public CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
|
||||
FileEntry fileEntry = this.entries.get(IndexFileNames.stripSegmentName(name));
|
||||
if (fileEntry == null) {
|
||||
throw new FileNotFoundException("file " + name + " does not exists in this CFS");
|
||||
}
|
||||
return new NestedCompoundFileDirectory(name, bufferSize, fileEntry.offset, fileEntry.length);
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
|
@ -298,8 +304,36 @@ public abstract class CompoundFileDirectory extends Directory {
|
|||
@Override
|
||||
public CompoundFileDirectory createCompoundOutput(String name)
|
||||
throws IOException {
|
||||
// NOTE: final to make nested compounding impossible.
|
||||
throw new UnsupportedOperationException();
|
||||
throw new UnsupportedOperationException("can not create nested CFS, create seperately and use Directory.copy instead");
|
||||
}
|
||||
|
||||
private class NestedCompoundFileDirectory extends CompoundFileDirectory {
|
||||
|
||||
private final long cfsOffset;
|
||||
private final long cfsLength;
|
||||
|
||||
public NestedCompoundFileDirectory(String fileName, int readBufferSize, long offset, long length)
|
||||
throws IOException {
|
||||
super(directory, fileName, readBufferSize);
|
||||
this.cfsOffset = offset;
|
||||
this.cfsLength = length;
|
||||
IndexInput input = null;
|
||||
try {
|
||||
input = CompoundFileDirectory.this.openInput(fileName, 128);
|
||||
initForRead(CompoundFileDirectory.readEntries(input,
|
||||
CompoundFileDirectory.this, fileName));
|
||||
} finally {
|
||||
IOUtils.closeSafely(false, input);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput openInputSlice(String id, long offset, long length,
|
||||
int readBufferSize) throws IOException {
|
||||
assert offset + length <= cfsLength;
|
||||
return CompoundFileDirectory.this.openInputSlice(id, cfsOffset + offset, length, readBufferSize);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.store;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
@ -55,7 +56,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class CompoundFileWriter {
|
||||
final class CompoundFileWriter implements Closeable{
|
||||
|
||||
private static final class FileEntry {
|
||||
/** source file */
|
||||
|
@ -89,8 +90,8 @@ final class CompoundFileWriter {
|
|||
private boolean closed = false;
|
||||
private volatile IndexOutput dataOut;
|
||||
private final AtomicBoolean outputTaken = new AtomicBoolean(false);
|
||||
private final String entryTableName;
|
||||
private final String dataFileName;
|
||||
final String entryTableName;
|
||||
final String dataFileName;
|
||||
|
||||
/**
|
||||
* Create the compound stream in the specified file. The file name is the
|
||||
|
@ -128,7 +129,7 @@ final class CompoundFileWriter {
|
|||
* if close() had been called before or if no file has been added to
|
||||
* this object
|
||||
*/
|
||||
void close() throws IOException {
|
||||
public void close() throws IOException {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("already closed");
|
||||
}
|
||||
|
@ -144,12 +145,18 @@ final class CompoundFileWriter {
|
|||
assert dataOut != null;
|
||||
long finalLength = dataOut.getFilePointer();
|
||||
assert assertFileLength(finalLength, dataOut);
|
||||
} catch (IOException e) {
|
||||
priorException = e;
|
||||
} finally {
|
||||
IOUtils.closeSafely(priorException, dataOut);
|
||||
}
|
||||
try {
|
||||
entryTableOut = directory.createOutput(entryTableName);
|
||||
writeEntryTable(entries.values(), entryTableOut);
|
||||
} catch (IOException e) {
|
||||
priorException = e;
|
||||
} finally {
|
||||
IOUtils.closeSafely(priorException, dataOut, entryTableOut);
|
||||
IOUtils.closeSafely(priorException, entryTableOut);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -318,6 +325,7 @@ final class CompoundFileWriter {
|
|||
closed = true;
|
||||
entry.length = writtenBytes;
|
||||
if (isSeparate) {
|
||||
delegate.close();
|
||||
// we are a separate file - push into the pending entries
|
||||
pendingEntries.add(entry);
|
||||
} else {
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.index.codecs.sep.IntIndexInput;
|
|||
import org.apache.lucene.index.codecs.sep.IntIndexOutput;
|
||||
import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
|
||||
import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput;
|
||||
import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput;
|
||||
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||
|
@ -46,7 +47,6 @@ import org.apache.lucene.index.codecs.BlockTermsReader;
|
|||
import org.apache.lucene.index.codecs.BlockTermsWriter;
|
||||
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
||||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -62,8 +62,8 @@ public class MockFixedIntBlockCodec extends Codec {
|
|||
private final int blockSize;
|
||||
|
||||
public MockFixedIntBlockCodec(int blockSize) {
|
||||
super("MockFixedIntBlock");
|
||||
this.blockSize = blockSize;
|
||||
name = "MockFixedIntBlock";
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -206,7 +206,7 @@ public class MockFixedIntBlockCodec extends Codec {
|
|||
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
|
||||
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
||||
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -214,16 +214,16 @@ public class MockFixedIntBlockCodec extends Codec {
|
|||
SepPostingsWriterImpl.getExtensions(extensions);
|
||||
BlockTermsReader.getExtensions(extensions);
|
||||
FixedGapTermsIndexReader.getIndexExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.index.codecs.sep.IntIndexInput;
|
|||
import org.apache.lucene.index.codecs.sep.IntIndexOutput;
|
||||
import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
|
||||
import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexInput;
|
||||
import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexOutput;
|
||||
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||
|
@ -46,7 +47,6 @@ import org.apache.lucene.index.codecs.BlockTermsReader;
|
|||
import org.apache.lucene.index.codecs.BlockTermsWriter;
|
||||
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
||||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -62,9 +62,9 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
public class MockVariableIntBlockCodec extends Codec {
|
||||
private final int baseBlockSize;
|
||||
|
||||
|
||||
public MockVariableIntBlockCodec(int baseBlockSize) {
|
||||
name = "MockVariableIntBlock";
|
||||
super("MockVariableIntBlock");
|
||||
this.baseBlockSize = baseBlockSize;
|
||||
}
|
||||
|
||||
|
@ -229,7 +229,7 @@ public class MockVariableIntBlockCodec extends Codec {
|
|||
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
|
||||
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
||||
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -237,16 +237,16 @@ public class MockVariableIntBlockCodec extends Codec {
|
|||
SepPostingsWriterImpl.getExtensions(extensions);
|
||||
BlockTermsReader.getExtensions(extensions);
|
||||
FixedGapTermsIndexReader.getIndexExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,9 +75,9 @@ public class MockRandomCodec extends Codec {
|
|||
|
||||
private final Random seedRandom;
|
||||
private final String SEED_EXT = "sd";
|
||||
|
||||
|
||||
public MockRandomCodec(Random random) {
|
||||
name = "MockRandom";
|
||||
super("MockRandom");
|
||||
this.seedRandom = new Random(random.nextLong());
|
||||
}
|
||||
|
||||
|
@ -354,7 +354,7 @@ public class MockRandomCodec extends Codec {
|
|||
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
||||
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
||||
VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files, getDocValuesUseCFS());
|
||||
// hackish!
|
||||
Iterator<String> it = files.iterator();
|
||||
while(it.hasNext()) {
|
||||
|
@ -372,7 +372,7 @@ public class MockRandomCodec extends Codec {
|
|||
BlockTermsReader.getExtensions(extensions);
|
||||
FixedGapTermsIndexReader.getIndexExtensions(extensions);
|
||||
VariableGapTermsIndexReader.getIndexExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
extensions.add(SEED_EXT);
|
||||
//System.out.println("MockRandom.getExtensions return " + extensions);
|
||||
}
|
||||
|
@ -380,11 +380,11 @@ public class MockRandomCodec extends Codec {
|
|||
// can we make this more evil?
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
public class MockSepCodec extends Codec {
|
||||
|
||||
public MockSepCodec() {
|
||||
name = "MockSep";
|
||||
super("MockSep");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -139,13 +139,13 @@ public class MockSepCodec extends Codec {
|
|||
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
|
||||
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
||||
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
getSepExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
public static void getSepExtensions(Set<String> extensions) {
|
||||
|
@ -156,11 +156,11 @@ public class MockSepCodec extends Codec {
|
|||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,6 @@ public class PreFlexRWCodec extends PreFlexCodec {
|
|||
public PreFlexRWCodec() {
|
||||
// NOTE: we impersonate the PreFlex codec so that it can
|
||||
// read the segments we write!
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -139,4 +139,9 @@ public class MockCompoundFileDirectoryWrapper extends CompoundFileDirectory {
|
|||
public CompoundFileDirectory createCompoundOutput(String name) throws IOException {
|
||||
return delegate.createCompoundOutput(name);
|
||||
}
|
||||
|
||||
public CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
|
||||
return delegate.openCompoundInput(name, bufferSize);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -239,7 +239,7 @@ public abstract class LuceneTestCase extends Assert {
|
|||
if (prior != null) {
|
||||
cp.unregister(prior);
|
||||
}
|
||||
cp.register(c);
|
||||
cp.register(randomizCodec(random, c));
|
||||
}
|
||||
|
||||
// returns current default codec
|
||||
|
@ -277,7 +277,7 @@ public abstract class LuceneTestCase extends Assert {
|
|||
}
|
||||
|
||||
swapCodec(new MockSepCodec(), cp);
|
||||
swapCodec(new PulsingCodec(codecHasParam && "Pulsing".equals(codec) ? codecParam : _TestUtil.nextInt(random, 1, 20)), cp);
|
||||
swapCodec(new PulsingCodec(codecHasParam && "Pulsing".equals(codec) ? codecParam : 1 + random.nextInt(20)), cp);
|
||||
swapCodec(new MockFixedIntBlockCodec(codecHasParam && "MockFixedIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(random, 1, 2000)), cp);
|
||||
// baseBlockSize cannot be over 127:
|
||||
swapCodec(new MockVariableIntBlockCodec(codecHasParam && "MockVariableIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(random, 1, 127)), cp);
|
||||
|
@ -285,6 +285,11 @@ public abstract class LuceneTestCase extends Assert {
|
|||
|
||||
return cp.lookup(codec);
|
||||
}
|
||||
|
||||
public static Codec randomizCodec(Random random, Codec codec) {
|
||||
codec.setDocValuesUseCFS(random.nextBoolean());
|
||||
return codec;
|
||||
}
|
||||
|
||||
// returns current PreFlex codec
|
||||
static void removeTestCodecs(Codec codec, CodecProvider cp) {
|
||||
|
@ -1464,11 +1469,11 @@ public abstract class LuceneTestCase extends Assert {
|
|||
|
||||
RandomCodecProvider(Random random) {
|
||||
this.perFieldSeed = random.nextInt();
|
||||
register(new StandardCodec());
|
||||
register(new PreFlexCodec());
|
||||
register(new PulsingCodec(1));
|
||||
register(new SimpleTextCodec());
|
||||
register(new MemoryCodec());
|
||||
register(randomizCodec(random, new StandardCodec()));
|
||||
register(randomizCodec(random, new PreFlexCodec()));
|
||||
register(randomizCodec(random, new PulsingCodec( 1 + random.nextInt(20))));
|
||||
register(randomizCodec(random, new SimpleTextCodec()));
|
||||
register(randomizCodec(random, new MemoryCodec()));
|
||||
Collections.shuffle(knownCodecs, random);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,8 +24,6 @@ import org.apache.lucene.document.*;
|
|||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.index.codecs.*;
|
||||
import org.apache.lucene.index.codecs.standard.*;
|
||||
import org.apache.lucene.index.codecs.pulsing.*;
|
||||
import org.apache.lucene.store.*;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
@ -75,7 +73,7 @@ public class TestExternalCodecs extends LuceneTestCase {
|
|||
public static class RAMOnlyCodec extends Codec {
|
||||
|
||||
public RAMOnlyCodec() {
|
||||
name = "RamOnly";
|
||||
super("RamOnly");
|
||||
}
|
||||
// Postings state:
|
||||
static class RAMPostings extends FieldsProducer {
|
||||
|
|
|
@ -1160,7 +1160,7 @@ public class TestAddIndexes extends LuceneTestCase {
|
|||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random));
|
||||
CodecProvider provider = new CodecProvider();
|
||||
provider.register(new PulsingCodec(1 + random.nextInt(10)));
|
||||
provider.register(new PulsingCodec(1 + random.nextInt(20)));
|
||||
conf.setCodecProvider(provider);
|
||||
IndexWriter w = new IndexWriter(dir, conf);
|
||||
try {
|
||||
|
@ -1181,7 +1181,7 @@ public class TestAddIndexes extends LuceneTestCase {
|
|||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random));
|
||||
CodecProvider provider = new CodecProvider();
|
||||
provider.register(new PulsingCodec(1 + random.nextInt(10)));
|
||||
provider.register(new PulsingCodec(1 + random.nextInt(20)));
|
||||
conf.setCodecProvider(provider);
|
||||
IndexWriter w = new IndexWriter(dir, conf);
|
||||
IndexReader indexReader = IndexReader.open(toAdd);
|
||||
|
|
|
@ -710,4 +710,62 @@ public class TestCompoundFile extends LuceneTestCase
|
|||
|
||||
newDir.close();
|
||||
}
|
||||
|
||||
public void testReadNestedCFP() throws IOException {
|
||||
Directory newDir = newDirectory();
|
||||
CompoundFileDirectory csw = newDir.createCompoundOutput("d.cfs");
|
||||
CompoundFileDirectory nested = newDir.createCompoundOutput("b.cfs");
|
||||
IndexOutput out = nested.createOutput("b.xyz");
|
||||
IndexOutput out1 = nested.createOutput("b_1.xyz");
|
||||
out.writeInt(0);
|
||||
out1.writeInt(1);
|
||||
out.close();
|
||||
out1.close();
|
||||
nested.close();
|
||||
newDir.copy(csw, "b.cfs", "b.cfs");
|
||||
newDir.copy(csw, "b.cfe", "b.cfe");
|
||||
newDir.deleteFile("b.cfs");
|
||||
newDir.deleteFile("b.cfe");
|
||||
csw.close();
|
||||
|
||||
assertEquals(2, newDir.listAll().length);
|
||||
csw = newDir.openCompoundInput("d.cfs", 1024);
|
||||
|
||||
assertEquals(2, csw.listAll().length);
|
||||
nested = csw.openCompoundInput("b.cfs", 1024);
|
||||
|
||||
assertEquals(2, nested.listAll().length);
|
||||
IndexInput openInput = nested.openInput("b.xyz");
|
||||
assertEquals(0, openInput.readInt());
|
||||
openInput.close();
|
||||
openInput = nested.openInput("b_1.xyz");
|
||||
assertEquals(1, openInput.readInt());
|
||||
openInput.close();
|
||||
nested.close();
|
||||
csw.close();
|
||||
newDir.close();
|
||||
}
|
||||
|
||||
public void testDoubleClose() throws IOException {
|
||||
Directory newDir = newDirectory();
|
||||
CompoundFileDirectory csw = newDir.createCompoundOutput("d.cfs");
|
||||
IndexOutput out = csw.createOutput("d.xyz");
|
||||
out.writeInt(0);
|
||||
out.close();
|
||||
|
||||
csw.close();
|
||||
// close a second time - must have no effect according to Closeable
|
||||
csw.close();
|
||||
|
||||
csw = newDir.openCompoundInput("d.cfs", 1024);
|
||||
IndexInput openInput = csw.openInput("d.xyz");
|
||||
assertEquals(0, openInput.readInt());
|
||||
openInput.close();
|
||||
csw.close();
|
||||
// close a second time - must have no effect according to Closeable
|
||||
csw.close();
|
||||
|
||||
newDir.close();
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -105,8 +105,9 @@ public class TestDocTermOrds extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private static class StandardCodecWithOrds extends Codec {
|
||||
|
||||
public StandardCodecWithOrds() {
|
||||
name = "StandardOrds";
|
||||
super("StandardOrds");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -200,13 +201,13 @@ public class TestDocTermOrds extends LuceneTestCase {
|
|||
StandardPostingsReader.files(dir, segmentInfo, id, files);
|
||||
BlockTermsReader.files(dir, segmentInfo, id, files);
|
||||
FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getExtensions(Set<String> extensions) {
|
||||
getStandardExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||
DefaultDocValuesConsumer.getDocValuesExtensions(extensions, getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
public static void getStandardExtensions(Set<String> extensions) {
|
||||
|
@ -218,12 +219,12 @@ public class TestDocTermOrds extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
return new DefaultDocValuesConsumer(state, getDocValuesSortComparator(), getDocValuesUseCFS());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId, getDocValuesUseCFS(), getDocValuesSortComparator());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -279,7 +279,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase {
|
|||
CodecProvider provider = new CodecProvider();
|
||||
Codec[] codecs = new Codec[] { new StandardCodec(),
|
||||
new SimpleTextCodec(), new MockSepCodec(),
|
||||
new PulsingCodec(1 + random.nextInt(10)),
|
||||
new PulsingCodec(1 + random.nextInt(20)),
|
||||
new MockVariableIntBlockCodec(1 + random.nextInt(10)),
|
||||
new MockFixedIntBlockCodec(1 + random.nextInt(10)) };
|
||||
for (Codec codec : codecs) {
|
||||
|
|
|
@ -81,7 +81,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.finish(maxDoc);
|
||||
assertEquals(0, trackBytes.get());
|
||||
|
||||
IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc);
|
||||
IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc, comp);
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
ValuesEnum bytesEnum = getEnum(r);
|
||||
assertNotNull("enum is null", bytesEnum);
|
||||
|
@ -105,7 +105,8 @@ public class TestDocValues extends LuceneTestCase {
|
|||
Source s;
|
||||
IndexDocValues.SortedSource ss;
|
||||
if (mode == Bytes.Mode.SORTED) {
|
||||
s = ss = getSortedSource(r, comp);
|
||||
// default is unicode so we can simply pass null here
|
||||
s = ss = getSortedSource(r, random.nextBoolean() ? comp : null);
|
||||
} else {
|
||||
s = getSource(r);
|
||||
ss = null;
|
||||
|
|
|
@ -42,7 +42,7 @@ public class MockCodecProviderFactory extends CodecProviderFactory {
|
|||
public CodecProvider create() {
|
||||
CodecProvider cp = new CodecProvider();
|
||||
cp.register(new StandardCodec());
|
||||
cp.register(new PulsingCodec(1));
|
||||
cp.register(new PulsingCodec());
|
||||
if (codecs != null) {
|
||||
for (Object codec : codecs.getAll("name")) {
|
||||
if (!cp.isCodecRegistered((String)codec)) {
|
||||
|
|
Loading…
Reference in New Issue