4.1 -> 4.2

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436544 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-21 18:45:43 +00:00
parent 02fe1c42c9
commit 24d701180e
40 changed files with 650 additions and 194 deletions

View File

@ -18,10 +18,9 @@ package org.apache.lucene.codecs.blockterms;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Basic tests of a PF using FixedGap terms dictionary
@ -29,13 +28,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase;
// TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package
// TODO: ensure both of these are also in rotation in RandomCodec
public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase {
private final PostingsFormat postings = new Lucene41WithOrds();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds());
@Override
protected Codec getCodec() {

View File

@ -18,21 +18,14 @@ package org.apache.lucene.codecs.bloom;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Basic tests for BloomPostingsFormat
*/
public class TestBloomPostingsFormat extends BasePostingsFormatTestCase {
private final PostingsFormat postings = new TestBloomFilteredLucene41Postings();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat(new TestBloomFilteredLucene41Postings());
@Override
protected Codec getCodec() {

View File

@ -18,23 +18,16 @@ package org.apache.lucene.codecs.intblock;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Basic tests for FixedIntBlock
*/
public class TestFixedIntBlockPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize blocksize
private final PostingsFormat postings = new MockFixedIntBlockPostingsFormat();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat(new MockFixedIntBlockPostingsFormat());
@Override
protected Codec getCodec() {

View File

@ -18,23 +18,16 @@ package org.apache.lucene.codecs.intblock;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Basic tests for VariableIntBlock
*/
public class TestVariableIntBlockPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize blocksize
private final PostingsFormat postings = new MockVariableIntBlockPostingsFormat();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat( new MockVariableIntBlockPostingsFormat());
@Override
protected Codec getCodec() {

View File

@ -18,23 +18,15 @@ package org.apache.lucene.codecs.memory;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Tests DirectPostingsFormat
*/
public class TestDirectPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize parameters
private final PostingsFormat postings = new DirectPostingsFormat();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat(new DirectPostingsFormat());
@Override
protected Codec getCodec() {

View File

@ -18,22 +18,15 @@ package org.apache.lucene.codecs.memory;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Tests MemoryPostingsFormat
*/
public class TestMemoryPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize doPack
private final PostingsFormat postings = new MemoryPostingsFormat();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat());
@Override
protected Codec getCodec() {

View File

@ -18,22 +18,15 @@ package org.apache.lucene.codecs.pulsing;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Tests PulsingPostingsFormat
*/
public class TestPulsingPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize cutoff
private final PostingsFormat postings = new Pulsing41PostingsFormat();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat());
@Override
protected Codec getCodec() {

View File

@ -18,23 +18,16 @@ package org.apache.lucene.codecs.sep;
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Tests sep layout
*/
public class TestSepPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize cutoff
private final PostingsFormat postings = new MockSepPostingsFormat();
private final Codec codec = new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
};
private final Codec codec = _TestUtil.alwaysPostingsFormat(new MockSepPostingsFormat());
@Override
protected Codec getCodec() {

View File

@ -119,7 +119,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
loader.reload(classloader);
}
private static Codec defaultCodec = Codec.forName("Lucene41");
private static Codec defaultCodec = Codec.forName("Lucene42");
/** expert: returns the default codec used for newly created
* {@link IndexWriterConfig}s.

View File

@ -21,13 +21,13 @@ package org.apache.lucene.codecs;
* A codec that forwards all its method calls to another codec.
* <p>
* Extend this class when you need to reuse the functionality of an existing
* codec. For example, if you want to build a codec that redefines Lucene41's
* codec. For example, if you want to build a codec that redefines Lucene42's
* {@link LiveDocsFormat}:
* <pre class="prettyprint">
* public final class CustomCodec extends FilterCodec {
*
* public CustomCodec() {
* super("CustomCodec", new Lucene41Codec());
* super("CustomCodec", new Lucene42Codec());
* }
*
* public LiveDocsFormat liveDocsFormat() {

View File

@ -27,7 +27,7 @@ import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41NormsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
@ -91,17 +91,17 @@ public final class Lucene40Codec extends Codec {
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Disk");
@Override
public DocValuesFormat docValuesFormat() {
public final DocValuesFormat docValuesFormat() {
// nocommit
return defaultDVFormat;
}
// nocommit need a read-only Lucene40NormsFormat / read-write in the impersonator
private final NormsFormat simpleNormsFormat = new Lucene41NormsFormat();
private final NormsFormat normsFormat = new Lucene42NormsFormat();
@Override
public NormsFormat normsFormat() {
return simpleNormsFormat;
public final NormsFormat normsFormat() {
return normsFormat;
}
@Override

View File

@ -31,7 +31,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
@ -43,9 +43,7 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
* @see org.apache.lucene.codecs.lucene41 package documentation for file format details.
* @lucene.experimental
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
// nocommit: make readonly
public class Lucene41Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
@ -59,14 +57,6 @@ public class Lucene41Codec extends Codec {
return Lucene41Codec.this.getPostingsFormatForField(field);
}
};
private final DocValuesFormat simpleDocValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Lucene41Codec.this.getDocValuesFormatForField(field);
}
};
/** Sole constructor. */
public Lucene41Codec() {
@ -112,28 +102,19 @@ public class Lucene41Codec extends Codec {
return defaultFormat;
}
/** Returns the docvalues format that should be used for writing
* new segments of <code>field</code>.
*
* The default implementation always returns "Lucene41"
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}
@Override
public DocValuesFormat docValuesFormat() {
return simpleDocValuesFormat;
public final DocValuesFormat docValuesFormat() {
return dvFormat;
}
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
// nocommit
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene41");
private final NormsFormat simpleNormsFormat = new Lucene41NormsFormat();
private final DocValuesFormat dvFormat = DocValuesFormat.forName("Lucene42");
// nocommit
private final NormsFormat normsFormat = new Lucene42NormsFormat();
@Override
public NormsFormat normsFormat() {
return simpleNormsFormat;
public final NormsFormat normsFormat() {
return normsFormat;
}
}

View File

@ -0,0 +1,139 @@
package org.apache.lucene.codecs.lucene42;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
* Implements the Lucene 4.2 index format, with configurable per-field postings formats.
* <p>
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene42 package documentation for file format details.
* @lucene.experimental
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene43Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
public class Lucene42Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat();
private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat();
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Lucene42Codec.this.getPostingsFormatForField(field);
}
};
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Lucene42Codec.this.getDocValuesFormatForField(field);
}
};
/** Sole constructor. */
public Lucene42Codec() {
super("Lucene42");
}
@Override
public final StoredFieldsFormat storedFieldsFormat() {
return fieldsFormat;
}
@Override
public final TermVectorsFormat termVectorsFormat() {
return vectorsFormat;
}
@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}
@Override
public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
}
@Override
public final SegmentInfoFormat segmentInfoFormat() {
return infosFormat;
}
@Override
public final LiveDocsFormat liveDocsFormat() {
return liveDocsFormat;
}
/** Returns the postings format that should be used for writing
* new segments of <code>field</code>.
*
* The default implementation always returns "Lucene41"
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultFormat;
}
/** Returns the docvalues format that should be used for writing
* new segments of <code>field</code>.
*
* The default implementation always returns "Lucene42"
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}
@Override
public DocValuesFormat docValuesFormat() {
return docValuesFormat;
}
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene42");
private final NormsFormat normsFormat = new Lucene42NormsFormat();
@Override
public NormsFormat normsFormat() {
return normsFormat;
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.codecs.lucene41;
package org.apache.lucene.codecs.lucene42;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -37,6 +37,7 @@ import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
/**
* Writes numbers one of two ways:
@ -46,7 +47,7 @@ import org.apache.lucene.util.packed.PackedInts;
* the latter is typically much smaller with lucene's sims, as only some byte values are used,
* but its often a nonlinear mapping, especially if you dont use crazy boosts.
*/
class Lucene41DocValuesConsumer extends DocValuesConsumer {
class Lucene42DocValuesConsumer extends DocValuesConsumer {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
@ -56,7 +57,7 @@ class Lucene41DocValuesConsumer extends DocValuesConsumer {
final IndexOutput data, meta;
Lucene41DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
Lucene42DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
boolean success = false;
try {
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
@ -141,14 +142,15 @@ class Lucene41DocValuesConsumer extends DocValuesConsumer {
}
data.writeLong(minValue);
final PackedInts.Writer writer = PackedInts.getWriter(data, count, bitsPerValue, PackedInts.COMPACT);
FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(count, bitsPerValue, PackedInts.COMPACT);
final PackedInts.Writer writer = PackedInts.getWriter(data, count, formatAndBits.bitsPerValue, 0);
for(Number nv : values) {
writer.add(nv.longValue() - minValue);
}
writer.finish();
}
@Override
public void close() throws IOException {
// nocommit: just write this to a RAMfile or something and flush it here, with #fields first.

View File

@ -1,4 +1,4 @@
package org.apache.lucene.codecs.lucene41;
package org.apache.lucene.codecs.lucene42;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,24 +25,24 @@ import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
public class Lucene41DocValuesFormat extends DocValuesFormat {
public class Lucene42DocValuesFormat extends DocValuesFormat {
public Lucene41DocValuesFormat() {
super("Lucene41");
public Lucene42DocValuesFormat() {
super("Lucene42");
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new Lucene41DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new Lucene41DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
return new Lucene42DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
}
private static final String DATA_CODEC = "Lucene41DocValuesData";
private static final String DATA_CODEC = "Lucene42DocValuesData";
private static final String DATA_EXTENSION = "dvd";
private static final String METADATA_CODEC = "Lucene41DocValuesMetadata";
private static final String METADATA_CODEC = "Lucene42DocValuesMetadata";
private static final String METADATA_EXTENSION = "dvm";
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.codecs.lucene41;
package org.apache.lucene.codecs.lucene42;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -31,7 +31,6 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -45,7 +44,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
class Lucene41DocValuesProducer extends DocValuesProducer {
class Lucene42DocValuesProducer extends DocValuesProducer {
// metadata maps (just file pointers and minimal stuff)
private final Map<Integer,NumericEntry> numerics;
private final Map<Integer,BinaryEntry> binaries;
@ -64,15 +63,15 @@ class Lucene41DocValuesProducer extends DocValuesProducer {
private final Map<Integer,FST<Long>> fstInstances =
new HashMap<Integer,FST<Long>>();
Lucene41DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
boolean success = false;
try {
CodecUtil.checkHeader(in, metaCodec,
Lucene41DocValuesConsumer.VERSION_START,
Lucene41DocValuesConsumer.VERSION_START);
Lucene42DocValuesConsumer.VERSION_START,
Lucene42DocValuesConsumer.VERSION_START);
numerics = new HashMap<Integer,NumericEntry>();
binaries = new HashMap<Integer,BinaryEntry>();
fsts = new HashMap<Integer,FSTEntry>();
@ -89,27 +88,27 @@ class Lucene41DocValuesProducer extends DocValuesProducer {
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.openInput(dataName, state.context);
CodecUtil.checkHeader(data, dataCodec,
Lucene41DocValuesConsumer.VERSION_START,
Lucene41DocValuesConsumer.VERSION_START);
Lucene42DocValuesConsumer.VERSION_START,
Lucene42DocValuesConsumer.VERSION_START);
}
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
int fieldNumber = meta.readVInt();
while (fieldNumber != -1) {
int fieldType = meta.readByte();
if (fieldType == Lucene41DocValuesConsumer.NUMBER) {
if (fieldType == Lucene42DocValuesConsumer.NUMBER) {
NumericEntry entry = new NumericEntry();
entry.offset = meta.readLong();
entry.tableized = meta.readByte() != 0;
numerics.put(fieldNumber, entry);
} else if (fieldType == Lucene41DocValuesConsumer.BYTES) {
} else if (fieldType == Lucene42DocValuesConsumer.BYTES) {
BinaryEntry entry = new BinaryEntry();
entry.offset = meta.readLong();
entry.numBytes = meta.readLong();
entry.minLength = meta.readVInt();
entry.maxLength = meta.readVInt();
binaries.put(fieldNumber, entry);
} else if (fieldType == Lucene41DocValuesConsumer.FST) {
} else if (fieldType == Lucene42DocValuesConsumer.FST) {
FSTEntry entry = new FSTEntry();
entry.offset = meta.readLong();
entry.numOrds = meta.readVInt();

View File

@ -1,4 +1,4 @@
package org.apache.lucene.codecs.lucene41;
package org.apache.lucene.codecs.lucene42;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,16 +25,16 @@ import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
public class Lucene41NormsFormat extends NormsFormat {
public class Lucene42NormsFormat extends NormsFormat {
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
return new Lucene41DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
}
@Override
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
return new Lucene41DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
return new Lucene42DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
}
private static final String DATA_CODEC = "Lucene41NormsData";

View File

@ -0,0 +1,397 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Lucene 4.2 file format.
<h1>Apache Lucene - Index File Formats</h1>
<div>
<ul>
<li><a href="#Introduction">Introduction</a></li>
<li><a href="#Definitions">Definitions</a>
<ul>
<li><a href="#Inverted_Indexing">Inverted Indexing</a></li>
<li><a href="#Types_of_Fields">Types of Fields</a></li>
<li><a href="#Segments">Segments</a></li>
<li><a href="#Document_Numbers">Document Numbers</a></li>
</ul>
</li>
<li><a href="#Overview">Index Structure Overview</a></li>
<li><a href="#File_Naming">File Naming</a></li>
<li><a href="#file-names">Summary of File Extensions</a></li>
<ul>
<li><a href="#Lock_File">Lock File</a></li>
<li><a href="#History">History</a></li>
<li><a href="#Limitations">Limitations</a></li>
</ul>
</ul>
</div>
<a name="Introduction"></a>
<h2>Introduction</h2>
<div>
<p>This document defines the index file formats used in this version of Lucene.
If you are using a different version of Lucene, please consult the copy of
<code>docs/</code> that was distributed with
the version you are using.</p>
<p>Apache Lucene is written in Java, but several efforts are underway to write
<a href="http://wiki.apache.org/lucene-java/LuceneImplementations">versions of
Lucene in other programming languages</a>. If these versions are to remain
compatible with Apache Lucene, then a language-independent definition of the
Lucene index format is required. This document thus attempts to provide a
complete and independent definition of the Apache Lucene file formats.</p>
<p>As Lucene evolves, this document should evolve. Versions of Lucene in
different programming languages should endeavor to agree on file formats, and
generate new versions of this document.</p>
</div>
<a name="Definitions" id="Definitions"></a>
<h2>Definitions</h2>
<div>
<p>The fundamental concepts in Lucene are index, document, field and term.</p>
<p>An index contains a sequence of documents.</p>
<ul>
<li>A document is a sequence of fields.</li>
<li>A field is a named sequence of terms.</li>
<li>A term is a sequence of bytes.</li>
</ul>
<p>The same sequence of bytes in two different fields is considered a different
term. Thus terms are represented as a pair: the string naming the field, and the
bytes within the field.</p>
<a name="Inverted_Indexing"></a>
<h3>Inverted Indexing</h3>
<p>The index stores statistics about terms in order to make term-based search
more efficient. Lucene's index falls into the family of indexes known as an
<i>inverted index.</i> This is because it can list, for a term, the documents
that contain it. This is the inverse of the natural relationship, in which
documents list terms.</p>
<a name="Types_of_Fields"></a>
<h3>Types of Fields</h3>
<p>In Lucene, fields may be <i>stored</i>, in which case their text is stored
in the index literally, in a non-inverted manner. Fields that are inverted are
called <i>indexed</i>. A field may be both stored and indexed.</p>
<p>The text of a field may be <i>tokenized</i> into terms to be indexed, or the
text of a field may be used literally as a term to be indexed. Most fields are
tokenized, but sometimes it is useful for certain identifier fields to be
indexed literally.</p>
<p>See the {@link org.apache.lucene.document.Field Field}
java docs for more information on Fields.</p>
<a name="Segments" id="Segments"></a>
<h3>Segments</h3>
<p>Lucene indexes may be composed of multiple sub-indexes, or <i>segments</i>.
Each segment is a fully independent index, which could be searched separately.
Indexes evolve by:</p>
<ol>
<li>Creating new segments for newly added documents.</li>
<li>Merging existing segments.</li>
</ol>
<p>Searches may involve multiple segments and/or multiple indexes, each index
potentially composed of a set of segments.</p>
<a name="Document_Numbers"></a>
<h3>Document Numbers</h3>
<p>Internally, Lucene refers to documents by an integer <i>document number</i>.
The first document added to an index is numbered zero, and each subsequent
document added gets a number one greater than the previous.</p>
<p>Note that a document's number may change, so caution should be taken when
storing these numbers outside of Lucene. In particular, numbers may change in
the following situations:</p>
<ul>
<li>
<p>The numbers stored in each segment are unique only within the segment, and
must be converted before they can be used in a larger context. The standard
technique is to allocate each segment a range of values, based on the range of
numbers used in that segment. To convert a document number from a segment to an
external value, the segment's <i>base</i> document number is added. To convert
an external value back to a segment-specific value, the segment is identified
by the range that the external value is in, and the segment's base value is
subtracted. For example two five document segments might be combined, so that
the first segment has a base value of zero, and the second of five. Document
three from the second segment would have an external value of eight.</p>
</li>
<li>
<p>When documents are deleted, gaps are created in the numbering. These are
eventually removed as the index evolves through merging. Deleted documents are
dropped when segments are merged. A freshly-merged segment thus has no gaps in
its numbering.</p>
</li>
</ul>
</div>
<a name="Overview" id="Overview"></a>
<h2>Index Structure Overview</h2>
<div>
<p>Each segment index maintains the following:</p>
<ul>
<li>
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}.
This contains metadata about a segment, such as the number of documents,
what files it uses,
</li>
<li>
{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Field names}.
This contains the set of field names used in the index.
</li>
<li>
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}.
This contains, for each document, a list of attribute-value pairs, where the attributes
are field names. These are used to store auxiliary information about the document, such as
its title, url, or an identifier to access a database. The set of stored fields are what is
returned for each hit when searching. This is keyed by document number.
</li>
<li>
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}.
A dictionary containing all of the terms used in all of the
indexed fields of all of the documents. The dictionary also contains the number
of documents which contain the term, and pointers to the term's frequency and
proximity data.
</li>
<li>
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}.
For each term in the dictionary, the numbers of all the
documents that contain that term, and the frequency of the term in that
document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
</li>
<li>
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}.
For each term in the dictionary, the positions that the
term occurs in each document. Note that this will not exist if all fields in
all documents omit position data.
</li>
<li>
{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}.
For each field in each document, a value is stored
that is multiplied into the score for hits on that field.
</li>
<li>
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}.
For each field in each document, the term vector (sometimes
called document vector) may be stored. A term vector consists of term text and
term frequency. To add Term Vectors to your index see the
{@link org.apache.lucene.document.Field Field} constructors
</li>
<li>
{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-document values}.
Like stored values, these are also keyed by document
number, but are generally intended to be loaded into main memory for fast
access. Whereas stored values are generally intended for summary results from
searches, per-document values are useful for things like scoring factors.
</li>
<li>
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}.
An optional file indicating which documents are deleted.
</li>
</ul>
<p>Details on each of these are provided in their linked pages.</p>
</div>
<a name="File_Naming"></a>
<h2>File Naming</h2>
<div>
<p>All files belonging to a segment have the same name with varying extensions.
The extensions correspond to the different file formats described below. When
using the Compound File format (default in 1.4 and greater) these files (except
for the Segment info file, the Lock file, and Deleted documents file) are collapsed
into a single .cfs file (see below for details)</p>
<p>Typically, all segments in an index are stored in a single directory,
although this is not required.</p>
<p>As of version 2.1 (lock-less commits), file names are never re-used (there
is one exception, "segments.gen", see below). That is, when any file is saved
to the Directory it is given a never before used filename. This is achieved
using a simple generations approach. For example, the first segments file is
segments_1, then segments_2, etc. The generation is a sequential long integer
represented in alpha-numeric (base 36) form.</p>
</div>
<a name="file-names" id="file-names"></a>
<h2>Summary of File Extensions</h2>
<div>
<p>The following table summarizes the names and extensions of the files in
Lucene:</p>
<table cellspacing="1" cellpadding="4">
<tr>
<th>Name</th>
<th>Extension</th>
<th>Brief Description</th>
</tr>
<tr>
<td>{@link org.apache.lucene.index.SegmentInfos Segments File}</td>
<td>segments.gen, segments_N</td>
<td>Stores information about a commit point</td>
</tr>
<tr>
<td><a href="#Lock_File">Lock File</a></td>
<td>write.lock</td>
<td>The Write lock prevents multiple IndexWriters from writing to the same
file.</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}</td>
<td>.si</td>
<td>Stores metadata about a segment</td>
</tr>
<tr>
<td>{@link org.apache.lucene.store.CompoundFileDirectory Compound File}</td>
<td>.cfs, .cfe</td>
<td>An optional "virtual" file consisting of all the other index files for
systems that frequently run out of file handles.</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Fields}</td>
<td>.fnm</td>
<td>Stores information about the fields</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}</td>
<td>.fdx</td>
<td>Contains pointers to field data</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}</td>
<td>.fdt</td>
<td>The stored fields for documents</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}</td>
<td>.tim</td>
<td>The term dictionary, stores term info</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}</td>
<td>.tip</td>
<td>The index into the Term Dictionary</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}</td>
<td>.doc</td>
<td>Contains the list of docs which contain each term along with frequency</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}</td>
<td>.pos</td>
<td>Stores position information about where a term occurs in the index</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}</td>
<td>.pay</td>
<td>Stores additional per-position metadata information such as character offsets and user payloads</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}</td>
<td>.nvd, .nvm</td>
<td>Encodes length and boost factors for docs and fields</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-Document Values}</td>
<td>.dvd, .dvm</td>
<td>Encodes additional scoring factors or other per-document information.</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}</td>
<td>.tvx</td>
<td>Stores offset into the document data file</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}</td>
<td>.tvd</td>
<td>Contains information about each document that has term vectors</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}</td>
<td>.tvf</td>
<td>The field level info about term vectors</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}</td>
<td>.del</td>
<td>Info about what files are deleted</td>
</tr>
</table>
</div>
<a name="Lock_File" id="Lock_File"></a>
<h2>Lock File</h2>
The write lock, which is stored in the index directory by default, is named
"write.lock". If the lock directory is different from the index directory then
the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix
derived from the full path to the index directory. When this file is present, a
writer is currently modifying the index (adding or removing documents). This
lock file ensures that only one writer is modifying the index at a time.</p>
<a name="History"></a>
<h2>History</h2>
<p>Compatibility notes are provided in this document, describing how file
formats have changed from prior versions:</p>
<ul>
<li>In version 2.1, the file format was changed to allow lock-less commits (ie,
no more commit lock). The change is fully backwards compatible: you can open a
pre-2.1 index for searching or adding/deleting of docs. When the new segments
file is saved (committed), it will be written in the new file format (meaning
no specific "upgrade" process is needed). But note that once a commit has
occurred, pre-2.1 Lucene will not be able to read the index.</li>
<li>In version 2.3, the file format was changed to allow segments to share a
single set of doc store (vectors &amp; stored fields) files. This allows for
faster indexing in certain cases. The change is fully backwards compatible (in
the same way as the lock-less commits change in 2.1).</li>
<li>In version 2.4, Strings are now written as true UTF-8 byte sequence, not
Java's modified UTF-8. See <a href="http://issues.apache.org/jira/browse/LUCENE-510">
LUCENE-510</a> for details.</li>
<li>In version 2.9, an optional opaque Map&lt;String,String&gt; CommitUserData
may be passed to IndexWriter's commit methods (and later retrieved), which is
recorded in the segments_N file. See <a href="http://issues.apache.org/jira/browse/LUCENE-1382">
LUCENE-1382</a> for details. Also,
diagnostics were added to each segment written recording details about why it
was written (due to flush, merge; which OS/JRE was used; etc.). See issue
<a href="http://issues.apache.org/jira/browse/LUCENE-1654">LUCENE-1654</a> for details.</li>
<li>In version 3.0, compressed fields are no longer written to the index (they
can still be read, but on merge the new segment will write them, uncompressed).
See issue <a href="http://issues.apache.org/jira/browse/LUCENE-1960">LUCENE-1960</a>
for details.</li>
<li>In version 3.1, segments records the code version that created them. See
<a href="http://issues.apache.org/jira/browse/LUCENE-2720">LUCENE-2720</a> for details.
Additionally segments track explicitly whether or not they have term vectors.
See <a href="http://issues.apache.org/jira/browse/LUCENE-2811">LUCENE-2811</a>
for details.</li>
<li>In version 3.2, numeric fields are written as natively to stored fields
file, previously they were stored in text format only.</li>
<li>In version 3.4, fields can omit position data while still indexing term
frequencies.</li>
<li>In version 4.0, the format of the inverted index became extensible via
the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
({@link org.apache.lucene.index.DocValues DocValues}) was introduced. Normalization
factors need no longer be a single byte, they can be any DocValues
{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode
strings, they can be any byte sequence. Term offsets can optionally be indexed
into the postings lists. Payloads can be stored in the term vectors.</li>
<li>In version 4.1, the format of the postings list changed to use either
of FOR compression or variable-byte encoding, depending upon the frequency
of the term.</li>
</ul>
<a name="Limitations" id="Limitations"></a>
<h2>Limitations</h2>
<div>
<p>When referring to term numbers, Lucene's current implementation uses a Java
<code>int</code> to hold the term index, which means the
maximum number of unique terms in any single index segment is ~2.1 billion
times the term index interval (default 128) = ~274 billion. This is technically
not a limitation of the index file format, just of Lucene's current
implementation.</p>
<p>Similarly, Lucene uses a Java <code>int</code> to refer to
document numbers, and the index file format uses an <code>Int32</code>
on-disk to store document numbers. This is a limitation
of both the index file format and the current implementation. Eventually these
should be replaced with either <code>UInt64</code> values, or
better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.</p>
</div>
</body>
</html>

View File

@ -61,8 +61,8 @@ name of your codec.
If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings
formats for different fields, then you can register your custom postings format in the same way (in
META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default
{@link org.apache.lucene.codecs.lucene41.Lucene41Codec} and override
{@link org.apache.lucene.codecs.lucene41.Lucene41Codec#getPostingsFormatForField(String)} to return your custom
{@link org.apache.lucene.codecs.lucene42.Lucene42Codec} and override
{@link org.apache.lucene.codecs.lucene42.Lucene42Codec#getPostingsFormatForField(String)} to return your custom
postings format.
</p>
</body>

View File

@ -193,7 +193,7 @@ public class LiveIndexWriterConfig {
* <pre class="prettyprint">
* //customize Lucene41PostingsFormat, passing minBlockSize=50, maxBlockSize=100
* final PostingsFormat tweakedPostings = new Lucene41PostingsFormat(50, 100);
* iwc.setCodec(new Lucene41Codec() {
* iwc.setCodec(new Lucene42Codec() {
* &#64;Override
* public PostingsFormat getPostingsFormatForField(String field) {
* if (field.equals("fieldWithTonsOfTerms"))

View File

@ -15,3 +15,4 @@
org.apache.lucene.codecs.lucene40.Lucene40Codec
org.apache.lucene.codecs.lucene41.Lucene41Codec
org.apache.lucene.codecs.lucene42.Lucene42Codec

View File

@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.codecs.lucene41.Lucene41DocValuesFormat
org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat

View File

@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
@ -42,7 +42,6 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
/**
@ -705,9 +704,9 @@ public class TestDemoDocValue extends LuceneTestCase {
Directory directory = newDirectory();
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
final DocValuesFormat fast = DocValuesFormat.forName("Lucene41");
final DocValuesFormat fast = DocValuesFormat.forName("Lucene42");
final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
iwc.setCodec(new Lucene41Codec() {
iwc.setCodec(new Lucene42Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if ("dv1".equals(field)) {

View File

@ -19,7 +19,7 @@ package org.apache.lucene;
import org.apache.lucene.analysis.*;
import org.apache.lucene.codecs.*;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
@ -31,7 +31,7 @@ import org.apache.lucene.util.*;
public class TestExternalCodecs extends LuceneTestCase {
private static final class CustomPerFieldCodec extends Lucene41Codec {
private static final class CustomPerFieldCodec extends Lucene42Codec {
private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly");
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");

View File

@ -19,12 +19,13 @@ package org.apache.lucene.codecs.lucene41;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Tests BlockPostingsFormat
*/
public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
private final Codec codec = new Lucene41Codec();
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat());
@Override
protected Codec getCodec() {

View File

@ -18,7 +18,6 @@ package org.apache.lucene.codecs.lucene41;
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -46,12 +45,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase {
super.setUp();
dir = newFSDirectory(_TestUtil.getTempDir("testDFBlockSize"));
iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(new Lucene41Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return PostingsFormat.forName("Lucene41");
}
});
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
iw = new RandomIndexWriter(random(), dir, iwc);
iw.setAddDocValuesFields(false);
iw.setDoRandomForceMerge(false); // we will ourselves

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -84,7 +83,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
}
};
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setCodec(new Lucene41Codec());
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
// TODO we could actually add more fields implemented with different PFs
// or, just put this test into the usual rotation?
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
@ -142,7 +142,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "aaa"), dir, 10);
Lucene41Codec codec = (Lucene41Codec)iwconf.getCodec();
Codec codec = iwconf.getCodec();
iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND).setCodec(codec);
@ -158,7 +158,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
}
addDocs2(writer, 10);
writer.commit();
codec = (Lucene41Codec)iwconf.getCodec();
codec = iwconf.getCodec();
assertEquals(30, writer.maxDoc());
assertQuery(new Term("content", "bbb"), dir, 10);
assertQuery(new Term("content", "ccc"), dir, 10); ////
@ -200,7 +200,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
}
public static class MockCodec extends Lucene41Codec {
public static class MockCodec extends Lucene42Codec {
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
final PostingsFormat mockSep = new MockSepPostingsFormat();
@ -217,7 +217,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
}
}
public static class MockCodec2 extends Lucene41Codec {
public static class MockCodec2 extends Lucene42Codec {
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
@ -268,7 +268,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
}
public void testSameCodecDifferentInstance() throws Exception {
Codec codec = new Lucene41Codec() {
Codec codec = new Lucene42Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
@ -284,7 +284,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
}
public void testSameCodecDifferentParams() throws Exception {
Codec codec = new Lucene41Codec() {
Codec codec = new Lucene42Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -1058,7 +1058,7 @@ public class TestAddIndexes extends LuceneTestCase {
aux2.close();
}
private static final class CustomPerFieldCodec extends Lucene41Codec {
private static final class CustomPerFieldCodec extends Lucene42Codec {
private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep");
@ -1109,7 +1109,7 @@ public class TestAddIndexes extends LuceneTestCase {
private static final class UnRegisteredCodec extends FilterCodec {
public UnRegisteredCodec() {
super("NotRegistered", new Lucene41Codec());
super("NotRegistered", new Lucene42Codec());
}
}

View File

@ -20,8 +20,8 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexFileNames;
@ -33,7 +33,6 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.Ignore;
/**
* Test that a plain default puts codec headers in all files.
@ -42,7 +41,7 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
conf.setCodec(Codec.forName("Lucene41"));
conf.setCodec(new Lucene42Codec());
// riw should sometimes create docvalues fields, etc
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();

View File

@ -60,7 +60,7 @@ public class TestDuelingCodecs extends LuceneTestCase {
public void setUp() throws Exception {
super.setUp();
// for now its SimpleText vs Lucene41(random postings format)
// for now its SimpleText vs Lucene42(random postings format)
// as this gives the best overall coverage. when we have more
// codecs we should probably pick 2 from Codec.availableCodecs()

View File

@ -25,8 +25,8 @@ import org.apache.lucene.codecs.Codec;
// enough to test the basics via Codec
public class TestNamedSPILoader extends LuceneTestCase {
public void testLookup() {
Codec codec = Codec.forName("Lucene41");
assertEquals("Lucene41", codec.getName());
Codec codec = Codec.forName("Lucene42");
assertEquals("Lucene42", codec.getName());
}
// we want an exception if its not found.
@ -39,6 +39,6 @@ public class TestNamedSPILoader extends LuceneTestCase {
public void testAvailableServices() {
Set<String> codecs = Codec.availableCodecs();
assertTrue(codecs.contains("Lucene41"));
assertTrue(codecs.contains("Lucene42"));
}
}

View File

@ -66,7 +66,7 @@
<li><a href="SYSTEM_REQUIREMENTS.html">System Requirements</a>: Minimum and supported Java versions.</li>
<li><a href="MIGRATE.html">Migration Guide</a>: What changed in Lucene 4; how to migrate code from Lucene 3.x.</li>
<li><a href="JRE_VERSION_MIGRATION.html">JRE Version Migration</a>: Information about upgrading between major JRE versions.</li>
<li><a href="core/org/apache/lucene/codecs/lucene41/package-summary.html#package_description">File Formats</a>: Guide to the supported index format used by Lucene. This can be customized by using <a href="core/org/apache/lucene/codecs/package-summary.html#package_description">an alternate codec</a>.</li>
<li><a href="core/org/apache/lucene/codecs/lucene42/package-summary.html#package_description">File Formats</a>: Guide to the supported index format used by Lucene. This can be customized by using <a href="core/org/apache/lucene/codecs/package-summary.html#package_description">an alternate codec</a>.</li>
<li><a href="core/org/apache/lucene/search/package-summary.html#package_description">Search and Scoring in Lucene</a>: Introduction to how Lucene scores documents.</li>
<li><a href="core/org/apache/lucene/search/similarities/TFIDFSimilarity.html">Classic Scoring Formula</a>: Formula of Lucene's classic <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space</a> implementation. (look <a href="core/org/apache/lucene/search/similarities/package-summary.html#package_description">here</a> for other models)</li>
<li><a href="queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package_description">Classic QueryParser Syntax</a>: Overview of the Classic QueryParser's syntax and features.</li>

View File

@ -21,10 +21,10 @@ import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
/**
* Acts like {@link Lucene41Codec} but with additional asserts.
* Acts like {@link Lucene42Codec} but with additional asserts.
*/
public final class AssertingCodec extends FilterCodec {
@ -33,7 +33,7 @@ public final class AssertingCodec extends FilterCodec {
private final StoredFieldsFormat storedFields = new AssertingStoredFieldsFormat();
public AssertingCodec() {
super("Asserting", new Lucene41Codec());
super("Asserting", new Lucene42Codec());
}
@Override

View File

@ -22,13 +22,13 @@ import java.util.Random;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
/**
* A codec that uses {@link CompressingStoredFieldsFormat} for its stored
* fields and delegates to {@link Lucene41Codec} for everything else.
* fields and delegates to {@link Lucene42Codec} for everything else.
*/
public abstract class CompressingCodec extends FilterCodec {
@ -71,7 +71,7 @@ public abstract class CompressingCodec extends FilterCodec {
* Creates a compressing codec with a given segment suffix
*/
public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
super(name, new Lucene41Codec());
super(name, new Lucene42Codec());
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
}

View File

@ -30,7 +30,7 @@ import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader;
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
import org.apache.lucene.codecs.lucene41.Lucene41Codec; // javadocs
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
import org.apache.lucene.index.SegmentReadState;
@ -41,7 +41,7 @@ import org.apache.lucene.util.BytesRef;
// any PostingsBaseFormat and make it ord-able...
/**
* Customized version of {@link Lucene41Codec} that uses
* Customized version of {@link Lucene41PostingsFormat} that uses
* {@link FixedGapTermsIndexWriter}.
*/
public final class Lucene41WithOrds extends PostingsFormat {

View File

@ -30,10 +30,10 @@ import java.util.Set;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene41.Lucene41DocValuesFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
@ -58,7 +58,7 @@ import org.apache.lucene.util._TestUtil;
* documents in different orders and the test will still be deterministic
* and reproducable.
*/
public class RandomCodec extends Lucene41Codec {
public class RandomCodec extends Lucene42Codec {
/** Shuffled list of postings formats to use for new mappings */
private List<PostingsFormat> formats = new ArrayList<PostingsFormat>();
@ -142,7 +142,7 @@ public class RandomCodec extends Lucene41Codec {
new MemoryPostingsFormat(false, random.nextFloat()));
addDocValues(avoidCodecs,
new Lucene41DocValuesFormat(),
new Lucene42DocValuesFormat(),
new DiskDocValuesFormat(),
new SimpleTextDocValuesFormat());

View File

@ -34,6 +34,7 @@ import org.apache.lucene.codecs.compressing.CompressingCodec;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.RandomCodec;
@ -133,7 +134,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
savedCodec = Codec.getDefault();
int randomVal = random.nextInt(10);
// nocommit: add 41 impersonation
if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) &&
"random".equals(TEST_POSTINGSFORMAT) &&
randomVal < 2 &&
@ -147,7 +148,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
} else {
format = PostingsFormat.forName(TEST_POSTINGSFORMAT);
}
codec = new Lucene41Codec() {
codec = new Lucene42Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return format;

View File

@ -44,7 +44,7 @@ import java.util.zip.ZipFile;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.ByteDocValuesField;
@ -702,7 +702,7 @@ public class _TestUtil {
if (LuceneTestCase.VERBOSE) {
System.out.println("forcing postings format to:" + format);
}
return new Lucene41Codec() {
return new Lucene42Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return format;

View File

@ -2,7 +2,7 @@ package org.apache.solr.core;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaAware;
import org.apache.solr.schema.SchemaField;
@ -42,7 +42,7 @@ public class SchemaCodecFactory extends CodecFactory implements SchemaAware {
@Override
public void inform(final IndexSchema schema) {
codec = new Lucene41Codec() {
codec = new Lucene42Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField fieldOrNull = schema.getFieldOrNull(field);
@ -55,6 +55,7 @@ public class SchemaCodecFactory extends CodecFactory implements SchemaAware {
}
return super.getPostingsFormatForField(field);
}
// nocommit: dv too
};
}