mirror of https://github.com/apache/lucene.git
4.1 -> 4.2
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436544 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
02fe1c42c9
commit
24d701180e
|
@ -18,10 +18,9 @@ package org.apache.lucene.codecs.blockterms;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Basic tests of a PF using FixedGap terms dictionary
|
||||
|
@ -29,13 +28,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase;
|
|||
// TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package
|
||||
// TODO: ensure both of these are also in rotation in RandomCodec
|
||||
public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase {
|
||||
private final PostingsFormat postings = new Lucene41WithOrds();
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,21 +18,14 @@ package org.apache.lucene.codecs.bloom;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Basic tests for BloomPostingsFormat
|
||||
*/
|
||||
public class TestBloomPostingsFormat extends BasePostingsFormatTestCase {
|
||||
private final PostingsFormat postings = new TestBloomFilteredLucene41Postings();
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new TestBloomFilteredLucene41Postings());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,23 +18,16 @@ package org.apache.lucene.codecs.intblock;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Basic tests for FixedIntBlock
|
||||
*/
|
||||
public class TestFixedIntBlockPostingsFormat extends BasePostingsFormatTestCase {
|
||||
// TODO: randomize blocksize
|
||||
private final PostingsFormat postings = new MockFixedIntBlockPostingsFormat();
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new MockFixedIntBlockPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,23 +18,16 @@ package org.apache.lucene.codecs.intblock;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Basic tests for VariableIntBlock
|
||||
*/
|
||||
public class TestVariableIntBlockPostingsFormat extends BasePostingsFormatTestCase {
|
||||
// TODO: randomize blocksize
|
||||
private final PostingsFormat postings = new MockVariableIntBlockPostingsFormat();
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat( new MockVariableIntBlockPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,23 +18,15 @@ package org.apache.lucene.codecs.memory;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Tests DirectPostingsFormat
|
||||
*/
|
||||
public class TestDirectPostingsFormat extends BasePostingsFormatTestCase {
|
||||
// TODO: randomize parameters
|
||||
private final PostingsFormat postings = new DirectPostingsFormat();
|
||||
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new DirectPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,22 +18,15 @@ package org.apache.lucene.codecs.memory;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Tests MemoryPostingsFormat
|
||||
*/
|
||||
public class TestMemoryPostingsFormat extends BasePostingsFormatTestCase {
|
||||
// TODO: randomize doPack
|
||||
private final PostingsFormat postings = new MemoryPostingsFormat();
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,22 +18,15 @@ package org.apache.lucene.codecs.pulsing;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Tests PulsingPostingsFormat
|
||||
*/
|
||||
public class TestPulsingPostingsFormat extends BasePostingsFormatTestCase {
|
||||
// TODO: randomize cutoff
|
||||
private final PostingsFormat postings = new Pulsing41PostingsFormat();
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,23 +18,16 @@ package org.apache.lucene.codecs.sep;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Tests sep layout
|
||||
*/
|
||||
public class TestSepPostingsFormat extends BasePostingsFormatTestCase {
|
||||
// TODO: randomize cutoff
|
||||
private final PostingsFormat postings = new MockSepPostingsFormat();
|
||||
private final Codec codec = new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return postings;
|
||||
}
|
||||
};
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new MockSepPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -119,7 +119,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
|
|||
loader.reload(classloader);
|
||||
}
|
||||
|
||||
private static Codec defaultCodec = Codec.forName("Lucene41");
|
||||
private static Codec defaultCodec = Codec.forName("Lucene42");
|
||||
|
||||
/** expert: returns the default codec used for newly created
|
||||
* {@link IndexWriterConfig}s.
|
||||
|
|
|
@ -21,13 +21,13 @@ package org.apache.lucene.codecs;
|
|||
* A codec that forwards all its method calls to another codec.
|
||||
* <p>
|
||||
* Extend this class when you need to reuse the functionality of an existing
|
||||
* codec. For example, if you want to build a codec that redefines Lucene41's
|
||||
* codec. For example, if you want to build a codec that redefines Lucene42's
|
||||
* {@link LiveDocsFormat}:
|
||||
* <pre class="prettyprint">
|
||||
* public final class CustomCodec extends FilterCodec {
|
||||
*
|
||||
* public CustomCodec() {
|
||||
* super("CustomCodec", new Lucene41Codec());
|
||||
* super("CustomCodec", new Lucene42Codec());
|
||||
* }
|
||||
*
|
||||
* public LiveDocsFormat liveDocsFormat() {
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.codecs.DocValuesFormat;
|
|||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41NormsFormat;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
/**
|
||||
|
@ -91,17 +91,17 @@ public final class Lucene40Codec extends Codec {
|
|||
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Disk");
|
||||
|
||||
@Override
|
||||
public DocValuesFormat docValuesFormat() {
|
||||
public final DocValuesFormat docValuesFormat() {
|
||||
// nocommit
|
||||
return defaultDVFormat;
|
||||
}
|
||||
|
||||
// nocommit need a read-only Lucene40NormsFormat / read-write in the impersonator
|
||||
private final NormsFormat simpleNormsFormat = new Lucene41NormsFormat();
|
||||
private final NormsFormat normsFormat = new Lucene42NormsFormat();
|
||||
|
||||
@Override
|
||||
public NormsFormat normsFormat() {
|
||||
return simpleNormsFormat;
|
||||
public final NormsFormat normsFormat() {
|
||||
return normsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
|||
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
/**
|
||||
|
@ -43,9 +43,7 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
|||
* @see org.apache.lucene.codecs.lucene41 package documentation for file format details.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever
|
||||
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
|
||||
// (it writes a minor version, etc).
|
||||
// nocommit: make readonly
|
||||
public class Lucene41Codec extends Codec {
|
||||
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
|
||||
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
|
||||
|
@ -59,14 +57,6 @@ public class Lucene41Codec extends Codec {
|
|||
return Lucene41Codec.this.getPostingsFormatForField(field);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
private final DocValuesFormat simpleDocValuesFormat = new PerFieldDocValuesFormat() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
return Lucene41Codec.this.getDocValuesFormatForField(field);
|
||||
}
|
||||
};
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene41Codec() {
|
||||
|
@ -112,28 +102,19 @@ public class Lucene41Codec extends Codec {
|
|||
return defaultFormat;
|
||||
}
|
||||
|
||||
/** Returns the docvalues format that should be used for writing
|
||||
* new segments of <code>field</code>.
|
||||
*
|
||||
* The default implementation always returns "Lucene41"
|
||||
*/
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
return defaultDVFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesFormat docValuesFormat() {
|
||||
return simpleDocValuesFormat;
|
||||
public final DocValuesFormat docValuesFormat() {
|
||||
return dvFormat;
|
||||
}
|
||||
|
||||
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
|
||||
// nocommit
|
||||
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene41");
|
||||
|
||||
private final NormsFormat simpleNormsFormat = new Lucene41NormsFormat();
|
||||
private final DocValuesFormat dvFormat = DocValuesFormat.forName("Lucene42");
|
||||
// nocommit
|
||||
private final NormsFormat normsFormat = new Lucene42NormsFormat();
|
||||
|
||||
@Override
|
||||
public NormsFormat normsFormat() {
|
||||
return simpleNormsFormat;
|
||||
public final NormsFormat normsFormat() {
|
||||
return normsFormat;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
/**
|
||||
* Implements the Lucene 4.2 index format, with configurable per-field postings formats.
|
||||
* <p>
|
||||
* If you want to reuse functionality of this codec in another codec, extend
|
||||
* {@link FilterCodec}.
|
||||
*
|
||||
* @see org.apache.lucene.codecs.lucene42 package documentation for file format details.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// NOTE: if we make largish changes in a minor release, easier to just make Lucene43Codec or whatever
|
||||
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
|
||||
// (it writes a minor version, etc).
|
||||
public class Lucene42Codec extends Codec {
|
||||
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
|
||||
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
|
||||
private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
|
||||
private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat();
|
||||
private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat();
|
||||
|
||||
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return Lucene42Codec.this.getPostingsFormatForField(field);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
return Lucene42Codec.this.getDocValuesFormatForField(field);
|
||||
}
|
||||
};
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene42Codec() {
|
||||
super("Lucene42");
|
||||
}
|
||||
|
||||
@Override
|
||||
public final StoredFieldsFormat storedFieldsFormat() {
|
||||
return fieldsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final TermVectorsFormat termVectorsFormat() {
|
||||
return vectorsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final PostingsFormat postingsFormat() {
|
||||
return postingsFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final FieldInfosFormat fieldInfosFormat() {
|
||||
return fieldInfosFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final SegmentInfoFormat segmentInfoFormat() {
|
||||
return infosFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final LiveDocsFormat liveDocsFormat() {
|
||||
return liveDocsFormat;
|
||||
}
|
||||
|
||||
/** Returns the postings format that should be used for writing
|
||||
* new segments of <code>field</code>.
|
||||
*
|
||||
* The default implementation always returns "Lucene41"
|
||||
*/
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return defaultFormat;
|
||||
}
|
||||
|
||||
/** Returns the docvalues format that should be used for writing
|
||||
* new segments of <code>field</code>.
|
||||
*
|
||||
* The default implementation always returns "Lucene42"
|
||||
*/
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
return defaultDVFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesFormat docValuesFormat() {
|
||||
return docValuesFormat;
|
||||
}
|
||||
|
||||
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
|
||||
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene42");
|
||||
|
||||
private final NormsFormat normsFormat = new Lucene42NormsFormat();
|
||||
|
||||
@Override
|
||||
public NormsFormat normsFormat() {
|
||||
return normsFormat;
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.lucene41;
|
||||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -37,6 +37,7 @@ import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
|||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
||||
|
||||
/**
|
||||
* Writes numbers one of two ways:
|
||||
|
@ -46,7 +47,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* the latter is typically much smaller with lucene's sims, as only some byte values are used,
|
||||
* but its often a nonlinear mapping, especially if you dont use crazy boosts.
|
||||
*/
|
||||
class Lucene41DocValuesConsumer extends DocValuesConsumer {
|
||||
class Lucene42DocValuesConsumer extends DocValuesConsumer {
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
|
@ -56,7 +57,7 @@ class Lucene41DocValuesConsumer extends DocValuesConsumer {
|
|||
|
||||
final IndexOutput data, meta;
|
||||
|
||||
Lucene41DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
Lucene42DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
|
||||
|
@ -141,14 +142,15 @@ class Lucene41DocValuesConsumer extends DocValuesConsumer {
|
|||
}
|
||||
|
||||
data.writeLong(minValue);
|
||||
|
||||
final PackedInts.Writer writer = PackedInts.getWriter(data, count, bitsPerValue, PackedInts.COMPACT);
|
||||
|
||||
FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(count, bitsPerValue, PackedInts.COMPACT);
|
||||
final PackedInts.Writer writer = PackedInts.getWriter(data, count, formatAndBits.bitsPerValue, 0);
|
||||
for(Number nv : values) {
|
||||
writer.add(nv.longValue() - minValue);
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
// nocommit: just write this to a RAMfile or something and flush it here, with #fields first.
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.lucene41;
|
||||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -25,24 +25,24 @@ import org.apache.lucene.codecs.DocValuesFormat;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
public class Lucene41DocValuesFormat extends DocValuesFormat {
|
||||
public class Lucene42DocValuesFormat extends DocValuesFormat {
|
||||
|
||||
public Lucene41DocValuesFormat() {
|
||||
super("Lucene41");
|
||||
public Lucene42DocValuesFormat() {
|
||||
super("Lucene42");
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
return new Lucene41DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return new Lucene41DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
return new Lucene42DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
}
|
||||
|
||||
private static final String DATA_CODEC = "Lucene41DocValuesData";
|
||||
private static final String DATA_CODEC = "Lucene42DocValuesData";
|
||||
private static final String DATA_EXTENSION = "dvd";
|
||||
private static final String METADATA_CODEC = "Lucene41DocValuesMetadata";
|
||||
private static final String METADATA_CODEC = "Lucene42DocValuesMetadata";
|
||||
private static final String METADATA_EXTENSION = "dvm";
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.lucene41;
|
||||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -31,7 +31,6 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -45,7 +44,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
|
|||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
class Lucene41DocValuesProducer extends DocValuesProducer {
|
||||
class Lucene42DocValuesProducer extends DocValuesProducer {
|
||||
// metadata maps (just file pointers and minimal stuff)
|
||||
private final Map<Integer,NumericEntry> numerics;
|
||||
private final Map<Integer,BinaryEntry> binaries;
|
||||
|
@ -64,15 +63,15 @@ class Lucene41DocValuesProducer extends DocValuesProducer {
|
|||
private final Map<Integer,FST<Long>> fstInstances =
|
||||
new HashMap<Integer,FST<Long>>();
|
||||
|
||||
Lucene41DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||
// read in the entries from the metadata file.
|
||||
IndexInput in = state.directory.openInput(metaName, state.context);
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkHeader(in, metaCodec,
|
||||
Lucene41DocValuesConsumer.VERSION_START,
|
||||
Lucene41DocValuesConsumer.VERSION_START);
|
||||
Lucene42DocValuesConsumer.VERSION_START,
|
||||
Lucene42DocValuesConsumer.VERSION_START);
|
||||
numerics = new HashMap<Integer,NumericEntry>();
|
||||
binaries = new HashMap<Integer,BinaryEntry>();
|
||||
fsts = new HashMap<Integer,FSTEntry>();
|
||||
|
@ -89,27 +88,27 @@ class Lucene41DocValuesProducer extends DocValuesProducer {
|
|||
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
|
||||
data = state.directory.openInput(dataName, state.context);
|
||||
CodecUtil.checkHeader(data, dataCodec,
|
||||
Lucene41DocValuesConsumer.VERSION_START,
|
||||
Lucene41DocValuesConsumer.VERSION_START);
|
||||
Lucene42DocValuesConsumer.VERSION_START,
|
||||
Lucene42DocValuesConsumer.VERSION_START);
|
||||
}
|
||||
|
||||
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
|
||||
int fieldNumber = meta.readVInt();
|
||||
while (fieldNumber != -1) {
|
||||
int fieldType = meta.readByte();
|
||||
if (fieldType == Lucene41DocValuesConsumer.NUMBER) {
|
||||
if (fieldType == Lucene42DocValuesConsumer.NUMBER) {
|
||||
NumericEntry entry = new NumericEntry();
|
||||
entry.offset = meta.readLong();
|
||||
entry.tableized = meta.readByte() != 0;
|
||||
numerics.put(fieldNumber, entry);
|
||||
} else if (fieldType == Lucene41DocValuesConsumer.BYTES) {
|
||||
} else if (fieldType == Lucene42DocValuesConsumer.BYTES) {
|
||||
BinaryEntry entry = new BinaryEntry();
|
||||
entry.offset = meta.readLong();
|
||||
entry.numBytes = meta.readLong();
|
||||
entry.minLength = meta.readVInt();
|
||||
entry.maxLength = meta.readVInt();
|
||||
binaries.put(fieldNumber, entry);
|
||||
} else if (fieldType == Lucene41DocValuesConsumer.FST) {
|
||||
} else if (fieldType == Lucene42DocValuesConsumer.FST) {
|
||||
FSTEntry entry = new FSTEntry();
|
||||
entry.offset = meta.readLong();
|
||||
entry.numOrds = meta.readVInt();
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.lucene41;
|
||||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -25,16 +25,16 @@ import org.apache.lucene.codecs.NormsFormat;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
public class Lucene41NormsFormat extends NormsFormat {
|
||||
public class Lucene42NormsFormat extends NormsFormat {
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||
return new Lucene41DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
|
||||
return new Lucene41DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
return new Lucene42DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
|
||||
}
|
||||
|
||||
private static final String DATA_CODEC = "Lucene41NormsData";
|
|
@ -0,0 +1,397 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
Lucene 4.2 file format.
|
||||
|
||||
<h1>Apache Lucene - Index File Formats</h1>
|
||||
<div>
|
||||
<ul>
|
||||
<li><a href="#Introduction">Introduction</a></li>
|
||||
<li><a href="#Definitions">Definitions</a>
|
||||
<ul>
|
||||
<li><a href="#Inverted_Indexing">Inverted Indexing</a></li>
|
||||
<li><a href="#Types_of_Fields">Types of Fields</a></li>
|
||||
<li><a href="#Segments">Segments</a></li>
|
||||
<li><a href="#Document_Numbers">Document Numbers</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a href="#Overview">Index Structure Overview</a></li>
|
||||
<li><a href="#File_Naming">File Naming</a></li>
|
||||
<li><a href="#file-names">Summary of File Extensions</a></li>
|
||||
<ul>
|
||||
<li><a href="#Lock_File">Lock File</a></li>
|
||||
<li><a href="#History">History</a></li>
|
||||
<li><a href="#Limitations">Limitations</a></li>
|
||||
</ul>
|
||||
</ul>
|
||||
</div>
|
||||
<a name="Introduction"></a>
|
||||
<h2>Introduction</h2>
|
||||
<div>
|
||||
<p>This document defines the index file formats used in this version of Lucene.
|
||||
If you are using a different version of Lucene, please consult the copy of
|
||||
<code>docs/</code> that was distributed with
|
||||
the version you are using.</p>
|
||||
<p>Apache Lucene is written in Java, but several efforts are underway to write
|
||||
<a href="http://wiki.apache.org/lucene-java/LuceneImplementations">versions of
|
||||
Lucene in other programming languages</a>. If these versions are to remain
|
||||
compatible with Apache Lucene, then a language-independent definition of the
|
||||
Lucene index format is required. This document thus attempts to provide a
|
||||
complete and independent definition of the Apache Lucene file formats.</p>
|
||||
<p>As Lucene evolves, this document should evolve. Versions of Lucene in
|
||||
different programming languages should endeavor to agree on file formats, and
|
||||
generate new versions of this document.</p>
|
||||
</div>
|
||||
<a name="Definitions" id="Definitions"></a>
|
||||
<h2>Definitions</h2>
|
||||
<div>
|
||||
<p>The fundamental concepts in Lucene are index, document, field and term.</p>
|
||||
<p>An index contains a sequence of documents.</p>
|
||||
<ul>
|
||||
<li>A document is a sequence of fields.</li>
|
||||
<li>A field is a named sequence of terms.</li>
|
||||
<li>A term is a sequence of bytes.</li>
|
||||
</ul>
|
||||
<p>The same sequence of bytes in two different fields is considered a different
|
||||
term. Thus terms are represented as a pair: the string naming the field, and the
|
||||
bytes within the field.</p>
|
||||
<a name="Inverted_Indexing"></a>
|
||||
<h3>Inverted Indexing</h3>
|
||||
<p>The index stores statistics about terms in order to make term-based search
|
||||
more efficient. Lucene's index falls into the family of indexes known as an
|
||||
<i>inverted index.</i> This is because it can list, for a term, the documents
|
||||
that contain it. This is the inverse of the natural relationship, in which
|
||||
documents list terms.</p>
|
||||
<a name="Types_of_Fields"></a>
|
||||
<h3>Types of Fields</h3>
|
||||
<p>In Lucene, fields may be <i>stored</i>, in which case their text is stored
|
||||
in the index literally, in a non-inverted manner. Fields that are inverted are
|
||||
called <i>indexed</i>. A field may be both stored and indexed.</p>
|
||||
<p>The text of a field may be <i>tokenized</i> into terms to be indexed, or the
|
||||
text of a field may be used literally as a term to be indexed. Most fields are
|
||||
tokenized, but sometimes it is useful for certain identifier fields to be
|
||||
indexed literally.</p>
|
||||
<p>See the {@link org.apache.lucene.document.Field Field}
|
||||
java docs for more information on Fields.</p>
|
||||
<a name="Segments" id="Segments"></a>
|
||||
<h3>Segments</h3>
|
||||
<p>Lucene indexes may be composed of multiple sub-indexes, or <i>segments</i>.
|
||||
Each segment is a fully independent index, which could be searched separately.
|
||||
Indexes evolve by:</p>
|
||||
<ol>
|
||||
<li>Creating new segments for newly added documents.</li>
|
||||
<li>Merging existing segments.</li>
|
||||
</ol>
|
||||
<p>Searches may involve multiple segments and/or multiple indexes, each index
|
||||
potentially composed of a set of segments.</p>
|
||||
<a name="Document_Numbers"></a>
|
||||
<h3>Document Numbers</h3>
|
||||
<p>Internally, Lucene refers to documents by an integer <i>document number</i>.
|
||||
The first document added to an index is numbered zero, and each subsequent
|
||||
document added gets a number one greater than the previous.</p>
|
||||
<p>Note that a document's number may change, so caution should be taken when
|
||||
storing these numbers outside of Lucene. In particular, numbers may change in
|
||||
the following situations:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p>The numbers stored in each segment are unique only within the segment, and
|
||||
must be converted before they can be used in a larger context. The standard
|
||||
technique is to allocate each segment a range of values, based on the range of
|
||||
numbers used in that segment. To convert a document number from a segment to an
|
||||
external value, the segment's <i>base</i> document number is added. To convert
|
||||
an external value back to a segment-specific value, the segment is identified
|
||||
by the range that the external value is in, and the segment's base value is
|
||||
subtracted. For example two five document segments might be combined, so that
|
||||
the first segment has a base value of zero, and the second of five. Document
|
||||
three from the second segment would have an external value of eight.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p>When documents are deleted, gaps are created in the numbering. These are
|
||||
eventually removed as the index evolves through merging. Deleted documents are
|
||||
dropped when segments are merged. A freshly-merged segment thus has no gaps in
|
||||
its numbering.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<a name="Overview" id="Overview"></a>
|
||||
<h2>Index Structure Overview</h2>
|
||||
<div>
|
||||
<p>Each segment index maintains the following:</p>
|
||||
<ul>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}.
|
||||
This contains metadata about a segment, such as the number of documents,
|
||||
what files it uses,
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Field names}.
|
||||
This contains the set of field names used in the index.
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}.
|
||||
This contains, for each document, a list of attribute-value pairs, where the attributes
|
||||
are field names. These are used to store auxiliary information about the document, such as
|
||||
its title, url, or an identifier to access a database. The set of stored fields are what is
|
||||
returned for each hit when searching. This is keyed by document number.
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}.
|
||||
A dictionary containing all of the terms used in all of the
|
||||
indexed fields of all of the documents. The dictionary also contains the number
|
||||
of documents which contain the term, and pointers to the term's frequency and
|
||||
proximity data.
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}.
|
||||
For each term in the dictionary, the numbers of all the
|
||||
documents that contain that term, and the frequency of the term in that
|
||||
document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}.
|
||||
For each term in the dictionary, the positions that the
|
||||
term occurs in each document. Note that this will not exist if all fields in
|
||||
all documents omit position data.
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}.
|
||||
For each field in each document, a value is stored
|
||||
that is multiplied into the score for hits on that field.
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}.
|
||||
For each field in each document, the term vector (sometimes
|
||||
called document vector) may be stored. A term vector consists of term text and
|
||||
term frequency. To add Term Vectors to your index see the
|
||||
{@link org.apache.lucene.document.Field Field} constructors
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-document values}.
|
||||
Like stored values, these are also keyed by document
|
||||
number, but are generally intended to be loaded into main memory for fast
|
||||
access. Whereas stored values are generally intended for summary results from
|
||||
searches, per-document values are useful for things like scoring factors.
|
||||
</li>
|
||||
<li>
|
||||
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}.
|
||||
An optional file indicating which documents are deleted.
|
||||
</li>
|
||||
</ul>
|
||||
<p>Details on each of these are provided in their linked pages.</p>
|
||||
</div>
|
||||
<a name="File_Naming"></a>
|
||||
<h2>File Naming</h2>
|
||||
<div>
|
||||
<p>All files belonging to a segment have the same name with varying extensions.
|
||||
The extensions correspond to the different file formats described below. When
|
||||
using the Compound File format (default in 1.4 and greater) these files (except
|
||||
for the Segment info file, the Lock file, and Deleted documents file) are collapsed
|
||||
into a single .cfs file (see below for details)</p>
|
||||
<p>Typically, all segments in an index are stored in a single directory,
|
||||
although this is not required.</p>
|
||||
<p>As of version 2.1 (lock-less commits), file names are never re-used (there
|
||||
is one exception, "segments.gen", see below). That is, when any file is saved
|
||||
to the Directory it is given a never before used filename. This is achieved
|
||||
using a simple generations approach. For example, the first segments file is
|
||||
segments_1, then segments_2, etc. The generation is a sequential long integer
|
||||
represented in alpha-numeric (base 36) form.</p>
|
||||
</div>
|
||||
<a name="file-names" id="file-names"></a>
|
||||
<h2>Summary of File Extensions</h2>
|
||||
<div>
|
||||
<p>The following table summarizes the names and extensions of the files in
|
||||
Lucene:</p>
|
||||
<table cellspacing="1" cellpadding="4">
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Extension</th>
|
||||
<th>Brief Description</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.index.SegmentInfos Segments File}</td>
|
||||
<td>segments.gen, segments_N</td>
|
||||
<td>Stores information about a commit point</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="#Lock_File">Lock File</a></td>
|
||||
<td>write.lock</td>
|
||||
<td>The Write lock prevents multiple IndexWriters from writing to the same
|
||||
file.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}</td>
|
||||
<td>.si</td>
|
||||
<td>Stores metadata about a segment</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.store.CompoundFileDirectory Compound File}</td>
|
||||
<td>.cfs, .cfe</td>
|
||||
<td>An optional "virtual" file consisting of all the other index files for
|
||||
systems that frequently run out of file handles.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Fields}</td>
|
||||
<td>.fnm</td>
|
||||
<td>Stores information about the fields</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}</td>
|
||||
<td>.fdx</td>
|
||||
<td>Contains pointers to field data</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}</td>
|
||||
<td>.fdt</td>
|
||||
<td>The stored fields for documents</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}</td>
|
||||
<td>.tim</td>
|
||||
<td>The term dictionary, stores term info</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}</td>
|
||||
<td>.tip</td>
|
||||
<td>The index into the Term Dictionary</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}</td>
|
||||
<td>.doc</td>
|
||||
<td>Contains the list of docs which contain each term along with frequency</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}</td>
|
||||
<td>.pos</td>
|
||||
<td>Stores position information about where a term occurs in the index</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}</td>
|
||||
<td>.pay</td>
|
||||
<td>Stores additional per-position metadata information such as character offsets and user payloads</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}</td>
|
||||
<td>.nvd, .nvm</td>
|
||||
<td>Encodes length and boost factors for docs and fields</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-Document Values}</td>
|
||||
<td>.dvd, .dvm</td>
|
||||
<td>Encodes additional scoring factors or other per-document information.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}</td>
|
||||
<td>.tvx</td>
|
||||
<td>Stores offset into the document data file</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}</td>
|
||||
<td>.tvd</td>
|
||||
<td>Contains information about each document that has term vectors</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}</td>
|
||||
<td>.tvf</td>
|
||||
<td>The field level info about term vectors</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}</td>
|
||||
<td>.del</td>
|
||||
<td>Info about what files are deleted</td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<a name="Lock_File" id="Lock_File"></a>
|
||||
<h2>Lock File</h2>
|
||||
The write lock, which is stored in the index directory by default, is named
|
||||
"write.lock". If the lock directory is different from the index directory then
|
||||
the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix
|
||||
derived from the full path to the index directory. When this file is present, a
|
||||
writer is currently modifying the index (adding or removing documents). This
|
||||
lock file ensures that only one writer is modifying the index at a time.</p>
|
||||
<a name="History"></a>
|
||||
<h2>History</h2>
|
||||
<p>Compatibility notes are provided in this document, describing how file
|
||||
formats have changed from prior versions:</p>
|
||||
<ul>
|
||||
<li>In version 2.1, the file format was changed to allow lock-less commits (ie,
|
||||
no more commit lock). The change is fully backwards compatible: you can open a
|
||||
pre-2.1 index for searching or adding/deleting of docs. When the new segments
|
||||
file is saved (committed), it will be written in the new file format (meaning
|
||||
no specific "upgrade" process is needed). But note that once a commit has
|
||||
occurred, pre-2.1 Lucene will not be able to read the index.</li>
|
||||
<li>In version 2.3, the file format was changed to allow segments to share a
|
||||
single set of doc store (vectors & stored fields) files. This allows for
|
||||
faster indexing in certain cases. The change is fully backwards compatible (in
|
||||
the same way as the lock-less commits change in 2.1).</li>
|
||||
<li>In version 2.4, Strings are now written as true UTF-8 byte sequence, not
|
||||
Java's modified UTF-8. See <a href="http://issues.apache.org/jira/browse/LUCENE-510">
|
||||
LUCENE-510</a> for details.</li>
|
||||
<li>In version 2.9, an optional opaque Map<String,String> CommitUserData
|
||||
may be passed to IndexWriter's commit methods (and later retrieved), which is
|
||||
recorded in the segments_N file. See <a href="http://issues.apache.org/jira/browse/LUCENE-1382">
|
||||
LUCENE-1382</a> for details. Also,
|
||||
diagnostics were added to each segment written recording details about why it
|
||||
was written (due to flush, merge; which OS/JRE was used; etc.). See issue
|
||||
<a href="http://issues.apache.org/jira/browse/LUCENE-1654">LUCENE-1654</a> for details.</li>
|
||||
<li>In version 3.0, compressed fields are no longer written to the index (they
|
||||
can still be read, but on merge the new segment will write them, uncompressed).
|
||||
See issue <a href="http://issues.apache.org/jira/browse/LUCENE-1960">LUCENE-1960</a>
|
||||
for details.</li>
|
||||
<li>In version 3.1, segments records the code version that created them. See
|
||||
<a href="http://issues.apache.org/jira/browse/LUCENE-2720">LUCENE-2720</a> for details.
|
||||
Additionally segments track explicitly whether or not they have term vectors.
|
||||
See <a href="http://issues.apache.org/jira/browse/LUCENE-2811">LUCENE-2811</a>
|
||||
for details.</li>
|
||||
<li>In version 3.2, numeric fields are written as natively to stored fields
|
||||
file, previously they were stored in text format only.</li>
|
||||
<li>In version 3.4, fields can omit position data while still indexing term
|
||||
frequencies.</li>
|
||||
<li>In version 4.0, the format of the inverted index became extensible via
|
||||
the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
|
||||
({@link org.apache.lucene.index.DocValues DocValues}) was introduced. Normalization
|
||||
factors need no longer be a single byte, they can be any DocValues
|
||||
{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode
|
||||
strings, they can be any byte sequence. Term offsets can optionally be indexed
|
||||
into the postings lists. Payloads can be stored in the term vectors.</li>
|
||||
<li>In version 4.1, the format of the postings list changed to use either
|
||||
of FOR compression or variable-byte encoding, depending upon the frequency
|
||||
of the term.</li>
|
||||
</ul>
|
||||
<a name="Limitations" id="Limitations"></a>
|
||||
<h2>Limitations</h2>
|
||||
<div>
|
||||
<p>When referring to term numbers, Lucene's current implementation uses a Java
|
||||
<code>int</code> to hold the term index, which means the
|
||||
maximum number of unique terms in any single index segment is ~2.1 billion
|
||||
times the term index interval (default 128) = ~274 billion. This is technically
|
||||
not a limitation of the index file format, just of Lucene's current
|
||||
implementation.</p>
|
||||
<p>Similarly, Lucene uses a Java <code>int</code> to refer to
|
||||
document numbers, and the index file format uses an <code>Int32</code>
|
||||
on-disk to store document numbers. This is a limitation
|
||||
of both the index file format and the current implementation. Eventually these
|
||||
should be replaced with either <code>UInt64</code> values, or
|
||||
better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -61,8 +61,8 @@ name of your codec.
|
|||
If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings
|
||||
formats for different fields, then you can register your custom postings format in the same way (in
|
||||
META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default
|
||||
{@link org.apache.lucene.codecs.lucene41.Lucene41Codec} and override
|
||||
{@link org.apache.lucene.codecs.lucene41.Lucene41Codec#getPostingsFormatForField(String)} to return your custom
|
||||
{@link org.apache.lucene.codecs.lucene42.Lucene42Codec} and override
|
||||
{@link org.apache.lucene.codecs.lucene42.Lucene42Codec#getPostingsFormatForField(String)} to return your custom
|
||||
postings format.
|
||||
</p>
|
||||
</body>
|
||||
|
|
|
@ -193,7 +193,7 @@ public class LiveIndexWriterConfig {
|
|||
* <pre class="prettyprint">
|
||||
* //customize Lucene41PostingsFormat, passing minBlockSize=50, maxBlockSize=100
|
||||
* final PostingsFormat tweakedPostings = new Lucene41PostingsFormat(50, 100);
|
||||
* iwc.setCodec(new Lucene41Codec() {
|
||||
* iwc.setCodec(new Lucene42Codec() {
|
||||
* @Override
|
||||
* public PostingsFormat getPostingsFormatForField(String field) {
|
||||
* if (field.equals("fieldWithTonsOfTerms"))
|
||||
|
|
|
@ -15,3 +15,4 @@
|
|||
|
||||
org.apache.lucene.codecs.lucene40.Lucene40Codec
|
||||
org.apache.lucene.codecs.lucene41.Lucene41Codec
|
||||
org.apache.lucene.codecs.lucene42.Lucene42Codec
|
||||
|
|
|
@ -13,4 +13,4 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.lucene.codecs.lucene41.Lucene41DocValuesFormat
|
||||
org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatDocValuesField;
|
||||
|
@ -42,7 +42,6 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
|
@ -705,9 +704,9 @@ public class TestDemoDocValue extends LuceneTestCase {
|
|||
Directory directory = newDirectory();
|
||||
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
final DocValuesFormat fast = DocValuesFormat.forName("Lucene41");
|
||||
final DocValuesFormat fast = DocValuesFormat.forName("Lucene42");
|
||||
final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
|
||||
iwc.setCodec(new Lucene41Codec() {
|
||||
iwc.setCodec(new Lucene42Codec() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
if ("dv1".equals(field)) {
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene;
|
|||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.codecs.*;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -31,7 +31,7 @@ import org.apache.lucene.util.*;
|
|||
|
||||
public class TestExternalCodecs extends LuceneTestCase {
|
||||
|
||||
private static final class CustomPerFieldCodec extends Lucene41Codec {
|
||||
private static final class CustomPerFieldCodec extends Lucene42Codec {
|
||||
|
||||
private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly");
|
||||
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
|
||||
|
|
|
@ -19,12 +19,13 @@ package org.apache.lucene.codecs.lucene41;
|
|||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Tests BlockPostingsFormat
|
||||
*/
|
||||
public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
|
||||
private final Codec codec = new Lucene41Codec();
|
||||
private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.codecs.lucene41;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -46,12 +45,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase {
|
|||
super.setUp();
|
||||
dir = newFSDirectory(_TestUtil.getTempDir("testDFBlockSize"));
|
||||
iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(new Lucene41Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return PostingsFormat.forName("Lucene41");
|
||||
}
|
||||
});
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
|
||||
iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
iw.setAddDocValuesFields(false);
|
||||
iw.setDoRandomForceMerge(false); // we will ourselves
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -84,7 +83,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
iwc.setCodec(new Lucene41Codec());
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()));
|
||||
// TODO we could actually add more fields implemented with different PFs
|
||||
// or, just put this test into the usual rotation?
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
|
||||
import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
|
||||
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
|
||||
|
@ -142,7 +142,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
|
||||
assertQuery(new Term("content", "ccc"), dir, 10);
|
||||
assertQuery(new Term("content", "aaa"), dir, 10);
|
||||
Lucene41Codec codec = (Lucene41Codec)iwconf.getCodec();
|
||||
Codec codec = iwconf.getCodec();
|
||||
|
||||
iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
|
||||
.setOpenMode(OpenMode.APPEND).setCodec(codec);
|
||||
|
@ -158,7 +158,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
}
|
||||
addDocs2(writer, 10);
|
||||
writer.commit();
|
||||
codec = (Lucene41Codec)iwconf.getCodec();
|
||||
codec = iwconf.getCodec();
|
||||
assertEquals(30, writer.maxDoc());
|
||||
assertQuery(new Term("content", "bbb"), dir, 10);
|
||||
assertQuery(new Term("content", "ccc"), dir, 10); ////
|
||||
|
@ -200,7 +200,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
|
||||
}
|
||||
|
||||
public static class MockCodec extends Lucene41Codec {
|
||||
public static class MockCodec extends Lucene42Codec {
|
||||
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
|
||||
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
|
||||
final PostingsFormat mockSep = new MockSepPostingsFormat();
|
||||
|
@ -217,7 +217,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public static class MockCodec2 extends Lucene41Codec {
|
||||
public static class MockCodec2 extends Lucene42Codec {
|
||||
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
|
||||
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
|
||||
|
||||
|
@ -268,7 +268,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSameCodecDifferentInstance() throws Exception {
|
||||
Codec codec = new Lucene41Codec() {
|
||||
Codec codec = new Lucene42Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
if ("id".equals(field)) {
|
||||
|
@ -284,7 +284,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSameCodecDifferentParams() throws Exception {
|
||||
Codec codec = new Lucene41Codec() {
|
||||
Codec codec = new Lucene42Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
if ("id".equals(field)) {
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -1058,7 +1058,7 @@ public class TestAddIndexes extends LuceneTestCase {
|
|||
aux2.close();
|
||||
}
|
||||
|
||||
private static final class CustomPerFieldCodec extends Lucene41Codec {
|
||||
private static final class CustomPerFieldCodec extends Lucene42Codec {
|
||||
private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
|
||||
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
|
||||
private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep");
|
||||
|
@ -1109,7 +1109,7 @@ public class TestAddIndexes extends LuceneTestCase {
|
|||
|
||||
private static final class UnRegisteredCodec extends FilterCodec {
|
||||
public UnRegisteredCodec() {
|
||||
super("NotRegistered", new Lucene41Codec());
|
||||
super("NotRegistered", new Lucene42Codec());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -33,7 +33,6 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* Test that a plain default puts codec headers in all files.
|
||||
|
@ -42,7 +41,7 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
|
|||
public void test() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
conf.setCodec(Codec.forName("Lucene41"));
|
||||
conf.setCodec(new Lucene42Codec());
|
||||
// riw should sometimes create docvalues fields, etc
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -60,7 +60,7 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
// for now its SimpleText vs Lucene41(random postings format)
|
||||
// for now its SimpleText vs Lucene42(random postings format)
|
||||
// as this gives the best overall coverage. when we have more
|
||||
// codecs we should probably pick 2 from Codec.availableCodecs()
|
||||
|
||||
|
|
|
@ -25,8 +25,8 @@ import org.apache.lucene.codecs.Codec;
|
|||
// enough to test the basics via Codec
|
||||
public class TestNamedSPILoader extends LuceneTestCase {
|
||||
public void testLookup() {
|
||||
Codec codec = Codec.forName("Lucene41");
|
||||
assertEquals("Lucene41", codec.getName());
|
||||
Codec codec = Codec.forName("Lucene42");
|
||||
assertEquals("Lucene42", codec.getName());
|
||||
}
|
||||
|
||||
// we want an exception if its not found.
|
||||
|
@ -39,6 +39,6 @@ public class TestNamedSPILoader extends LuceneTestCase {
|
|||
|
||||
public void testAvailableServices() {
|
||||
Set<String> codecs = Codec.availableCodecs();
|
||||
assertTrue(codecs.contains("Lucene41"));
|
||||
assertTrue(codecs.contains("Lucene42"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
<li><a href="SYSTEM_REQUIREMENTS.html">System Requirements</a>: Minimum and supported Java versions.</li>
|
||||
<li><a href="MIGRATE.html">Migration Guide</a>: What changed in Lucene 4; how to migrate code from Lucene 3.x.</li>
|
||||
<li><a href="JRE_VERSION_MIGRATION.html">JRE Version Migration</a>: Information about upgrading between major JRE versions.</li>
|
||||
<li><a href="core/org/apache/lucene/codecs/lucene41/package-summary.html#package_description">File Formats</a>: Guide to the supported index format used by Lucene. This can be customized by using <a href="core/org/apache/lucene/codecs/package-summary.html#package_description">an alternate codec</a>.</li>
|
||||
<li><a href="core/org/apache/lucene/codecs/lucene42/package-summary.html#package_description">File Formats</a>: Guide to the supported index format used by Lucene. This can be customized by using <a href="core/org/apache/lucene/codecs/package-summary.html#package_description">an alternate codec</a>.</li>
|
||||
<li><a href="core/org/apache/lucene/search/package-summary.html#package_description">Search and Scoring in Lucene</a>: Introduction to how Lucene scores documents.</li>
|
||||
<li><a href="core/org/apache/lucene/search/similarities/TFIDFSimilarity.html">Classic Scoring Formula</a>: Formula of Lucene's classic <a href="http://en.wikipedia.org/wiki/Vector_Space_Model">Vector Space</a> implementation. (look <a href="core/org/apache/lucene/search/similarities/package-summary.html#package_description">here</a> for other models)</li>
|
||||
<li><a href="queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package_description">Classic QueryParser Syntax</a>: Overview of the Classic QueryParser's syntax and features.</li>
|
||||
|
|
|
@ -21,10 +21,10 @@ import org.apache.lucene.codecs.FilterCodec;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
|
||||
/**
|
||||
* Acts like {@link Lucene41Codec} but with additional asserts.
|
||||
* Acts like {@link Lucene42Codec} but with additional asserts.
|
||||
*/
|
||||
public final class AssertingCodec extends FilterCodec {
|
||||
|
||||
|
@ -33,7 +33,7 @@ public final class AssertingCodec extends FilterCodec {
|
|||
private final StoredFieldsFormat storedFields = new AssertingStoredFieldsFormat();
|
||||
|
||||
public AssertingCodec() {
|
||||
super("Asserting", new Lucene41Codec());
|
||||
super("Asserting", new Lucene42Codec());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,13 +22,13 @@ import java.util.Random;
|
|||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
/**
|
||||
* A codec that uses {@link CompressingStoredFieldsFormat} for its stored
|
||||
* fields and delegates to {@link Lucene41Codec} for everything else.
|
||||
* fields and delegates to {@link Lucene42Codec} for everything else.
|
||||
*/
|
||||
public abstract class CompressingCodec extends FilterCodec {
|
||||
|
||||
|
@ -71,7 +71,7 @@ public abstract class CompressingCodec extends FilterCodec {
|
|||
* Creates a compressing codec with a given segment suffix
|
||||
*/
|
||||
public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
|
||||
super(name, new Lucene41Codec());
|
||||
super(name, new Lucene42Codec());
|
||||
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader;
|
|||
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
|
||||
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
|
||||
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec; // javadocs
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
// any PostingsBaseFormat and make it ord-able...
|
||||
|
||||
/**
|
||||
* Customized version of {@link Lucene41Codec} that uses
|
||||
* Customized version of {@link Lucene41PostingsFormat} that uses
|
||||
* {@link FixedGapTermsIndexWriter}.
|
||||
*/
|
||||
public final class Lucene41WithOrds extends PostingsFormat {
|
||||
|
|
|
@ -30,10 +30,10 @@ import java.util.Set;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
|
||||
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings;
|
||||
import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
|
||||
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
|
||||
|
@ -58,7 +58,7 @@ import org.apache.lucene.util._TestUtil;
|
|||
* documents in different orders and the test will still be deterministic
|
||||
* and reproducable.
|
||||
*/
|
||||
public class RandomCodec extends Lucene41Codec {
|
||||
public class RandomCodec extends Lucene42Codec {
|
||||
/** Shuffled list of postings formats to use for new mappings */
|
||||
private List<PostingsFormat> formats = new ArrayList<PostingsFormat>();
|
||||
|
||||
|
@ -142,7 +142,7 @@ public class RandomCodec extends Lucene41Codec {
|
|||
new MemoryPostingsFormat(false, random.nextFloat()));
|
||||
|
||||
addDocValues(avoidCodecs,
|
||||
new Lucene41DocValuesFormat(),
|
||||
new Lucene42DocValuesFormat(),
|
||||
new DiskDocValuesFormat(),
|
||||
new SimpleTextDocValuesFormat());
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.codecs.compressing.CompressingCodec;
|
|||
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
|
||||
import org.apache.lucene.index.RandomCodec;
|
||||
|
@ -133,7 +134,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
|
|||
|
||||
savedCodec = Codec.getDefault();
|
||||
int randomVal = random.nextInt(10);
|
||||
|
||||
// nocommit: add 41 impersonation
|
||||
if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) &&
|
||||
"random".equals(TEST_POSTINGSFORMAT) &&
|
||||
randomVal < 2 &&
|
||||
|
@ -147,7 +148,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
|
|||
} else {
|
||||
format = PostingsFormat.forName(TEST_POSTINGSFORMAT);
|
||||
}
|
||||
codec = new Lucene41Codec() {
|
||||
codec = new Lucene42Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return format;
|
||||
|
|
|
@ -44,7 +44,7 @@ import java.util.zip.ZipFile;
|
|||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.ByteDocValuesField;
|
||||
|
@ -702,7 +702,7 @@ public class _TestUtil {
|
|||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("forcing postings format to:" + format);
|
||||
}
|
||||
return new Lucene41Codec() {
|
||||
return new Lucene42Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return format;
|
||||
|
|
|
@ -2,7 +2,7 @@ package org.apache.solr.core;
|
|||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaAware;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
@ -42,7 +42,7 @@ public class SchemaCodecFactory extends CodecFactory implements SchemaAware {
|
|||
|
||||
@Override
|
||||
public void inform(final IndexSchema schema) {
|
||||
codec = new Lucene41Codec() {
|
||||
codec = new Lucene42Codec() {
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
final SchemaField fieldOrNull = schema.getFieldOrNull(field);
|
||||
|
@ -55,6 +55,7 @@ public class SchemaCodecFactory extends CodecFactory implements SchemaAware {
|
|||
}
|
||||
return super.getPostingsFormatForField(field);
|
||||
}
|
||||
// nocommit: dv too
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue