LUCENE-5915: remove pulsing

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1621961 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-09-02 11:24:24 +00:00
parent ec0a99552d
commit 7e69874ea5
30 changed files with 33 additions and 1977 deletions

View File

@ -88,6 +88,8 @@ Other
* LUCENE-5858: Moved compatibility codecs to 'lucene-backward-codecs.jar'.
(Adrien Grand, Robert Muir)
* LUCENE-5915: Remove Pulsing postings format. (Robert Muir)
======================= Lucene 4.11.0 ======================
New Features

View File

@ -839,7 +839,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
// TODO: implement reuse, something like Pulsing:
// TODO: implement reuse
// it's hairy!
if (terms[termOrd] instanceof LowFreqTerm) {
@ -916,7 +916,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
return null;
}
// TODO: implement reuse, something like Pulsing:
// TODO: implement reuse
// it's hairy!
if (terms[termOrd] instanceof LowFreqTerm) {
@ -1437,7 +1437,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
// TODO: implement reuse, something like Pulsing:
// TODO: implement reuse
// it's hairy!
if (terms[termOrd] instanceof LowFreqTerm) {
@ -1473,7 +1473,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
return null;
}
// TODO: implement reuse, something like Pulsing:
// TODO: implement reuse
// it's hairy!
if (terms[termOrd] instanceof LowFreqTerm) {

View File

@ -1,88 +0,0 @@
package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsBaseFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat;
import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;
import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/** FSTOrd + Pulsing41
* @lucene.experimental */
public class FSTOrdPulsing41PostingsFormat extends PostingsFormat {
private final PostingsBaseFormat wrappedPostingsBaseFormat;
private final int freqCutoff;
public FSTOrdPulsing41PostingsFormat() {
this(1);
}
public FSTOrdPulsing41PostingsFormat(int freqCutoff) {
super("FSTOrdPulsing41");
this.wrappedPostingsBaseFormat = new Lucene41PostingsBaseFormat();
this.freqCutoff = freqCutoff;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docsWriter = null;
PostingsWriterBase pulsingWriter = null;
boolean success = false;
try {
docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
FieldsConsumer ret = new FSTOrdTermsWriter(state, pulsingWriter);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsWriter, pulsingWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase docsReader = null;
PostingsReaderBase pulsingReader = null;
boolean success = false;
try {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
pulsingReader = new PulsingPostingsReader(state, docsReader);
FieldsProducer ret = new FSTOrdTermsReader(state, pulsingReader);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsReader, pulsingReader);
}
}
}
}

View File

@ -1,89 +0,0 @@
package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsBaseFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat;
import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;
import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/** FST + Pulsing41, test only, since
* FST does no delta encoding here!
* @lucene.experimental */
public class FSTPulsing41PostingsFormat extends PostingsFormat {
private final PostingsBaseFormat wrappedPostingsBaseFormat;
private final int freqCutoff;
public FSTPulsing41PostingsFormat() {
this(1);
}
public FSTPulsing41PostingsFormat(int freqCutoff) {
super("FSTPulsing41");
this.wrappedPostingsBaseFormat = new Lucene41PostingsBaseFormat();
this.freqCutoff = freqCutoff;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docsWriter = null;
PostingsWriterBase pulsingWriter = null;
boolean success = false;
try {
docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
FieldsConsumer ret = new FSTTermsWriter(state, pulsingWriter);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsWriter, pulsingWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase docsReader = null;
PostingsReaderBase pulsingReader = null;
boolean success = false;
try {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
pulsingReader = new PulsingPostingsReader(state, docsReader);
FieldsProducer ret = new FSTTermsReader(state, pulsingReader);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsReader, pulsingReader);
}
}
}
}

View File

@ -1,45 +0,0 @@
package org.apache.lucene.codecs.pulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
/**
* Concrete pulsing implementation over {@link Lucene41PostingsFormat}.
*
* @lucene.experimental
*/
public class Pulsing41PostingsFormat extends PulsingPostingsFormat {
/** Inlines docFreq=1 terms, otherwise uses the normal "Lucene41" format. */
public Pulsing41PostingsFormat() {
this(1);
}
/** Inlines docFreq=<code>freqCutoff</code> terms, otherwise uses the normal "Lucene41" format. */
public Pulsing41PostingsFormat(int freqCutoff) {
this(freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
/** Inlines docFreq=<code>freqCutoff</code> terms, otherwise uses the normal "Lucene41" format. */
public Pulsing41PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) {
super("Pulsing41", new Lucene41PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize);
}
}

View File

@ -1,119 +0,0 @@
package org.apache.lucene.codecs.pulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsBaseFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/** This postings format "inlines" the postings for terms that have
* low docFreq. It wraps another postings format, which is used for
* writing the non-inlined terms.
*
* @lucene.experimental */
public abstract class PulsingPostingsFormat extends PostingsFormat {
private final int freqCutoff;
private final int minBlockSize;
private final int maxBlockSize;
private final PostingsBaseFormat wrappedPostingsBaseFormat;
public PulsingPostingsFormat(String name, PostingsBaseFormat wrappedPostingsBaseFormat, int freqCutoff) {
this(name, wrappedPostingsBaseFormat, freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
/** Terms with freq <= freqCutoff are inlined into terms
* dict. */
public PulsingPostingsFormat(String name, PostingsBaseFormat wrappedPostingsBaseFormat, int freqCutoff, int minBlockSize, int maxBlockSize) {
super(name);
this.freqCutoff = freqCutoff;
this.minBlockSize = minBlockSize;
assert minBlockSize > 1;
this.maxBlockSize = maxBlockSize;
this.wrappedPostingsBaseFormat = wrappedPostingsBaseFormat;
}
@Override
public String toString() {
return getName() + "(freqCutoff=" + freqCutoff + " minBlockSize=" + minBlockSize + " maxBlockSize=" + maxBlockSize + ")";
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docsWriter = null;
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
PostingsWriterBase pulsingWriter = null;
// Terms dict
boolean success = false;
try {
docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, minBlockSize, maxBlockSize);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsWriter, pulsingWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase docsReader = null;
PostingsReaderBase pulsingReader = null;
boolean success = false;
try {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
pulsingReader = new PulsingPostingsReader(state, docsReader);
FieldsProducer ret = new BlockTreeTermsReader(
state.directory, state.fieldInfos, state.segmentInfo,
pulsingReader,
state.context,
state.segmentSuffix);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsReader, pulsingReader);
}
}
}
public int getFreqCutoff() {
return freqCutoff;
}
}

View File

@ -1,661 +0,0 @@
package org.apache.lucene.codecs.pulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
/** Concrete class that reads the current doc/freq/skip
* postings format
* @lucene.experimental */
// TODO: -- should we switch "hasProx" higher up? and
// create two separate docs readers, one that also reads
// prox and one that doesn't?
public class PulsingPostingsReader extends PostingsReaderBase {
// Fallback reader for non-pulsed terms:
final PostingsReaderBase wrappedPostingsReader;
final SegmentReadState segmentState;
int maxPositions;
int version;
TreeMap<Integer, Integer> fields;
public PulsingPostingsReader(SegmentReadState state, PostingsReaderBase wrappedPostingsReader) {
this.wrappedPostingsReader = wrappedPostingsReader;
this.segmentState = state;
}
@Override
public void init(IndexInput termsIn) throws IOException {
version = CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
PulsingPostingsWriter.VERSION_START,
PulsingPostingsWriter.VERSION_CURRENT);
maxPositions = termsIn.readVInt();
wrappedPostingsReader.init(termsIn);
if (wrappedPostingsReader instanceof PulsingPostingsReader ||
version < PulsingPostingsWriter.VERSION_META_ARRAY) {
fields = null;
} else {
fields = new TreeMap<>();
String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION);
IndexInput in = null;
try {
in = segmentState.directory.openInput(summaryFileName, segmentState.context);
CodecUtil.checkHeader(in, PulsingPostingsWriter.CODEC, version,
PulsingPostingsWriter.VERSION_CURRENT);
int numField = in.readVInt();
for (int i = 0; i < numField; i++) {
int fieldNum = in.readVInt();
int longsSize = in.readVInt();
fields.put(fieldNum, longsSize);
}
} finally {
IOUtils.closeWhileHandlingException(in);
}
}
}
private static class PulsingTermState extends BlockTermState {
private boolean absolute = false;
private long[] longs;
private byte[] postings;
private int postingsSize; // -1 if this term was not inlined
private BlockTermState wrappedTermState;
@Override
public PulsingTermState clone() {
PulsingTermState clone;
clone = (PulsingTermState) super.clone();
if (postingsSize != -1) {
clone.postings = new byte[postingsSize];
System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
} else {
assert wrappedTermState != null;
clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
clone.absolute = absolute;
if (longs != null) {
clone.longs = new long[longs.length];
System.arraycopy(longs, 0, clone.longs, 0, longs.length);
}
}
return clone;
}
@Override
public void copyFrom(TermState _other) {
super.copyFrom(_other);
PulsingTermState other = (PulsingTermState) _other;
postingsSize = other.postingsSize;
if (other.postingsSize != -1) {
if (postings == null || postings.length < other.postingsSize) {
postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
}
System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
} else {
wrappedTermState.copyFrom(other.wrappedTermState);
}
}
@Override
public String toString() {
if (postingsSize == -1) {
return "PulsingTermState: not inlined: wrapped=" + wrappedTermState;
} else {
return "PulsingTermState: inlined size=" + postingsSize + " " + super.toString();
}
}
}
@Override
public BlockTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
state.wrappedTermState = wrappedPostingsReader.newTermState();
return state;
}
@Override
public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException {
//System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState;
assert empty.length == 0;
termState.absolute = termState.absolute || absolute;
// if we have positions, its total TF, otherwise its computed based on docFreq.
long count = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? termState.totalTermFreq : termState.docFreq;
//System.out.println(" count=" + count + " threshold=" + maxPositions);
if (count <= maxPositions) {
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
// or D&PEnum is pulled):
termState.postingsSize = in.readVInt();
if (termState.postings == null || termState.postings.length < termState.postingsSize) {
termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
}
// TODO: sort of silly to copy from one big byte[]
// (the blob holding all inlined terms' blobs for
// current term block) into another byte[] (just the
// blob for this term)...
in.readBytes(termState.postings, 0, termState.postingsSize);
//System.out.println(" inlined bytes=" + termState.postingsSize);
termState.absolute = termState.absolute || absolute;
} else {
//System.out.println(" not inlined");
final int longsSize = fields == null ? 0 : fields.get(fieldInfo.number);
if (termState.longs == null) {
termState.longs = new long[longsSize];
}
for (int i = 0; i < longsSize; i++) {
termState.longs[i] = in.readVLong();
}
termState.postingsSize = -1;
termState.wrappedTermState.docFreq = termState.docFreq;
termState.wrappedTermState.totalTermFreq = termState.totalTermFreq;
wrappedPostingsReader.decodeTerm(termState.longs, in, fieldInfo, termState.wrappedTermState, termState.absolute);
termState.absolute = false;
}
}
@Override
public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsEnum postings;
if (reuse instanceof PulsingDocsEnum) {
postings = (PulsingDocsEnum) reuse;
if (!postings.canReuse(field)) {
postings = new PulsingDocsEnum(field);
}
} else {
// the 'reuse' is actually the wrapped enum
PulsingDocsEnum previous = (PulsingDocsEnum) getOther(reuse);
if (previous != null && previous.canReuse(field)) {
postings = previous;
} else {
postings = new PulsingDocsEnum(field);
}
}
if (reuse != postings) {
setOther(postings, reuse); // postings.other = reuse
}
return postings.reset(liveDocs, termState);
} else {
if (reuse instanceof PulsingDocsEnum) {
DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse), flags);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse, flags);
}
}
}
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse,
int flags) throws IOException {
final PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsAndPositionsEnum postings;
if (reuse instanceof PulsingDocsAndPositionsEnum) {
postings = (PulsingDocsAndPositionsEnum) reuse;
if (!postings.canReuse(field)) {
postings = new PulsingDocsAndPositionsEnum(field);
}
} else {
// the 'reuse' is actually the wrapped enum
PulsingDocsAndPositionsEnum previous = (PulsingDocsAndPositionsEnum) getOther(reuse);
if (previous != null && previous.canReuse(field)) {
postings = previous;
} else {
postings = new PulsingDocsAndPositionsEnum(field);
}
}
if (reuse != postings) {
setOther(postings, reuse); // postings.other = reuse
}
return postings.reset(liveDocs, termState);
} else {
if (reuse instanceof PulsingDocsAndPositionsEnum) {
DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse),
flags);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, flags);
}
}
}
private static class PulsingDocsEnum extends DocsEnum {
private byte[] postingsBytes;
private final ByteArrayDataInput postings = new ByteArrayDataInput();
private final IndexOptions indexOptions;
private final boolean storePayloads;
private final boolean storeOffsets;
private Bits liveDocs;
private int docID = -1;
private int accum;
private int freq;
private int payloadLength;
private int cost;
public PulsingDocsEnum(FieldInfo fieldInfo) {
indexOptions = fieldInfo.getIndexOptions();
storePayloads = fieldInfo.hasPayloads();
storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
public PulsingDocsEnum reset(Bits liveDocs, PulsingTermState termState) {
//System.out.println("PR docsEnum termState=" + termState + " docFreq=" + termState.docFreq);
assert termState.postingsSize != -1;
// Must make a copy of termState's byte[] so that if
// app does TermsEnum.next(), this DocsEnum is not affected
if (postingsBytes == null) {
postingsBytes = new byte[termState.postingsSize];
} else if (postingsBytes.length < termState.postingsSize) {
postingsBytes = ArrayUtil.grow(postingsBytes, termState.postingsSize);
}
System.arraycopy(termState.postings, 0, postingsBytes, 0, termState.postingsSize);
postings.reset(postingsBytes, 0, termState.postingsSize);
docID = -1;
accum = 0;
freq = 1;
cost = termState.docFreq;
payloadLength = 0;
this.liveDocs = liveDocs;
return this;
}
boolean canReuse(FieldInfo fieldInfo) {
return indexOptions == fieldInfo.getIndexOptions() && storePayloads == fieldInfo.hasPayloads();
}
@Override
public int nextDoc() throws IOException {
//System.out.println("PR nextDoc this= "+ this);
while(true) {
if (postings.eof()) {
//System.out.println("PR END");
return docID = NO_MORE_DOCS;
}
final int code = postings.readVInt();
//System.out.println(" read code=" + code);
if (indexOptions == IndexOptions.DOCS_ONLY) {
accum += code;
} else {
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = postings.readVInt(); // else read freq
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// Skip positions
if (storePayloads) {
for(int pos=0;pos<freq;pos++) {
final int posCode = postings.readVInt();
if ((posCode & 1) != 0) {
payloadLength = postings.readVInt();
}
if (storeOffsets && (postings.readVInt() & 1) != 0) {
// new offset length
postings.readVInt();
}
if (payloadLength != 0) {
postings.skipBytes(payloadLength);
}
}
} else {
for(int pos=0;pos<freq;pos++) {
// TODO: skipVInt
postings.readVInt();
if (storeOffsets && (postings.readVInt() & 1) != 0) {
// new offset length
postings.readVInt();
}
}
}
}
}
if (liveDocs == null || liveDocs.get(accum)) {
return (docID = accum);
}
}
}
@Override
public int freq() throws IOException {
return freq;
}
@Override
public int docID() {
return docID;
}
@Override
public int advance(int target) throws IOException {
return docID = slowAdvance(target);
}
@Override
public long cost() {
return cost;
}
}
private static class PulsingDocsAndPositionsEnum extends DocsAndPositionsEnum {
private byte[] postingsBytes;
private final ByteArrayDataInput postings = new ByteArrayDataInput();
private final boolean storePayloads;
private final boolean storeOffsets;
// note: we could actually reuse across different options, if we passed this to reset()
// and re-init'ed storeOffsets accordingly (made it non-final)
private final IndexOptions indexOptions;
private Bits liveDocs;
private int docID = -1;
private int accum;
private int freq;
private int posPending;
private int position;
private int payloadLength;
private BytesRefBuilder payload;
private int startOffset;
private int offsetLength;
private boolean payloadRetrieved;
private int cost;
public PulsingDocsAndPositionsEnum(FieldInfo fieldInfo) {
indexOptions = fieldInfo.getIndexOptions();
storePayloads = fieldInfo.hasPayloads();
storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
boolean canReuse(FieldInfo fieldInfo) {
return indexOptions == fieldInfo.getIndexOptions() && storePayloads == fieldInfo.hasPayloads();
}
public PulsingDocsAndPositionsEnum reset(Bits liveDocs, PulsingTermState termState) {
assert termState.postingsSize != -1;
if (postingsBytes == null) {
postingsBytes = new byte[termState.postingsSize];
} else if (postingsBytes.length < termState.postingsSize) {
postingsBytes = ArrayUtil.grow(postingsBytes, termState.postingsSize);
}
System.arraycopy(termState.postings, 0, postingsBytes, 0, termState.postingsSize);
postings.reset(postingsBytes, 0, termState.postingsSize);
this.liveDocs = liveDocs;
payloadLength = 0;
posPending = 0;
docID = -1;
accum = 0;
cost = termState.docFreq;
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
offsetLength = 0;
//System.out.println("PR d&p reset storesPayloads=" + storePayloads + " bytes=" + bytes.length + " this=" + this);
return this;
}
@Override
public int nextDoc() throws IOException {
//System.out.println("PR d&p nextDoc this=" + this);
while(true) {
//System.out.println(" cycle skip posPending=" + posPending);
skipPositions();
if (postings.eof()) {
//System.out.println("PR END");
return docID = NO_MORE_DOCS;
}
final int code = postings.readVInt();
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = postings.readVInt(); // else read freq
}
posPending = freq;
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
if (liveDocs == null || liveDocs.get(accum)) {
//System.out.println(" return docID=" + docID + " freq=" + freq);
position = 0;
return (docID = accum);
}
}
}
@Override
public int freq() throws IOException {
return freq;
}
@Override
public int docID() {
return docID;
}
@Override
public int advance(int target) throws IOException {
return docID = slowAdvance(target);
}
@Override
public int nextPosition() throws IOException {
//System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq);
assert posPending > 0;
posPending--;
if (storePayloads) {
if (!payloadRetrieved) {
//System.out.println("PR skip payload=" + payloadLength);
postings.skipBytes(payloadLength);
}
final int code = postings.readVInt();
//System.out.println("PR code=" + code);
if ((code & 1) != 0) {
payloadLength = postings.readVInt();
//System.out.println("PR new payload len=" + payloadLength);
}
position += code >>> 1;
payloadRetrieved = false;
} else {
position += postings.readVInt();
}
if (storeOffsets) {
int offsetCode = postings.readVInt();
if ((offsetCode & 1) != 0) {
// new offset length
offsetLength = postings.readVInt();
}
startOffset += offsetCode >>> 1;
}
//System.out.println("PR d&p nextPos return pos=" + position + " this=" + this);
return position;
}
@Override
public int startOffset() {
return startOffset;
}
@Override
public int endOffset() {
return startOffset + offsetLength;
}
private void skipPositions() throws IOException {
while(posPending != 0) {
nextPosition();
}
if (storePayloads && !payloadRetrieved) {
//System.out.println(" skip payload len=" + payloadLength);
postings.skipBytes(payloadLength);
payloadRetrieved = true;
}
}
@Override
public BytesRef getPayload() throws IOException {
//System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this);
if (payloadRetrieved) {
return payload.get();
} else if (storePayloads && payloadLength > 0) {
payloadRetrieved = true;
if (payload == null) {
payload = new BytesRefBuilder();
}
payload.grow(payloadLength);
postings.readBytes(payload.bytes(), 0, payloadLength);
payload.setLength(payloadLength);
return payload.get();
} else {
return null;
}
}
@Override
public long cost() {
return cost;
}
}
@Override
public void close() throws IOException {
wrappedPostingsReader.close();
}
/** for a docsenum, gets the 'other' reused enum.
* Example: Pulsing(Standard).
* when doing a term range query you are switching back and forth
* between Pulsing and Standard
*
* The way the reuse works is that Pulsing.other = Standard and
* Standard.other = Pulsing.
*/
private DocsEnum getOther(DocsEnum de) {
if (de == null) {
return null;
} else {
final AttributeSource atts = de.attributes();
return atts.addAttribute(PulsingEnumAttribute.class).enums().get(this);
}
}
/**
* for a docsenum, sets the 'other' reused enum.
* see getOther for an example.
*/
private DocsEnum setOther(DocsEnum de, DocsEnum other) {
final AttributeSource atts = de.attributes();
return atts.addAttribute(PulsingEnumAttribute.class).enums().put(this, other);
}
/**
* A per-docsenum attribute that stores additional reuse information
* so that pulsing enums can keep a reference to their wrapped enums,
* and vice versa. this way we can always reuse.
*
* @lucene.internal */
public static interface PulsingEnumAttribute extends Attribute {
public Map<PulsingPostingsReader,DocsEnum> enums();
}
/**
* Implementation of {@link PulsingEnumAttribute} for reuse of
* wrapped postings readers underneath pulsing.
*
* @lucene.internal */
public static final class PulsingEnumAttributeImpl extends AttributeImpl implements PulsingEnumAttribute {
// we could store 'other', but what if someone 'chained' multiple postings readers,
// this could cause problems?
// TODO: we should consider nuking this map and just making it so if you do this,
// you don't reuse? and maybe pulsingPostingsReader should throw an exc if it wraps
// another pulsing, because this is just stupid and wasteful.
// we still have to be careful in case someone does Pulsing(Stomping(Pulsing(...
private final Map<PulsingPostingsReader,DocsEnum> enums =
new IdentityHashMap<>();
@Override
public Map<PulsingPostingsReader,DocsEnum> enums() {
return enums;
}
@Override
public void clear() {
// our state is per-docsenum, so this makes no sense.
// its best not to clear, in case a wrapped enum has a per-doc attribute or something
// and is calling clearAttributes(), so they don't nuke the reuse information!
}
@Override
public void copyTo(AttributeImpl target) {
// this makes no sense for us, because our state is per-docsenum.
// we don't want to copy any stuff over to another docsenum ever!
}
}
@Override
public long ramBytesUsed() {
return ((wrappedPostingsReader!=null) ? wrappedPostingsReader.ramBytesUsed(): 0);
}
@Override
public void checkIntegrity() throws IOException {
wrappedPostingsReader.checkIntegrity();
}
}

View File

@ -1,378 +0,0 @@
package org.apache.lucene.codecs.pulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
// TODO: we now inline based on total TF of the term,
// but it might be better to inline by "net bytes used"
// so that a term that has only 1 posting but a huge
// payload would not be inlined. Though this is
// presumably rare in practice...
/**
* Writer for the pulsing format.
* <p>
* Wraps another postings implementation and decides
* (based on total number of occurrences), whether a terms
* postings should be inlined into the term dictionary,
* or passed through to the wrapped writer.
*
* @lucene.experimental */
public final class PulsingPostingsWriter extends PostingsWriterBase {
final static String CODEC = "PulsedPostingsWriter";
// recording field summary
final static String SUMMARY_EXTENSION = "smy";
// To add a new version, increment from the last one, and
// change VERSION_CURRENT to point to your new version:
final static int VERSION_START = 0;
final static int VERSION_META_ARRAY = 1;
final static int VERSION_CURRENT = VERSION_META_ARRAY;
private SegmentWriteState segmentState;
private List<FieldMetaData> fields;
// Reused by writeTerm:
private DocsEnum docsEnum;
private DocsAndPositionsEnum posEnum;
private int enumFlags;
private final RAMOutputStream buffer = new RAMOutputStream();
private IndexOptions indexOptions;
// information for wrapped PF, in current field
private int longsSize;
private long[] longs;
private boolean fieldHasFreqs;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
boolean absolute;
private static class PulsingTermState extends BlockTermState {
private byte[] bytes;
private BlockTermState wrappedState;
@Override
public String toString() {
if (bytes != null) {
return "inlined";
} else {
return "not inlined wrapped=" + wrappedState;
}
}
}
private static final class FieldMetaData {
int fieldNumber;
int longsSize;
FieldMetaData(int number, int size) {
fieldNumber = number;
longsSize = size;
}
}
// TODO: -- lazy init this? ie, if every single term
// was inlined (eg for a "primary key" field) then we
// never need to use this fallback? Fallback writer for
// non-inlined terms:
final PostingsWriterBase wrappedPostingsWriter;
final int maxPositions;
/** If the total number of positions (summed across all docs
* for this term) is <= maxPositions, then the postings are
* inlined into terms dict */
public PulsingPostingsWriter(SegmentWriteState state, int maxPositions, PostingsWriterBase wrappedPostingsWriter) {
fields = new ArrayList<>();
this.maxPositions = maxPositions;
// We simply wrap another postings writer, but only call
// on it when tot positions is >= the cutoff:
this.wrappedPostingsWriter = wrappedPostingsWriter;
this.segmentState = state;
}
@Override
public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeVInt(maxPositions); // encode maxPositions in header
wrappedPostingsWriter.init(termsOut);
}
@Override
public BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen) throws IOException {
// First pass: figure out whether we should pulse this term
long posCount = 0;
if (fieldHasPositions == false) {
// No positions:
docsEnum = termsEnum.docs(null, docsEnum, enumFlags);
assert docsEnum != null;
while (posCount <= maxPositions) {
if (docsEnum.nextDoc() == DocsEnum.NO_MORE_DOCS) {
break;
}
posCount++;
}
} else {
posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags);
assert posEnum != null;
while (posCount <= maxPositions) {
if (posEnum.nextDoc() == DocsEnum.NO_MORE_DOCS) {
break;
}
posCount += posEnum.freq();
}
}
if (posCount == 0) {
// All docs were deleted
return null;
}
// Second pass: write postings
if (posCount > maxPositions) {
// Too many positions; do not pulse. Just lset
// wrapped postingsWriter encode the postings:
PulsingTermState state = new PulsingTermState();
state.wrappedState = wrappedPostingsWriter.writeTerm(term, termsEnum, docsSeen);
state.docFreq = state.wrappedState.docFreq;
state.totalTermFreq = state.wrappedState.totalTermFreq;
return state;
} else {
// Pulsed:
if (fieldHasPositions == false) {
docsEnum = termsEnum.docs(null, docsEnum, enumFlags);
} else {
posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags);
docsEnum = posEnum;
}
assert docsEnum != null;
// There were few enough total occurrences for this
// term, so we fully inline our postings data into
// terms dict, now:
// TODO: it'd be better to share this encoding logic
// in some inner codec that knows how to write a
// single doc / single position, etc. This way if a
// given codec wants to store other interesting
// stuff, it could use this pulsing codec to do so
int lastDocID = 0;
int lastPayloadLength = -1;
int lastOffsetLength = -1;
int docFreq = 0;
long totalTermFreq = 0;
while (true) {
int docID = docsEnum.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
}
docsSeen.set(docID);
int delta = docID - lastDocID;
lastDocID = docID;
docFreq++;
if (fieldHasFreqs) {
int freq = docsEnum.freq();
totalTermFreq += freq;
if (freq == 1) {
buffer.writeVInt((delta << 1) | 1);
} else {
buffer.writeVInt(delta << 1);
buffer.writeVInt(freq);
}
if (fieldHasPositions) {
int lastPos = 0;
int lastOffset = 0;
for(int posIDX=0;posIDX<freq;posIDX++) {
int pos = posEnum.nextPosition();
int posDelta = pos - lastPos;
lastPos = pos;
int payloadLength;
BytesRef payload;
if (fieldHasPayloads) {
payload = posEnum.getPayload();
payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
buffer.writeVInt((posDelta << 1)|1);
buffer.writeVInt(payloadLength);
lastPayloadLength = payloadLength;
} else {
buffer.writeVInt(posDelta << 1);
}
} else {
payloadLength = 0;
payload = null;
buffer.writeVInt(posDelta);
}
if (fieldHasOffsets) {
int startOffset = posEnum.startOffset();
int endOffset = posEnum.endOffset();
int offsetDelta = startOffset - lastOffset;
int offsetLength = endOffset - startOffset;
if (offsetLength != lastOffsetLength) {
buffer.writeVInt(offsetDelta << 1 | 1);
buffer.writeVInt(offsetLength);
} else {
buffer.writeVInt(offsetDelta << 1);
}
lastOffset = startOffset;
lastOffsetLength = offsetLength;
}
if (payloadLength > 0) {
assert fieldHasPayloads;
assert payload != null;
buffer.writeBytes(payload.bytes, payload.offset, payload.length);
}
}
}
} else {
buffer.writeVInt(delta);
}
}
PulsingTermState state = new PulsingTermState();
state.bytes = new byte[(int) buffer.getFilePointer()];
state.docFreq = docFreq;
state.totalTermFreq = fieldHasFreqs ? totalTermFreq : -1;
buffer.writeTo(state.bytes, 0);
buffer.reset();
return state;
}
}
// TODO: -- should we NOT reuse across fields? would
// be cleaner
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
public int setField(FieldInfo fieldInfo) {
this.indexOptions = fieldInfo.getIndexOptions();
//if (DEBUG) System.out.println("PW field=" + fieldInfo.name + " indexOptions=" + indexOptions);
fieldHasPayloads = fieldInfo.hasPayloads();
absolute = false;
longsSize = wrappedPostingsWriter.setField(fieldInfo);
longs = new long[longsSize];
fields.add(new FieldMetaData(fieldInfo.number, longsSize));
fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (fieldHasFreqs == false) {
enumFlags = 0;
} else if (fieldHasPositions == false) {
enumFlags = DocsEnum.FLAG_FREQS;
} else if (fieldHasOffsets == false) {
if (fieldHasPayloads) {
enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS;
} else {
enumFlags = 0;
}
} else {
if (fieldHasPayloads) {
enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS;
} else {
enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS;
}
}
return 0;
//DEBUG = BlockTreeTermsWriter.DEBUG;
}
@Override
public void encodeTerm(long[] empty, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
PulsingTermState state = (PulsingTermState)_state;
assert empty.length == 0;
this.absolute = this.absolute || absolute;
if (state.bytes == null) {
wrappedPostingsWriter.encodeTerm(longs, buffer, fieldInfo, state.wrappedState, this.absolute);
for (int i = 0; i < longsSize; i++) {
out.writeVLong(longs[i]);
}
buffer.writeTo(out);
buffer.reset();
this.absolute = false;
} else {
out.writeVInt(state.bytes.length);
out.writeBytes(state.bytes, 0, state.bytes.length);
this.absolute = this.absolute || absolute;
}
}
@Override
public void close() throws IOException {
wrappedPostingsWriter.close();
if (wrappedPostingsWriter instanceof PulsingPostingsWriter ||
VERSION_CURRENT < VERSION_META_ARRAY) {
return;
}
String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, SUMMARY_EXTENSION);
IndexOutput out = null;
try {
out = segmentState.directory.createOutput(summaryFileName, segmentState.context);
CodecUtil.writeHeader(out, CODEC, VERSION_CURRENT);
out.writeVInt(fields.size());
for (FieldMetaData field : fields) {
out.writeVInt(field.fieldNumber);
out.writeVInt(field.longsSize);
}
out.close();
} finally {
IOUtils.closeWhileHandlingException(out);
}
}
}

View File

@ -1,25 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Pulsing Codec: inlines low frequency terms' postings into terms dictionary.
</body>
</html>

View File

@ -17,9 +17,6 @@ org.apache.lucene.codecs.blocktreeords.Ords41PostingsFormat
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
org.apache.lucene.codecs.memory.FSTPostingsFormat
org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat
org.apache.lucene.codecs.memory.MemoryPostingsFormat
org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat

View File

@ -1,34 +0,0 @@
package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Tests FSTOrdPulsing41PostingsFormat
*/
public class TestFSTOrdPulsing41PostingsFormat extends BasePostingsFormatTestCase {
private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTOrdPulsing41PostingsFormat());
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -1,34 +0,0 @@
package org.apache.lucene.codecs.memory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Tests FSTPulsing41PostingsFormat
*/
public class TestFSTPulsing41PostingsFormat extends BasePostingsFormatTestCase {
private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTPulsing41PostingsFormat());
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -1,156 +0,0 @@
package org.apache.lucene.codecs.pulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.util.Locale;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Pulses 10k terms/docs,
* originally designed to find JRE bugs (https://issues.apache.org/jira/browse/LUCENE-3335)
*
* @lucene.experimental
*/
@LuceneTestCase.Nightly
public class Test10KPulsings extends LuceneTestCase {
public void test10kPulsed() throws Exception {
// we always run this test with pulsing codec.
Codec cp = TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1));
File f = createTempDir("10kpulsed");
BaseDirectoryWrapper dir = newFSDirectory(f);
dir.setCheckIndexOnClose(false); // we do this ourselves explicitly
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
Document document = new Document();
FieldType ft = new FieldType(TextField.TYPE_STORED);
switch(TestUtil.nextInt(random(), 0, 2)) {
case 0: ft.setIndexOptions(IndexOptions.DOCS_ONLY); break;
case 1: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break;
default: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break;
}
Field field = newField("field", "", ft);
document.add(field);
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
for (int i = 0; i < 10050; i++) {
field.setStringValue(df.format(i));
iw.addDocument(document);
}
IndexReader ir = iw.getReader();
iw.close();
TermsEnum te = MultiFields.getTerms(ir, "field").iterator(null);
DocsEnum de = null;
for (int i = 0; i < 10050; i++) {
String expected = df.format(i);
assertEquals(expected, te.next().utf8ToString());
de = TestUtil.docs(random(), te, null, de, DocsEnum.FLAG_NONE);
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
}
ir.close();
TestUtil.checkIndex(dir);
dir.close();
}
/** a variant, that uses pulsing, but uses a high TF to force pass thru to the underlying codec
*/
public void test10kNotPulsed() throws Exception {
// we always run this test with pulsing codec.
int freqCutoff = TestUtil.nextInt(random(), 1, 10);
Codec cp = TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(freqCutoff));
File f = createTempDir("10knotpulsed");
BaseDirectoryWrapper dir = newFSDirectory(f);
dir.setCheckIndexOnClose(false); // we do this ourselves explicitly
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
Document document = new Document();
FieldType ft = new FieldType(TextField.TYPE_STORED);
switch(TestUtil.nextInt(random(), 0, 2)) {
case 0: ft.setIndexOptions(IndexOptions.DOCS_ONLY); break;
case 1: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break;
default: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break;
}
Field field = newField("field", "", ft);
document.add(field);
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
final int freq = freqCutoff + 1;
for (int i = 0; i < 10050; i++) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < freq; j++) {
sb.append(df.format(i));
sb.append(' '); // whitespace
}
field.setStringValue(sb.toString());
iw.addDocument(document);
}
IndexReader ir = iw.getReader();
iw.close();
TermsEnum te = MultiFields.getTerms(ir, "field").iterator(null);
DocsEnum de = null;
for (int i = 0; i < 10050; i++) {
String expected = df.format(i);
assertEquals(expected, te.next().utf8ToString());
de = TestUtil.docs(random(), te, null, de, DocsEnum.FLAG_NONE);
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
}
ir.close();
TestUtil.checkIndex(dir);
dir.close();
}
}

View File

@ -1,36 +0,0 @@
package org.apache.lucene.codecs.pulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
/**
* Tests PulsingPostingsFormat
*/
public class TestPulsingPostingsFormat extends BasePostingsFormatTestCase {
// TODO: randomize cutoff
private final Codec codec = TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat());
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -1,122 +0,0 @@
package org.apache.lucene.codecs.pulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.IdentityHashMap;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Tests that pulsing codec reuses its enums and wrapped enums
*/
public class TestPulsingReuse extends LuceneTestCase {
// TODO: this is a basic test. this thing is complicated, add more
public void testSophisticatedReuse() throws Exception {
// we always run this test with pulsing codec.
Codec cp = TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1));
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
Document doc = new Document();
doc.add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
AtomicReader segment = getOnlySegmentReader(ir);
DocsEnum reuse = null;
Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<>();
TermsEnum te = segment.terms("foo").iterator(null);
while (te.next() != null) {
reuse = te.docs(null, reuse, DocsEnum.FLAG_NONE);
allEnums.put(reuse, true);
}
assertEquals(2, allEnums.size());
allEnums.clear();
DocsAndPositionsEnum posReuse = null;
te = segment.terms("foo").iterator(null);
while (te.next() != null) {
posReuse = te.docsAndPositions(null, posReuse);
allEnums.put(posReuse, true);
}
assertEquals(2, allEnums.size());
ir.close();
dir.close();
}
/** tests reuse with Pulsing1(Pulsing2(Standard)) */
public void testNestedPulsing() throws Exception {
// we always run this test with pulsing codec.
Codec cp = TestUtil.alwaysPostingsFormat(new NestedPulsingPostingsFormat());
BaseDirectoryWrapper dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
Document doc = new Document();
doc.add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO));
// note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm')
// this is because we only track the 'last' enum we reused (not all).
// but this seems 'good enough' for now.
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
AtomicReader segment = getOnlySegmentReader(ir);
DocsEnum reuse = null;
Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<>();
TermsEnum te = segment.terms("foo").iterator(null);
while (te.next() != null) {
reuse = te.docs(null, reuse, DocsEnum.FLAG_NONE);
allEnums.put(reuse, true);
}
assertEquals(4, allEnums.size());
allEnums.clear();
DocsAndPositionsEnum posReuse = null;
te = segment.terms("foo").iterator(null);
while (te.next() != null) {
posReuse = te.docsAndPositions(null, posReuse);
allEnums.put(posReuse, true);
}
assertEquals(4, allEnums.size());
ir.close();
dir.close();
}
}

View File

@ -41,12 +41,12 @@ public class TestExternalCodecs extends LuceneTestCase {
private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly");
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
private final PostingsFormat pulsingFormat = PostingsFormat.forName("Pulsing41");
private final PostingsFormat memoryFormat = PostingsFormat.forName("Memory");
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("field2") || field.equals("id")) {
return pulsingFormat;
return memoryFormat;
} else if (field.equals("field1")) {
return defaultFormat;
} else {
@ -76,8 +76,8 @@ public class TestExternalCodecs extends LuceneTestCase {
Document doc = new Document();
// uses default codec:
doc.add(newTextField("field1", "this field uses the standard codec as the test", Field.Store.NO));
// uses pulsing codec:
Field field2 = newTextField("field2", "this field uses the pulsing codec as the test", Field.Store.NO);
// uses memory codec:
Field field2 = newTextField("field2", "this field uses the memory codec as the test", Field.Store.NO);
doc.add(field2);
Field idField = newStringField("id", "", Field.Store.NO);
@ -100,7 +100,7 @@ public class TestExternalCodecs extends LuceneTestCase {
assertEquals(NUM_DOCS-1, r.numDocs());
IndexSearcher s = newSearcher(r);
assertEquals(NUM_DOCS-1, s.search(new TermQuery(new Term("field1", "standard")), 1).totalHits);
assertEquals(NUM_DOCS-1, s.search(new TermQuery(new Term("field2", "pulsing")), 1).totalHits);
assertEquals(NUM_DOCS-1, s.search(new TermQuery(new Term("field2", "memory")), 1).totalHits);
r.close();
if (VERBOSE) {
@ -120,7 +120,7 @@ public class TestExternalCodecs extends LuceneTestCase {
assertEquals(NUM_DOCS-2, r.numDocs());
s = newSearcher(r);
assertEquals(NUM_DOCS-2, s.search(new TermQuery(new Term("field1", "standard")), 1).totalHits);
assertEquals(NUM_DOCS-2, s.search(new TermQuery(new Term("field2", "pulsing")), 1).totalHits);
assertEquals(NUM_DOCS-2, s.search(new TermQuery(new Term("field2", "memory")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("id", "76")), 1).totalHits);
assertEquals(0, s.search(new TermQuery(new Term("id", "77")), 1).totalHits);
assertEquals(0, s.search(new TermQuery(new Term("id", "44")), 1).totalHits);

View File

@ -28,7 +28,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.blocktreeords.Ords41PostingsFormat;
import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds;
import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat;
import org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
@ -131,8 +130,6 @@ public class TestLucene410DocValuesFormat extends BaseCompressingDocValuesFormat
// TODO: these don't actually support ords!
//case 2: pf = new FSTOrdPostingsFormat();
// break;
//case 3: pf = new FSTOrdPulsing41PostingsFormat();
// break;
default: throw new AssertionError();
}
final DocValuesFormat dv = new Lucene410DocValuesFormat();

View File

@ -23,8 +23,8 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.lucene410.Lucene410Codec;
import org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -272,9 +272,9 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
return new Pulsing41PostingsFormat(1);
return new MemoryPostingsFormat();
} else if ("date".equals(field)) {
return new Pulsing41PostingsFormat(1);
return new MemoryPostingsFormat();
} else {
return super.getPostingsFormatForField(field);
}
@ -288,9 +288,9 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
return new Pulsing41PostingsFormat(1);
return new Lucene41VarGapFixedInterval(1);
} else if ("date".equals(field)) {
return new Pulsing41PostingsFormat(2);
return new Lucene41VarGapFixedInterval(2);
} else {
return super.getPostingsFormatForField(field);
}

View File

@ -29,7 +29,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene410.Lucene410Codec;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -1149,7 +1149,7 @@ public class TestAddIndexes extends LuceneTestCase {
{
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1 + random().nextInt(20))));
conf.setCodec(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat()));
IndexWriter w = new IndexWriter(dir, conf);
try {
w.addIndexes(toAdd);

View File

@ -34,9 +34,6 @@ public class TestForTooMuchCloning extends LuceneTestCase {
// Make sure we don't clone IndexInputs too frequently
// during merging:
public void test() throws Exception {
// NOTE: if we see a fail on this test with "NestedPulsing" its because its
// reuse isnt perfect (but reasonable). see TestPulsingReuse.testNestedPulsing
// for more details
final MockDirectoryWrapper dir = newMockDirectory();
final TieredMergePolicy tmp = new TieredMergePolicy();
tmp.setMaxMergeAtOnce(2);

View File

@ -279,7 +279,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* @param openMode see {@link OpenMode}
*/
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
// TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)?
// TODO: should we use a more optimized Codec?
// The taxonomy has a unique structure, where each term is associated with one document
// Make sure we use a MergePolicy which always merges adjacent segments and thus

View File

@ -44,8 +44,6 @@ import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
import org.apache.lucene.codecs.memory.FSTOrdTermsWriter;
import org.apache.lucene.codecs.memory.FSTTermsReader;
import org.apache.lucene.codecs.memory.FSTTermsWriter;
import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;
import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
@ -121,14 +119,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state, skipInterval);
if (random.nextBoolean()) {
final int totTFCutoff = TestUtil.nextInt(random, 1, 20);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter);
}
final FieldsConsumer fields;
final int t1 = random.nextInt(5);
@ -292,14 +282,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
if (random.nextBoolean()) {
final int totTFCutoff = TestUtil.nextInt(random, 1, 20);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsReader = new PulsingPostingsReader(state, postingsReader);
}
final FieldsProducer fields;
final int t1 = random.nextInt(5);
if (t1 == 0) {

View File

@ -1,96 +0,0 @@
package org.apache.lucene.codecs.nestedpulsing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;
import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/**
* Pulsing(1, Pulsing(2, Lucene41))
* @lucene.experimental
*/
// TODO: if we create PulsingPostingsBaseFormat then we
// can simplify this? note: I don't like the *BaseFormat
// hierarchy, maybe we can clean that up...
public final class NestedPulsingPostingsFormat extends PostingsFormat {
public NestedPulsingPostingsFormat() {
super("NestedPulsing");
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docsWriter = null;
PostingsWriterBase pulsingWriterInner = null;
PostingsWriterBase pulsingWriter = null;
// Terms dict
boolean success = false;
try {
docsWriter = new Lucene41PostingsWriter(state);
pulsingWriterInner = new PulsingPostingsWriter(state, 2, docsWriter);
pulsingWriter = new PulsingPostingsWriter(state, 1, pulsingWriterInner);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter,
BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsWriter, pulsingWriterInner, pulsingWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase docsReader = null;
PostingsReaderBase pulsingReaderInner = null;
PostingsReaderBase pulsingReader = null;
boolean success = false;
try {
docsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
pulsingReaderInner = new PulsingPostingsReader(state, docsReader);
pulsingReader = new PulsingPostingsReader(state, pulsingReaderInner);
FieldsProducer ret = new BlockTreeTermsReader(
state.directory, state.fieldInfos, state.segmentInfo,
pulsingReader,
state.context,
state.segmentSuffix);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docsReader, pulsingReaderInner, pulsingReader);
}
}
}
}

View File

@ -1,25 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Codec for testing that wraps {@link org.apache.lucene.codecs.pulsing.PulsingPostingsFormat} with itself.
</body>
</html>

View File

@ -40,14 +40,10 @@ import org.apache.lucene.codecs.lucene410.Lucene410Codec;
import org.apache.lucene.codecs.lucene410.Lucene410DocValuesFormat;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat;
import org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat;
import org.apache.lucene.codecs.memory.FSTPostingsFormat;
import org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat;
import org.apache.lucene.codecs.memory.MemoryDocValuesFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.util.LuceneTestCase;
@ -130,19 +126,13 @@ public class RandomCodec extends Lucene410Codec {
new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock),
new FSTPostingsFormat(),
new FSTOrdPostingsFormat(),
new FSTPulsing41PostingsFormat(1 + random.nextInt(20)),
new FSTOrdPulsing41PostingsFormat(1 + random.nextInt(20)),
new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock),
LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)),
new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
// add pulsing again with (usually) different parameters
new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed
//with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing
//with such "wrapper" classes?
new TestBloomFilteredLucene41Postings(),
new MockRandomPostingsFormat(random),
new NestedPulsingPostingsFormat(),
new Lucene41WithOrds(TestUtil.nextInt(random, 1, 1000)),
new Lucene41VarGapFixedInterval(TestUtil.nextInt(random, 1, 1000)),
new Lucene41VarGapDocFreqInterval(TestUtil.nextInt(random, 1, 100), TestUtil.nextInt(random, 1, 1000)),

View File

@ -14,7 +14,6 @@
# limitations under the License.
org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat
org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat
org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds
org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapFixedInterval

View File

@ -36,9 +36,9 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
private volatile SolrCore core;
// TODO: we need to change how solr does this?
// rather than a string like "Pulsing" you need to be able to pass parameters
// rather than a string like "Direct" you need to be able to pass parameters
// and everything to a field in the schema, e.g. we should provide factories for
// the Lucene's core formats (Memory, Pulsing, ...) and such.
// the Lucene's core formats (Memory, Direct, ...) and such.
//
// So I think a FieldType should return PostingsFormat, not a String.
// how it constructs this from the XML... i don't care.

View File

@ -19,7 +19,7 @@
<schema name="bad-schema-codec-global-vs-ft-mismatch" version="1.0">
<types>
<!-- BAD: postingsFormat here but no codec that allows it -->
<fieldType name="pulsing1" class="solr.TextField" postingsFormat="Pulsing">
<fieldType name="direct1" class="solr.TextField" postingsFormat="Direct">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
@ -27,10 +27,10 @@
</types>
<fields>
<field name="pulsing1text" type="pulsing1" indexed="true" stored="true"/>
<dynamicField name="*" type="pulsing1" />
<field name="direct1text" type="direct1" indexed="true" stored="true"/>
<dynamicField name="*" type="direct1" />
</fields>
<defaultSearchField>pulsing1text</defaultSearchField>
<defaultSearchField>direct1text</defaultSearchField>
</schema>

View File

@ -17,7 +17,7 @@
-->
<schema name="codec" version="1.2">
<types>
<fieldType name="string_pulsing" class="solr.StrField" postingsFormat="Pulsing41"/>
<fieldType name="string_direct" class="solr.StrField" postingsFormat="Direct"/>
<fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText"/>
<fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene41"/>
@ -28,7 +28,7 @@
</types>
<fields>
<field name="string_pulsing_f" type="string_pulsing" indexed="true" stored="true" />
<field name="string_direct_f" type="string_direct" indexed="true" stored="true" />
<field name="string_simpletext_f" type="string_simpletext" indexed="true" stored="true" />
<field name="string_standard_f" type="string_standard" indexed="true" stored="true" />
@ -38,7 +38,7 @@
<field name="string_f" type="string" indexed="true" stored="true" docValues="true" required="true"/>
<dynamicField name="*_simple" type="string_simpletext" indexed="true" stored="true"/>
<dynamicField name="*_pulsing" type="string_pulsing" indexed="true" stored="true"/>
<dynamicField name="*_direct" type="string_direct" indexed="true" stored="true"/>
<dynamicField name="*_standard" type="string_standard" indexed="true" stored="true"/>
<dynamicField name="*_disk" type="string_disk" indexed="false" stored="false" docValues="true" />

View File

@ -36,9 +36,9 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
public void testPostingsFormats() {
Codec codec = h.getCore().getCodec();
Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields();
SchemaField schemaField = fields.get("string_pulsing_f");
SchemaField schemaField = fields.get("string_direct_f");
PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
assertEquals("Pulsing41", format.getPostingsFormatForField(schemaField.getName()).getName());
assertEquals("Direct", format.getPostingsFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_simpletext_f");
assertEquals("SimpleText",
format.getPostingsFormatForField(schemaField.getName()).getName());
@ -68,8 +68,8 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
assertEquals("SimpleText", format.getPostingsFormatForField("foo_simple").getName());
assertEquals("SimpleText", format.getPostingsFormatForField("bar_simple").getName());
assertEquals("Pulsing41", format.getPostingsFormatForField("foo_pulsing").getName());
assertEquals("Pulsing41", format.getPostingsFormatForField("bar_pulsing").getName());
assertEquals("Direct", format.getPostingsFormatForField("foo_direct").getName());
assertEquals("Direct", format.getPostingsFormatForField("bar_direct").getName());
assertEquals("Lucene41", format.getPostingsFormatForField("foo_standard").getName());
assertEquals("Lucene41", format.getPostingsFormatForField("bar_standard").getName());
}