mirror of https://github.com/apache/lucene.git
LUCENE-3069: writer part
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1499744 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1e3adfae1b
commit
d6e2f4b663
|
@ -1,47 +0,0 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
||||
public class TempBlockTermState extends TempTermState {
|
||||
/** the term's ord in the current block */
|
||||
public int termBlockOrd;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected TempBlockTermState() {
|
||||
}
|
||||
|
||||
public TempBlockTermState clone() {
|
||||
TempBlockTermState other = (TempBlockTermState)super.clone();
|
||||
return other;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFrom(TermState _other) {
|
||||
assert _other instanceof TempBlockTermState : "can not copy from " + _other.getClass().getName();
|
||||
super.copyFrom(_other);
|
||||
TempBlockTermState other = (TempBlockTermState) _other;
|
||||
termBlockOrd = other.termBlockOrd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString() + " termBlockOrd=" + termBlockOrd;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,282 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.RunAutomaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Outputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.TempPostingsReaderBase;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
|
||||
public class TempFSTTermsReader extends FieldsProducer {
|
||||
final TempPostingsReaderBase postingsReader;
|
||||
final IndexInput in;
|
||||
final TreeMap<String, FieldReader> fields = new TreeMap<String, FieldReader>();
|
||||
|
||||
|
||||
public TempFSTTermsReader(SegmentReadState state, TempPostingsReaderBase postingsReader) throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTTermsWriter.TERMS_EXTENSION);
|
||||
|
||||
this.postingsReader = postingsReader;
|
||||
this.in = state.directory.openInput(termsFileName, state.context);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
readHeader(in);
|
||||
this.postingsReader.init(in);
|
||||
seekDir(in);
|
||||
|
||||
final FieldInfos fieldInfos = state.fieldInfos;
|
||||
final int numFields = in.readVInt();
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
int fieldNumber = in.readVInt();
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
||||
long numTerms = in.readVLong();
|
||||
long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
|
||||
long sumDocFreq = in.readVLong();
|
||||
int docCount = in.readVInt();
|
||||
int longsSize = in.readVInt();
|
||||
FieldReader current = new FieldReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
|
||||
FieldReader previous = fields.put(fieldInfo.name, current);
|
||||
checkFieldSummary(state.segmentInfo, current, previous);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int readHeader(IndexInput in) throws IOException {
|
||||
return CodecUtil.checkHeader(in, TempFSTTermsWriter.TERMS_CODEC_NAME,
|
||||
TempFSTTermsWriter.TERMS_VERSION_START,
|
||||
TempFSTTermsWriter.TERMS_VERSION_CURRENT);
|
||||
}
|
||||
private void seekDir(IndexInput in) throws IOException {
|
||||
in.seek(in.length() - 8);
|
||||
in.seek(in.readLong());
|
||||
}
|
||||
private void checkFieldSummary(SegmentInfo info, FieldReader field, FieldReader previous) throws IOException {
|
||||
// #docs with field must be <= #docs
|
||||
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
|
||||
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
|
||||
}
|
||||
// #postings must be >= #docs with field
|
||||
if (field.sumDocFreq < field.docCount) {
|
||||
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (resource=" + in + ")");
|
||||
}
|
||||
// #positions must be >= #postings
|
||||
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (resource=" + in + ")");
|
||||
}
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (resource=" + in + ")");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.unmodifiableSet(fields.keySet()).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
assert field != null;
|
||||
return fields.get(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return fields.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
IOUtils.close(in, postingsReader);
|
||||
} finally {
|
||||
fields.clear();
|
||||
}
|
||||
}
|
||||
|
||||
final class FieldReader extends Terms {
|
||||
final FieldInfo fieldInfo;
|
||||
final long numTerms;
|
||||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
final int docCount;
|
||||
final int longsSize;
|
||||
final FST<TempTermOutputs.TempMetaData> dict;
|
||||
|
||||
FieldReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(longsSize));
|
||||
//PrintWriter pw = new PrintWriter(new File("../temp/xxx.txt"));
|
||||
//Util.toDot(dict, pw, false, false);
|
||||
}
|
||||
|
||||
// nocommit: implement intersect
|
||||
// nocommit: why do we need this comparator overridden again and again?
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||
return new SegmentTermsEnum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPositions() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return fieldInfo.hasPayloads();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return numTerms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() {
|
||||
return sumTotalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumDocFreq() throws IOException {
|
||||
return sumDocFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount() throws IOException {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
// Iterates through terms in this field
|
||||
private final class SegmentTermsEnum extends TermsEnum {
|
||||
SegmentTermsEnum() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() throws IOException {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() throws IOException {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(BytesRef target, TermState otherState) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermState termState() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,198 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.codecs.TempPostingsWriterBase;
|
||||
import org.apache.lucene.codecs.PostingsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.TermsConsumer;
|
||||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
||||
/** FST based term dict, all the metadata held
|
||||
* as output of FST */
|
||||
|
||||
public class TempFSTTermsWriter extends FieldsConsumer {
|
||||
static final String TERMS_EXTENSION = "tmp";
|
||||
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
|
||||
public static final int TERMS_VERSION_START = 0;
|
||||
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
|
||||
|
||||
final TempPostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
final IndexOutput out;
|
||||
final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
|
||||
|
||||
public TempFSTTermsWriter(SegmentWriteState state, TempPostingsWriterBase postingsWriter) throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
|
||||
|
||||
this.postingsWriter = postingsWriter;
|
||||
this.fieldInfos = state.fieldInfos;
|
||||
this.out = state.directory.createOutput(termsFileName, state.context);
|
||||
|
||||
// nocommit: why try catch here? not catching createOutput?
|
||||
boolean success = false;
|
||||
try {
|
||||
writeHeader(out);
|
||||
this.postingsWriter.start(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
private void writeHeader(IndexOutput out) throws IOException {
|
||||
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
|
||||
}
|
||||
private void writeTrailer(IndexOutput out, long dirStart) throws IOException {
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsConsumer addField(FieldInfo field) throws IOException {
|
||||
return new TermsWriter(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOException ioe = null;
|
||||
try {
|
||||
// write field summary
|
||||
final long dirStart = out.getFilePointer();
|
||||
|
||||
out.writeVInt(fields.size());
|
||||
for (FieldMetaData field : fields) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVLong(field.numTerms);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||
out.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
out.writeVInt(field.longsSize);
|
||||
field.dict.save(out);
|
||||
}
|
||||
writeTrailer(out, dirStart);
|
||||
} catch (IOException ioe2) {
|
||||
ioe = ioe2;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
|
||||
}
|
||||
}
|
||||
|
||||
private static class FieldMetaData {
|
||||
public final FieldInfo fieldInfo;
|
||||
public final long numTerms;
|
||||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
public final int longsSize;
|
||||
public final FST<TempTermOutputs.TempMetaData> dict;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<TempTermOutputs.TempMetaData> fst) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.dict = fst;
|
||||
}
|
||||
}
|
||||
|
||||
class TermsWriter extends TermsConsumer {
|
||||
private final Builder<TempTermOutputs.TempMetaData> builder;
|
||||
private final TempTermOutputs outputs;
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
private long numTerms;
|
||||
|
||||
private final IntsRef scratchTerm = new IntsRef();
|
||||
private final RAMOutputStream metaWriter = new RAMOutputStream();
|
||||
|
||||
TermsWriter(FieldInfo fieldInfo) {
|
||||
this.numTerms = 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
this.outputs = new TempTermOutputs(longsSize);
|
||||
this.builder = new Builder<TempTermOutputs.TempMetaData>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||
postingsWriter.startTerm();
|
||||
return postingsWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||
final TempTermOutputs.TempMetaData meta = new TempTermOutputs.TempMetaData();
|
||||
meta.longs = new long[longsSize];
|
||||
meta.bytes = null;
|
||||
postingsWriter.finishTerm(meta.longs, metaWriter, stats);
|
||||
/*
|
||||
meta.bytes = new byte[(int)metaWriter.getFilePointer()];
|
||||
metaWriter.writeTo(meta.bytes, 0);
|
||||
metaWriter.reset();
|
||||
*/
|
||||
int bytesSize = (int)metaWriter.getFilePointer();
|
||||
if (bytesSize > 0) {
|
||||
meta.bytes = new byte[bytesSize];
|
||||
metaWriter.writeTo(meta.bytes, 0);
|
||||
metaWriter.reset();
|
||||
}
|
||||
//System.out.println("add term:<"+text.utf8ToString()+", "+meta+">");
|
||||
builder.add(Util.toIntsRef(text, scratchTerm), meta);
|
||||
numTerms++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||
// save FST dict
|
||||
if (numTerms > 0) {
|
||||
final FST<TempTermOutputs.TempMetaData> fst = builder.finish();
|
||||
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -384,7 +384,10 @@ public final class TempPostingsFormat extends PostingsFormat {
|
|||
/** Creates {@code TempPostingsFormat} with default
|
||||
* settings. */
|
||||
public TempPostingsFormat() {
|
||||
this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
|
||||
super("TempFST");
|
||||
minTermBlockSize = 0;
|
||||
maxTermBlockSize = 0;
|
||||
//this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/** Creates {@code TempPostingsFormat} with custom
|
||||
|
@ -410,10 +413,11 @@ public final class TempPostingsFormat extends PostingsFormat {
|
|||
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new TempBlockTermsWriter(state,
|
||||
postingsWriter,
|
||||
minTermBlockSize,
|
||||
maxTermBlockSize);
|
||||
//FieldsConsumer ret = new TempBlockTermsWriter(state,
|
||||
// postingsWriter,
|
||||
// minTermBlockSize,
|
||||
// maxTermBlockSize);
|
||||
FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
|
@ -432,13 +436,14 @@ public final class TempPostingsFormat extends PostingsFormat {
|
|||
state.segmentSuffix);
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new TempBlockTermsReader(state.directory,
|
||||
state.fieldInfos,
|
||||
state.segmentInfo,
|
||||
postingsReader,
|
||||
state.context,
|
||||
state.segmentSuffix,
|
||||
state.termsIndexDivisor);
|
||||
//FieldsProducer ret = new TempBlockTermsReader(state.directory,
|
||||
// state.fieldInfos,
|
||||
// state.segmentInfo,
|
||||
// postingsReader,
|
||||
// state.context,
|
||||
// state.segmentSuffix,
|
||||
// state.termsIndexDivisor);
|
||||
FieldsProducer ret = new TempFSTTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
|
|
|
@ -0,0 +1,274 @@
|
|||
package org.apache.lucene.codecs.temp;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.fst.Outputs;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
|
||||
|
||||
// NOTE: outputs should be per-field, since
|
||||
// longsSize is fixed for each field
|
||||
public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
|
||||
private final static TempMetaData NO_OUTPUT = new TempMetaData();
|
||||
private static boolean DEBUG = false;
|
||||
private int longsSize;
|
||||
|
||||
public static class TempMetaData {
|
||||
public long[] longs;
|
||||
public byte[] bytes;
|
||||
TempMetaData() {
|
||||
this.longs = null;
|
||||
this.bytes = null;
|
||||
}
|
||||
TempMetaData(long[] longs, byte[] bytes) {
|
||||
this.longs = longs;
|
||||
this.bytes = bytes;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = 0;
|
||||
if (longs != null) {
|
||||
final int end = longs.length;
|
||||
for (int i = 0; i < end; i++) {
|
||||
hash -= longs[i];
|
||||
}
|
||||
}
|
||||
if (bytes != null) {
|
||||
hash = -hash;
|
||||
final int end = bytes.length;
|
||||
for (int i = 0; i < end; i++) {
|
||||
hash += bytes[i];
|
||||
}
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
public String toString() {
|
||||
if (this == NO_OUTPUT) {
|
||||
return "no_output";
|
||||
}
|
||||
StringBuffer sb = new StringBuffer();
|
||||
if (longs != null) {
|
||||
sb.append("[ ");
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
sb.append(longs[i]+" ");
|
||||
}
|
||||
sb.append("]");
|
||||
} else {
|
||||
sb.append("null");
|
||||
}
|
||||
if (bytes != null) {
|
||||
sb.append(" [ ");
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
sb.append(bytes[i]+" ");
|
||||
}
|
||||
sb.append("]");
|
||||
} else {
|
||||
sb.append(" null");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private TempTermOutputs() {
|
||||
}
|
||||
|
||||
protected TempTermOutputs(int longsSize) {
|
||||
this.longsSize = longsSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
//
|
||||
// Since longs blob is fixed length, when these two are 'comparable'
|
||||
// i.e. when every value in long[] fits the same ordering, the smaller one
|
||||
// will be the result.
|
||||
//
|
||||
// NOTE: only long[] is 'shared', i.e. after sharing common value,
|
||||
// the output of smaller one will be a all-zero long[] with original byte[] blob.
|
||||
//
|
||||
// nocommit: Builder.add() doesn't immediatelly consumes the output data,
|
||||
// which means, the longs after one add() should all be deeply copied
|
||||
// instead of being reused? quite hairly to detect it here, so the caller
|
||||
// must be careful about this.
|
||||
//
|
||||
public TempMetaData common(TempMetaData t1, TempMetaData t2) {
|
||||
if (DEBUG) System.out.print("common("+t1+", "+t2+") = ");
|
||||
if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
|
||||
if (DEBUG) System.out.println("ret:"+NO_OUTPUT);
|
||||
return NO_OUTPUT;
|
||||
}
|
||||
assert t1.longs != null;
|
||||
assert t2.longs != null;
|
||||
assert t1.longs.length == t2.longs.length;
|
||||
|
||||
long accum = 0;
|
||||
long[] longs1 = t1.longs, longs2 = t2.longs;
|
||||
int pos = 0;
|
||||
boolean order = true;
|
||||
TempMetaData ret;
|
||||
|
||||
while (pos < longsSize && longs1[pos] == longs2[pos]) {
|
||||
pos++;
|
||||
}
|
||||
if (pos < longsSize) {
|
||||
// unequal
|
||||
order = (longs1[pos] > longs2[pos]);
|
||||
if (order) {
|
||||
// check whether strictly longs1 >= longs2
|
||||
while (pos < longsSize && longs1[pos] >= longs2[pos]) {
|
||||
accum += longs2[pos];
|
||||
pos++;
|
||||
}
|
||||
} else {
|
||||
// check whether strictly longs1 <= longs2
|
||||
while (pos < longsSize && longs1[pos] <= longs2[pos]) {
|
||||
accum += longs1[pos];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
if (pos < longsSize || accum == 0) {
|
||||
ret = NO_OUTPUT;
|
||||
} else if (order) {
|
||||
ret = new TempMetaData(longs2, null);
|
||||
} else {
|
||||
ret = new TempMetaData(longs1, null);
|
||||
}
|
||||
} else {
|
||||
// equal
|
||||
if (t1.bytes!= null && Arrays.equals(t1.bytes, t2.bytes)) { // all fields are equal
|
||||
ret = t1;
|
||||
} else if (accum == 0) { // all zero case
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TempMetaData(longs1, null);
|
||||
}
|
||||
}
|
||||
if (DEBUG) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
// nocommit:
|
||||
// this *actually* always assume that t2 <= t1 before calling the method
|
||||
public TempMetaData subtract(TempMetaData t1, TempMetaData t2) {
|
||||
if (DEBUG) System.out.print("subtract("+t1+", "+t2+") = ");
|
||||
if (t2 == NO_OUTPUT) {
|
||||
if (DEBUG) System.out.println("ret:"+t1);
|
||||
return t1;
|
||||
}
|
||||
assert t1.longs != null;
|
||||
assert t2.longs != null;
|
||||
|
||||
int pos = 0;
|
||||
long diff = 0;
|
||||
long[] share = new long[longsSize]; // nocommit: reuse
|
||||
|
||||
while (pos < longsSize) {
|
||||
share[pos] = t1.longs[pos] - t2.longs[pos];
|
||||
diff += share[pos];
|
||||
pos++;
|
||||
}
|
||||
|
||||
TempMetaData ret;
|
||||
if (diff == 0 && (t1.bytes == null || t1.bytes.length == 0)) {
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TempMetaData(share, t1.bytes);
|
||||
}
|
||||
if (DEBUG) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
// nocommit: need to check all-zero case?
|
||||
// so we can reuse one long[]
|
||||
public TempMetaData add(TempMetaData t1, TempMetaData t2) {
|
||||
if (DEBUG) System.out.print("add("+t1+", "+t2+") = ");
|
||||
// nocommit: necessary?
|
||||
if (t1 == NO_OUTPUT) {
|
||||
if (DEBUG) System.out.println("ret:"+t2);
|
||||
return t2;
|
||||
} else if (t2 == NO_OUTPUT) {
|
||||
if (DEBUG) System.out.println("ret:"+t1);
|
||||
return t1;
|
||||
}
|
||||
assert t1.longs != null;
|
||||
assert t2.longs != null;
|
||||
|
||||
int pos = 0;
|
||||
long[] accum = new long[longsSize]; // nocommit: reuse
|
||||
while (pos < longsSize) {
|
||||
accum[pos] = t1.longs[pos] + t2.longs[pos];
|
||||
assert(accum[pos] >= 0);
|
||||
pos++;
|
||||
}
|
||||
TempMetaData ret;
|
||||
if (t2.bytes != null) {
|
||||
ret = new TempMetaData(accum, t2.bytes);
|
||||
} else {
|
||||
ret = new TempMetaData(accum, t1.bytes);
|
||||
}
|
||||
if (DEBUG) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(TempMetaData data, DataOutput out) throws IOException {
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
out.writeVLong(data.longs[pos]);
|
||||
}
|
||||
if (data.bytes != null) {
|
||||
out.writeVInt(data.bytes.length);
|
||||
out.writeBytes(data.bytes, 0, data.bytes.length);
|
||||
} else {
|
||||
out.writeVInt(0);
|
||||
}
|
||||
}
|
||||
// nocommit: can this non-null byte case be used in Final Output?
|
||||
|
||||
@Override
|
||||
public TempMetaData read(DataInput in) throws IOException {
|
||||
long[] longs = new long[longsSize];
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
longs[pos] = in.readVLong();
|
||||
}
|
||||
int bytesSize = in.readVInt();
|
||||
byte[] bytes = null;
|
||||
if (bytesSize > 0) {
|
||||
bytes = new byte[bytesSize];
|
||||
in.readBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
TempMetaData meta = new TempMetaData(longs, bytes);
|
||||
return meta;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TempMetaData getNoOutput() {
|
||||
return NO_OUTPUT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String outputToString(TempMetaData data) {
|
||||
return data.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -68,7 +68,7 @@ final class NodeHash<T> {
|
|||
}
|
||||
|
||||
// hash code for an unfrozen node. This must be identical
|
||||
// to the un-frozen case (below)!!
|
||||
// to the frozen case (below)!!
|
||||
private long hash(Builder.UnCompiledNode<T> node) {
|
||||
final int PRIME = 31;
|
||||
//System.out.println("hash unfrozen");
|
||||
|
|
Loading…
Reference in New Issue