LUCENE-3069: writer part

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1499744 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Han Jiang 2013-07-04 13:13:54 +00:00
parent 1e3adfae1b
commit d6e2f4b663
6 changed files with 772 additions and 60 deletions

View File

@ -1,47 +0,0 @@
package org.apache.lucene.codecs.temp;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.TermState;
public class TempBlockTermState extends TempTermState {
/** the term's ord in the current block */
public int termBlockOrd;
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected TempBlockTermState() {
}
public TempBlockTermState clone() {
TempBlockTermState other = (TempBlockTermState)super.clone();
return other;
}
@Override
public void copyFrom(TermState _other) {
assert _other instanceof TempBlockTermState : "can not copy from " + _other.getClass().getName();
super.copyFrom(_other);
TempBlockTermState other = (TempBlockTermState) _other;
termBlockOrd = other.termBlockOrd;
}
@Override
public String toString() {
return super.toString() + " termBlockOrd=" + termBlockOrd;
}
}

View File

@ -0,0 +1,282 @@
package org.apache.lucene.codecs.temp;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.PrintWriter;
import java.io.File;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.TreeMap;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RunAutomaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.TempPostingsReaderBase;
import org.apache.lucene.codecs.CodecUtil;
public class TempFSTTermsReader extends FieldsProducer {
final TempPostingsReaderBase postingsReader;
final IndexInput in;
final TreeMap<String, FieldReader> fields = new TreeMap<String, FieldReader>();
public TempFSTTermsReader(SegmentReadState state, TempPostingsReaderBase postingsReader) throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
this.in = state.directory.openInput(termsFileName, state.context);
boolean success = false;
try {
readHeader(in);
this.postingsReader.init(in);
seekDir(in);
final FieldInfos fieldInfos = state.fieldInfos;
final int numFields = in.readVInt();
for (int i = 0; i < numFields; i++) {
int fieldNumber = in.readVInt();
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
long numTerms = in.readVLong();
long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
long sumDocFreq = in.readVLong();
int docCount = in.readVInt();
int longsSize = in.readVInt();
FieldReader current = new FieldReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
FieldReader previous = fields.put(fieldInfo.name, current);
checkFieldSummary(state.segmentInfo, current, previous);
}
success = true;
} finally {
if (!success) {
in.close();
}
}
}
private int readHeader(IndexInput in) throws IOException {
return CodecUtil.checkHeader(in, TempFSTTermsWriter.TERMS_CODEC_NAME,
TempFSTTermsWriter.TERMS_VERSION_START,
TempFSTTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
in.seek(in.length() - 8);
in.seek(in.readLong());
}
private void checkFieldSummary(SegmentInfo info, FieldReader field, FieldReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
// #postings must be >= #docs with field
if (field.sumDocFreq < field.docCount) {
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (resource=" + in + ")");
}
// #positions must be >= #postings
if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (resource=" + in + ")");
}
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (resource=" + in + ")");
}
}
@Override
public Iterator<String> iterator() {
return Collections.unmodifiableSet(fields.keySet()).iterator();
}
@Override
public Terms terms(String field) throws IOException {
assert field != null;
return fields.get(field);
}
@Override
public int size() {
return fields.size();
}
@Override
public void close() throws IOException {
try {
IOUtils.close(in, postingsReader);
} finally {
fields.clear();
}
}
final class FieldReader extends Terms {
final FieldInfo fieldInfo;
final long numTerms;
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
final int longsSize;
final FST<TempTermOutputs.TempMetaData> dict;
FieldReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.dict = new FST<TempTermOutputs.TempMetaData>(in, new TempTermOutputs(longsSize));
//PrintWriter pw = new PrintWriter(new File("../temp/xxx.txt"));
//Util.toDot(dict, pw, false, false);
}
// nocommit: implement intersect
// nocommit: why do we need this comparator overridden again and again?
@Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum();
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
}
@Override
public long size() {
return numTerms;
}
@Override
public long getSumTotalTermFreq() {
return sumTotalTermFreq;
}
@Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
@Override
public int getDocCount() throws IOException {
return docCount;
}
// Iterates through terms in this field
private final class SegmentTermsEnum extends TermsEnum {
SegmentTermsEnum() {
}
@Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
return null;
}
@Override
public BytesRef next() throws IOException {
return null;
}
@Override
public BytesRef term() {
return null;
}
@Override
public int docFreq() throws IOException {
return 0;
}
@Override
public long totalTermFreq() throws IOException {
return 0;
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
return null;
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
return null;
}
@Override
public void seekExact(BytesRef target, TermState otherState) {
}
@Override
public TermState termState() throws IOException {
return null;
}
@Override
public void seekExact(long ord) throws IOException {
}
@Override
public long ord() {
return 0;
}
}
}
}

View File

@ -0,0 +1,198 @@
package org.apache.lucene.codecs.temp;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.Comparator;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.codecs.TempPostingsWriterBase;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.CodecUtil;
/** FST based term dict, all the metadata held
* as output of FST */
public class TempFSTTermsWriter extends FieldsConsumer {
static final String TERMS_EXTENSION = "tmp";
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
final TempPostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
final IndexOutput out;
final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
public TempFSTTermsWriter(SegmentWriteState state, TempPostingsWriterBase postingsWriter) throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
this.postingsWriter = postingsWriter;
this.fieldInfos = state.fieldInfos;
this.out = state.directory.createOutput(termsFileName, state.context);
// nocommit: why try catch here? not catching createOutput?
boolean success = false;
try {
writeHeader(out);
this.postingsWriter.start(out);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
}
}
}
private void writeHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
}
private void writeTrailer(IndexOutput out, long dirStart) throws IOException {
out.writeLong(dirStart);
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
return new TermsWriter(field);
}
@Override
public void close() throws IOException {
IOException ioe = null;
try {
// write field summary
final long dirStart = out.getFilePointer();
out.writeVInt(fields.size());
for (FieldMetaData field : fields) {
out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.numTerms);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
out.writeVLong(field.sumTotalTermFreq);
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
field.dict.save(out);
}
writeTrailer(out, dirStart);
} catch (IOException ioe2) {
ioe = ioe2;
} finally {
IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
}
}
private static class FieldMetaData {
public final FieldInfo fieldInfo;
public final long numTerms;
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public final int longsSize;
public final FST<TempTermOutputs.TempMetaData> dict;
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<TempTermOutputs.TempMetaData> fst) {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.dict = fst;
}
}
class TermsWriter extends TermsConsumer {
private final Builder<TempTermOutputs.TempMetaData> builder;
private final TempTermOutputs outputs;
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
private final IntsRef scratchTerm = new IntsRef();
private final RAMOutputStream metaWriter = new RAMOutputStream();
TermsWriter(FieldInfo fieldInfo) {
this.numTerms = 0;
this.fieldInfo = fieldInfo;
this.longsSize = postingsWriter.setField(fieldInfo);
this.outputs = new TempTermOutputs(longsSize);
this.builder = new Builder<TempTermOutputs.TempMetaData>(FST.INPUT_TYPE.BYTE1, outputs);
}
@Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
postingsWriter.startTerm();
return postingsWriter;
}
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
final TempTermOutputs.TempMetaData meta = new TempTermOutputs.TempMetaData();
meta.longs = new long[longsSize];
meta.bytes = null;
postingsWriter.finishTerm(meta.longs, metaWriter, stats);
/*
meta.bytes = new byte[(int)metaWriter.getFilePointer()];
metaWriter.writeTo(meta.bytes, 0);
metaWriter.reset();
*/
int bytesSize = (int)metaWriter.getFilePointer();
if (bytesSize > 0) {
meta.bytes = new byte[bytesSize];
metaWriter.writeTo(meta.bytes, 0);
metaWriter.reset();
}
//System.out.println("add term:<"+text.utf8ToString()+", "+meta+">");
builder.add(Util.toIntsRef(text, scratchTerm), meta);
numTerms++;
}
@Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
// save FST dict
if (numTerms > 0) {
final FST<TempTermOutputs.TempMetaData> fst = builder.finish();
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
}
}
}
}

View File

@ -384,7 +384,10 @@ public final class TempPostingsFormat extends PostingsFormat {
/** Creates {@code TempPostingsFormat} with default
* settings. */
public TempPostingsFormat() {
this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
super("TempFST");
minTermBlockSize = 0;
maxTermBlockSize = 0;
//this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
/** Creates {@code TempPostingsFormat} with custom
@ -410,10 +413,11 @@ public final class TempPostingsFormat extends PostingsFormat {
boolean success = false;
try {
FieldsConsumer ret = new TempBlockTermsWriter(state,
postingsWriter,
minTermBlockSize,
maxTermBlockSize);
//FieldsConsumer ret = new TempBlockTermsWriter(state,
// postingsWriter,
// minTermBlockSize,
// maxTermBlockSize);
FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter);
success = true;
return ret;
} finally {
@ -432,13 +436,14 @@ public final class TempPostingsFormat extends PostingsFormat {
state.segmentSuffix);
boolean success = false;
try {
FieldsProducer ret = new TempBlockTermsReader(state.directory,
state.fieldInfos,
state.segmentInfo,
postingsReader,
state.context,
state.segmentSuffix,
state.termsIndexDivisor);
//FieldsProducer ret = new TempBlockTermsReader(state.directory,
// state.fieldInfos,
// state.segmentInfo,
// postingsReader,
// state.context,
// state.segmentSuffix,
// state.termsIndexDivisor);
FieldsProducer ret = new TempFSTTermsReader(state, postingsReader);
success = true;
return ret;
} finally {

View File

@ -0,0 +1,274 @@
package org.apache.lucene.codecs.temp;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.LongsRef;
// NOTE: outputs should be per-field, since
// longsSize is fixed for each field
public class TempTermOutputs extends Outputs<TempTermOutputs.TempMetaData> {
private final static TempMetaData NO_OUTPUT = new TempMetaData();
private static boolean DEBUG = false;
private int longsSize;
public static class TempMetaData {
public long[] longs;
public byte[] bytes;
TempMetaData() {
this.longs = null;
this.bytes = null;
}
TempMetaData(long[] longs, byte[] bytes) {
this.longs = longs;
this.bytes = bytes;
}
@Override
public int hashCode() {
int hash = 0;
if (longs != null) {
final int end = longs.length;
for (int i = 0; i < end; i++) {
hash -= longs[i];
}
}
if (bytes != null) {
hash = -hash;
final int end = bytes.length;
for (int i = 0; i < end; i++) {
hash += bytes[i];
}
}
return hash;
}
public String toString() {
if (this == NO_OUTPUT) {
return "no_output";
}
StringBuffer sb = new StringBuffer();
if (longs != null) {
sb.append("[ ");
for (int i = 0; i < longs.length; i++) {
sb.append(longs[i]+" ");
}
sb.append("]");
} else {
sb.append("null");
}
if (bytes != null) {
sb.append(" [ ");
for (int i = 0; i < bytes.length; i++) {
sb.append(bytes[i]+" ");
}
sb.append("]");
} else {
sb.append(" null");
}
return sb.toString();
}
}
private TempTermOutputs() {
}
protected TempTermOutputs(int longsSize) {
this.longsSize = longsSize;
}
@Override
//
// Since longs blob is fixed length, when these two are 'comparable'
// i.e. when every value in long[] fits the same ordering, the smaller one
// will be the result.
//
// NOTE: only long[] is 'shared', i.e. after sharing common value,
// the output of smaller one will be a all-zero long[] with original byte[] blob.
//
// nocommit: Builder.add() doesn't immediatelly consumes the output data,
// which means, the longs after one add() should all be deeply copied
// instead of being reused? quite hairly to detect it here, so the caller
// must be careful about this.
//
public TempMetaData common(TempMetaData t1, TempMetaData t2) {
if (DEBUG) System.out.print("common("+t1+", "+t2+") = ");
if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) {
if (DEBUG) System.out.println("ret:"+NO_OUTPUT);
return NO_OUTPUT;
}
assert t1.longs != null;
assert t2.longs != null;
assert t1.longs.length == t2.longs.length;
long accum = 0;
long[] longs1 = t1.longs, longs2 = t2.longs;
int pos = 0;
boolean order = true;
TempMetaData ret;
while (pos < longsSize && longs1[pos] == longs2[pos]) {
pos++;
}
if (pos < longsSize) {
// unequal
order = (longs1[pos] > longs2[pos]);
if (order) {
// check whether strictly longs1 >= longs2
while (pos < longsSize && longs1[pos] >= longs2[pos]) {
accum += longs2[pos];
pos++;
}
} else {
// check whether strictly longs1 <= longs2
while (pos < longsSize && longs1[pos] <= longs2[pos]) {
accum += longs1[pos];
pos++;
}
}
if (pos < longsSize || accum == 0) {
ret = NO_OUTPUT;
} else if (order) {
ret = new TempMetaData(longs2, null);
} else {
ret = new TempMetaData(longs1, null);
}
} else {
// equal
if (t1.bytes!= null && Arrays.equals(t1.bytes, t2.bytes)) { // all fields are equal
ret = t1;
} else if (accum == 0) { // all zero case
ret = NO_OUTPUT;
} else {
ret = new TempMetaData(longs1, null);
}
}
if (DEBUG) System.out.println("ret:"+ret);
return ret;
}
@Override
// nocommit:
// this *actually* always assume that t2 <= t1 before calling the method
public TempMetaData subtract(TempMetaData t1, TempMetaData t2) {
if (DEBUG) System.out.print("subtract("+t1+", "+t2+") = ");
if (t2 == NO_OUTPUT) {
if (DEBUG) System.out.println("ret:"+t1);
return t1;
}
assert t1.longs != null;
assert t2.longs != null;
int pos = 0;
long diff = 0;
long[] share = new long[longsSize]; // nocommit: reuse
while (pos < longsSize) {
share[pos] = t1.longs[pos] - t2.longs[pos];
diff += share[pos];
pos++;
}
TempMetaData ret;
if (diff == 0 && (t1.bytes == null || t1.bytes.length == 0)) {
ret = NO_OUTPUT;
} else {
ret = new TempMetaData(share, t1.bytes);
}
if (DEBUG) System.out.println("ret:"+ret);
return ret;
}
@Override
// nocommit: need to check all-zero case?
// so we can reuse one long[]
public TempMetaData add(TempMetaData t1, TempMetaData t2) {
if (DEBUG) System.out.print("add("+t1+", "+t2+") = ");
// nocommit: necessary?
if (t1 == NO_OUTPUT) {
if (DEBUG) System.out.println("ret:"+t2);
return t2;
} else if (t2 == NO_OUTPUT) {
if (DEBUG) System.out.println("ret:"+t1);
return t1;
}
assert t1.longs != null;
assert t2.longs != null;
int pos = 0;
long[] accum = new long[longsSize]; // nocommit: reuse
while (pos < longsSize) {
accum[pos] = t1.longs[pos] + t2.longs[pos];
assert(accum[pos] >= 0);
pos++;
}
TempMetaData ret;
if (t2.bytes != null) {
ret = new TempMetaData(accum, t2.bytes);
} else {
ret = new TempMetaData(accum, t1.bytes);
}
if (DEBUG) System.out.println("ret:"+ret);
return ret;
}
@Override
public void write(TempMetaData data, DataOutput out) throws IOException {
for (int pos = 0; pos < longsSize; pos++) {
out.writeVLong(data.longs[pos]);
}
if (data.bytes != null) {
out.writeVInt(data.bytes.length);
out.writeBytes(data.bytes, 0, data.bytes.length);
} else {
out.writeVInt(0);
}
}
// nocommit: can this non-null byte case be used in Final Output?
@Override
public TempMetaData read(DataInput in) throws IOException {
long[] longs = new long[longsSize];
for (int pos = 0; pos < longsSize; pos++) {
longs[pos] = in.readVLong();
}
int bytesSize = in.readVInt();
byte[] bytes = null;
if (bytesSize > 0) {
bytes = new byte[bytesSize];
in.readBytes(bytes, 0, bytes.length);
}
TempMetaData meta = new TempMetaData(longs, bytes);
return meta;
}
@Override
public TempMetaData getNoOutput() {
return NO_OUTPUT;
}
@Override
public String outputToString(TempMetaData data) {
return data.toString();
}
}

View File

@ -68,7 +68,7 @@ final class NodeHash<T> {
}
// hash code for an unfrozen node. This must be identical
// to the un-frozen case (below)!!
// to the frozen case (below)!!
private long hash(Builder.UnCompiledNode<T> node) {
final int PRIME = 31;
//System.out.println("hash unfrozen");