diff --git a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermState.java b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermState.java deleted file mode 100644 index b6a027d3077..00000000000 --- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermState.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.apache.lucene.codecs.temp; -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.TermState; - -public class TempBlockTermState extends TempTermState { - /** the term's ord in the current block */ - public int termBlockOrd; - - /** Sole constructor. (For invocation by subclass - * constructors, typically implicit.) */ - protected TempBlockTermState() { - } - - public TempBlockTermState clone() { - TempBlockTermState other = (TempBlockTermState)super.clone(); - return other; - } - - @Override - public void copyFrom(TermState _other) { - assert _other instanceof TempBlockTermState : "can not copy from " + _other.getClass().getName(); - super.copyFrom(_other); - TempBlockTermState other = (TempBlockTermState) _other; - termBlockOrd = other.termBlockOrd; - } - - @Override - public String toString() { - return super.toString() + " termBlockOrd=" + termBlockOrd; - } -} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java new file mode 100644 index 00000000000..b044e41d333 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java @@ -0,0 +1,282 @@ +package org.apache.lucene.codecs.temp; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.File; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.TreeMap; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.apache.lucene.util.automaton.RunAutomaton; +import org.apache.lucene.util.automaton.Transition; +import org.apache.lucene.util.fst.ByteSequenceOutputs; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.Outputs; +import org.apache.lucene.util.fst.Util; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.TempPostingsReaderBase; +import org.apache.lucene.codecs.CodecUtil; + + +public class TempFSTTermsReader extends FieldsProducer { + final TempPostingsReaderBase postingsReader; + final IndexInput in; + final TreeMap fields = new TreeMap(); + + + public TempFSTTermsReader(SegmentReadState state, TempPostingsReaderBase postingsReader) throws IOException { + final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTTermsWriter.TERMS_EXTENSION); + + this.postingsReader = postingsReader; + this.in = state.directory.openInput(termsFileName, state.context); + + boolean success = false; + try { + readHeader(in); + this.postingsReader.init(in); + seekDir(in); + + final FieldInfos fieldInfos = state.fieldInfos; + final int numFields = in.readVInt(); + for (int i = 0; i < numFields; i++) { + int fieldNumber = in.readVInt(); + FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); + long numTerms = in.readVLong(); + long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); + long sumDocFreq = in.readVLong(); + int docCount = in.readVInt(); + int longsSize = in.readVInt(); + FieldReader current = new FieldReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); + FieldReader previous = fields.put(fieldInfo.name, current); + checkFieldSummary(state.segmentInfo, current, previous); + } + success = true; + } finally { + if (!success) { + in.close(); + } + } + } + + private int readHeader(IndexInput in) throws IOException { + return CodecUtil.checkHeader(in, TempFSTTermsWriter.TERMS_CODEC_NAME, + TempFSTTermsWriter.TERMS_VERSION_START, + TempFSTTermsWriter.TERMS_VERSION_CURRENT); + } + private void seekDir(IndexInput in) throws IOException { + in.seek(in.length() - 8); + in.seek(in.readLong()); + } + private void checkFieldSummary(SegmentInfo info, FieldReader field, FieldReader previous) throws IOException { + // #docs with field must be <= #docs + if (field.docCount < 0 || field.docCount > info.getDocCount()) { + throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); + } + // #postings must be >= #docs with field + if (field.sumDocFreq < field.docCount) { + throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (resource=" + in + ")"); + } + // #positions must be >= #postings + if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { + throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (resource=" + in + ")"); + } + if (previous != null) { + throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (resource=" + in + ")"); + } + } + + @Override + public Iterator iterator() { + return Collections.unmodifiableSet(fields.keySet()).iterator(); + } + + @Override + public Terms terms(String field) throws IOException { + assert field != null; + return fields.get(field); + } + + @Override + public int size() { + return fields.size(); + } + + @Override + public void close() throws IOException { + try { + IOUtils.close(in, postingsReader); + } finally { + fields.clear(); + } + } + + final class FieldReader extends Terms { + final FieldInfo fieldInfo; + final long numTerms; + final long sumTotalTermFreq; + final long sumDocFreq; + final int docCount; + final int longsSize; + final FST dict; + + FieldReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException { + this.fieldInfo = fieldInfo; + this.numTerms = numTerms; + this.sumTotalTermFreq = sumTotalTermFreq; + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + this.longsSize = longsSize; + this.dict = new FST(in, new TempTermOutputs(longsSize)); + //PrintWriter pw = new PrintWriter(new File("../temp/xxx.txt")); + //Util.toDot(dict, pw, false, false); + } + + // nocommit: implement intersect + // nocommit: why do we need this comparator overridden again and again? + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + @Override + public TermsEnum iterator(TermsEnum reuse) throws IOException { + return new SegmentTermsEnum(); + } + + @Override + public boolean hasOffsets() { + return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + } + + @Override + public boolean hasPositions() { + return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + } + + @Override + public boolean hasPayloads() { + return fieldInfo.hasPayloads(); + } + + @Override + public long size() { + return numTerms; + } + + @Override + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } + + @Override + public long getSumDocFreq() throws IOException { + return sumDocFreq; + } + + @Override + public int getDocCount() throws IOException { + return docCount; + } + + // Iterates through terms in this field + private final class SegmentTermsEnum extends TermsEnum { + SegmentTermsEnum() { + } + + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + @Override + public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException { + return null; + } + + @Override + public BytesRef next() throws IOException { + return null; + } + + @Override + public BytesRef term() { + return null; + } + + @Override + public int docFreq() throws IOException { + return 0; + } + + @Override + public long totalTermFreq() throws IOException { + return 0; + } + + @Override + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { + return null; + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + return null; + } + + @Override + public void seekExact(BytesRef target, TermState otherState) { + } + + @Override + public TermState termState() throws IOException { + return null; + } + + @Override + public void seekExact(long ord) throws IOException { + } + + @Override + public long ord() { + return 0; + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java new file mode 100644 index 00000000000..fc0e097018d --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsWriter.java @@ -0,0 +1,198 @@ +package org.apache.lucene.codecs.temp; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; +import java.util.Comparator; + +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMOutputStream; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.fst.Builder; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.Util; +import org.apache.lucene.codecs.TempPostingsWriterBase; +import org.apache.lucene.codecs.PostingsConsumer; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.TermsConsumer; +import org.apache.lucene.codecs.TermStats; +import org.apache.lucene.codecs.CodecUtil; + +/** FST based term dict, all the metadata held + * as output of FST */ + +public class TempFSTTermsWriter extends FieldsConsumer { + static final String TERMS_EXTENSION = "tmp"; + static final String TERMS_CODEC_NAME = "FST_TERMS_DICT"; + public static final int TERMS_VERSION_START = 0; + public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START; + + final TempPostingsWriterBase postingsWriter; + final FieldInfos fieldInfos; + final IndexOutput out; + final List fields = new ArrayList(); + + public TempFSTTermsWriter(SegmentWriteState state, TempPostingsWriterBase postingsWriter) throws IOException { + final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION); + + this.postingsWriter = postingsWriter; + this.fieldInfos = state.fieldInfos; + this.out = state.directory.createOutput(termsFileName, state.context); + + // nocommit: why try catch here? not catching createOutput? + boolean success = false; + try { + writeHeader(out); + this.postingsWriter.start(out); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(out); + } + } + } + private void writeHeader(IndexOutput out) throws IOException { + CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT); + } + private void writeTrailer(IndexOutput out, long dirStart) throws IOException { + out.writeLong(dirStart); + } + + @Override + public TermsConsumer addField(FieldInfo field) throws IOException { + return new TermsWriter(field); + } + + @Override + public void close() throws IOException { + IOException ioe = null; + try { + // write field summary + final long dirStart = out.getFilePointer(); + + out.writeVInt(fields.size()); + for (FieldMetaData field : fields) { + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + out.writeVLong(field.sumTotalTermFreq); + } + out.writeVLong(field.sumDocFreq); + out.writeVInt(field.docCount); + out.writeVInt(field.longsSize); + field.dict.save(out); + } + writeTrailer(out, dirStart); + } catch (IOException ioe2) { + ioe = ioe2; + } finally { + IOUtils.closeWhileHandlingException(ioe, out, postingsWriter); + } + } + + private static class FieldMetaData { + public final FieldInfo fieldInfo; + public final long numTerms; + public final long sumTotalTermFreq; + public final long sumDocFreq; + public final int docCount; + public final int longsSize; + public final FST dict; + + public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST fst) { + this.fieldInfo = fieldInfo; + this.numTerms = numTerms; + this.sumTotalTermFreq = sumTotalTermFreq; + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + this.longsSize = longsSize; + this.dict = fst; + } + } + + class TermsWriter extends TermsConsumer { + private final Builder builder; + private final TempTermOutputs outputs; + private final FieldInfo fieldInfo; + private final int longsSize; + private long numTerms; + + private final IntsRef scratchTerm = new IntsRef(); + private final RAMOutputStream metaWriter = new RAMOutputStream(); + + TermsWriter(FieldInfo fieldInfo) { + this.numTerms = 0; + this.fieldInfo = fieldInfo; + this.longsSize = postingsWriter.setField(fieldInfo); + this.outputs = new TempTermOutputs(longsSize); + this.builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + } + + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + @Override + public PostingsConsumer startTerm(BytesRef text) throws IOException { + postingsWriter.startTerm(); + return postingsWriter; + } + + @Override + public void finishTerm(BytesRef text, TermStats stats) throws IOException { + final TempTermOutputs.TempMetaData meta = new TempTermOutputs.TempMetaData(); + meta.longs = new long[longsSize]; + meta.bytes = null; + postingsWriter.finishTerm(meta.longs, metaWriter, stats); + /* + meta.bytes = new byte[(int)metaWriter.getFilePointer()]; + metaWriter.writeTo(meta.bytes, 0); + metaWriter.reset(); + */ + int bytesSize = (int)metaWriter.getFilePointer(); + if (bytesSize > 0) { + meta.bytes = new byte[bytesSize]; + metaWriter.writeTo(meta.bytes, 0); + metaWriter.reset(); + } + //System.out.println("add term:<"+text.utf8ToString()+", "+meta+">"); + builder.add(Util.toIntsRef(text, scratchTerm), meta); + numTerms++; + } + + @Override + public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { + // save FST dict + if (numTerms > 0) { + final FST fst = builder.finish(); + fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst)); + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java index 29a56e2cbb9..31736b366b3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempPostingsFormat.java @@ -384,7 +384,10 @@ public final class TempPostingsFormat extends PostingsFormat { /** Creates {@code TempPostingsFormat} with default * settings. */ public TempPostingsFormat() { - this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE); + super("TempFST"); + minTermBlockSize = 0; + maxTermBlockSize = 0; + //this(TempBlockTermsWriter.DEFAULT_MIN_BLOCK_SIZE, TempBlockTermsWriter.DEFAULT_MAX_BLOCK_SIZE); } /** Creates {@code TempPostingsFormat} with custom @@ -410,10 +413,11 @@ public final class TempPostingsFormat extends PostingsFormat { boolean success = false; try { - FieldsConsumer ret = new TempBlockTermsWriter(state, - postingsWriter, - minTermBlockSize, - maxTermBlockSize); + //FieldsConsumer ret = new TempBlockTermsWriter(state, + // postingsWriter, + // minTermBlockSize, + // maxTermBlockSize); + FieldsConsumer ret = new TempFSTTermsWriter(state, postingsWriter); success = true; return ret; } finally { @@ -432,13 +436,14 @@ public final class TempPostingsFormat extends PostingsFormat { state.segmentSuffix); boolean success = false; try { - FieldsProducer ret = new TempBlockTermsReader(state.directory, - state.fieldInfos, - state.segmentInfo, - postingsReader, - state.context, - state.segmentSuffix, - state.termsIndexDivisor); + //FieldsProducer ret = new TempBlockTermsReader(state.directory, + // state.fieldInfos, + // state.segmentInfo, + // postingsReader, + // state.context, + // state.segmentSuffix, + // state.termsIndexDivisor); + FieldsProducer ret = new TempFSTTermsReader(state, postingsReader); success = true; return ret; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java new file mode 100644 index 00000000000..6f9d9818a9f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/temp/TempTermOutputs.java @@ -0,0 +1,274 @@ +package org.apache.lucene.codecs.temp; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.fst.Outputs; +import org.apache.lucene.util.LongsRef; + + +// NOTE: outputs should be per-field, since +// longsSize is fixed for each field +public class TempTermOutputs extends Outputs { + private final static TempMetaData NO_OUTPUT = new TempMetaData(); + private static boolean DEBUG = false; + private int longsSize; + + public static class TempMetaData { + public long[] longs; + public byte[] bytes; + TempMetaData() { + this.longs = null; + this.bytes = null; + } + TempMetaData(long[] longs, byte[] bytes) { + this.longs = longs; + this.bytes = bytes; + } + @Override + public int hashCode() { + int hash = 0; + if (longs != null) { + final int end = longs.length; + for (int i = 0; i < end; i++) { + hash -= longs[i]; + } + } + if (bytes != null) { + hash = -hash; + final int end = bytes.length; + for (int i = 0; i < end; i++) { + hash += bytes[i]; + } + } + return hash; + } + public String toString() { + if (this == NO_OUTPUT) { + return "no_output"; + } + StringBuffer sb = new StringBuffer(); + if (longs != null) { + sb.append("[ "); + for (int i = 0; i < longs.length; i++) { + sb.append(longs[i]+" "); + } + sb.append("]"); + } else { + sb.append("null"); + } + if (bytes != null) { + sb.append(" [ "); + for (int i = 0; i < bytes.length; i++) { + sb.append(bytes[i]+" "); + } + sb.append("]"); + } else { + sb.append(" null"); + } + return sb.toString(); + } + } + + private TempTermOutputs() { + } + + protected TempTermOutputs(int longsSize) { + this.longsSize = longsSize; + } + + @Override + // + // Since longs blob is fixed length, when these two are 'comparable' + // i.e. when every value in long[] fits the same ordering, the smaller one + // will be the result. + // + // NOTE: only long[] is 'shared', i.e. after sharing common value, + // the output of smaller one will be a all-zero long[] with original byte[] blob. + // + // nocommit: Builder.add() doesn't immediatelly consumes the output data, + // which means, the longs after one add() should all be deeply copied + // instead of being reused? quite hairly to detect it here, so the caller + // must be careful about this. + // + public TempMetaData common(TempMetaData t1, TempMetaData t2) { + if (DEBUG) System.out.print("common("+t1+", "+t2+") = "); + if (t1 == NO_OUTPUT || t2 == NO_OUTPUT) { + if (DEBUG) System.out.println("ret:"+NO_OUTPUT); + return NO_OUTPUT; + } + assert t1.longs != null; + assert t2.longs != null; + assert t1.longs.length == t2.longs.length; + + long accum = 0; + long[] longs1 = t1.longs, longs2 = t2.longs; + int pos = 0; + boolean order = true; + TempMetaData ret; + + while (pos < longsSize && longs1[pos] == longs2[pos]) { + pos++; + } + if (pos < longsSize) { + // unequal + order = (longs1[pos] > longs2[pos]); + if (order) { + // check whether strictly longs1 >= longs2 + while (pos < longsSize && longs1[pos] >= longs2[pos]) { + accum += longs2[pos]; + pos++; + } + } else { + // check whether strictly longs1 <= longs2 + while (pos < longsSize && longs1[pos] <= longs2[pos]) { + accum += longs1[pos]; + pos++; + } + } + if (pos < longsSize || accum == 0) { + ret = NO_OUTPUT; + } else if (order) { + ret = new TempMetaData(longs2, null); + } else { + ret = new TempMetaData(longs1, null); + } + } else { + // equal + if (t1.bytes!= null && Arrays.equals(t1.bytes, t2.bytes)) { // all fields are equal + ret = t1; + } else if (accum == 0) { // all zero case + ret = NO_OUTPUT; + } else { + ret = new TempMetaData(longs1, null); + } + } + if (DEBUG) System.out.println("ret:"+ret); + return ret; + } + + @Override + // nocommit: + // this *actually* always assume that t2 <= t1 before calling the method + public TempMetaData subtract(TempMetaData t1, TempMetaData t2) { + if (DEBUG) System.out.print("subtract("+t1+", "+t2+") = "); + if (t2 == NO_OUTPUT) { + if (DEBUG) System.out.println("ret:"+t1); + return t1; + } + assert t1.longs != null; + assert t2.longs != null; + + int pos = 0; + long diff = 0; + long[] share = new long[longsSize]; // nocommit: reuse + + while (pos < longsSize) { + share[pos] = t1.longs[pos] - t2.longs[pos]; + diff += share[pos]; + pos++; + } + + TempMetaData ret; + if (diff == 0 && (t1.bytes == null || t1.bytes.length == 0)) { + ret = NO_OUTPUT; + } else { + ret = new TempMetaData(share, t1.bytes); + } + if (DEBUG) System.out.println("ret:"+ret); + return ret; + } + + @Override + // nocommit: need to check all-zero case? + // so we can reuse one long[] + public TempMetaData add(TempMetaData t1, TempMetaData t2) { + if (DEBUG) System.out.print("add("+t1+", "+t2+") = "); + // nocommit: necessary? + if (t1 == NO_OUTPUT) { + if (DEBUG) System.out.println("ret:"+t2); + return t2; + } else if (t2 == NO_OUTPUT) { + if (DEBUG) System.out.println("ret:"+t1); + return t1; + } + assert t1.longs != null; + assert t2.longs != null; + + int pos = 0; + long[] accum = new long[longsSize]; // nocommit: reuse + while (pos < longsSize) { + accum[pos] = t1.longs[pos] + t2.longs[pos]; + assert(accum[pos] >= 0); + pos++; + } + TempMetaData ret; + if (t2.bytes != null) { + ret = new TempMetaData(accum, t2.bytes); + } else { + ret = new TempMetaData(accum, t1.bytes); + } + if (DEBUG) System.out.println("ret:"+ret); + return ret; + } + + @Override + public void write(TempMetaData data, DataOutput out) throws IOException { + for (int pos = 0; pos < longsSize; pos++) { + out.writeVLong(data.longs[pos]); + } + if (data.bytes != null) { + out.writeVInt(data.bytes.length); + out.writeBytes(data.bytes, 0, data.bytes.length); + } else { + out.writeVInt(0); + } + } + // nocommit: can this non-null byte case be used in Final Output? + + @Override + public TempMetaData read(DataInput in) throws IOException { + long[] longs = new long[longsSize]; + for (int pos = 0; pos < longsSize; pos++) { + longs[pos] = in.readVLong(); + } + int bytesSize = in.readVInt(); + byte[] bytes = null; + if (bytesSize > 0) { + bytes = new byte[bytesSize]; + in.readBytes(bytes, 0, bytes.length); + } + TempMetaData meta = new TempMetaData(longs, bytes); + return meta; + } + + @Override + public TempMetaData getNoOutput() { + return NO_OUTPUT; + } + + @Override + public String outputToString(TempMetaData data) { + return data.toString(); + } +} + diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java index 7b6d7870368..32123303758 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java @@ -68,7 +68,7 @@ final class NodeHash { } // hash code for an unfrozen node. This must be identical - // to the un-frozen case (below)!! + // to the frozen case (below)!! private long hash(Builder.UnCompiledNode node) { final int PRIME = 31; //System.out.println("hash unfrozen");