diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java index d47e4c5f452..2aefd09cf9b 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java @@ -20,19 +20,16 @@ import static org.apache.lucene.util.fst.FST.readMetadata; import java.io.IOException; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountables; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.PositiveIntOutputs; @@ -46,53 +43,61 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton(); - final HashMap fields = new HashMap<>(); + final HashMap> fields = new HashMap<>(); public VariableGapTermsIndexReader(SegmentReadState state) throws IOException { - String fileName = + String metaFileName = + IndexFileNames.segmentFileName( + state.segmentInfo.name, + state.segmentSuffix, + VariableGapTermsIndexWriter.TERMS_META_EXTENSION); + String indexFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION); - final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE); - boolean success = false; - try { + try (ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName); + ChecksumIndexInput indexIn = state.directory.openChecksumInput(indexFileName)) { - CodecUtil.checkIndexHeader( - in, - VariableGapTermsIndexWriter.CODEC_NAME, - VariableGapTermsIndexWriter.VERSION_START, - VariableGapTermsIndexWriter.VERSION_CURRENT, - state.segmentInfo.getId(), - state.segmentSuffix); + Throwable priorE = null; + try { + CodecUtil.checkIndexHeader( + metaIn, + VariableGapTermsIndexWriter.META_CODEC_NAME, + VariableGapTermsIndexWriter.VERSION_START, + VariableGapTermsIndexWriter.VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix); - CodecUtil.checksumEntireFile(in); + CodecUtil.checkIndexHeader( + indexIn, + VariableGapTermsIndexWriter.CODEC_NAME, + VariableGapTermsIndexWriter.VERSION_START, + VariableGapTermsIndexWriter.VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix); - seekDir(in); - - // Read directory - final int numFields = in.readVInt(); - if (numFields < 0) { - throw new CorruptIndexException("invalid numFields: " + numFields, in); - } - - for (int i = 0; i < numFields; i++) { - final int field = in.readVInt(); - final long indexStart = in.readVLong(); - final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); - FieldIndexData previous = - fields.put(fieldInfo.name, new FieldIndexData(in, fieldInfo, indexStart)); - if (previous != null) { - throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in); + // Read directory + for (int field = metaIn.readInt(); field != -1; field = metaIn.readInt()) { + final long indexStart = metaIn.readVLong(); + final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); + if (indexIn.getFilePointer() != indexStart) { + throw new CorruptIndexException( + "Gap in FST, expected position " + indexIn.getFilePointer() + ", got " + indexStart, + metaIn); + } + FST fst = new FST<>(readMetadata(metaIn, fstOutputs), indexIn); + FST previous = fields.put(fieldInfo.name, fst); + if (previous != null) { + throw new CorruptIndexException("duplicate field: " + fieldInfo.name, metaIn); + } } - } - success = true; - } finally { - if (success) { - IOUtils.close(in); - } else { - IOUtils.closeWhileHandlingException(in); + } catch (Throwable t) { + priorE = t; + } finally { + CodecUtil.checkFooter(metaIn, priorE); + CodecUtil.checkFooter(indexIn, priorE); } } } @@ -150,68 +155,26 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { return false; } - private final class FieldIndexData implements Accountable { - private final FST fst; - - public FieldIndexData(IndexInput in, FieldInfo fieldInfo, long indexStart) throws IOException { - IndexInput clone = in.clone(); - clone.seek(indexStart); - fst = new FST<>(readMetadata(clone, fstOutputs), clone); - clone.close(); - - /* - final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; - Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); - Util.toDot(fst, w, false, false); - System.out.println("FST INDEX: SAVED to " + dotFileName); - w.close(); - */ - } - - @Override - public long ramBytesUsed() { - return fst == null ? 0 : fst.ramBytesUsed(); - } - - @Override - public Collection getChildResources() { - if (fst == null) { - return Collections.emptyList(); - } else { - return Collections.singletonList(Accountables.namedAccountable("index data", fst)); - } - } - - @Override - public String toString() { - return "VarGapTermIndex"; - } - } - @Override public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) { - final FieldIndexData fieldData = fields.get(fieldInfo.name); - if (fieldData.fst == null) { + final FST fieldData = fields.get(fieldInfo.name); + if (fieldData == null) { return null; } else { - return new IndexEnum(fieldData.fst); + return new IndexEnum(fieldData); } } @Override public void close() throws IOException {} - private void seekDir(IndexInput input) throws IOException { - input.seek(input.length() - CodecUtil.footerLength() - 8); - long dirOffset = input.readLong(); - input.seek(dirOffset); - } - @Override public long ramBytesUsed() { long sizeInBytes = 0; - for (FieldIndexData entry : fields.values()) { - sizeInBytes += entry.ramBytesUsed(); + for (FST entry : fields.values()) { + if (entry != null) { + sizeInBytes += entry.ramBytesUsed(); + } } return sizeInBytes; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java index 70f1f9918dd..97a58a9ff97 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java @@ -17,8 +17,6 @@ package org.apache.lucene.codecs.blockterms; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermStats; import org.apache.lucene.index.FieldInfo; @@ -44,16 +42,19 @@ import org.apache.lucene.util.fst.Util; * @lucene.experimental */ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { + protected IndexOutput metaOut; protected IndexOutput out; /** Extension of terms index file */ static final String TERMS_INDEX_EXTENSION = "tiv"; - static final String CODEC_NAME = "VariableGapTermsIndex"; - static final int VERSION_START = 3; - static final int VERSION_CURRENT = VERSION_START; + /** Extension of terms meta file */ + static final String TERMS_META_EXTENSION = "tmv"; - private final List fields = new ArrayList<>(); + static final String META_CODEC_NAME = "VariableGapTermsMeta"; + static final String CODEC_NAME = "VariableGapTermsIndex"; + static final int VERSION_START = 4; + static final int VERSION_CURRENT = VERSION_START; @SuppressWarnings("unused") private final FieldInfos fieldInfos; // unread @@ -176,20 +177,32 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException { + fieldInfos = state.fieldInfos; + this.policy = policy; + + final String metaFileName = + IndexFileNames.segmentFileName( + state.segmentInfo.name, state.segmentSuffix, TERMS_META_EXTENSION); final String indexFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION); - out = state.directory.createOutput(indexFileName, state.context); + boolean success = false; try { - fieldInfos = state.fieldInfos; - this.policy = policy; + metaOut = state.directory.createOutput(metaFileName, state.context); + out = state.directory.createOutput(indexFileName, state.context); + CodecUtil.writeIndexHeader( + metaOut, + META_CODEC_NAME, + VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix); CodecUtil.writeIndexHeader( out, CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); success = true; } finally { if (!success) { - IOUtils.closeWhileHandlingException(out); + IOUtils.closeWhileHandlingException(this); } } } @@ -198,9 +211,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { public FieldWriter addField(FieldInfo field, long termsFilePointer) throws IOException { //// System.out.println("VGW: field=" + field.name); policy.newField(field); - FSTFieldWriter writer = new FSTFieldWriter(field, termsFilePointer); - fields.add(writer); - return writer; + return new FSTFieldWriter(field, termsFilePointer); } /** @@ -230,7 +241,6 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { final FieldInfo fieldInfo; FST fst; - final long indexStart; private final BytesRefBuilder lastTerm = new BytesRefBuilder(); private boolean first = true; @@ -239,7 +249,6 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.getSingleton(); fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs).build(); - indexStart = out.getFilePointer(); //// System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in @@ -285,44 +294,30 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { public void finish(long termsFilePointer) throws IOException { fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); if (fst != null) { - fst.save(out, out); + metaOut.writeInt(fieldInfo.number); + metaOut.writeVLong(out.getFilePointer()); + fst.save(metaOut, out); } } } @Override public void close() throws IOException { - if (out != null) { - try { - final long dirStart = out.getFilePointer(); - final int fieldCount = fields.size(); - - int nonNullFieldCount = 0; - for (int i = 0; i < fieldCount; i++) { - FSTFieldWriter field = fields.get(i); - if (field.fst != null) { - nonNullFieldCount++; - } - } - - out.writeVInt(nonNullFieldCount); - for (int i = 0; i < fieldCount; i++) { - FSTFieldWriter field = fields.get(i); - if (field.fst != null) { - out.writeVInt(field.fieldInfo.number); - out.writeVLong(field.indexStart); - } - } - writeTrailer(dirStart); + try { + if (metaOut != null) { + metaOut.writeInt(-1); + CodecUtil.writeFooter(metaOut); + } + if (out != null) { CodecUtil.writeFooter(out); + } + } finally { + try { + IOUtils.close(out, metaOut); } finally { - out.close(); out = null; + metaOut = null; } } } - - private void writeTrailer(long dirStart) throws IOException { - out.writeLong(dirStart); - } }