mirror of https://github.com/apache/lucene.git
LUCENE-5587: fix SimpleText so its doc-values .dat files are same size regardless of order that fields were added
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1585994 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fa4669caf9
commit
7a0e6bf9e5
|
@ -17,7 +17,6 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
|
||||
|
@ -481,7 +480,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
|
|||
while(true) {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (scratch.equals(END)) {
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef FIELD = new BytesRef("field ");
|
||||
final static BytesRef TYPE = new BytesRef(" type ");
|
||||
|
@ -397,10 +396,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
// TODO: sheisty to do this here?
|
||||
SimpleTextUtil.write(data, END);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
String checksum = Long.toString(data.getChecksum());
|
||||
SimpleTextUtil.write(data, CHECKSUM);
|
||||
SimpleTextUtil.write(data, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
SimpleTextUtil.writeChecksum(data, scratch);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
|
|
@ -129,7 +129,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
|
|||
infos[i].setDocValuesGen(dvGen);
|
||||
}
|
||||
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
|
||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||
success = true;
|
||||
|
|
|
@ -58,7 +58,6 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|||
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
|
||||
final static BytesRef ATT_KEY = new BytesRef(" key ");
|
||||
final static BytesRef ATT_VALUE = new BytesRef(" value ");
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
|
||||
@Override
|
||||
public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||
|
@ -133,10 +132,7 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|||
}
|
||||
}
|
||||
}
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
SimpleTextUtil.write(out, CHECKSUM);
|
||||
SimpleTextUtil.write(out, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
SimpleTextUtil.writeChecksum(out, scratch);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.lucene.util.fst.PairOutputs;
|
|||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
|
||||
|
@ -93,7 +92,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
while (true) {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (scratch.equals(END)) {
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
return fields;
|
||||
} else if (StringHelper.startsWith(scratch, FIELD)) {
|
||||
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, StandardCharsets.UTF_8);
|
||||
|
|
|
@ -39,7 +39,6 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
|
|||
private final BytesRef scratch = new BytesRef(10);
|
||||
private final SegmentWriteState writeState;
|
||||
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef FIELD = new BytesRef("field ");
|
||||
final static BytesRef TERM = new BytesRef(" term ");
|
||||
|
@ -220,10 +219,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
|
|||
try {
|
||||
write(END);
|
||||
newline();
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
write(CHECKSUM);
|
||||
write(checksum);
|
||||
newline();
|
||||
SimpleTextUtil.writeChecksum(out, scratch);
|
||||
} finally {
|
||||
out.close();
|
||||
out = null;
|
||||
|
|
|
@ -50,7 +50,6 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
final static BytesRef SIZE = new BytesRef("size ");
|
||||
final static BytesRef DOC = new BytesRef(" doc ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
|
||||
@Override
|
||||
public MutableBits newLiveDocs(int size) throws IOException {
|
||||
|
@ -89,7 +88,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
SimpleTextUtil.readLine(in, scratch);
|
||||
}
|
||||
|
||||
SimpleTextUtil.checkFooter(in, CHECKSUM);
|
||||
SimpleTextUtil.checkFooter(in);
|
||||
|
||||
success = true;
|
||||
return new SimpleTextBits(bits, size);
|
||||
|
@ -130,10 +129,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
|
||||
SimpleTextUtil.write(out, END);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
SimpleTextUtil.write(out, CHECKSUM);
|
||||
SimpleTextUtil.write(out, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
SimpleTextUtil.writeChecksum(out, scratch);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
|
||||
|
@ -99,7 +98,7 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
|
|||
files.add(fileName);
|
||||
}
|
||||
|
||||
SimpleTextUtil.checkFooter(input, SI_CHECKSUM);
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
|
||||
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
||||
isCompoundFile, null, diagnostics);
|
||||
|
|
|
@ -47,7 +47,6 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|||
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
||||
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
||||
final static BytesRef SI_FILE = new BytesRef(" file ");
|
||||
final static BytesRef SI_CHECKSUM = new BytesRef(" checksum ");
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
||||
|
@ -105,10 +104,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|||
}
|
||||
}
|
||||
|
||||
String checksum = Long.toString(output.getChecksum());
|
||||
SimpleTextUtil.write(output, SI_CHECKSUM);
|
||||
SimpleTextUtil.write(output, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
SimpleTextUtil.writeChecksum(output, scratch);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
|
|
@ -91,7 +91,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
upto++;
|
||||
}
|
||||
}
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
assert upto == offsets.length;
|
||||
}
|
||||
|
||||
|
@ -193,11 +193,6 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
}
|
||||
|
||||
private String readString(int offset, BytesRef scratch) {
|
||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
|
||||
return scratchUTF16.toString();
|
||||
}
|
||||
|
||||
private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
|
||||
return a.length == b.length - bOffset &&
|
||||
ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
|
||||
|
|
|
@ -51,7 +51,6 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
|
|||
final static BytesRef TYPE_FLOAT = new BytesRef("float");
|
||||
final static BytesRef TYPE_DOUBLE = new BytesRef("double");
|
||||
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef DOC = new BytesRef("doc ");
|
||||
final static BytesRef NUM = new BytesRef(" numfields ");
|
||||
|
@ -172,10 +171,7 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
|
|||
}
|
||||
write(END);
|
||||
newLine();
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
write(CHECKSUM);
|
||||
write(checksum);
|
||||
newLine();
|
||||
SimpleTextUtil.writeChecksum(out, scratch);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -94,7 +94,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
upto++;
|
||||
}
|
||||
}
|
||||
SimpleTextUtil.checkFooter(input, CHECKSUM);
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
assert upto == offsets.length;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.lucene.util.IOUtils;
|
|||
*/
|
||||
public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||
|
||||
static final BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
static final BytesRef END = new BytesRef("END");
|
||||
static final BytesRef DOC = new BytesRef("doc ");
|
||||
static final BytesRef NUMFIELDS = new BytesRef(" numfields ");
|
||||
|
@ -178,10 +177,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
|||
}
|
||||
write(END);
|
||||
newLine();
|
||||
String checksum = Long.toString(out.getChecksum());
|
||||
write(CHECKSUM);
|
||||
write(checksum);
|
||||
newLine();
|
||||
SimpleTextUtil.writeChecksum(out, scratch);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,14 +17,14 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
class SimpleTextUtil {
|
||||
public final static byte NEWLINE = 10;
|
||||
public final static byte ESCAPE = 92;
|
||||
final static BytesRef CHECKSUM = new BytesRef("checksum ");
|
||||
|
||||
public static void write(DataOutput out, String s, BytesRef scratch) throws IOException {
|
||||
UnicodeUtil.UTF16toUTF8(s, 0, s.length(), scratch);
|
||||
|
@ -73,12 +74,24 @@ class SimpleTextUtil {
|
|||
scratch.length = upto;
|
||||
}
|
||||
|
||||
public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException {
|
||||
public static void writeChecksum(IndexOutput out, BytesRef scratch) throws IOException {
|
||||
// Pad with zeros so different checksum values use the
|
||||
// same number of bytes
|
||||
// (BaseIndexFileFormatTestCase.testMergeStability cares):
|
||||
String checksum = String.format(Locale.ROOT, "%020d", out.getChecksum());
|
||||
SimpleTextUtil.write(out, CHECKSUM);
|
||||
SimpleTextUtil.write(out, checksum, scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
}
|
||||
|
||||
public static void checkFooter(ChecksumIndexInput input) throws IOException {
|
||||
BytesRef scratch = new BytesRef();
|
||||
String expectedChecksum = Long.toString(input.getChecksum());
|
||||
String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, prefix);
|
||||
String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString();
|
||||
if (StringHelper.startsWith(scratch, CHECKSUM) == false) {
|
||||
throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.utf8ToString() + " (resource=" + input + ")");
|
||||
}
|
||||
String actualChecksum = new BytesRef(scratch.bytes, CHECKSUM.length, scratch.length - CHECKSUM.length).utf8ToString();
|
||||
if (!expectedChecksum.equals(actualChecksum)) {
|
||||
throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue