LUCENE-5587: fix SimpleText so its doc-values .dat files are same size regardless of order that fields were added

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1585994 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-04-09 15:35:45 +00:00
parent fa4669caf9
commit 7a0e6bf9e5
14 changed files with 33 additions and 56 deletions

View File

@ -17,7 +17,6 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License. * limitations under the License.
*/ */
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
@ -481,7 +480,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
while(true) { while(true) {
SimpleTextUtil.readLine(input, scratch); SimpleTextUtil.readLine(input, scratch);
if (scratch.equals(END)) { if (scratch.equals(END)) {
SimpleTextUtil.checkFooter(input, CHECKSUM); SimpleTextUtil.checkFooter(input);
break; break;
} }
} }

View File

@ -36,7 +36,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
class SimpleTextDocValuesWriter extends DocValuesConsumer { class SimpleTextDocValuesWriter extends DocValuesConsumer {
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END"); final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field "); final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TYPE = new BytesRef(" type "); final static BytesRef TYPE = new BytesRef(" type ");
@ -397,10 +396,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
// TODO: sheisty to do this here? // TODO: sheisty to do this here?
SimpleTextUtil.write(data, END); SimpleTextUtil.write(data, END);
SimpleTextUtil.writeNewline(data); SimpleTextUtil.writeNewline(data);
String checksum = Long.toString(data.getChecksum()); SimpleTextUtil.writeChecksum(data, scratch);
SimpleTextUtil.write(data, CHECKSUM);
SimpleTextUtil.write(data, checksum, scratch);
SimpleTextUtil.writeNewline(data);
success = true; success = true;
} finally { } finally {
if (success) { if (success) {

View File

@ -129,7 +129,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
infos[i].setDocValuesGen(dvGen); infos[i].setDocValuesGen(dvGen);
} }
SimpleTextUtil.checkFooter(input, CHECKSUM); SimpleTextUtil.checkFooter(input);
FieldInfos fieldInfos = new FieldInfos(infos); FieldInfos fieldInfos = new FieldInfos(infos);
success = true; success = true;

View File

@ -58,7 +58,6 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
static final BytesRef NUM_ATTS = new BytesRef(" attributes "); static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
final static BytesRef ATT_KEY = new BytesRef(" key "); final static BytesRef ATT_KEY = new BytesRef(" key ");
final static BytesRef ATT_VALUE = new BytesRef(" value "); final static BytesRef ATT_VALUE = new BytesRef(" value ");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override @Override
public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
@ -133,10 +132,7 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
} }
} }
} }
String checksum = Long.toString(out.getChecksum()); SimpleTextUtil.writeChecksum(out, scratch);
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
success = true; success = true;
} finally { } finally {
if (success) { if (success) {

View File

@ -53,7 +53,6 @@ import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END; import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD; import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM; import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
@ -93,7 +92,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
while (true) { while (true) {
SimpleTextUtil.readLine(input, scratch); SimpleTextUtil.readLine(input, scratch);
if (scratch.equals(END)) { if (scratch.equals(END)) {
SimpleTextUtil.checkFooter(input, CHECKSUM); SimpleTextUtil.checkFooter(input);
return fields; return fields;
} else if (StringHelper.startsWith(scratch, FIELD)) { } else if (StringHelper.startsWith(scratch, FIELD)) {
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, StandardCharsets.UTF_8); String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, StandardCharsets.UTF_8);

View File

@ -39,7 +39,6 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
private final BytesRef scratch = new BytesRef(10); private final BytesRef scratch = new BytesRef(10);
private final SegmentWriteState writeState; private final SegmentWriteState writeState;
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END"); final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field "); final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TERM = new BytesRef(" term "); final static BytesRef TERM = new BytesRef(" term ");
@ -220,10 +219,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
try { try {
write(END); write(END);
newline(); newline();
String checksum = Long.toString(out.getChecksum()); SimpleTextUtil.writeChecksum(out, scratch);
write(CHECKSUM);
write(checksum);
newline();
} finally { } finally {
out.close(); out.close();
out = null; out = null;

View File

@ -50,7 +50,6 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
final static BytesRef SIZE = new BytesRef("size "); final static BytesRef SIZE = new BytesRef("size ");
final static BytesRef DOC = new BytesRef(" doc "); final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef END = new BytesRef("END"); final static BytesRef END = new BytesRef("END");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override @Override
public MutableBits newLiveDocs(int size) throws IOException { public MutableBits newLiveDocs(int size) throws IOException {
@ -89,7 +88,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
SimpleTextUtil.readLine(in, scratch); SimpleTextUtil.readLine(in, scratch);
} }
SimpleTextUtil.checkFooter(in, CHECKSUM); SimpleTextUtil.checkFooter(in);
success = true; success = true;
return new SimpleTextBits(bits, size); return new SimpleTextBits(bits, size);
@ -130,10 +129,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
SimpleTextUtil.write(out, END); SimpleTextUtil.write(out, END);
SimpleTextUtil.writeNewline(out); SimpleTextUtil.writeNewline(out);
String checksum = Long.toString(out.getChecksum()); SimpleTextUtil.writeChecksum(out, scratch);
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
success = true; success = true;
} finally { } finally {
if (success) { if (success) {

View File

@ -17,7 +17,6 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License. * limitations under the License.
*/ */
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY; import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
@ -99,7 +98,7 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
files.add(fileName); files.add(fileName);
} }
SimpleTextUtil.checkFooter(input, SI_CHECKSUM); SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
isCompoundFile, null, diagnostics); isCompoundFile, null, diagnostics);

View File

@ -47,7 +47,6 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value "); final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files "); final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file "); final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_CHECKSUM = new BytesRef(" checksum ");
@Override @Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
@ -105,10 +104,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
} }
} }
String checksum = Long.toString(output.getChecksum()); SimpleTextUtil.writeChecksum(output, scratch);
SimpleTextUtil.write(output, SI_CHECKSUM);
SimpleTextUtil.write(output, checksum, scratch);
SimpleTextUtil.writeNewline(output);
success = true; success = true;
} finally { } finally {
if (!success) { if (!success) {

View File

@ -91,7 +91,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
upto++; upto++;
} }
} }
SimpleTextUtil.checkFooter(input, CHECKSUM); SimpleTextUtil.checkFooter(input);
assert upto == offsets.length; assert upto == offsets.length;
} }
@ -193,11 +193,6 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
} }
private String readString(int offset, BytesRef scratch) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
return scratchUTF16.toString();
}
private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) { private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
return a.length == b.length - bOffset && return a.length == b.length - bOffset &&
ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset); ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);

View File

@ -51,7 +51,6 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
final static BytesRef TYPE_FLOAT = new BytesRef("float"); final static BytesRef TYPE_FLOAT = new BytesRef("float");
final static BytesRef TYPE_DOUBLE = new BytesRef("double"); final static BytesRef TYPE_DOUBLE = new BytesRef("double");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END"); final static BytesRef END = new BytesRef("END");
final static BytesRef DOC = new BytesRef("doc "); final static BytesRef DOC = new BytesRef("doc ");
final static BytesRef NUM = new BytesRef(" numfields "); final static BytesRef NUM = new BytesRef(" numfields ");
@ -172,10 +171,7 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
} }
write(END); write(END);
newLine(); newLine();
String checksum = Long.toString(out.getChecksum()); SimpleTextUtil.writeChecksum(out, scratch);
write(CHECKSUM);
write(checksum);
newLine();
} }
@Override @Override

View File

@ -94,7 +94,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
upto++; upto++;
} }
} }
SimpleTextUtil.checkFooter(input, CHECKSUM); SimpleTextUtil.checkFooter(input);
assert upto == offsets.length; assert upto == offsets.length;
} }

View File

@ -37,7 +37,6 @@ import org.apache.lucene.util.IOUtils;
*/ */
public class SimpleTextTermVectorsWriter extends TermVectorsWriter { public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
static final BytesRef CHECKSUM = new BytesRef("checksum ");
static final BytesRef END = new BytesRef("END"); static final BytesRef END = new BytesRef("END");
static final BytesRef DOC = new BytesRef("doc "); static final BytesRef DOC = new BytesRef("doc ");
static final BytesRef NUMFIELDS = new BytesRef(" numfields "); static final BytesRef NUMFIELDS = new BytesRef(" numfields ");
@ -178,10 +177,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
} }
write(END); write(END);
newLine(); newLine();
String checksum = Long.toString(out.getChecksum()); SimpleTextUtil.writeChecksum(out, scratch);
write(CHECKSUM);
write(checksum);
newLine();
} }
@Override @Override

View File

@ -17,14 +17,14 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License. * limitations under the License.
*/ */
import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM;
import java.io.IOException; import java.io.IOException;
import java.util.Locale;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
@ -32,6 +32,7 @@ import org.apache.lucene.util.UnicodeUtil;
class SimpleTextUtil { class SimpleTextUtil {
public final static byte NEWLINE = 10; public final static byte NEWLINE = 10;
public final static byte ESCAPE = 92; public final static byte ESCAPE = 92;
final static BytesRef CHECKSUM = new BytesRef("checksum ");
public static void write(DataOutput out, String s, BytesRef scratch) throws IOException { public static void write(DataOutput out, String s, BytesRef scratch) throws IOException {
UnicodeUtil.UTF16toUTF8(s, 0, s.length(), scratch); UnicodeUtil.UTF16toUTF8(s, 0, s.length(), scratch);
@ -72,13 +73,25 @@ class SimpleTextUtil {
scratch.offset = 0; scratch.offset = 0;
scratch.length = upto; scratch.length = upto;
} }
public static void writeChecksum(IndexOutput out, BytesRef scratch) throws IOException {
// Pad with zeros so different checksum values use the
// same number of bytes
// (BaseIndexFileFormatTestCase.testMergeStability cares):
String checksum = String.format(Locale.ROOT, "%020d", out.getChecksum());
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
}
public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException { public static void checkFooter(ChecksumIndexInput input) throws IOException {
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
String expectedChecksum = Long.toString(input.getChecksum()); String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
SimpleTextUtil.readLine(input, scratch); SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, prefix); if (StringHelper.startsWith(scratch, CHECKSUM) == false) {
String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString(); throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.utf8ToString() + " (resource=" + input + ")");
}
String actualChecksum = new BytesRef(scratch.bytes, CHECKSUM.length, scratch.length - CHECKSUM.length).utf8ToString();
if (!expectedChecksum.equals(actualChecksum)) { if (!expectedChecksum.equals(actualChecksum)) {
throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")"); throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
} }