LUCENE-5587: fix SimpleText so its doc-values .dat files are same size regardless of order that fields were added

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1585994 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-04-09 15:35:45 +00:00
parent fa4669caf9
commit 7a0e6bf9e5
14 changed files with 33 additions and 56 deletions

View File

@ -17,7 +17,6 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
@ -481,7 +480,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
while(true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.equals(END)) {
SimpleTextUtil.checkFooter(input, CHECKSUM);
SimpleTextUtil.checkFooter(input);
break;
}
}

View File

@ -36,7 +36,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
class SimpleTextDocValuesWriter extends DocValuesConsumer {
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TYPE = new BytesRef(" type ");
@ -397,10 +396,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
// TODO: sheisty to do this here?
SimpleTextUtil.write(data, END);
SimpleTextUtil.writeNewline(data);
String checksum = Long.toString(data.getChecksum());
SimpleTextUtil.write(data, CHECKSUM);
SimpleTextUtil.write(data, checksum, scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.writeChecksum(data, scratch);
success = true;
} finally {
if (success) {

View File

@ -129,7 +129,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
infos[i].setDocValuesGen(dvGen);
}
SimpleTextUtil.checkFooter(input, CHECKSUM);
SimpleTextUtil.checkFooter(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;

View File

@ -58,7 +58,6 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
final static BytesRef ATT_KEY = new BytesRef(" key ");
final static BytesRef ATT_VALUE = new BytesRef(" value ");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override
public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
@ -133,10 +132,7 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
}
}
}
String checksum = Long.toString(out.getChecksum());
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.writeChecksum(out, scratch);
success = true;
} finally {
if (success) {

View File

@ -53,7 +53,6 @@ import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
@ -93,7 +92,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
while (true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.equals(END)) {
SimpleTextUtil.checkFooter(input, CHECKSUM);
SimpleTextUtil.checkFooter(input);
return fields;
} else if (StringHelper.startsWith(scratch, FIELD)) {
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, StandardCharsets.UTF_8);

View File

@ -39,7 +39,6 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
private final BytesRef scratch = new BytesRef(10);
private final SegmentWriteState writeState;
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TERM = new BytesRef(" term ");
@ -220,10 +219,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
try {
write(END);
newline();
String checksum = Long.toString(out.getChecksum());
write(CHECKSUM);
write(checksum);
newline();
SimpleTextUtil.writeChecksum(out, scratch);
} finally {
out.close();
out = null;

View File

@ -50,7 +50,6 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
final static BytesRef SIZE = new BytesRef("size ");
final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef END = new BytesRef("END");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override
public MutableBits newLiveDocs(int size) throws IOException {
@ -89,7 +88,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
SimpleTextUtil.readLine(in, scratch);
}
SimpleTextUtil.checkFooter(in, CHECKSUM);
SimpleTextUtil.checkFooter(in);
success = true;
return new SimpleTextBits(bits, size);
@ -130,10 +129,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
SimpleTextUtil.write(out, END);
SimpleTextUtil.writeNewline(out);
String checksum = Long.toString(out.getChecksum());
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.writeChecksum(out, scratch);
success = true;
} finally {
if (success) {

View File

@ -17,7 +17,6 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
@ -99,7 +98,7 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
files.add(fileName);
}
SimpleTextUtil.checkFooter(input, SI_CHECKSUM);
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
isCompoundFile, null, diagnostics);

View File

@ -47,7 +47,6 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_CHECKSUM = new BytesRef(" checksum ");
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
@ -105,10 +104,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
}
}
String checksum = Long.toString(output.getChecksum());
SimpleTextUtil.write(output, SI_CHECKSUM);
SimpleTextUtil.write(output, checksum, scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.writeChecksum(output, scratch);
success = true;
} finally {
if (!success) {

View File

@ -91,7 +91,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
upto++;
}
}
SimpleTextUtil.checkFooter(input, CHECKSUM);
SimpleTextUtil.checkFooter(input);
assert upto == offsets.length;
}
@ -193,11 +193,6 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
}
private String readString(int offset, BytesRef scratch) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
return scratchUTF16.toString();
}
private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
return a.length == b.length - bOffset &&
ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);

View File

@ -51,7 +51,6 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
final static BytesRef TYPE_FLOAT = new BytesRef("float");
final static BytesRef TYPE_DOUBLE = new BytesRef("double");
final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef DOC = new BytesRef("doc ");
final static BytesRef NUM = new BytesRef(" numfields ");
@ -172,10 +171,7 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
}
write(END);
newLine();
String checksum = Long.toString(out.getChecksum());
write(CHECKSUM);
write(checksum);
newLine();
SimpleTextUtil.writeChecksum(out, scratch);
}
@Override

View File

@ -94,7 +94,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
upto++;
}
}
SimpleTextUtil.checkFooter(input, CHECKSUM);
SimpleTextUtil.checkFooter(input);
assert upto == offsets.length;
}

View File

@ -37,7 +37,6 @@ import org.apache.lucene.util.IOUtils;
*/
public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
static final BytesRef CHECKSUM = new BytesRef("checksum ");
static final BytesRef END = new BytesRef("END");
static final BytesRef DOC = new BytesRef("doc ");
static final BytesRef NUMFIELDS = new BytesRef(" numfields ");
@ -178,10 +177,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
}
write(END);
newLine();
String checksum = Long.toString(out.getChecksum());
write(CHECKSUM);
write(checksum);
newLine();
SimpleTextUtil.writeChecksum(out, scratch);
}
@Override

View File

@ -17,14 +17,14 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM;
import java.io.IOException;
import java.util.Locale;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
@ -32,6 +32,7 @@ import org.apache.lucene.util.UnicodeUtil;
class SimpleTextUtil {
public final static byte NEWLINE = 10;
public final static byte ESCAPE = 92;
final static BytesRef CHECKSUM = new BytesRef("checksum ");
public static void write(DataOutput out, String s, BytesRef scratch) throws IOException {
UnicodeUtil.UTF16toUTF8(s, 0, s.length(), scratch);
@ -72,13 +73,25 @@ class SimpleTextUtil {
scratch.offset = 0;
scratch.length = upto;
}
public static void writeChecksum(IndexOutput out, BytesRef scratch) throws IOException {
// Pad with zeros so different checksum values use the
// same number of bytes
// (BaseIndexFileFormatTestCase.testMergeStability cares):
String checksum = String.format(Locale.ROOT, "%020d", out.getChecksum());
SimpleTextUtil.write(out, CHECKSUM);
SimpleTextUtil.write(out, checksum, scratch);
SimpleTextUtil.writeNewline(out);
}
public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException {
public static void checkFooter(ChecksumIndexInput input) throws IOException {
BytesRef scratch = new BytesRef();
String expectedChecksum = Long.toString(input.getChecksum());
String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, prefix);
String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString();
if (StringHelper.startsWith(scratch, CHECKSUM) == false) {
throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.utf8ToString() + " (resource=" + input + ")");
}
String actualChecksum = new BytesRef(scratch.bytes, CHECKSUM.length, scratch.length - CHECKSUM.length).utf8ToString();
if (!expectedChecksum.equals(actualChecksum)) {
throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
}