mirror of
https://github.com/apache/lucene.git
synced 2025-02-28 05:19:17 +00:00
LUCENE-9662: CheckIndex should be concurrent - parallelizing index check across segments (#128)
This commit is contained in:
parent
9c7f0d45ee
commit
424192e170
@ -258,6 +258,9 @@ Improvements
|
|||||||
* LUCENE-10019: Align file starts in CFS files to have proper alignment (8 bytes)
|
* LUCENE-10019: Align file starts in CFS files to have proper alignment (8 bytes)
|
||||||
(Uwe Schinder)
|
(Uwe Schinder)
|
||||||
|
|
||||||
|
* LUCENE-9662: Make CheckIndex concurrent by parallelizing index check across segments.
|
||||||
|
(Zach Chen, Mike McCandless, Dawid Weiss, Robert Muir)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-9686: Fix read past EOF handling in DirectIODirectory. (Zach Chen,
|
* LUCENE-9686: Fix read past EOF handling in DirectIODirectory. (Zach Chen,
|
||||||
|
@ -70,7 +70,7 @@ public class TestManyPointsInOldIndex extends LuceneTestCase {
|
|||||||
dir.setCheckIndexOnClose(false);
|
dir.setCheckIndexOnClose(false);
|
||||||
|
|
||||||
// ... because we check ourselves here:
|
// ... because we check ourselves here:
|
||||||
TestUtil.checkIndex(dir, false, true, null);
|
TestUtil.checkIndex(dir, false, true, true, null);
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -121,7 +121,7 @@ public class TestAllFilesDetectTruncation extends LuceneTestCase {
|
|||||||
// CheckIndex should also fail:
|
// CheckIndex should also fail:
|
||||||
expectThrowsAnyOf(
|
expectThrowsAnyOf(
|
||||||
Arrays.asList(CorruptIndexException.class, EOFException.class),
|
Arrays.asList(CorruptIndexException.class, EOFException.class),
|
||||||
() -> TestUtil.checkIndex(dirCopy, true, true, null));
|
() -> TestUtil.checkIndex(dirCopy, true, true, true, null));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,8 +16,19 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.lucene.util.VectorUtil;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestCheckIndex extends BaseTestCheckIndex {
|
public class TestCheckIndex extends BaseTestCheckIndex {
|
||||||
@ -54,4 +65,142 @@ public class TestCheckIndex extends BaseTestCheckIndex {
|
|||||||
public void testObtainsLock() throws IOException {
|
public void testObtainsLock() throws IOException {
|
||||||
testObtainsLock(directory);
|
testObtainsLock(directory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCheckIndexAllValid() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
int liveDocCount = 1 + random().nextInt(10);
|
||||||
|
IndexWriterConfig conifg = newIndexWriterConfig();
|
||||||
|
conifg.setIndexSort(new Sort(new SortField("sort_field", SortField.Type.INT, true)));
|
||||||
|
conifg.setSoftDeletesField("soft_delete");
|
||||||
|
try (IndexWriter w = new IndexWriter(dir, conifg)) {
|
||||||
|
for (int i = 0; i < liveDocCount; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
// stored field
|
||||||
|
doc.add(new StringField("id", Integer.toString(random().nextInt()), Field.Store.YES));
|
||||||
|
doc.add(new StoredField("field", "value" + TestUtil.randomSimpleString(random())));
|
||||||
|
|
||||||
|
// vector
|
||||||
|
doc.add(new KnnVectorField("v1", randomVector(3)));
|
||||||
|
doc.add(new KnnVectorField("v2", randomVector(3)));
|
||||||
|
|
||||||
|
// doc value
|
||||||
|
doc.add(new NumericDocValuesField("dv", random().nextLong()));
|
||||||
|
|
||||||
|
// point value
|
||||||
|
byte[] point = new byte[4];
|
||||||
|
NumericUtils.intToSortableBytes(random().nextInt(), point, 0);
|
||||||
|
doc.add(new BinaryPoint("point", point));
|
||||||
|
|
||||||
|
// term vector
|
||||||
|
Token token1 =
|
||||||
|
new Token("bar", 0, 3) {
|
||||||
|
{
|
||||||
|
setPayload(new BytesRef("pay1"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Token token2 =
|
||||||
|
new Token("bar", 4, 8) {
|
||||||
|
{
|
||||||
|
setPayload(new BytesRef("pay2"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
ft.setStoreTermVectors(true);
|
||||||
|
ft.setStoreTermVectorPositions(true);
|
||||||
|
ft.setStoreTermVectorPayloads(true);
|
||||||
|
doc.add(new Field("termvector", new CannedTokenStream(token1, token2), ft));
|
||||||
|
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
Document tombstone = new Document();
|
||||||
|
tombstone.add(new NumericDocValuesField("soft_delete", 1));
|
||||||
|
w.softUpdateDocument(
|
||||||
|
new Term("id", "1"), tombstone, new NumericDocValuesField("soft_delete", 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||||
|
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output);
|
||||||
|
|
||||||
|
assertEquals(1, status.segmentInfos.size());
|
||||||
|
|
||||||
|
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
|
||||||
|
|
||||||
|
// confirm live docs testing status
|
||||||
|
assertEquals(0, segStatus.liveDocStatus.numDeleted);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: check live docs"));
|
||||||
|
assertNull(segStatus.liveDocStatus.error);
|
||||||
|
|
||||||
|
// confirm field infos testing status
|
||||||
|
assertEquals(8, segStatus.fieldInfoStatus.totFields);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: field infos"));
|
||||||
|
assertNull(segStatus.fieldInfoStatus.error);
|
||||||
|
|
||||||
|
// confirm field norm (from term vector) testing status
|
||||||
|
assertEquals(1, segStatus.fieldNormStatus.totFields);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: field norms"));
|
||||||
|
assertNull(segStatus.fieldNormStatus.error);
|
||||||
|
|
||||||
|
// confirm term index testing status
|
||||||
|
assertTrue(segStatus.termIndexStatus.termCount > 0);
|
||||||
|
assertTrue(segStatus.termIndexStatus.totFreq > 0);
|
||||||
|
assertTrue(segStatus.termIndexStatus.totPos > 0);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: terms, freq, prox"));
|
||||||
|
assertNull(segStatus.termIndexStatus.error);
|
||||||
|
|
||||||
|
// confirm stored field testing status
|
||||||
|
// add storedField from tombstone doc
|
||||||
|
assertEquals(liveDocCount + 1, segStatus.storedFieldStatus.docCount);
|
||||||
|
assertEquals(2 * liveDocCount, segStatus.storedFieldStatus.totFields);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: stored fields"));
|
||||||
|
assertNull(segStatus.storedFieldStatus.error);
|
||||||
|
|
||||||
|
// confirm term vector testing status
|
||||||
|
assertEquals(liveDocCount, segStatus.termVectorStatus.docCount);
|
||||||
|
assertEquals(liveDocCount, segStatus.termVectorStatus.totVectors);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: term vectors"));
|
||||||
|
assertNull(segStatus.termVectorStatus.error);
|
||||||
|
|
||||||
|
// confirm doc values testing status
|
||||||
|
assertEquals(2, segStatus.docValuesStatus.totalNumericFields);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: docvalues"));
|
||||||
|
assertNull(segStatus.docValuesStatus.error);
|
||||||
|
|
||||||
|
// confirm point values testing status
|
||||||
|
assertEquals(1, segStatus.pointsStatus.totalValueFields);
|
||||||
|
assertEquals(liveDocCount, segStatus.pointsStatus.totalValuePoints);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: points"));
|
||||||
|
assertNull(segStatus.pointsStatus.error);
|
||||||
|
|
||||||
|
// confirm vector testing status
|
||||||
|
assertEquals(2 * liveDocCount, segStatus.vectorValuesStatus.totalVectorValues);
|
||||||
|
assertEquals(2, segStatus.vectorValuesStatus.totalKnnVectorFields);
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: vectors"));
|
||||||
|
assertNull(segStatus.vectorValuesStatus.error);
|
||||||
|
|
||||||
|
// confirm index sort testing status
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: index sort"));
|
||||||
|
assertNull(segStatus.indexSortStatus.error);
|
||||||
|
|
||||||
|
// confirm soft deletes testing status
|
||||||
|
assertTrue(output.toString(IOUtils.UTF_8).contains("test: check soft deletes"));
|
||||||
|
assertNull(segStatus.softDeletesStatus.error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInvalidThreadCountArgument() {
|
||||||
|
String[] args = new String[] {"-threadCount", "0"};
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> CheckIndex.parseOptions(args));
|
||||||
|
}
|
||||||
|
|
||||||
|
private float[] randomVector(int dim) {
|
||||||
|
float[] v = new float[dim];
|
||||||
|
for (int i = 0; i < dim; i++) {
|
||||||
|
v[i] = random().nextFloat();
|
||||||
|
}
|
||||||
|
VectorUtil.l2normalize(v);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -726,7 +726,7 @@ public class TestPointValues extends LuceneTestCase {
|
|||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||||
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, output);
|
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output);
|
||||||
assertEquals(1, status.segmentInfos.size());
|
assertEquals(1, status.segmentInfos.size());
|
||||||
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
|
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
|
||||||
// total 3 point values were index:
|
// total 3 point values were index:
|
||||||
|
@ -116,8 +116,11 @@ public class TestSwappedIndexFiles extends LuceneTestCase {
|
|||||||
// CheckIndex should also fail:
|
// CheckIndex should also fail:
|
||||||
expectThrowsAnyOf(
|
expectThrowsAnyOf(
|
||||||
Arrays.asList(
|
Arrays.asList(
|
||||||
CorruptIndexException.class, EOFException.class, IndexFormatTooOldException.class),
|
CorruptIndexException.class,
|
||||||
() -> TestUtil.checkIndex(dirCopy, true, true, null));
|
EOFException.class,
|
||||||
|
IndexFormatTooOldException.class,
|
||||||
|
CheckIndex.CheckIndexException.class),
|
||||||
|
() -> TestUtil.checkIndex(dirCopy, true, true, true, null));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -926,7 +926,7 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
|
|||||||
}
|
}
|
||||||
|
|
||||||
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||||
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, output);
|
CheckIndex.Status status = TestUtil.checkIndex(dir, false, true, true, output);
|
||||||
assertEquals(1, status.segmentInfos.size());
|
assertEquals(1, status.segmentInfos.size());
|
||||||
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
|
CheckIndex.Status.SegmentInfoStatus segStatus = status.segmentInfos.get(0);
|
||||||
// total 3 vector values were indexed:
|
// total 3 vector values were indexed:
|
||||||
|
@ -895,7 +895,11 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
|
|||||||
System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
|
System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
|
||||||
}
|
}
|
||||||
|
|
||||||
TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true, null);
|
// Methods in MockDirectoryWrapper hold locks on this, which will cause deadlock when
|
||||||
|
// TestUtil#checkIndex checks segment concurrently using another thread, but making
|
||||||
|
// call back to synchronized methods such as MockDirectoryWrapper#fileLength.
|
||||||
|
// Hence passing concurrent = false to this method to turn off concurrent checks.
|
||||||
|
TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true, false, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles
|
// TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.util;
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
@ -303,7 +304,7 @@ public final class TestUtil {
|
|||||||
|
|
||||||
public static CheckIndex.Status checkIndex(Directory dir, boolean doSlowChecks)
|
public static CheckIndex.Status checkIndex(Directory dir, boolean doSlowChecks)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return checkIndex(dir, doSlowChecks, false, null);
|
return checkIndex(dir, doSlowChecks, false, true, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -311,7 +312,11 @@ public final class TestUtil {
|
|||||||
* moving on to other fields/segments to look for any other corruption.
|
* moving on to other fields/segments to look for any other corruption.
|
||||||
*/
|
*/
|
||||||
public static CheckIndex.Status checkIndex(
|
public static CheckIndex.Status checkIndex(
|
||||||
Directory dir, boolean doSlowChecks, boolean failFast, ByteArrayOutputStream output)
|
Directory dir,
|
||||||
|
boolean doSlowChecks,
|
||||||
|
boolean failFast,
|
||||||
|
boolean concurrent,
|
||||||
|
ByteArrayOutputStream output)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (output == null) {
|
if (output == null) {
|
||||||
output = new ByteArrayOutputStream(1024);
|
output = new ByteArrayOutputStream(1024);
|
||||||
@ -323,6 +328,11 @@ public final class TestUtil {
|
|||||||
checker.setDoSlowChecks(doSlowChecks);
|
checker.setDoSlowChecks(doSlowChecks);
|
||||||
checker.setFailFast(failFast);
|
checker.setFailFast(failFast);
|
||||||
checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false);
|
checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false);
|
||||||
|
if (concurrent) {
|
||||||
|
checker.setThreadCount(RandomizedTest.randomIntBetween(2, 5));
|
||||||
|
} else {
|
||||||
|
checker.setThreadCount(1);
|
||||||
|
}
|
||||||
CheckIndex.Status indexStatus = checker.checkIndex(null);
|
CheckIndex.Status indexStatus = checker.checkIndex(null);
|
||||||
|
|
||||||
if (indexStatus == null || indexStatus.clean == false) {
|
if (indexStatus == null || indexStatus.clean == false) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user