mirror of https://github.com/apache/lucene.git
Remove FixBrokenOffsets since 7.0+ indices cannot have broken offsets.
This commit is contained in:
parent
e809e095f0
commit
869a48ccac
|
@ -1,138 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
|
||||
/**
|
||||
* Command-line tool that reads from a source index and
|
||||
* writes to a dest index, correcting any broken offsets
|
||||
* in the process.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FixBrokenOffsets {
|
||||
public SegmentInfos infos;
|
||||
|
||||
FSDirectory fsDir;
|
||||
|
||||
Path dir;
|
||||
|
||||
@SuppressForbidden(reason = "System.out required: command line tool")
|
||||
public static void main(String[] args) throws IOException {
|
||||
if (args.length < 2) {
|
||||
System.err.println("Usage: FixBrokenOffsetse <srcDir> <destDir>");
|
||||
return;
|
||||
}
|
||||
Path srcPath = Paths.get(args[0]);
|
||||
if (!Files.exists(srcPath)) {
|
||||
throw new RuntimeException("srcPath " + srcPath.toAbsolutePath() + " doesn't exist");
|
||||
}
|
||||
Path destPath = Paths.get(args[1]);
|
||||
if (Files.exists(destPath)) {
|
||||
throw new RuntimeException("destPath " + destPath.toAbsolutePath() + " already exists; please remove it and re-run");
|
||||
}
|
||||
Directory srcDir = FSDirectory.open(srcPath);
|
||||
DirectoryReader reader = DirectoryReader.open(srcDir);
|
||||
|
||||
List<LeafReaderContext> leaves = reader.leaves();
|
||||
CodecReader[] filtered = new CodecReader[leaves.size()];
|
||||
for(int i=0;i<leaves.size();i++) {
|
||||
filtered[i] = SlowCodecReaderWrapper.wrap(new FilterLeafReader(leaves.get(i).reader()) {
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
Fields termVectors = in.getTermVectors(docID);
|
||||
if (termVectors == null) {
|
||||
return null;
|
||||
}
|
||||
return new FilterFields(termVectors) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return new FilterTerms(super.terms(field)) {
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
return new FilterTermsEnum(super.iterator()) {
|
||||
@Override
|
||||
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
|
||||
return new FilterPostingsEnum(super.postings(reuse, flags)) {
|
||||
int nextLastStartOffset = 0;
|
||||
int lastStartOffset = 0;
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
int pos = super.nextPosition();
|
||||
lastStartOffset = nextLastStartOffset;
|
||||
nextLastStartOffset = startOffset();
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
int offset = super.startOffset();
|
||||
if (offset < lastStartOffset) {
|
||||
offset = lastStartOffset;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
int offset = super.endOffset();
|
||||
if (offset < lastStartOffset) {
|
||||
offset = lastStartOffset;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getCoreCacheHelper() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getReaderCacheHelper() {
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Directory destDir = FSDirectory.open(destPath);
|
||||
// We need to maintain the same major version
|
||||
int createdMajor = SegmentInfos.readLatestCommit(srcDir).getIndexCreatedVersionMajor();
|
||||
new SegmentInfos(createdMajor).commit(destDir);
|
||||
IndexWriter writer = new IndexWriter(destDir, new IndexWriterConfig());
|
||||
writer.addIndexes(filtered);
|
||||
IOUtils.close(writer, reader, srcDir, destDir);
|
||||
}
|
||||
}
|
|
@ -1,112 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestFixBrokenOffsets extends LuceneTestCase {
|
||||
|
||||
// Run this in Lucene 6.x:
|
||||
//
|
||||
// ant test -Dtestcase=TestFixBrokenOffsets -Dtestmethod=testCreateBrokenOffsetsIndex -Dtests.codec=default -Dtests.useSecurityManager=false
|
||||
/*
|
||||
public void testCreateBrokenOffsetsIndex() throws IOException {
|
||||
|
||||
Path indexDir = Paths.get("/tmp/brokenoffsets");
|
||||
Files.deleteIfExists(indexDir);
|
||||
Directory dir = newFSDirectory(indexDir);
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_STORED);
|
||||
fieldType.setStoreTermVectors(true);
|
||||
fieldType.setStoreTermVectorPositions(true);
|
||||
fieldType.setStoreTermVectorOffsets(true);
|
||||
Field field = new Field("foo", "bar", fieldType);
|
||||
field.setTokenStream(new CannedTokenStream(new Token("foo", 10, 13), new Token("foo", 7, 9)));
|
||||
doc.add(field);
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
// 2nd segment
|
||||
doc = new Document();
|
||||
field = new Field("foo", "bar", fieldType);
|
||||
field.setTokenStream(new CannedTokenStream(new Token("bar", 15, 17), new Token("bar", 1, 5)));
|
||||
doc.add(field);
|
||||
writer.addDocument(doc);
|
||||
|
||||
writer.close();
|
||||
|
||||
dir.close();
|
||||
}
|
||||
*/
|
||||
|
||||
public void testFixBrokenOffsetsIndex() throws IOException {
|
||||
InputStream resource = getClass().getResourceAsStream("index.630.brokenoffsets.zip");
|
||||
assertNotNull("Broken offsets index not found", resource);
|
||||
Path path = createTempDir("brokenoffsets");
|
||||
TestUtil.unzip(resource, path);
|
||||
Directory dir = newFSDirectory(path);
|
||||
|
||||
// OK: index is 6.3.0 so offsets not checked:
|
||||
TestUtil.checkIndex(dir);
|
||||
|
||||
MockDirectoryWrapper tmpDir = newMockDirectory();
|
||||
tmpDir.setCheckIndexOnClose(false);
|
||||
IndexWriter w = new IndexWriter(tmpDir, new IndexWriterConfig());
|
||||
IndexWriter finalW = w;
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> finalW.addIndexes(dir));
|
||||
assertTrue(e.getMessage(), e.getMessage().startsWith("Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version."));
|
||||
w.close();
|
||||
// OK: addIndexes(Directory...) refuses to execute if the index creation version is different so broken offsets are not carried over
|
||||
tmpDir.close();
|
||||
|
||||
final MockDirectoryWrapper tmpDir2 = newMockDirectory();
|
||||
tmpDir2.setCheckIndexOnClose(false);
|
||||
w = new IndexWriter(tmpDir2, new IndexWriterConfig());
|
||||
DirectoryReader reader = DirectoryReader.open(dir);
|
||||
List<LeafReaderContext> leaves = reader.leaves();
|
||||
CodecReader[] codecReaders = new CodecReader[leaves.size()];
|
||||
for(int i=0;i<leaves.size();i++) {
|
||||
codecReaders[i] = (CodecReader) leaves.get(i).reader();
|
||||
}
|
||||
IndexWriter finalW2 = w;
|
||||
e = expectThrows(IllegalArgumentException.class, () -> finalW2.addIndexes(codecReaders));
|
||||
assertEquals("Cannot merge a segment that has been created with major version 6 into this index which has been created by major version 7", e.getMessage());
|
||||
reader.close();
|
||||
w.close();
|
||||
tmpDir2.close();
|
||||
|
||||
// Now run the tool and confirm the broken offsets are fixed:
|
||||
Path path2 = createTempDir("fixedbrokenoffsets").resolve("subdir");
|
||||
FixBrokenOffsets.main(new String[] {path.toString(), path2.toString()});
|
||||
Directory tmpDir3 = FSDirectory.open(path2);
|
||||
TestUtil.checkIndex(tmpDir3);
|
||||
tmpDir3.close();
|
||||
|
||||
dir.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue