Remove FixBrokenOffsets since 7.0+ indices cannot have broken offsets.

This commit is contained in:
Adrien Grand 2017-07-03 14:12:25 +02:00
parent e809e095f0
commit 869a48ccac
2 changed files with 0 additions and 250 deletions

View File

@ -1,138 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.SuppressForbidden;
/**
* Command-line tool that reads from a source index and
* writes to a dest index, correcting any broken offsets
* in the process.
*
* @lucene.experimental
*/
public class FixBrokenOffsets {
public SegmentInfos infos;
FSDirectory fsDir;
Path dir;
@SuppressForbidden(reason = "System.out required: command line tool")
public static void main(String[] args) throws IOException {
if (args.length < 2) {
System.err.println("Usage: FixBrokenOffsetse <srcDir> <destDir>");
return;
}
Path srcPath = Paths.get(args[0]);
if (!Files.exists(srcPath)) {
throw new RuntimeException("srcPath " + srcPath.toAbsolutePath() + " doesn't exist");
}
Path destPath = Paths.get(args[1]);
if (Files.exists(destPath)) {
throw new RuntimeException("destPath " + destPath.toAbsolutePath() + " already exists; please remove it and re-run");
}
Directory srcDir = FSDirectory.open(srcPath);
DirectoryReader reader = DirectoryReader.open(srcDir);
List<LeafReaderContext> leaves = reader.leaves();
CodecReader[] filtered = new CodecReader[leaves.size()];
for(int i=0;i<leaves.size();i++) {
filtered[i] = SlowCodecReaderWrapper.wrap(new FilterLeafReader(leaves.get(i).reader()) {
@Override
public Fields getTermVectors(int docID) throws IOException {
Fields termVectors = in.getTermVectors(docID);
if (termVectors == null) {
return null;
}
return new FilterFields(termVectors) {
@Override
public Terms terms(String field) throws IOException {
return new FilterTerms(super.terms(field)) {
@Override
public TermsEnum iterator() throws IOException {
return new FilterTermsEnum(super.iterator()) {
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return new FilterPostingsEnum(super.postings(reuse, flags)) {
int nextLastStartOffset = 0;
int lastStartOffset = 0;
@Override
public int nextPosition() throws IOException {
int pos = super.nextPosition();
lastStartOffset = nextLastStartOffset;
nextLastStartOffset = startOffset();
return pos;
}
@Override
public int startOffset() throws IOException {
int offset = super.startOffset();
if (offset < lastStartOffset) {
offset = lastStartOffset;
}
return offset;
}
@Override
public int endOffset() throws IOException {
int offset = super.endOffset();
if (offset < lastStartOffset) {
offset = lastStartOffset;
}
return offset;
}
};
}
};
}
};
}
};
}
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
});
}
Directory destDir = FSDirectory.open(destPath);
// We need to maintain the same major version
int createdMajor = SegmentInfos.readLatestCommit(srcDir).getIndexCreatedVersionMajor();
new SegmentInfos(createdMajor).commit(destDir);
IndexWriter writer = new IndexWriter(destDir, new IndexWriterConfig());
writer.addIndexes(filtered);
IOUtils.close(writer, reader, srcDir, destDir);
}
}

View File

@ -1,112 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestFixBrokenOffsets extends LuceneTestCase {
// Run this in Lucene 6.x:
//
// ant test -Dtestcase=TestFixBrokenOffsets -Dtestmethod=testCreateBrokenOffsetsIndex -Dtests.codec=default -Dtests.useSecurityManager=false
/*
public void testCreateBrokenOffsetsIndex() throws IOException {
Path indexDir = Paths.get("/tmp/brokenoffsets");
Files.deleteIfExists(indexDir);
Directory dir = newFSDirectory(indexDir);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_STORED);
fieldType.setStoreTermVectors(true);
fieldType.setStoreTermVectorPositions(true);
fieldType.setStoreTermVectorOffsets(true);
Field field = new Field("foo", "bar", fieldType);
field.setTokenStream(new CannedTokenStream(new Token("foo", 10, 13), new Token("foo", 7, 9)));
doc.add(field);
writer.addDocument(doc);
writer.commit();
// 2nd segment
doc = new Document();
field = new Field("foo", "bar", fieldType);
field.setTokenStream(new CannedTokenStream(new Token("bar", 15, 17), new Token("bar", 1, 5)));
doc.add(field);
writer.addDocument(doc);
writer.close();
dir.close();
}
*/
public void testFixBrokenOffsetsIndex() throws IOException {
InputStream resource = getClass().getResourceAsStream("index.630.brokenoffsets.zip");
assertNotNull("Broken offsets index not found", resource);
Path path = createTempDir("brokenoffsets");
TestUtil.unzip(resource, path);
Directory dir = newFSDirectory(path);
// OK: index is 6.3.0 so offsets not checked:
TestUtil.checkIndex(dir);
MockDirectoryWrapper tmpDir = newMockDirectory();
tmpDir.setCheckIndexOnClose(false);
IndexWriter w = new IndexWriter(tmpDir, new IndexWriterConfig());
IndexWriter finalW = w;
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> finalW.addIndexes(dir));
assertTrue(e.getMessage(), e.getMessage().startsWith("Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version."));
w.close();
// OK: addIndexes(Directory...) refuses to execute if the index creation version is different so broken offsets are not carried over
tmpDir.close();
final MockDirectoryWrapper tmpDir2 = newMockDirectory();
tmpDir2.setCheckIndexOnClose(false);
w = new IndexWriter(tmpDir2, new IndexWriterConfig());
DirectoryReader reader = DirectoryReader.open(dir);
List<LeafReaderContext> leaves = reader.leaves();
CodecReader[] codecReaders = new CodecReader[leaves.size()];
for(int i=0;i<leaves.size();i++) {
codecReaders[i] = (CodecReader) leaves.get(i).reader();
}
IndexWriter finalW2 = w;
e = expectThrows(IllegalArgumentException.class, () -> finalW2.addIndexes(codecReaders));
assertEquals("Cannot merge a segment that has been created with major version 6 into this index which has been created by major version 7", e.getMessage());
reader.close();
w.close();
tmpDir2.close();
// Now run the tool and confirm the broken offsets are fixed:
Path path2 = createTempDir("fixedbrokenoffsets").resolve("subdir");
FixBrokenOffsets.main(new String[] {path.toString(), path2.toString()});
Directory tmpDir3 = FSDirectory.open(path2);
TestUtil.checkIndex(tmpDir3);
tmpDir3.close();
dir.close();
}
}