LUCENE-5969, LUCENE-5412: make .si immutable again, and make ancient writers read-only

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1627535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-09-25 13:16:16 +00:00
parent dce58c7346
commit d1a67913d4
22 changed files with 158 additions and 86 deletions

View File

@ -72,7 +72,6 @@ import org.apache.lucene.store.DataOutput; // javadocs
@Deprecated
public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoReader reader = new Lucene40SegmentInfoReader();
private final SegmentInfoWriter writer = new Lucene40SegmentInfoWriter();
/** Sole constructor. */
public Lucene40SegmentInfoFormat() {
@ -83,11 +82,9 @@ public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
return reader;
}
// we must unfortunately support write, to allow addIndexes to write a new .si with rewritten filenames:
// see LUCENE-5377
@Override
public SegmentInfoWriter getSegmentInfoWriter() {
return writer;
throw new UnsupportedOperationException("this codec can only be used for reading");
}
/** File extension used to store {@link SegmentInfo}. */

View File

@ -100,7 +100,7 @@ public class Lucene46Codec extends Codec {
}
@Override
public final SegmentInfoFormat segmentInfoFormat() {
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
}

View File

@ -68,7 +68,6 @@ import org.apache.lucene.store.DataOutput; // javadocs
*/
public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoReader reader = new Lucene46SegmentInfoReader();
private final SegmentInfoWriter writer = new Lucene46SegmentInfoWriter();
/** Sole constructor. */
public Lucene46SegmentInfoFormat() {
@ -81,7 +80,7 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
@Override
public SegmentInfoWriter getSegmentInfoWriter() {
return writer;
throw new UnsupportedOperationException("this codec can only be used for reading");
}
/** File extension used to store {@link SegmentInfo}. */

View File

@ -99,7 +99,7 @@ public class Lucene49Codec extends Codec {
}
@Override
public final SegmentInfoFormat segmentInfoFormat() {
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
}

View File

@ -7,9 +7,9 @@ import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.util.LuceneTestCase;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -74,4 +74,11 @@ public final class Lucene40RWCodec extends Lucene40Codec {
public PostingsFormat getPostingsFormatForField(String field) {
return postings;
}
private static final SegmentInfoFormat segmentInfos = new Lucene40RWSegmentInfoFormat();
@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfos;
}
}

View File

@ -0,0 +1,29 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.SegmentInfoWriter;
/** read-write version of 4.6 segmentinfos for testing */
public class Lucene40RWSegmentInfoFormat extends Lucene40SegmentInfoFormat {
@Override
public SegmentInfoWriter getSegmentInfoWriter() {
return new Lucene40SegmentInfoWriter();
}
}

View File

@ -6,12 +6,14 @@ import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
import org.apache.lucene.util.LuceneTestCase;
@ -73,4 +75,11 @@ public class Lucene41RWCodec extends Lucene41Codec {
public TermVectorsFormat termVectorsFormat() {
return vectors;
}
private static final SegmentInfoFormat segmentInfos = new Lucene40RWSegmentInfoFormat();
@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfos;
}
}

View File

@ -23,6 +23,8 @@ import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
import org.apache.lucene.util.LuceneTestCase;
/**
@ -55,4 +57,11 @@ public class Lucene42RWCodec extends Lucene42Codec {
public FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
}
private static final SegmentInfoFormat segmentInfos = new Lucene40RWSegmentInfoFormat();
@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfos;
}
}

View File

@ -23,6 +23,8 @@ import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat;
import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosWriter;
import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat;
@ -58,4 +60,11 @@ public class Lucene45RWCodec extends Lucene45Codec {
public NormsFormat normsFormat() {
return norms;
}
private static final SegmentInfoFormat segmentInfos = new Lucene40RWSegmentInfoFormat();
@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfos;
}
}

View File

@ -19,6 +19,8 @@ package org.apache.lucene.codecs.lucene46;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat;
import org.apache.lucene.codecs.lucene45.Lucene45RWDocValuesFormat;
@ -41,4 +43,11 @@ public class Lucene46RWCodec extends Lucene46Codec {
public NormsFormat normsFormat() {
return norms;
}
private static final SegmentInfoFormat segmentInfos = new Lucene46RWSegmentInfoFormat();
@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfos;
}
}

View File

@ -0,0 +1,28 @@
package org.apache.lucene.codecs.lucene46;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.SegmentInfoWriter;
/** read-write version of 4.6 segmentinfos for testing */
public class Lucene46RWSegmentInfoFormat extends Lucene46SegmentInfoFormat {
@Override
public SegmentInfoWriter getSegmentInfoWriter() {
return new Lucene46SegmentInfoWriter();
}
}

View File

@ -19,6 +19,8 @@ package org.apache.lucene.codecs.lucene49;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.lucene46.Lucene46RWSegmentInfoFormat;
/**
* Read-write version of {@link Lucene49Codec} for testing.
@ -39,4 +41,11 @@ public class Lucene49RWCodec extends Lucene49Codec {
public NormsFormat normsFormat() {
return norms;
}
private static final SegmentInfoFormat segmentInfos = new Lucene46RWSegmentInfoFormat();
@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfos;
}
}

View File

@ -1,25 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
Lucene 4.6 file format.
</body>
</html>

View File

@ -2591,67 +2591,33 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
}
/** Copies the segment files as-is into the IndexWriter's directory. */
private SegmentCommitInfo copySegmentAsIs(SegmentCommitInfo info, String segName, IOContext context)
throws IOException {
// note: we don't really need this fis (its copied), but we load it up
// so we don't pass a null value to the si writer
FieldInfos fis = SegmentReader.readFieldInfos(info);
private SegmentCommitInfo copySegmentAsIs(SegmentCommitInfo info, String segName, IOContext context) throws IOException {
//System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
// Same SI as before but we change directory and name
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
info.info.getUseCompoundFile(), info.info.getCodec(),
info.info.getDiagnostics(), StringHelper.randomId());
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo,
info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(),
info.getDocValuesGen());
Set<String> segFiles = new HashSet<>();
// Build up new segment's file names. Must do this
// before writing SegmentInfo:
for (String file: info.files()) {
final String newFileName;
newFileName = segName + IndexFileNames.stripSegmentName(file);
segFiles.add(newFileName);
}
newInfo.setFiles(segFiles);
// We must rewrite the SI file because it references segment name in its list of files, etc
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
info.info.getDiagnostics(), info.info.getId());
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getDelGen(),
info.getFieldInfosGen(), info.getDocValuesGen());
newInfo.setFiles(info.files());
boolean success = false;
try {
newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
final Collection<String> siFiles = trackingDir.getCreatedFiles();
// Copy the segment's files
for (String file: info.files()) {
final String newFileName = newInfo.namedForThisSegment(file);
final String newFileName = segName + IndexFileNames.stripSegmentName(file);
if (siFiles.contains(newFileName)) {
// We already rewrote this above
continue;
}
assert !slowFileExists(directory, newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
assert !slowFileExists(directory, newFileName): "file \"" + newFileName + "\" already exists; newInfo.files=" + newInfo.files();
info.info.dir.copy(directory, file, newFileName, context);
}
success = true;
} finally {
if (!success) {
for(String file : newInfo.files()) {
try {
directory.deleteFile(file);
} catch (Throwable t) {
}
}
IOUtils.deleteFilesIgnoringExceptions(directory, newInfo.files().toArray(new String[0]));
}
}

View File

@ -109,7 +109,14 @@ public class SegmentCommitInfo {
@Deprecated
public void setGenUpdatesFiles(Map<Long,Set<String>> genUpdatesFiles) {
this.genUpdatesFiles.clear();
this.genUpdatesFiles.putAll(genUpdatesFiles);
for (Map.Entry<Long,Set<String>> kv : genUpdatesFiles.entrySet()) {
// rename the set
Set<String> set = new HashSet<>();
for (String file : kv.getValue()) {
set.add(info.namedForThisSegment(file));
}
this.genUpdatesFiles.put(kv.getKey(), set);
}
}
/** Returns the per-field DocValues updates files. */
@ -120,7 +127,14 @@ public class SegmentCommitInfo {
/** Sets the DocValues updates file names, per field number. Does not deep clone the map. */
public void setDocValuesUpdatesFiles(Map<Integer,Set<String>> dvUpdatesFiles) {
this.dvUpdatesFiles.clear();
this.dvUpdatesFiles.putAll(dvUpdatesFiles);
for (Map.Entry<Integer,Set<String>> kv : dvUpdatesFiles.entrySet()) {
// rename the set
Set<String> set = new HashSet<>();
for (String file : kv.getValue()) {
set.add(info.namedForThisSegment(file));
}
this.dvUpdatesFiles.put(kv.getKey(), set);
}
}
/** Returns the FieldInfos file names. */
@ -131,7 +145,9 @@ public class SegmentCommitInfo {
/** Sets the FieldInfos file names. */
public void setFieldInfosFiles(Set<String> fieldInfosFiles) {
this.fieldInfosFiles.clear();
this.fieldInfosFiles.addAll(fieldInfosFiles);
for (String file : fieldInfosFiles) {
this.fieldInfosFiles.add(info.namedForThisSegment(file));
}
}
/** Called when we succeed in writing deletes */

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
@ -233,23 +234,25 @@ public final class SegmentInfo {
private Set<String> setFiles;
/** Sets the files written for this segment. */
public void setFiles(Set<String> files) {
checkFileNames(files);
setFiles = files;
public void setFiles(Collection<String> files) {
setFiles = new HashSet<>();
addFiles(files);
}
/** Add these files to the set of files written for this
* segment. */
public void addFiles(Collection<String> files) {
checkFileNames(files);
setFiles.addAll(files);
for (String f : files) {
setFiles.add(namedForThisSegment(f));
}
}
/** Add this file to the set of files written for this
* segment. */
public void addFile(String file) {
checkFileNames(Collections.singleton(file));
setFiles.add(file);
setFiles.add(namedForThisSegment(file));
}
private void checkFileNames(Collection<String> files) {
@ -261,5 +264,12 @@ public final class SegmentInfo {
}
}
}
/**
* strips any segment name from the file, naming it with this segment
* this is because "segment names" can change, e.g. by addIndexes(Dir)
*/
String namedForThisSegment(String file) {
return name + IndexFileNames.stripSegmentName(file);
}
}