LUCENE-4055: start cleaning up 3.x SIreader

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4055@1342247 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-05-24 13:21:55 +00:00
parent 89aba400df
commit d4f49a8959
1 changed files with 111 additions and 86 deletions

View File

@ -50,7 +50,7 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
infos.counter = input.readInt(); // read counter infos.counter = input.readInt(); // read counter
Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader();
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
SegmentInfoPerCommit siPerCommit = reader.readSegmentInfo(null, directory, format, input); SegmentInfoPerCommit siPerCommit = reader.readLegacySegmentInfo(directory, format, input);
SegmentInfo si = siPerCommit.info; SegmentInfo si = siPerCommit.info;
if (si.getVersion() == null) { if (si.getVersion() == null) {
@ -94,11 +94,6 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
@Override @Override
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException { public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
return read(directory, segmentName, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE, context);
}
public SegmentInfo read(Directory directory, String segmentName, int format, IOContext context) throws IOException {
// NOTE: this is NOT how 3.x is really written... // NOTE: this is NOT how 3.x is really written...
String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.SI_EXTENSION); String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.SI_EXTENSION);
@ -107,7 +102,8 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
IndexInput input = directory.openInput(fileName, context); IndexInput input = directory.openInput(fileName, context);
try { try {
SegmentInfo si = readSegmentInfo(segmentName, directory, format, input).info; // nocommit: we need a version header
SegmentInfo si = readUpgradedSegmentInfo(segmentName, directory, input);
success = true; success = true;
return si; return si;
} finally { } finally {
@ -125,13 +121,15 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
} }
} }
private SegmentInfoPerCommit readSegmentInfo(String segmentName, Directory dir, int format, IndexInput input) throws IOException { /** reads from legacy 3.x segments_N */
private SegmentInfoPerCommit readLegacySegmentInfo(Directory dir, int format, IndexInput input) throws IOException {
// check that it is a format we can understand // check that it is a format we can understand
assert format != Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE;
if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) {
throw new IndexFormatTooOldException(input, format, throw new IndexFormatTooOldException(input, format,
Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE); Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE);
} }
if (format < Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE) { if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) {
throw new IndexFormatTooNewException(input, format, throw new IndexFormatTooNewException(input, format,
Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE); Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE);
} }
@ -142,46 +140,26 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
version = null; version = null;
} }
// NOTE: we ignore this and use the incoming arg
// instead, if it's non-null:
final String name = input.readString(); final String name = input.readString();
if (segmentName == null) {
segmentName = name;
}
final int docCount = input.readInt(); final int docCount = input.readInt();
final long delGen = input.readLong(); final long delGen = input.readLong();
final int docStoreOffset; final int docStoreOffset = input.readInt();
final Map<String,String> attributes = new HashMap<String,String>();
// parse the docstore stuff and shove it into attributes
final String docStoreSegment; final String docStoreSegment;
final boolean docStoreIsCompoundFile; final boolean docStoreIsCompoundFile;
final Map<String,String> attributes; if (docStoreOffset != -1) {
docStoreSegment = input.readString();
if (format == Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE) { docStoreIsCompoundFile = input.readByte() == SegmentInfo.YES;
// we already upgraded to 4.x si format: so shared docstore stuff is in the attributes map. attributes.put(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY, Integer.toString(docStoreOffset));
attributes = input.readStringStringMap(); attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, docStoreSegment);
String v = attributes.get(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY); attributes.put(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY, Boolean.toString(docStoreIsCompoundFile));
docStoreOffset = v == null ? -1 : Integer.parseInt(v);
v = attributes.get(Lucene3xSegmentInfoFormat.DS_NAME_KEY);
docStoreSegment = v == null ? segmentName : v;
v = attributes.get(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY);
docStoreIsCompoundFile = v == null ? false : Boolean.parseBoolean(v);
} else { } else {
// for older formats, parse the docstore stuff and shove it into attributes docStoreSegment = name;
attributes = new HashMap<String,String>(); docStoreIsCompoundFile = false;
docStoreOffset = input.readInt();
if (docStoreOffset != -1) {
docStoreSegment = input.readString();
docStoreIsCompoundFile = input.readByte() == SegmentInfo.YES;
attributes.put(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY, Integer.toString(docStoreOffset));
attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, docStoreSegment);
attributes.put(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY, Boolean.toString(docStoreIsCompoundFile));
} else {
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
} }
// pre-4.0 indexes write a byte if there is a single norms file // pre-4.0 indexes write a byte if there is a single norms file
@ -214,63 +192,110 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
final int hasVectors = input.readByte(); final int hasVectors = input.readByte();
} }
final Set<String> files; // Replicate logic from 3.x's SegmentInfo.files():
if (format == Lucene3xSegmentInfoFormat.FORMAT_4X_UPGRADE) { final Set<String> files = new HashSet<String>();
files = input.readStringSet(); if (isCompoundFile) {
files.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
} else { } else {
// Replicate logic from 3.x's SegmentInfo.files(): addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
files = new HashSet<String>(); addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
if (isCompoundFile) { addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION));
files.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION));
}
if (docStoreOffset != -1) {
if (docStoreIsCompoundFile) {
files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
} else { } else {
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.PROX_EXTENSION)); addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xNormsProducer.NORMS_EXTENSION));
} }
} else if (!isCompoundFile) {
files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
}
if (docStoreOffset != -1) { if (normGen != null) {
if (docStoreIsCompoundFile) { for(Map.Entry<Integer,Long> ent : normGen.entrySet()) {
files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); long gen = ent.getValue();
if (gen >= SegmentInfo.YES) {
// Definitely a separate norm file, with generation:
files.add(IndexFileNames.fileNameFromGeneration(name, "s" + ent.getKey(), gen));
} else if (gen == SegmentInfo.NO) {
// No separate norm
} else { } else {
files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); // We should have already hit indexformat too old exception
files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); assert false;
addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
}
} else if (!isCompoundFile) {
files.add(IndexFileNames.segmentFileName(segmentName, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentName, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(segmentName, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
}
if (normGen != null) {
for(Map.Entry<Integer,Long> ent : normGen.entrySet()) {
long gen = ent.getValue();
if (gen >= SegmentInfo.YES) {
// Definitely a separate norm file, with generation:
files.add(IndexFileNames.fileNameFromGeneration(segmentName, "s" + ent.getKey(), gen));
} else if (gen == SegmentInfo.NO) {
// No separate norm
} else {
// We should have already hit indexformat too old exception
assert false;
}
} }
} }
} }
// nocommit: convert normgen into attributes? // nocommit: convert normgen into attributes?
SegmentInfo info = new SegmentInfo(dir, version, segmentName, docCount, normGen, isCompoundFile, SegmentInfo info = new SegmentInfo(dir, version, name, docCount, normGen, isCompoundFile,
null, diagnostics, Collections.unmodifiableMap(attributes)); null, diagnostics, Collections.unmodifiableMap(attributes));
info.setFiles(files); info.setFiles(files);
SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen); SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen);
return infoPerCommit; return infoPerCommit;
} }
private SegmentInfo readUpgradedSegmentInfo(String name, Directory dir, IndexInput input) throws IOException {
final String version = input.readString();
// nocommit: we ignore this and use the incoming arg: don't write this
input.readString();
final int docCount = input.readInt();
// nocommit: dont write this
final long delGen = input.readLong();
final Map<String,String> attributes = input.readStringStringMap();
// pre-4.0 indexes write a byte if there is a single norms file
byte b = input.readByte();
//System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format);
// nocommit: don't write this
assert 1 == b : "expected 1 but was: "+ b;
final int numNormGen = input.readInt();
final Map<Integer,Long> normGen;
if (numNormGen == SegmentInfo.NO) {
normGen = null;
} else {
normGen = new HashMap<Integer, Long>();
for(int j=0;j<numNormGen;j++) {
normGen.put(j, input.readLong());
}
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final int delCount = input.readInt();
assert delCount <= docCount;
// nocommit: unused, dont write this
final boolean hasProx = input.readByte() == 1;
final Map<String,String> diagnostics = input.readStringStringMap();
// nocommit: unused, dont write this
final int hasVectors = input.readByte();
final Set<String> files = input.readStringSet();
// nocommit: convert normgen into attributes?
SegmentInfo info = new SegmentInfo(dir, version, name, docCount, normGen, isCompoundFile,
null, diagnostics, Collections.unmodifiableMap(attributes));
info.setFiles(files);
return info;
}
} }