LUCENE-4055: add segment metadata attributes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4055@1341590 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-05-22 18:53:13 +00:00
parent 8e596ef6ea
commit aec09e25cd
14 changed files with 97 additions and 19 deletions

View File

@ -244,9 +244,10 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
}
}
// nocommit: convert 3.x specific stuff (shared docstores, normgen, etc) into attributes
SegmentInfo info = new SegmentInfo(dir, version, segmentName, docCount, docStoreOffset,
docStoreSegment, docStoreIsCompoundFile, normGen, isCompoundFile,
null, diagnostics);
null, diagnostics, null);
info.setFiles(files);
SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen);

View File

@ -91,6 +91,7 @@ import org.apache.lucene.store.DataOutput; // javadoc
* <li>13: variable-length sorted byte array values. ({@link Type#BYTES_VAR_SORTED BYTES_VAR_SORTED})</li>
* </ul>
* </li>
* <li>Attributes: a key-value map of codec-private attributes.</li>
* </ul>
*
* @lucene.experimental

View File

@ -29,7 +29,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
* <p>
* Files:
* <ul>
* <li><tt>.si</tt>: SegVersion, SegSize, IsCompoundFile, Diagnostics, Files
* <li><tt>.si</tt>: SegVersion, SegSize, IsCompoundFile, Diagnostics, Attributes, Files
* </ul>
* </p>
* Data types:
@ -38,7 +38,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
* <li>SegSize --&gt; {@link DataOutput#writeInt Int32}</li>
* <li>SegVersion --&gt; {@link DataOutput#writeString String}</li>
* <li>Files --&gt; {@link DataOutput#writeStringSet Set&lt;String&gt;}</li>
* <li>Diagnostics--&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>Diagnostics, Attributes --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
* </ul>
* </p>
@ -57,6 +57,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
* for each segment it creates. It includes metadata like the current Lucene
* version, OS, Java version, why the segment was created (merge, flush,
* addIndexes), etc.</li>
* <li>Attributes: a key-value map of codec-private attributes.</li>
* <li>Files is a list of files referred to by this segment.</li>
* </ul>
* </p>

View File

@ -51,11 +51,12 @@ public class Lucene40SegmentInfoReader extends SegmentInfoReader {
final Map<Integer,Long> normGen = null;
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Map<String,String> attributes = input.readStringStringMap();
final Set<String> files = input.readStringSet();
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, docStoreOffset,
docStoreSegment, docStoreIsCompoundFile, normGen, isCompoundFile,
null, diagnostics);
null, diagnostics, attributes);
si.setFiles(files);
success = true;

View File

@ -56,6 +56,7 @@ public class Lucene40SegmentInfoWriter extends SegmentInfoWriter {
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringStringMap(si.attributes());
output.writeStringSet(si.getFiles());
success = true;

View File

@ -84,6 +84,22 @@ public class SimpleTextSegmentInfosReader extends SegmentInfoReader {
String value = readString(SI_DIAG_VALUE.length, scratch);
diagnostics.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_NUM_ATTS);
int numAtts = Integer.parseInt(readString(SI_NUM_ATTS.length, scratch));
Map<String,String> attributes = new HashMap<String,String>();
for (int i = 0; i < numAtts; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_ATT_KEY);
String key = readString(SI_ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_ATT_VALUE);
String value = readString(SI_ATT_VALUE.length, scratch);
attributes.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_NUM_FILES);
@ -99,7 +115,7 @@ public class SimpleTextSegmentInfosReader extends SegmentInfoReader {
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, -1,
segmentName, false, null, isCompoundFile,
null, diagnostics);
null, diagnostics, attributes);
info.setFiles(files);
success = true;
return info;

View File

@ -51,6 +51,9 @@ public class SimpleTextSegmentInfosWriter extends SegmentInfoWriter {
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_ATTS = new BytesRef(" attributes ");
final static BytesRef SI_ATT_KEY = new BytesRef(" key ");
final static BytesRef SI_ATT_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
@ -95,6 +98,24 @@ public class SimpleTextSegmentInfosWriter extends SegmentInfoWriter {
SimpleTextUtil.writeNewline(output);
}
}
Map<String,String> atts = si.attributes();
int numAtts = atts == null ? 0 : atts.size();
SimpleTextUtil.write(output, SI_NUM_ATTS);
SimpleTextUtil.write(output, Integer.toString(numAtts), scratch);
SimpleTextUtil.writeNewline(output);
if (numAtts > 0) {
for (Map.Entry<String,String> entry : atts.entrySet()) {
SimpleTextUtil.write(output, SI_ATT_KEY);
SimpleTextUtil.write(output, entry.getKey(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_ATT_VALUE);
SimpleTextUtil.write(output, entry.getValue(), scratch);
SimpleTextUtil.writeNewline(output);
}
}
Set<String> files = si.getFiles();
int numFiles = files == null ? 0 : files.size();

View File

@ -487,7 +487,7 @@ class DocumentsWriterPerThread {
final SegmentInfo newSegment = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segment, flushState.numDocs,
-1, segment, false, null, false,
flushState.codec,
null);
null, null);
newSegment.setFiles(new HashSet<String>(directory.getCreatedFiles()));
if (infoStream.isEnabled("DWPT")) {

View File

@ -2299,7 +2299,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
MergeState mergeState = merger.merge(); // merge 'em
SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, mergeState.mergedDocCount,
-1, mergedName, false, null, false,
codec, null);
codec, null, null);
SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, 0, -1L);
info.setFiles(new HashSet<String>(trackingDir.getCreatedFiles()));
@ -2398,7 +2398,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// Same SI as before but we change directory, name and docStoreSegment:
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.docCount, info.info.getDocStoreOffset(),
newDsName, info.info.getDocStoreIsCompoundFile(), info.info.getNormGen(), info.info.getUseCompoundFile(),
info.info.getCodec(), info.info.getDiagnostics());
info.info.getCodec(), info.info.getDiagnostics(), info.info.attributes());
SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, info.getDelCount(), info.getDelGen());
Set<String> segFiles = new HashSet<String>();
@ -3316,7 +3316,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// ConcurrentMergePolicy we keep deterministic segment
// names.
final String mergeSegmentName = newSegmentName();
SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, 0, -1, mergeSegmentName, false, null, false, codec, details);
SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, 0, -1, mergeSegmentName, false, null, false, codec, details, null);
merge.info = new SegmentInfoPerCommit(si, 0, -1L);
// Lock order: IW -> BD

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
@ -30,16 +31,13 @@ import org.apache.lucene.store.TrackingDirectoryWrapper;
// nocommit fix codec api to pass this around so they can
// store attrs
// nocommit add attrs api like FI
/**
* Information about a segment such as it's name, directory, and files related
* to the segment.
*
* @lucene.experimental
*/
// nocommit make final again once atts are working here
public class SegmentInfo {
public final class SegmentInfo {
// TODO: remove these from this class, for now this is the representation
public static final int NO = -1; // e.g. no norms; no deletes;
@ -76,6 +74,8 @@ public class SegmentInfo {
private Codec codec;
private Map<String,String> diagnostics;
private Map<String,String> attributes;
// Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
@ -100,7 +100,7 @@ public class SegmentInfo {
*/
public SegmentInfo(Directory dir, String version, String name, int docCount, int docStoreOffset,
String docStoreSegment, boolean docStoreIsCompoundFile, Map<Integer,Long> normGen, boolean isCompoundFile,
Codec codec, Map<String,String> diagnostics) {
Codec codec, Map<String,String> diagnostics, Map<String,String> attributes) {
assert !(dir instanceof TrackingDirectoryWrapper);
this.dir = dir;
this.version = version;
@ -113,6 +113,7 @@ public class SegmentInfo {
this.isCompoundFile = isCompoundFile;
this.codec = codec;
this.diagnostics = diagnostics;
this.attributes = attributes;
}
/**
@ -331,4 +332,39 @@ public class SegmentInfo {
public Set<String> getFiles() {
return setFiles;
}
/**
* Get a codec attribute value, or null if it does not exist
*/
public String getAttribute(String key) {
if (attributes == null) {
return null;
} else {
return attributes.get(key);
}
}
/**
* Puts a codec attribute value.
* <p>
* This is a key-value mapping for the field that the codec can use
* to store additional metadata, and will be available to the codec
* when reading the segment via {@link #getAttribute(String)}
* <p>
* If a value already exists for the field, it will be replaced with
* the new value.
*/
public String putAttribute(String key, String value) {
if (attributes == null) {
attributes = new HashMap<String,String>();
}
return attributes.put(key, value);
}
/**
* @return internal codec attributes map. May be null if no mappings exist.
*/
public Map<String,String> attributes() {
return attributes;
}
}

View File

@ -257,7 +257,7 @@ public class TestCodecs extends LuceneTestCase {
this.write(fieldInfos, dir, fields, true);
Codec codec = Codec.getDefault();
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, -1, SEGMENT, false, null, false,
codec, null);
codec, null, null);
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));
@ -315,7 +315,7 @@ public class TestCodecs extends LuceneTestCase {
Codec codec = Codec.getDefault();
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, -1,
SEGMENT, false, null, false,
codec, null);
codec, null, null);
if (VERBOSE) {
System.out.println("TEST: now read postings");

View File

@ -206,7 +206,7 @@ public class TestDoc extends LuceneTestCase {
r2.close();
final SegmentInfo info = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged,
si1.info.docCount + si2.info.docCount, -1, merged,
false, null, false, codec, null);
false, null, false, codec, null, null);
info.setFiles(new HashSet<String>(trackingDir.getCreatedFiles()));
if (useCompoundFile) {

View File

@ -86,7 +86,7 @@ public class TestSegmentMerger extends LuceneTestCase {
//Should be able to open a new SegmentReader against the new directory
SegmentReader mergedReader = new SegmentReader(new SegmentInfoPerCommit(
new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, docsMerged, -1, mergedSegment,
false, null, false, codec, null),
false, null, false, codec, null, null),
0, -1L),
DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
assertTrue(mergedReader != null);

View File

@ -147,7 +147,7 @@ public class IndexSplitter {
// Same info just changing the dir:
SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.docCount, info.getDocStoreOffset(),
info.getDocStoreSegment(), info.getDocStoreIsCompoundFile(), info.getNormGen(), info.getUseCompoundFile(),
info.getCodec(), info.getDiagnostics());
info.getCodec(), info.getDiagnostics(), info.attributes());
destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen()));
// nocommit is this right...?
// now copy files over