mirror of
https://github.com/apache/lucene.git
synced 2025-03-03 23:09:36 +00:00
LUCENE-4055: add segment metadata attributes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4055@1341590 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8e596ef6ea
commit
aec09e25cd
@ -244,9 +244,10 @@ public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
|
||||
}
|
||||
}
|
||||
|
||||
// nocommit: convert 3.x specific stuff (shared docstores, normgen, etc) into attributes
|
||||
SegmentInfo info = new SegmentInfo(dir, version, segmentName, docCount, docStoreOffset,
|
||||
docStoreSegment, docStoreIsCompoundFile, normGen, isCompoundFile,
|
||||
null, diagnostics);
|
||||
null, diagnostics, null);
|
||||
info.setFiles(files);
|
||||
|
||||
SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen);
|
||||
|
@ -91,6 +91,7 @@ import org.apache.lucene.store.DataOutput; // javadoc
|
||||
* <li>13: variable-length sorted byte array values. ({@link Type#BYTES_VAR_SORTED BYTES_VAR_SORTED})</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>Attributes: a key-value map of codec-private attributes.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
|
@ -29,7 +29,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.si</tt>: SegVersion, SegSize, IsCompoundFile, Diagnostics, Files
|
||||
* <li><tt>.si</tt>: SegVersion, SegSize, IsCompoundFile, Diagnostics, Attributes, Files
|
||||
* </ul>
|
||||
* </p>
|
||||
* Data types:
|
||||
@ -38,7 +38,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||
* <li>SegSize --> {@link DataOutput#writeInt Int32}</li>
|
||||
* <li>SegVersion --> {@link DataOutput#writeString String}</li>
|
||||
* <li>Files --> {@link DataOutput#writeStringSet Set<String>}</li>
|
||||
* <li>Diagnostics--> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||
* <li>Diagnostics, Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
@ -57,6 +57,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||
* for each segment it creates. It includes metadata like the current Lucene
|
||||
* version, OS, Java version, why the segment was created (merge, flush,
|
||||
* addIndexes), etc.</li>
|
||||
* <li>Attributes: a key-value map of codec-private attributes.</li>
|
||||
* <li>Files is a list of files referred to by this segment.</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
|
@ -51,11 +51,12 @@ public class Lucene40SegmentInfoReader extends SegmentInfoReader {
|
||||
final Map<Integer,Long> normGen = null;
|
||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||
final Map<String,String> attributes = input.readStringStringMap();
|
||||
final Set<String> files = input.readStringSet();
|
||||
|
||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, docStoreOffset,
|
||||
docStoreSegment, docStoreIsCompoundFile, normGen, isCompoundFile,
|
||||
null, diagnostics);
|
||||
null, diagnostics, attributes);
|
||||
si.setFiles(files);
|
||||
|
||||
success = true;
|
||||
|
@ -56,6 +56,7 @@ public class Lucene40SegmentInfoWriter extends SegmentInfoWriter {
|
||||
|
||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeStringStringMap(si.getDiagnostics());
|
||||
output.writeStringStringMap(si.attributes());
|
||||
output.writeStringSet(si.getFiles());
|
||||
|
||||
success = true;
|
||||
|
@ -84,6 +84,22 @@ public class SimpleTextSegmentInfosReader extends SegmentInfoReader {
|
||||
String value = readString(SI_DIAG_VALUE.length, scratch);
|
||||
diagnostics.put(key, value);
|
||||
}
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, SI_NUM_ATTS);
|
||||
int numAtts = Integer.parseInt(readString(SI_NUM_ATTS.length, scratch));
|
||||
Map<String,String> attributes = new HashMap<String,String>();
|
||||
|
||||
for (int i = 0; i < numAtts; i++) {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, SI_ATT_KEY);
|
||||
String key = readString(SI_ATT_KEY.length, scratch);
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, SI_ATT_VALUE);
|
||||
String value = readString(SI_ATT_VALUE.length, scratch);
|
||||
attributes.put(key, value);
|
||||
}
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, SI_NUM_FILES);
|
||||
@ -99,7 +115,7 @@ public class SimpleTextSegmentInfosReader extends SegmentInfoReader {
|
||||
|
||||
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, -1,
|
||||
segmentName, false, null, isCompoundFile,
|
||||
null, diagnostics);
|
||||
null, diagnostics, attributes);
|
||||
info.setFiles(files);
|
||||
success = true;
|
||||
return info;
|
||||
|
@ -51,6 +51,9 @@ public class SimpleTextSegmentInfosWriter extends SegmentInfoWriter {
|
||||
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
|
||||
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
|
||||
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
||||
final static BytesRef SI_NUM_ATTS = new BytesRef(" attributes ");
|
||||
final static BytesRef SI_ATT_KEY = new BytesRef(" key ");
|
||||
final static BytesRef SI_ATT_VALUE = new BytesRef(" value ");
|
||||
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
||||
final static BytesRef SI_FILE = new BytesRef(" file ");
|
||||
|
||||
@ -95,6 +98,24 @@ public class SimpleTextSegmentInfosWriter extends SegmentInfoWriter {
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,String> atts = si.attributes();
|
||||
int numAtts = atts == null ? 0 : atts.size();
|
||||
SimpleTextUtil.write(output, SI_NUM_ATTS);
|
||||
SimpleTextUtil.write(output, Integer.toString(numAtts), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
|
||||
if (numAtts > 0) {
|
||||
for (Map.Entry<String,String> entry : atts.entrySet()) {
|
||||
SimpleTextUtil.write(output, SI_ATT_KEY);
|
||||
SimpleTextUtil.write(output, entry.getKey(), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
|
||||
SimpleTextUtil.write(output, SI_ATT_VALUE);
|
||||
SimpleTextUtil.write(output, entry.getValue(), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
}
|
||||
|
||||
Set<String> files = si.getFiles();
|
||||
int numFiles = files == null ? 0 : files.size();
|
||||
|
@ -487,7 +487,7 @@ class DocumentsWriterPerThread {
|
||||
final SegmentInfo newSegment = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segment, flushState.numDocs,
|
||||
-1, segment, false, null, false,
|
||||
flushState.codec,
|
||||
null);
|
||||
null, null);
|
||||
newSegment.setFiles(new HashSet<String>(directory.getCreatedFiles()));
|
||||
|
||||
if (infoStream.isEnabled("DWPT")) {
|
||||
|
@ -2299,7 +2299,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
MergeState mergeState = merger.merge(); // merge 'em
|
||||
SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, mergeState.mergedDocCount,
|
||||
-1, mergedName, false, null, false,
|
||||
codec, null);
|
||||
codec, null, null);
|
||||
SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, 0, -1L);
|
||||
|
||||
info.setFiles(new HashSet<String>(trackingDir.getCreatedFiles()));
|
||||
@ -2398,7 +2398,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
// Same SI as before but we change directory, name and docStoreSegment:
|
||||
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.docCount, info.info.getDocStoreOffset(),
|
||||
newDsName, info.info.getDocStoreIsCompoundFile(), info.info.getNormGen(), info.info.getUseCompoundFile(),
|
||||
info.info.getCodec(), info.info.getDiagnostics());
|
||||
info.info.getCodec(), info.info.getDiagnostics(), info.info.attributes());
|
||||
SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, info.getDelCount(), info.getDelGen());
|
||||
|
||||
Set<String> segFiles = new HashSet<String>();
|
||||
@ -3316,7 +3316,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
// ConcurrentMergePolicy we keep deterministic segment
|
||||
// names.
|
||||
final String mergeSegmentName = newSegmentName();
|
||||
SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, 0, -1, mergeSegmentName, false, null, false, codec, details);
|
||||
SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, 0, -1, mergeSegmentName, false, null, false, codec, details, null);
|
||||
merge.info = new SegmentInfoPerCommit(si, 0, -1L);
|
||||
|
||||
// Lock order: IW -> BD
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@ -30,16 +31,13 @@ import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||
// nocommit fix codec api to pass this around so they can
|
||||
// store attrs
|
||||
|
||||
// nocommit add attrs api like FI
|
||||
|
||||
/**
|
||||
* Information about a segment such as it's name, directory, and files related
|
||||
* to the segment.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
// nocommit make final again once atts are working here
|
||||
public class SegmentInfo {
|
||||
public final class SegmentInfo {
|
||||
|
||||
// TODO: remove these from this class, for now this is the representation
|
||||
public static final int NO = -1; // e.g. no norms; no deletes;
|
||||
@ -76,6 +74,8 @@ public class SegmentInfo {
|
||||
private Codec codec;
|
||||
|
||||
private Map<String,String> diagnostics;
|
||||
|
||||
private Map<String,String> attributes;
|
||||
|
||||
// Tracks the Lucene version this segment was created with, since 3.1. Null
|
||||
// indicates an older than 3.0 index, and it's used to detect a too old index.
|
||||
@ -100,7 +100,7 @@ public class SegmentInfo {
|
||||
*/
|
||||
public SegmentInfo(Directory dir, String version, String name, int docCount, int docStoreOffset,
|
||||
String docStoreSegment, boolean docStoreIsCompoundFile, Map<Integer,Long> normGen, boolean isCompoundFile,
|
||||
Codec codec, Map<String,String> diagnostics) {
|
||||
Codec codec, Map<String,String> diagnostics, Map<String,String> attributes) {
|
||||
assert !(dir instanceof TrackingDirectoryWrapper);
|
||||
this.dir = dir;
|
||||
this.version = version;
|
||||
@ -113,6 +113,7 @@ public class SegmentInfo {
|
||||
this.isCompoundFile = isCompoundFile;
|
||||
this.codec = codec;
|
||||
this.diagnostics = diagnostics;
|
||||
this.attributes = attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -331,4 +332,39 @@ public class SegmentInfo {
|
||||
public Set<String> getFiles() {
|
||||
return setFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a codec attribute value, or null if it does not exist
|
||||
*/
|
||||
public String getAttribute(String key) {
|
||||
if (attributes == null) {
|
||||
return null;
|
||||
} else {
|
||||
return attributes.get(key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Puts a codec attribute value.
|
||||
* <p>
|
||||
* This is a key-value mapping for the field that the codec can use
|
||||
* to store additional metadata, and will be available to the codec
|
||||
* when reading the segment via {@link #getAttribute(String)}
|
||||
* <p>
|
||||
* If a value already exists for the field, it will be replaced with
|
||||
* the new value.
|
||||
*/
|
||||
public String putAttribute(String key, String value) {
|
||||
if (attributes == null) {
|
||||
attributes = new HashMap<String,String>();
|
||||
}
|
||||
return attributes.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return internal codec attributes map. May be null if no mappings exist.
|
||||
*/
|
||||
public Map<String,String> attributes() {
|
||||
return attributes;
|
||||
}
|
||||
}
|
||||
|
@ -257,7 +257,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||
this.write(fieldInfos, dir, fields, true);
|
||||
Codec codec = Codec.getDefault();
|
||||
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, -1, SEGMENT, false, null, false,
|
||||
codec, null);
|
||||
codec, null, null);
|
||||
|
||||
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));
|
||||
|
||||
@ -315,7 +315,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||
Codec codec = Codec.getDefault();
|
||||
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, -1,
|
||||
SEGMENT, false, null, false,
|
||||
codec, null);
|
||||
codec, null, null);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now read postings");
|
||||
|
@ -206,7 +206,7 @@ public class TestDoc extends LuceneTestCase {
|
||||
r2.close();
|
||||
final SegmentInfo info = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged,
|
||||
si1.info.docCount + si2.info.docCount, -1, merged,
|
||||
false, null, false, codec, null);
|
||||
false, null, false, codec, null, null);
|
||||
info.setFiles(new HashSet<String>(trackingDir.getCreatedFiles()));
|
||||
|
||||
if (useCompoundFile) {
|
||||
|
@ -86,7 +86,7 @@ public class TestSegmentMerger extends LuceneTestCase {
|
||||
//Should be able to open a new SegmentReader against the new directory
|
||||
SegmentReader mergedReader = new SegmentReader(new SegmentInfoPerCommit(
|
||||
new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, docsMerged, -1, mergedSegment,
|
||||
false, null, false, codec, null),
|
||||
false, null, false, codec, null, null),
|
||||
0, -1L),
|
||||
DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random()));
|
||||
assertTrue(mergedReader != null);
|
||||
|
@ -147,7 +147,7 @@ public class IndexSplitter {
|
||||
// Same info just changing the dir:
|
||||
SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.docCount, info.getDocStoreOffset(),
|
||||
info.getDocStoreSegment(), info.getDocStoreIsCompoundFile(), info.getNormGen(), info.getUseCompoundFile(),
|
||||
info.getCodec(), info.getDiagnostics());
|
||||
info.getCodec(), info.getDiagnostics(), info.attributes());
|
||||
destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen()));
|
||||
// nocommit is this right...?
|
||||
// now copy files over
|
||||
|
Loading…
x
Reference in New Issue
Block a user