load files up-front

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411336 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-11-19 18:02:23 +00:00
parent 08812d6994
commit 27351a764f
4 changed files with 1221 additions and 52 deletions

View File

@ -36,7 +36,7 @@ public class Lucene41DocValuesFormat extends SimpleDocValuesFormat {
@Override
public SimpleDVProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new Lucene41DocValuesProducer(state.directory, state.segmentInfo, state.context);
return new Lucene41DocValuesProducer(state.directory, state.segmentInfo, state.fieldInfos, state.context);
}
}

View File

@ -25,7 +25,9 @@ import java.util.Map;
import org.apache.lucene.codecs.SimpleDVProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentInfo;
@ -39,20 +41,33 @@ import org.apache.lucene.util.IOUtils;
public class Lucene41DocValuesProducer extends SimpleDVProducer {
private final CompoundFileDirectory cfs;
private IOContext context;
private final SegmentInfo info;
private final Map<String,DocValuesFactory<NumericDocValues>> numeric = new HashMap<String,DocValuesFactory<NumericDocValues>>();
private final Map<String,DocValuesFactory<BinaryDocValues>> binary = new HashMap<String,DocValuesFactory<BinaryDocValues>>();
private final Map<String,DocValuesFactory<SortedDocValues>> sorted = new HashMap<String,DocValuesFactory<SortedDocValues>>();
public Lucene41DocValuesProducer(Directory dir, SegmentInfo segmentInfo,
IOContext context) throws IOException {
FieldInfos fieldInfos, IOContext context) throws IOException {
this.cfs = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(
segmentInfo.name, Lucene41DocValuesConsumer.DV_SEGMENT_SUFFIX,
IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
this.context = context;
this.info = segmentInfo;
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) {
if (DocValues.isNumber(fieldInfo.getDocValuesType())
|| DocValues.isFloat(fieldInfo.getDocValuesType())) {
numeric.put(fieldInfo.name, new Lucene41NumericDocValues.Factory(
this.cfs, this.info, fieldInfo, context));
} else if (DocValues.isBytes(fieldInfo.getDocValuesType())) {
binary.put(fieldInfo.name, new Lucene41BinaryDocValues.Factory(
this.cfs, this.info, fieldInfo, context));
} else {
assert DocValues.isSortedBytes(fieldInfo.getDocValuesType());
sorted.put(fieldInfo.name, new Lucene41SortedDocValues.Factory(
this.cfs, this.info, fieldInfo, context));
}
}
}
}
@Override
@ -69,39 +84,27 @@ public class Lucene41DocValuesProducer extends SimpleDVProducer {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
//nocommit do we need to sync that?
DocValuesFactory<NumericDocValues> docValuesFactory = numeric
.get(field.name);
if (docValuesFactory == null) {
numeric.put(field.name,
docValuesFactory = new Lucene41NumericDocValues.Factory(this.cfs,
this.info, field, context));
}
return docValuesFactory.getDirect();
return valueOrNull(numeric, field);
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
//nocommit do we need to sync that?
DocValuesFactory<BinaryDocValues> docValuesFactory = binary.get(field.name);
if (docValuesFactory == null) {
binary.put(field.name,
docValuesFactory = new Lucene41BinaryDocValues.Factory(this.cfs,
this.info, field, context));
}
return docValuesFactory.getDirect();
return valueOrNull(binary, field);
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
//nocommit do we need to sync that?
DocValuesFactory<SortedDocValues> docValuesFactory = sorted.get(field.name);
if (docValuesFactory == null) {
sorted.put(field.name,
docValuesFactory = new Lucene41SortedDocValues.Factory(this.cfs,
this.info, field, context));
return valueOrNull(sorted, field);
}
private static <T> T valueOrNull(Map<String,DocValuesFactory<T>> map,
FieldInfo field) throws IOException {
final DocValuesFactory<T> docValuesFactory = map.get(field.name);
if (docValuesFactory != null) {
return docValuesFactory.getDirect();
}
return docValuesFactory.getDirect();
return null;
}
public static abstract class DocValuesFactory<T> implements Closeable {

View File

@ -94,7 +94,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
DirectoryReader reader = DirectoryReader.open(dir, 1);
assertEquals(1, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery query = new BooleanQuery();
@ -107,11 +107,10 @@ public class TestDocValuesIndexing extends LuceneTestCase {
TopDocs search = searcher.search(query, 10);
assertEquals(5, search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
DocValues docValues = MultiDocValues.getDocValues(reader, "docId");
Source source = docValues.getSource();
NumericDocValues docValues = numeric(reader, "docId");
for (int i = 0; i < scoreDocs.length; i++) {
assertEquals(i, scoreDocs[i].doc);
assertEquals(i, source.getInt(scoreDocs[i].doc));
assertEquals(i, docValues.get(scoreDocs[i].doc));
}
reader.close();
dir.close();
@ -165,13 +164,14 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.close();
AtomicReader sr = getOnlySegmentReader(r3);
assertEquals(2, sr.numDocs());
DocValues docValues = sr.docValues("dv");
NumericDocValues docValues = sr.getNumericDocValues("dv");
assertNotNull(docValues);
r3.close();
d3.close();
}
public void testAddIndexesRandom() throws IOException {
//nocommit convert
int valuesPerIndex = 10;
List<Type> values = Arrays.asList(Type.values());
Collections.shuffle(values, random());
@ -296,6 +296,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
@SuppressWarnings("fallthrough")
public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions)
throws IOException {
//nocommit convert
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
final int numValues = 50 + atLeast(10);
@ -831,7 +832,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1);
DirectoryReader r = w.getReader();
w.close();
assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0));
assertEquals(17, getOnlySegmentReader(r).getNumericDocValues("field").get(0));
r.close();
d.close();
}
@ -979,12 +980,12 @@ public class TestDocValuesIndexing extends LuceneTestCase {
writer.close();
final AtomicReader sr = getOnlySegmentReader(r);
final DocValues dv = sr.docValues("stringdv");
final SortedDocValues dv = sorted(sr, "stringdv").newRAMInstance();
assertNotNull(dv);
final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1);
final DocValues.Source docIDToID = sr.docValues("id").getSource();
final NumericDocValues docIDToID = numeric(sr, "id").newRAMInstance();
final int NUM_THREADS = _TestUtil.nextInt(random(), 1, 10);
Thread[] threads = new Thread[NUM_THREADS];
@ -993,33 +994,29 @@ public class TestDocValuesIndexing extends LuceneTestCase {
@Override
public void run() {
Random random = random();
final DocValues.Source stringDVSource;
final DocValues.Source stringDVDirectSource;
final SortedDocValues stringDV = dv;
final SortedDocValues stringDVDirect;
try {
stringDVSource = dv.getSource();
assertNotNull(stringDVSource);
stringDVDirectSource = dv.getDirectSource();
assertNotNull(stringDVDirectSource);
assertNotNull(stringDV);
stringDVDirect = sr.getSortedDocValues("stringdv");
assertNotNull(stringDVDirect);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
while(System.currentTimeMillis() < END_TIME) {
final DocValues.Source source;
final SortedDocValues source;
if (random.nextBoolean()) {
source = stringDVSource;
source = stringDV;
} else {
source = stringDVDirectSource;
source = stringDVDirect;
}
final DocValues.SortedSource sortedSource = source.asSortedSource();
assertNotNull(sortedSource);
final BytesRef scratch = new BytesRef();
for(int iter=0;iter<100;iter++) {
final int docID = random.nextInt(sr.maxDoc());
final BytesRef br = sortedSource.getBytes(docID, scratch);
assertEquals(docValues.get((int) docIDToID.getInt(docID)), br);
source.get(docID, scratch);
assertEquals(docValues.get((int) docIDToID.get(docID)), scratch);
}
}
}
@ -1084,6 +1081,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
public void testDocValuesUnstored() throws IOException {
//nocommit convert!
Directory dir = newDirectory();
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwconfig.setMergePolicy(newLogMergePolicy());
@ -1132,4 +1130,37 @@ public class TestDocValuesIndexing extends LuceneTestCase {
public void invalidate(DocValues values) {}
}
public NumericDocValues numeric(AtomicReader reader, String field) throws IOException {
NumericDocValues docValues = reader.getNumericDocValues(field);
if(random().nextBoolean()) {
return docValues.newRAMInstance();
}
return docValues;
}
public NumericDocValues numeric(DirectoryReader reader, String field) throws IOException {
return numeric(getOnlySegmentReader(reader), field);
}
public BinaryDocValues binary(DirectoryReader reader, String field) throws IOException {
return binary(getOnlySegmentReader(reader), field);
}
public SortedDocValues sorted(DirectoryReader reader, String field) throws IOException {
return sorted(getOnlySegmentReader(reader), field);
}
public BinaryDocValues binary(AtomicReader reader, String field) throws IOException {
BinaryDocValues docValues = reader.getBinaryDocValues(field);
if(random().nextBoolean()) {
return docValues.newRAMInstance();
}
return docValues;
}
public SortedDocValues sorted(AtomicReader reader, String field) throws IOException {
SortedDocValues docValues = reader.getSortedDocValues(field);
if(random().nextBoolean()) {
return docValues.newRAMInstance();
}
return docValues;
}
}

File diff suppressed because it is too large Load Diff