From 27351a764f77cf8c4c9efa9889977b26dd7fe4d7 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 19 Nov 2012 18:02:23 +0000 Subject: [PATCH] load files up-front git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411336 13f79535-47bb-0310-9956-ffa450edef68 --- .../values/Lucene41DocValuesFormat.java | 2 +- .../values/Lucene41DocValuesProducer.java | 59 +- .../lucene/index/TestDocValuesIndexing.java | 77 +- .../index/TestSimpleDocValuesIndexing.java | 1135 +++++++++++++++++ 4 files changed, 1221 insertions(+), 52 deletions(-) create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestSimpleDocValuesIndexing.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesFormat.java index e909b346f60..f7aba0932d4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesFormat.java @@ -36,7 +36,7 @@ public class Lucene41DocValuesFormat extends SimpleDocValuesFormat { @Override public SimpleDVProducer fieldsProducer(SegmentReadState state) throws IOException { - return new Lucene41DocValuesProducer(state.directory, state.segmentInfo, state.context); + return new Lucene41DocValuesProducer(state.directory, state.segmentInfo, state.fieldInfos, state.context); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesProducer.java index 226c41013a3..abab16c7f08 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesProducer.java @@ -25,7 +25,9 @@ import java.util.Map; import org.apache.lucene.codecs.SimpleDVProducer; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentInfo; @@ -39,20 +41,33 @@ import org.apache.lucene.util.IOUtils; public class Lucene41DocValuesProducer extends SimpleDVProducer { private final CompoundFileDirectory cfs; - private IOContext context; private final SegmentInfo info; private final Map> numeric = new HashMap>(); private final Map> binary = new HashMap>(); - private final Map> sorted = new HashMap>(); public Lucene41DocValuesProducer(Directory dir, SegmentInfo segmentInfo, - IOContext context) throws IOException { + FieldInfos fieldInfos, IOContext context) throws IOException { this.cfs = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( segmentInfo.name, Lucene41DocValuesConsumer.DV_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); - this.context = context; this.info = segmentInfo; + for (FieldInfo fieldInfo : fieldInfos) { + if (fieldInfo.hasDocValues()) { + if (DocValues.isNumber(fieldInfo.getDocValuesType()) + || DocValues.isFloat(fieldInfo.getDocValuesType())) { + numeric.put(fieldInfo.name, new Lucene41NumericDocValues.Factory( + this.cfs, this.info, fieldInfo, context)); + } else if (DocValues.isBytes(fieldInfo.getDocValuesType())) { + binary.put(fieldInfo.name, new Lucene41BinaryDocValues.Factory( + this.cfs, this.info, fieldInfo, context)); + } else { + assert DocValues.isSortedBytes(fieldInfo.getDocValuesType()); + sorted.put(fieldInfo.name, new Lucene41SortedDocValues.Factory( + this.cfs, this.info, fieldInfo, context)); + } + } + } } @Override @@ -69,39 +84,27 @@ public class Lucene41DocValuesProducer extends SimpleDVProducer { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - //nocommit do we need to sync that? - DocValuesFactory docValuesFactory = numeric - .get(field.name); - if (docValuesFactory == null) { - numeric.put(field.name, - docValuesFactory = new Lucene41NumericDocValues.Factory(this.cfs, - this.info, field, context)); - } - return docValuesFactory.getDirect(); + return valueOrNull(numeric, field); } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - //nocommit do we need to sync that? - DocValuesFactory docValuesFactory = binary.get(field.name); - if (docValuesFactory == null) { - binary.put(field.name, - docValuesFactory = new Lucene41BinaryDocValues.Factory(this.cfs, - this.info, field, context)); - } - return docValuesFactory.getDirect(); + return valueOrNull(binary, field); + } @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - //nocommit do we need to sync that? - DocValuesFactory docValuesFactory = sorted.get(field.name); - if (docValuesFactory == null) { - sorted.put(field.name, - docValuesFactory = new Lucene41SortedDocValues.Factory(this.cfs, - this.info, field, context)); + return valueOrNull(sorted, field); + } + + private static T valueOrNull(Map> map, + FieldInfo field) throws IOException { + final DocValuesFactory docValuesFactory = map.get(field.name); + if (docValuesFactory != null) { + return docValuesFactory.getDirect(); } - return docValuesFactory.getDirect(); + return null; } public static abstract class DocValuesFactory implements Closeable { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index d64122c774c..80bb0ec6b86 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -94,7 +94,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { DirectoryReader reader = DirectoryReader.open(dir, 1); assertEquals(1, reader.leaves().size()); - + IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery query = new BooleanQuery(); @@ -107,11 +107,10 @@ public class TestDocValuesIndexing extends LuceneTestCase { TopDocs search = searcher.search(query, 10); assertEquals(5, search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; - DocValues docValues = MultiDocValues.getDocValues(reader, "docId"); - Source source = docValues.getSource(); + NumericDocValues docValues = numeric(reader, "docId"); for (int i = 0; i < scoreDocs.length; i++) { assertEquals(i, scoreDocs[i].doc); - assertEquals(i, source.getInt(scoreDocs[i].doc)); + assertEquals(i, docValues.get(scoreDocs[i].doc)); } reader.close(); dir.close(); @@ -165,13 +164,14 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.close(); AtomicReader sr = getOnlySegmentReader(r3); assertEquals(2, sr.numDocs()); - DocValues docValues = sr.docValues("dv"); + NumericDocValues docValues = sr.getNumericDocValues("dv"); assertNotNull(docValues); r3.close(); d3.close(); } public void testAddIndexesRandom() throws IOException { + //nocommit convert int valuesPerIndex = 10; List values = Arrays.asList(Type.values()); Collections.shuffle(values, random()); @@ -296,6 +296,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { @SuppressWarnings("fallthrough") public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions) throws IOException { + //nocommit convert Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 50 + atLeast(10); @@ -831,7 +832,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.forceMerge(1); DirectoryReader r = w.getReader(); w.close(); - assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0)); + assertEquals(17, getOnlySegmentReader(r).getNumericDocValues("field").get(0)); r.close(); d.close(); } @@ -979,12 +980,12 @@ public class TestDocValuesIndexing extends LuceneTestCase { writer.close(); final AtomicReader sr = getOnlySegmentReader(r); - final DocValues dv = sr.docValues("stringdv"); + final SortedDocValues dv = sorted(sr, "stringdv").newRAMInstance(); assertNotNull(dv); final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1); - final DocValues.Source docIDToID = sr.docValues("id").getSource(); + final NumericDocValues docIDToID = numeric(sr, "id").newRAMInstance(); final int NUM_THREADS = _TestUtil.nextInt(random(), 1, 10); Thread[] threads = new Thread[NUM_THREADS]; @@ -993,33 +994,29 @@ public class TestDocValuesIndexing extends LuceneTestCase { @Override public void run() { Random random = random(); - final DocValues.Source stringDVSource; - final DocValues.Source stringDVDirectSource; + final SortedDocValues stringDV = dv; + final SortedDocValues stringDVDirect; try { - stringDVSource = dv.getSource(); - assertNotNull(stringDVSource); - stringDVDirectSource = dv.getDirectSource(); - assertNotNull(stringDVDirectSource); + + assertNotNull(stringDV); + stringDVDirect = sr.getSortedDocValues("stringdv"); + assertNotNull(stringDVDirect); } catch (IOException ioe) { throw new RuntimeException(ioe); } while(System.currentTimeMillis() < END_TIME) { - final DocValues.Source source; + final SortedDocValues source; if (random.nextBoolean()) { - source = stringDVSource; + source = stringDV; } else { - source = stringDVDirectSource; + source = stringDVDirect; } - - final DocValues.SortedSource sortedSource = source.asSortedSource(); - assertNotNull(sortedSource); - final BytesRef scratch = new BytesRef(); for(int iter=0;iter<100;iter++) { final int docID = random.nextInt(sr.maxDoc()); - final BytesRef br = sortedSource.getBytes(docID, scratch); - assertEquals(docValues.get((int) docIDToID.getInt(docID)), br); + source.get(docID, scratch); + assertEquals(docValues.get((int) docIDToID.get(docID)), scratch); } } } @@ -1084,6 +1081,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } public void testDocValuesUnstored() throws IOException { + //nocommit convert! Directory dir = newDirectory(); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwconfig.setMergePolicy(newLogMergePolicy()); @@ -1132,4 +1130,37 @@ public class TestDocValuesIndexing extends LuceneTestCase { public void invalidate(DocValues values) {} } + public NumericDocValues numeric(AtomicReader reader, String field) throws IOException { + NumericDocValues docValues = reader.getNumericDocValues(field); + if(random().nextBoolean()) { + return docValues.newRAMInstance(); + } + return docValues; + } + + public NumericDocValues numeric(DirectoryReader reader, String field) throws IOException { + return numeric(getOnlySegmentReader(reader), field); + } + public BinaryDocValues binary(DirectoryReader reader, String field) throws IOException { + return binary(getOnlySegmentReader(reader), field); + } + public SortedDocValues sorted(DirectoryReader reader, String field) throws IOException { + return sorted(getOnlySegmentReader(reader), field); + } + + public BinaryDocValues binary(AtomicReader reader, String field) throws IOException { + BinaryDocValues docValues = reader.getBinaryDocValues(field); + if(random().nextBoolean()) { + return docValues.newRAMInstance(); + } + return docValues; + } + public SortedDocValues sorted(AtomicReader reader, String field) throws IOException { + SortedDocValues docValues = reader.getSortedDocValues(field); + if(random().nextBoolean()) { + return docValues.newRAMInstance(); + } + return docValues; + } + } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSimpleDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestSimpleDocValuesIndexing.java new file mode 100644 index 00000000000..9d39d836e91 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestSimpleDocValuesIndexing.java @@ -0,0 +1,1135 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.ByteDocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.IntDocValuesField; +import org.apache.lucene.document.LongDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.ShortDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DocValues.SortedSource; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.SourceCache; +import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues.SourceCache.DirectSourceCache; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * + * Tests DocValues integration into IndexWriter & Codecs + * + */ +public class TestSimpleDocValuesIndexing extends LuceneTestCase { + /* + * - add test for multi segment case with deletes + * - add multithreaded tests / integrate into stress indexing? + */ + + /* + * Simple test case to show how to use the API + */ + public void testDocValuesSimple() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, writerConfig(false)); + for (int i = 0; i < 5; i++) { + Document doc = new Document(); + doc.add(new PackedLongDocValuesField("docId", i)); + doc.add(new TextField("docId", "" + i, Field.Store.NO)); + writer.addDocument(doc); + } + writer.commit(); + writer.forceMerge(1, true); + + writer.close(true); + + DirectoryReader reader = DirectoryReader.open(dir, 1); + assertEquals(1, reader.leaves().size()); + + IndexSearcher searcher = new IndexSearcher(reader); + + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD); + query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD); + query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD); + query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD); + query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD); + + TopDocs search = searcher.search(query, 10); + assertEquals(5, search.totalHits); + ScoreDoc[] scoreDocs = search.scoreDocs; + DocValues docValues = MultiDocValues.getDocValues(reader, "docId"); + Source source = docValues.getSource(); + for (int i = 0; i < scoreDocs.length; i++) { + assertEquals(i, scoreDocs[i].doc); + assertEquals(i, source.getInt(scoreDocs[i].doc)); + } + reader.close(); + dir.close(); + } + + public void testIndexBytesNoDeletes() throws IOException { + runTestIndexBytes(writerConfig(random().nextBoolean()), false); + } + + public void testIndexBytesDeletes() throws IOException { + runTestIndexBytes(writerConfig(random().nextBoolean()), true); + } + + public void testIndexNumericsNoDeletes() throws IOException { + runTestNumerics(writerConfig(random().nextBoolean()), false); + } + + public void testIndexNumericsDeletes() throws IOException { + runTestNumerics(writerConfig(random().nextBoolean()), true); + } + + public void testAddIndexes() throws IOException { + Directory d1 = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d1); + Document doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + doc.add(new PackedLongDocValuesField("dv", 1)); + w.addDocument(doc); + IndexReader r1 = w.getReader(); + w.close(); + + Directory d2 = newDirectory(); + w = new RandomIndexWriter(random(), d2); + doc = new Document(); + doc.add(newStringField("id", "2", Field.Store.YES)); + doc.add(new PackedLongDocValuesField("dv", 2)); + w.addDocument(doc); + IndexReader r2 = w.getReader(); + w.close(); + + Directory d3 = newDirectory(); + w = new RandomIndexWriter(random(), d3); + w.addIndexes(SlowCompositeReaderWrapper.wrap(r1), SlowCompositeReaderWrapper.wrap(r2)); + r1.close(); + d1.close(); + r2.close(); + d2.close(); + + w.forceMerge(1); + DirectoryReader r3 = w.getReader(); + w.close(); + AtomicReader sr = getOnlySegmentReader(r3); + assertEquals(2, sr.numDocs()); + DocValues docValues = sr.docValues("dv"); + assertNotNull(docValues); + r3.close(); + d3.close(); + } + + public void testAddIndexesRandom() throws IOException { + int valuesPerIndex = 10; + List values = Arrays.asList(Type.values()); + Collections.shuffle(values, random()); + Type first = values.get(0); + Type second = values.get(1); + // index first index + Directory d_1 = newDirectory(); + IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random().nextBoolean())); + indexValues(w_1, valuesPerIndex, first, values, false, 7); + w_1.commit(); + assertEquals(valuesPerIndex, w_1.maxDoc()); + _TestUtil.checkIndex(d_1); + + // index second index + Directory d_2 = newDirectory(); + IndexWriter w_2 = new IndexWriter(d_2, writerConfig(random().nextBoolean())); + indexValues(w_2, valuesPerIndex, second, values, false, 7); + w_2.commit(); + assertEquals(valuesPerIndex, w_2.maxDoc()); + _TestUtil.checkIndex(d_2); + + Directory target = newDirectory(); + IndexWriter w = new IndexWriter(target, writerConfig(random().nextBoolean())); + DirectoryReader r_1 = DirectoryReader.open(w_1, true); + DirectoryReader r_2 = DirectoryReader.open(w_2, true); + if (random().nextBoolean()) { + w.addIndexes(d_1, d_2); + } else { + w.addIndexes(r_1, r_2); + } + w.forceMerge(1, true); + w.commit(); + + _TestUtil.checkIndex(target); + assertEquals(valuesPerIndex * 2, w.maxDoc()); + + // check values + + DirectoryReader merged = DirectoryReader.open(w, true); + Source source_1 = getSource(getDocValues(r_1, first.name())); + Source source_2 = getSource(getDocValues(r_2, second.name())); + Source source_1_merged = getSource(getDocValues(merged, first.name())); + Source source_2_merged = getSource(getDocValues(merged, second + .name())); + for (int i = 0; i < r_1.maxDoc(); i++) { + switch (first) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_FIXED_SORTED: + case BYTES_VAR_SORTED: + assertEquals(source_1.getBytes(i, new BytesRef()), + source_1_merged.getBytes(i, new BytesRef())); + break; + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + case FIXED_INTS_8: + case VAR_INTS: + assertEquals(source_1.getInt(i), source_1_merged.getInt(i)); + break; + case FLOAT_32: + case FLOAT_64: + assertEquals(source_1.getFloat(i), source_1_merged.getFloat(i), 0.0d); + break; + default: + fail("unkonwn " + first); + } + } + + for (int i = r_1.maxDoc(); i < merged.maxDoc(); i++) { + switch (second) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_FIXED_SORTED: + case BYTES_VAR_SORTED: + assertEquals(source_2.getBytes(i - r_1.maxDoc(), new BytesRef()), + source_2_merged.getBytes(i, new BytesRef())); + break; + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + case FIXED_INTS_8: + case VAR_INTS: + assertEquals(source_2.getInt(i - r_1.maxDoc()), + source_2_merged.getInt(i)); + break; + case FLOAT_32: + case FLOAT_64: + assertEquals(source_2.getFloat(i - r_1.maxDoc()), + source_2_merged.getFloat(i), 0.0d); + break; + default: + fail("unkonwn " + first); + } + } + // close resources + r_1.close(); + r_2.close(); + merged.close(); + w_1.close(true); + w_2.close(true); + w.close(true); + d_1.close(); + d_2.close(); + target.close(); + } + + private IndexWriterConfig writerConfig(boolean useCompoundFile) { + final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random())); + cfg.setMergePolicy(newLogMergePolicy(random())); + LogMergePolicy policy = new LogDocMergePolicy(); + cfg.setMergePolicy(policy); + policy.setUseCompoundFile(useCompoundFile); + return cfg; + } + + @SuppressWarnings("fallthrough") + public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions) + throws IOException { + Directory d = newDirectory(); + IndexWriter w = new IndexWriter(d, cfg); + final int numValues = 50 + atLeast(10); + final List numVariantList = new ArrayList(NUMERICS); + + // run in random order to test if fill works correctly during merges + Collections.shuffle(numVariantList, random()); + for (Type val : numVariantList) { + FixedBitSet deleted = indexValues(w, numValues, val, numVariantList, + withDeletions, 7); + List closeables = new ArrayList(); + DirectoryReader r = DirectoryReader.open(w, true); + final int numRemainingValues = numValues - deleted.cardinality(); + final int base = r.numDocs() - numRemainingValues; + // for FIXED_INTS_8 we use value mod 128 - to enable testing in + // one go we simply use numValues as the mod for all other INT types + int mod = numValues; + switch (val) { + case FIXED_INTS_8: + mod = 128; + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + case VAR_INTS: { + DocValues intsReader = getDocValues(r, val.name()); + assertNotNull(intsReader); + + Source ints = getSource(intsReader); + + for (int i = 0; i < base; i++) { + long value = ints.getInt(i); + assertEquals("index " + i, 0, value); + } + + int expected = 0; + for (int i = base; i < r.numDocs(); i++, expected++) { + while (deleted.get(expected)) { + expected++; + } + assertEquals(val + " mod: " + mod + " index: " + i, expected%mod, ints.getInt(i)); + } + } + break; + case FLOAT_32: + case FLOAT_64: { + DocValues floatReader = getDocValues(r, val.name()); + assertNotNull(floatReader); + Source floats = getSource(floatReader); + for (int i = 0; i < base; i++) { + double value = floats.getFloat(i); + assertEquals(val + " failed for doc: " + i + " base: " + base, + 0.0d, value, 0.0d); + } + int expected = 0; + for (int i = base; i < r.numDocs(); i++, expected++) { + while (deleted.get(expected)) { + expected++; + } + assertEquals("index " + i, 2.0 * expected, floats.getFloat(i), + 0.00001); + } + } + break; + default: + fail("unexpected value " + val); + } + + closeables.add(r); + for (Closeable toClose : closeables) { + toClose.close(); + } + } + w.close(); + d.close(); + } + + public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions) + throws IOException { + final Directory d = newDirectory(); + IndexWriter w = new IndexWriter(d, cfg); + final List byteVariantList = new ArrayList(BYTES); + // run in random order to test if fill works correctly during merges + Collections.shuffle(byteVariantList, random()); + final int numValues = 50 + atLeast(10); + for (Type byteIndexValue : byteVariantList) { + List closeables = new ArrayList(); + final int bytesSize = 1 + atLeast(50); + FixedBitSet deleted = indexValues(w, numValues, byteIndexValue, + byteVariantList, withDeletions, bytesSize); + final DirectoryReader r = DirectoryReader.open(w, withDeletions); + assertEquals(0, r.numDeletedDocs()); + final int numRemainingValues = numValues - deleted.cardinality(); + final int base = r.numDocs() - numRemainingValues; + DocValues bytesReader = getDocValues(r, byteIndexValue.name()); + assertNotNull("field " + byteIndexValue.name() + + " returned null reader - maybe merged failed", bytesReader); + Source bytes = getSource(bytesReader); + byte upto = 0; + + // test the filled up slots for correctness + for (int i = 0; i < base; i++) { + + BytesRef br = bytes.getBytes(i, new BytesRef()); + String msg = " field: " + byteIndexValue.name() + " at index: " + i + + " base: " + base + " numDocs:" + r.numDocs(); + switch (byteIndexValue) { + case BYTES_VAR_STRAIGHT: + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + // fixed straight returns bytesref with zero bytes all of fixed + // length + assertNotNull("expected none null - " + msg, br); + if (br.length != 0) { + assertEquals("expected zero bytes of length " + bytesSize + " - " + + msg + br.utf8ToString(), bytesSize, br.length); + for (int j = 0; j < br.length; j++) { + assertEquals("Byte at index " + j + " doesn't match - " + msg, 0, + br.bytes[br.offset + j]); + } + } + break; + default: + assertNotNull("expected none null - " + msg, br); + assertEquals(byteIndexValue + "", 0, br.length); + // make sure we advance at least until base + } + } + + // test the actual doc values added in this iteration + assertEquals(base + numRemainingValues, r.numDocs()); + int v = 0; + for (int i = base; i < r.numDocs(); i++) { + String msg = " field: " + byteIndexValue.name() + " at index: " + i + + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + + bytesSize + " src: " + bytes; + while (withDeletions && deleted.get(v++)) { + upto += bytesSize; + } + BytesRef br = bytes.getBytes(i, new BytesRef()); + assertTrue(msg, br.length > 0); + for (int j = 0; j < br.length; j++, upto++) { + if (!(br.bytes.length > br.offset + j)) + br = bytes.getBytes(i, new BytesRef()); + assertTrue("BytesRef index exceeded [" + msg + "] offset: " + + br.offset + " length: " + br.length + " index: " + + (br.offset + j), br.bytes.length > br.offset + j); + assertEquals("SourceRef Byte at index " + j + " doesn't match - " + + msg, upto, br.bytes[br.offset + j]); + } + } + + // clean up + closeables.add(r); + for (Closeable toClose : closeables) { + toClose.close(); + } + } + + w.close(); + d.close(); + } + + public void testGetArrayNumerics() throws IOException { + Directory d = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter w = new IndexWriter(d, cfg); + final int numValues = 50 + atLeast(10); + final List numVariantList = new ArrayList(NUMERICS); + Collections.shuffle(numVariantList, random()); + for (Type val : numVariantList) { + indexValues(w, numValues, val, numVariantList, + false, 7); + DirectoryReader r = DirectoryReader.open(w, true); + DocValues docValues = getDocValues(r, val.name()); + assertNotNull(docValues); + // make sure we don't get a direct source since they don't support getArray() + Source source = docValues.getSource(); + switch (source.getType()) { + case FIXED_INTS_8: + { + assertTrue(source.hasArray()); + byte[] values = (byte[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((long)values[i], source.getInt(i)); + } + } + break; + case FIXED_INTS_16: + { + assertTrue(source.hasArray()); + short[] values = (short[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((long)values[i], source.getInt(i)); + } + } + break; + case FIXED_INTS_32: + { + assertTrue(source.hasArray()); + int[] values = (int[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((long)values[i], source.getInt(i)); + } + } + break; + case FIXED_INTS_64: + { + assertTrue(source.hasArray()); + long[] values = (long[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals(values[i], source.getInt(i)); + } + } + break; + case VAR_INTS: + assertFalse(source.hasArray()); + break; + case FLOAT_32: + { + assertTrue(source.hasArray()); + float[] values = (float[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((double)values[i], source.getFloat(i), 0.0d); + } + } + break; + case FLOAT_64: + { + assertTrue(source.hasArray()); + double[] values = (double[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals(values[i], source.getFloat(i), 0.0d); + } + } + break; + default: + fail("unexpected value " + source.getType()); + } + r.close(); + } + w.close(); + d.close(); + } + + public void testGetArrayBytes() throws IOException { + Directory d = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random())); + IndexWriter w = new IndexWriter(d, cfg); + final int numValues = 50 + atLeast(10); + // only single byte fixed straight supports getArray() + indexValues(w, numValues, Type.BYTES_FIXED_STRAIGHT, null, false, 1); + DirectoryReader r = DirectoryReader.open(w, true); + DocValues docValues = getDocValues(r, Type.BYTES_FIXED_STRAIGHT.name()); + assertNotNull(docValues); + // make sure we don't get a direct source since they don't support + // getArray() + Source source = docValues.getSource(); + + switch (source.getType()) { + case BYTES_FIXED_STRAIGHT: { + BytesRef ref = new BytesRef(); + if (source.hasArray()) { + byte[] values = (byte[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + source.getBytes(i, ref); + assertEquals(1, ref.length); + assertEquals(values[i], ref.bytes[ref.offset]); + } + } + } + break; + default: + fail("unexpected value " + source.getType()); + } + r.close(); + w.close(); + d.close(); + } + + private DocValues getDocValues(IndexReader reader, String field) throws IOException { + final DocValues docValues = MultiDocValues.getDocValues(reader, field); + if (docValues == null) { + return docValues; + } + if (rarely()) { + docValues.setCache(new NotCachingSourceCache()); + } else { + if (!(docValues.getCache() instanceof DirectSourceCache)) { + docValues.setCache(new DirectSourceCache()); + } + } + return docValues; + } + + @SuppressWarnings("fallthrough") + private Source getSource(DocValues values) throws IOException { + // getSource uses cache internally + switch(random().nextInt(5)) { + case 3: + return values.loadSource(); + case 2: + return values.getDirectSource(); + case 1: + if(values.getType() == Type.BYTES_VAR_SORTED || values.getType() == Type.BYTES_FIXED_SORTED) { + return values.getSource().asSortedSource(); + } + default: + return values.getSource(); + } + } + + + private static EnumSet BYTES = EnumSet.of(Type.BYTES_FIXED_DEREF, + Type.BYTES_FIXED_STRAIGHT, Type.BYTES_VAR_DEREF, + Type.BYTES_VAR_STRAIGHT, Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED); + + private static EnumSet NUMERICS = EnumSet.of(Type.VAR_INTS, + Type.FIXED_INTS_16, Type.FIXED_INTS_32, + Type.FIXED_INTS_64, + Type.FIXED_INTS_8, + Type.FLOAT_32, + Type.FLOAT_64); + + private FixedBitSet indexValues(IndexWriter w, int numValues, Type valueType, + List valueVarList, boolean withDeletions, int bytesSize) + throws IOException { + final boolean isNumeric = NUMERICS.contains(valueType); + FixedBitSet deleted = new FixedBitSet(numValues); + Document doc = new Document(); + final Field valField; + if (isNumeric) { + switch (valueType) { + case VAR_INTS: + valField = new PackedLongDocValuesField(valueType.name(), (long) 0); + break; + case FIXED_INTS_16: + valField = new ShortDocValuesField(valueType.name(), (short) 0); + break; + case FIXED_INTS_32: + valField = new IntDocValuesField(valueType.name(), 0); + break; + case FIXED_INTS_64: + valField = new LongDocValuesField(valueType.name(), (long) 0); + break; + case FIXED_INTS_8: + valField = new ByteDocValuesField(valueType.name(), (byte) 0); + break; + case FLOAT_32: + valField = new FloatDocValuesField(valueType.name(), (float) 0); + break; + case FLOAT_64: + valField = new DoubleDocValuesField(valueType.name(), (double) 0); + break; + default: + valField = null; + fail("unhandled case"); + } + } else { + switch (valueType) { + case BYTES_FIXED_STRAIGHT: + valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), true); + break; + case BYTES_VAR_STRAIGHT: + valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), false); + break; + case BYTES_FIXED_DEREF: + valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), true); + break; + case BYTES_VAR_DEREF: + valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), false); + break; + case BYTES_FIXED_SORTED: + valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), true); + break; + case BYTES_VAR_SORTED: + valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), false); + break; + default: + valField = null; + fail("unhandled case"); + } + } + doc.add(valField); + final BytesRef bytesRef = new BytesRef(); + + final String idBase = valueType.name() + "_"; + final byte[] b = new byte[bytesSize]; + if (bytesRef != null) { + bytesRef.bytes = b; + bytesRef.length = b.length; + bytesRef.offset = 0; + } + byte upto = 0; + for (int i = 0; i < numValues; i++) { + if (isNumeric) { + switch (valueType) { + case VAR_INTS: + valField.setLongValue((long)i); + break; + case FIXED_INTS_16: + valField.setShortValue((short)i); + break; + case FIXED_INTS_32: + valField.setIntValue(i); + break; + case FIXED_INTS_64: + valField.setLongValue((long)i); + break; + case FIXED_INTS_8: + valField.setByteValue((byte)(0xFF & (i % 128))); + break; + case FLOAT_32: + valField.setFloatValue(2.0f * i); + break; + case FLOAT_64: + valField.setDoubleValue(2.0d * i); + break; + default: + fail("unexpected value " + valueType); + } + } else { + for (int j = 0; j < b.length; j++) { + b[j] = upto++; + } + if (bytesRef != null) { + valField.setBytesValue(bytesRef); + } + } + doc.removeFields("id"); + doc.add(new StringField("id", idBase + i, Field.Store.YES)); + w.addDocument(doc); + + if (i % 7 == 0) { + if (withDeletions && random().nextBoolean()) { + Type val = valueVarList.get(random().nextInt(1 + valueVarList + .indexOf(valueType))); + final int randInt = val == valueType ? random().nextInt(1 + i) : random() + .nextInt(numValues); + w.deleteDocuments(new Term("id", val.name() + "_" + randInt)); + if (val == valueType) { + deleted.set(randInt); + } + } + if (random().nextInt(10) == 0) { + w.commit(); + } + } + } + w.commit(); + + // TODO test multi seg with deletions + if (withDeletions || random().nextBoolean()) { + w.forceMerge(1, true); + } + return deleted; + } + + public void testMultiValuedDocValuesField() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + Document doc = new Document(); + Field f = new PackedLongDocValuesField("field", 17); + // Index doc values are single-valued so we should not + // be able to add same field more than once: + doc.add(f); + doc.add(f); + try { + w.addDocument(doc); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + doc = new Document(); + doc.add(f); + w.addDocument(doc); + w.forceMerge(1); + DirectoryReader r = w.getReader(); + w.close(); + assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0)); + r.close(); + d.close(); + } + + public void testDifferentTypedDocValuesField() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + Document doc = new Document(); + // Index doc values are single-valued so we should not + // be able to add same field more than once: + Field f; + doc.add(f = new PackedLongDocValuesField("field", 17)); + doc.add(new FloatDocValuesField("field", 22.0f)); + try { + w.addDocument(doc); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + doc = new Document(); + doc.add(f); + w.addDocument(doc); + w.forceMerge(1); + DirectoryReader r = w.getReader(); + w.close(); + assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0)); + r.close(); + d.close(); + } + + public void testDifferentTypedDocValuesField2() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + Document doc = new Document(); + // Index doc values are single-valued so we should not + // be able to add same field more than once: + Field f; + doc.add(f = new PackedLongDocValuesField("field", 17)); + doc.add(new SortedBytesDocValuesField("field", new BytesRef("hello"))); + try { + w.addDocument(doc); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + doc = new Document(); + doc.add(f); + w.addDocument(doc); + w.forceMerge(1); + DirectoryReader r = w.getReader(); + w.close(); + assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0)); + r.close(); + d.close(); + } + + public void testSortedBytes() throws IOException { + Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED }; + for (Type type : types) { + boolean fixed = type == Type.BYTES_FIXED_SORTED; + final Directory d = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random())); + IndexWriter w = new IndexWriter(d, cfg); + int numDocs = atLeast(100); + BytesRefHash hash = new BytesRefHash(); + Map docToString = new HashMap(); + int len = 1 + random().nextInt(50); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(newTextField("id", "" + i, Field.Store.YES)); + String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(), + len) : _TestUtil.randomRealisticUnicodeString(random(), 1, len); + BytesRef br = new BytesRef(string); + doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED)); + hash.add(br); + docToString.put("" + i, string); + w.addDocument(doc); + } + if (rarely()) { + w.commit(); + } + int numDocsNoValue = atLeast(10); + for (int i = 0; i < numDocsNoValue; i++) { + Document doc = new Document(); + doc.add(newTextField("id", "noValue", Field.Store.YES)); + w.addDocument(doc); + } + BytesRef bytesRef = new BytesRef(fixed ? len : 0); + bytesRef.offset = 0; + bytesRef.length = fixed ? len : 0; + hash.add(bytesRef); // add empty value for the gaps + if (rarely()) { + w.commit(); + } + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + String id = "" + i + numDocs; + doc.add(newTextField("id", id, Field.Store.YES)); + String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(), + len) : _TestUtil.randomRealisticUnicodeString(random(), 1, len); + BytesRef br = new BytesRef(string); + hash.add(br); + docToString.put(id, string); + doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED)); + w.addDocument(doc); + } + w.commit(); + IndexReader reader = w.getReader(); + DocValues docValues = MultiDocValues.getDocValues(reader, "field"); + Source source = getSource(docValues); + SortedSource asSortedSource = source.asSortedSource(); + int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); + BytesRef expected = new BytesRef(); + BytesRef actual = new BytesRef(); + assertEquals(hash.size(), asSortedSource.getValueCount()); + for (int i = 0; i < hash.size(); i++) { + hash.get(sort[i], expected); + asSortedSource.getByOrd(i, actual); + assertEquals(expected.utf8ToString(), actual.utf8ToString()); + int ord = asSortedSource.getOrdByValue(expected, actual); + assertEquals(i, ord); + } + AtomicReader slowR = SlowCompositeReaderWrapper.wrap(reader); + Set> entrySet = docToString.entrySet(); + + for (Entry entry : entrySet) { + int docId = docId(slowR, new Term("id", entry.getKey())); + expected = new BytesRef(entry.getValue()); + assertEquals(expected, asSortedSource.getBytes(docId, actual)); + } + + reader.close(); + w.close(); + d.close(); + } + } + + public int docId(AtomicReader reader, Term term) throws IOException { + int docFreq = reader.docFreq(term); + assertEquals(1, docFreq); + DocsEnum termDocsEnum = reader.termDocsEnum(term); + int nextDoc = termDocsEnum.nextDoc(); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocsEnum.nextDoc()); + return nextDoc; + } + + public void testWithThreads() throws Exception { + Random random = random(); + final int NUM_DOCS = atLeast(100); + final Directory dir = newDirectory(); + final RandomIndexWriter writer = new RandomIndexWriter(random, dir); + final boolean allowDups = random.nextBoolean(); + final Set seen = new HashSet(); + if (VERBOSE) { + System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); + } + int numDocs = 0; + final List docValues = new ArrayList(); + + // TODO: deletions + while (numDocs < NUM_DOCS) { + final String s; + if (random.nextBoolean()) { + s = _TestUtil.randomSimpleString(random); + } else { + s = _TestUtil.randomUnicodeString(random); + } + final BytesRef br = new BytesRef(s); + + if (!allowDups) { + if (seen.contains(s)) { + continue; + } + seen.add(s); + } + + if (VERBOSE) { + System.out.println(" " + numDocs + ": s=" + s); + } + + final Document doc = new Document(); + doc.add(new SortedBytesDocValuesField("stringdv", br)); + doc.add(new PackedLongDocValuesField("id", numDocs)); + docValues.add(br); + writer.addDocument(doc); + numDocs++; + + if (random.nextInt(40) == 17) { + // force flush + writer.getReader().close(); + } + } + + writer.forceMerge(1); + final DirectoryReader r = writer.getReader(); + writer.close(); + + final AtomicReader sr = getOnlySegmentReader(r); + final DocValues dv = sr.docValues("stringdv"); + assertNotNull(dv); + + final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1); + + final DocValues.Source docIDToID = sr.docValues("id").getSource(); + + final int NUM_THREADS = _TestUtil.nextInt(random(), 1, 10); + Thread[] threads = new Thread[NUM_THREADS]; + for(int thread=0;thread