mirror of https://github.com/apache/lucene.git
LUCENE-8033: FieldInfos always use dense encoding
FieldInfos always use an array to store FieldInfo byNumber Signed-off-by: Adrien Grand <jpountz@gmail.com> Closes #320
This commit is contained in:
parent
899966b481
commit
7d07fbee5f
|
@ -147,6 +147,9 @@ Improvements
|
|||
* LUCENE-8152: Improve consumption of doc-value iterators. (Horatiu Lazu via
|
||||
Adrien Grand)
|
||||
|
||||
* LUCENE-8033: FieldInfos now always use a dense encoding. (Mayya Sharipova
|
||||
via Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-8077: Fixed bug in how CheckIndex verifies doc-value iterators.
|
||||
|
|
|
@ -25,8 +25,8 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
|
@ -45,8 +45,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
private final boolean hasPointValues;
|
||||
|
||||
// used only by fieldInfo(int)
|
||||
private final FieldInfo[] byNumberTable; // contiguous
|
||||
private final SortedMap<Integer,FieldInfo> byNumberMap; // sparse
|
||||
private final FieldInfo[] byNumber;
|
||||
|
||||
private final HashMap<String,FieldInfo> byName = new HashMap<>();
|
||||
private final Collection<FieldInfo> values; // for an unmodifiable iterator
|
||||
|
@ -63,21 +62,28 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
boolean hasNorms = false;
|
||||
boolean hasDocValues = false;
|
||||
boolean hasPointValues = false;
|
||||
|
||||
TreeMap<Integer, FieldInfo> byNumber = new TreeMap<>();
|
||||
|
||||
int size = 0; // number of elements in byNumberTemp, number of used array slots
|
||||
FieldInfo[] byNumberTemp = new FieldInfo[10]; // initial array capacity of 10
|
||||
for (FieldInfo info : infos) {
|
||||
if (info.number < 0) {
|
||||
throw new IllegalArgumentException("illegal field number: " + info.number + " for field " + info.name);
|
||||
}
|
||||
FieldInfo previous = byNumber.put(info.number, info);
|
||||
size = info.number >= size ? info.number+1 : size;
|
||||
if (info.number >= byNumberTemp.length){ //grow array
|
||||
byNumberTemp = ArrayUtil.grow(byNumberTemp, info.number + 1);
|
||||
}
|
||||
FieldInfo previous = byNumberTemp[info.number];
|
||||
if (previous != null) {
|
||||
throw new IllegalArgumentException("duplicate field numbers: " + previous.name + " and " + info.name + " have: " + info.number);
|
||||
}
|
||||
byNumberTemp[info.number] = info;
|
||||
|
||||
previous = byName.put(info.name, info);
|
||||
if (previous != null) {
|
||||
throw new IllegalArgumentException("duplicate field names: " + previous.number + " and " + info.number + " have: " + info.name);
|
||||
}
|
||||
|
||||
|
||||
hasVectors |= info.hasVectors();
|
||||
hasProx |= info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
hasFreq |= info.getIndexOptions() != IndexOptions.DOCS;
|
||||
|
@ -96,25 +102,16 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
this.hasNorms = hasNorms;
|
||||
this.hasDocValues = hasDocValues;
|
||||
this.hasPointValues = hasPointValues;
|
||||
Integer max = byNumber.isEmpty() ? null : byNumber.lastKey();
|
||||
|
||||
// Only usee TreeMap in the very sparse case (< 1/16th of the numbers are used),
|
||||
// because TreeMap uses ~ 64 (32 bit JVM) or 120 (64 bit JVM w/o compressed oops)
|
||||
// overall bytes per entry, but array uses 4 (32 bit JMV) or 8
|
||||
// (64 bit JVM w/o compressed oops):
|
||||
if (max != null && max < ArrayUtil.MAX_ARRAY_LENGTH && max < 16L*byNumber.size()) {
|
||||
// Pull infos into an arraylist to avoid holding a reference to the TreeMap
|
||||
values = Collections.unmodifiableCollection(new ArrayList<>(byNumber.values()));
|
||||
byNumberMap = null;
|
||||
byNumberTable = new FieldInfo[max+1];
|
||||
for (Map.Entry<Integer,FieldInfo> entry : byNumber.entrySet()) {
|
||||
byNumberTable[entry.getKey()] = entry.getValue();
|
||||
|
||||
List<FieldInfo> valuesTemp = new ArrayList<>();
|
||||
byNumber = new FieldInfo[size];
|
||||
for(int i=0; i<size; i++){
|
||||
byNumber[i] = byNumberTemp[i];
|
||||
if (byNumberTemp[i] != null) {
|
||||
valuesTemp.add(byNumberTemp[i]);
|
||||
}
|
||||
} else {
|
||||
byNumberMap = byNumber;
|
||||
values = Collections.unmodifiableCollection(byNumber.values());
|
||||
byNumberTable = null;
|
||||
}
|
||||
values = Collections.unmodifiableCollection(Arrays.asList(valuesTemp.toArray(new FieldInfo[0])));
|
||||
}
|
||||
|
||||
/** Returns true if any fields have freqs */
|
||||
|
@ -192,14 +189,10 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
if (fieldNumber < 0) {
|
||||
throw new IllegalArgumentException("Illegal field number: " + fieldNumber);
|
||||
}
|
||||
if (byNumberTable != null) {
|
||||
if (fieldNumber >= byNumberTable.length) {
|
||||
return null;
|
||||
}
|
||||
return byNumberTable[fieldNumber];
|
||||
} else {
|
||||
return byNumberMap.get(fieldNumber);
|
||||
if (fieldNumber >= byNumber.length) {
|
||||
return null;
|
||||
}
|
||||
return byNumber[fieldNumber];
|
||||
}
|
||||
|
||||
static final class FieldDimensions {
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestFieldInfos extends LuceneTestCase {
|
||||
|
||||
public void testFieldInfos() throws Exception{
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMergePolicy(NoMergePolicy.INSTANCE));
|
||||
|
||||
Document d1 = new Document();
|
||||
for (int i = 0; i < 15; i++) {
|
||||
d1.add(new StringField("f" + i, "v" + i, Field.Store.YES));
|
||||
}
|
||||
writer.addDocument(d1);
|
||||
writer.commit();
|
||||
|
||||
Document d2 = new Document();
|
||||
d2.add(new StringField("f0", "v0", Field.Store.YES));
|
||||
d2.add(new StringField("f15", "v15", Field.Store.YES));
|
||||
d2.add(new StringField("f16", "v16", Field.Store.YES));
|
||||
writer.addDocument(d2);
|
||||
writer.commit();
|
||||
|
||||
Document d3 = new Document();
|
||||
writer.addDocument(d3);
|
||||
writer.close();
|
||||
|
||||
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
|
||||
assertEquals(3, sis.size());
|
||||
|
||||
FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
|
||||
FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
|
||||
FieldInfos fis3 = IndexWriter.readFieldInfos(sis.info(2));
|
||||
|
||||
// testing dense FieldInfos
|
||||
Iterator<FieldInfo> it = fis1.iterator();
|
||||
int i = 0;
|
||||
while(it.hasNext()) {
|
||||
FieldInfo fi = it.next();
|
||||
assertEquals(i, fi.number);
|
||||
assertEquals("f" + i , fi.name);
|
||||
assertEquals("f" + i, fis1.fieldInfo(i).name); //lookup by number
|
||||
assertEquals("f" + i, fis1.fieldInfo("f" + i).name); //lookup by name
|
||||
i++;
|
||||
}
|
||||
|
||||
// testing sparse FieldInfos
|
||||
assertEquals("f0", fis2.fieldInfo(0).name); //lookup by number
|
||||
assertEquals("f0", fis2.fieldInfo("f0").name); //lookup by name
|
||||
assertNull(fis2.fieldInfo(1));
|
||||
assertNull(fis2.fieldInfo("f1"));
|
||||
assertEquals("f15", fis2.fieldInfo(15).name);
|
||||
assertEquals("f15", fis2.fieldInfo("f15").name);
|
||||
assertEquals("f16", fis2.fieldInfo(16).name);
|
||||
assertEquals("f16", fis2.fieldInfo("f16").name);
|
||||
|
||||
// testing empty FieldInfos
|
||||
assertNull(fis3.fieldInfo(0)); //lookup by number
|
||||
assertNull(fis3.fieldInfo("f0")); //lookup by name
|
||||
assertEquals(0, fis3.size());
|
||||
Iterator<FieldInfo> it3 = fis3.iterator();
|
||||
assertFalse(it3.hasNext());
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue