From fba2d427974bca3331034df082ecaeacd4750d07 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 12 Oct 2010 05:39:37 +0000 Subject: [PATCH 001/116] Created docvalues branch - see LUCENE-2186 for reference git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1021634 13f79535-47bb-0310-9956-ffa450edef68 From 1c66cf571b0faace76c67b7b9616f0f3b5a245db Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 12 Oct 2010 06:15:03 +0000 Subject: [PATCH 002/116] LUCENE-2186: First cut at column-stride fields (index values storage) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1021636 13f79535-47bb-0310-9956-ffa450edef68 --- .../contrib/benchmark/conf/sort-standard.alg | 5 +- .../benchmark/byTask/feeds/DocMaker.java | 37 +- .../byTask/tasks/SearchWithSortTask.java | 28 +- .../apache/lucene/document/AbstractField.java | 13 + .../org/apache/lucene/document/Fieldable.java | 4 + .../apache/lucene/document/ValuesField.java | 136 ++++ .../lucene/index/CompoundFileReader.java | 2 +- .../lucene/index/CompoundFileWriter.java | 13 +- .../apache/lucene/index/DirectoryReader.java | 264 +++++++ .../lucene/index/DocFieldProcessor.java | 166 +++++ .../index/DocFieldProcessorPerThread.java | 26 +- .../org/apache/lucene/index/FieldInfo.java | 18 + .../org/apache/lucene/index/FieldInfos.java | 94 ++- .../org/apache/lucene/index/FieldsEnum.java | 3 +- .../lucene/index/FilterIndexReader.java | 1 + .../apache/lucene/index/IndexFileNames.java | 8 + .../org/apache/lucene/index/IndexReader.java | 13 + .../org/apache/lucene/index/SegmentInfo.java | 9 +- .../apache/lucene/index/SegmentMerger.java | 116 ++- .../apache/lucene/index/SegmentReader.java | 74 +- .../org/apache/lucene/index/values/Bytes.java | 301 ++++++++ .../org/apache/lucene/index/values/Cache.java | 116 +++ .../index/values/FixedDerefBytesImpl.java | 262 +++++++ .../index/values/FixedSortedBytesImpl.java | 258 +++++++ .../index/values/FixedStraightBytesImpl.java | 221 ++++++ .../apache/lucene/index/values/Floats.java | 389 +++++++++++ .../org/apache/lucene/index/values/Ints.java | 32 + .../lucene/index/values/PackedIntsImpl.java | 240 +++++++ .../apache/lucene/index/values/Reader.java | 109 +++ .../apache/lucene/index/values/Values.java | 48 ++ .../lucene/index/values/ValuesAttribute.java | 34 + .../index/values/ValuesAttributeImpl.java | 151 ++++ .../lucene/index/values/ValuesEnum.java | 62 ++ .../index/values/VarDerefBytesImpl.java | 255 +++++++ .../index/values/VarSortedBytesImpl.java | 344 +++++++++ .../index/values/VarStraightBytesImpl.java | 232 ++++++ .../apache/lucene/index/values/Writer.java | 92 +++ .../apache/lucene/search/FieldComparator.java | 128 +++- .../apache/lucene/search/ReqExclScorer.java | 2 +- .../apache/lucene/search/ReqOptSumScorer.java | 2 +- .../org/apache/lucene/search/SortField.java | 41 +- .../org/apache/lucene/util/ArrayUtil.java | 13 + .../org/apache/lucene/util/ByteBlockPool.java | 12 + .../java/org/apache/lucene/util/BytesRef.java | 6 +- .../org/apache/lucene/util/BytesRefHash.java | 61 ++ .../org/apache/lucene/util/FloatsRef.java | 91 +++ .../java/org/apache/lucene/util/IOUtils.java | 30 + .../java/org/apache/lucene/util/LongsRef.java | 91 +++ .../org/apache/lucene/util/ParallelArray.java | 57 ++ .../apache/lucene/util/packed/Packed64.java | 2 +- .../lucene/index/TestIndexWriterConfig.java | 2 +- .../codecs/preflexrw/TermInfosWriter.java | 2 +- .../lucene/index/values/TestIndexValues.java | 658 ++++++++++++++++++ .../org/apache/lucene/util/_TestUtil.java | 31 + 54 files changed, 5357 insertions(+), 48 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/document/ValuesField.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/Bytes.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/Cache.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/Floats.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/Ints.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/Reader.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/Values.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/Writer.java create mode 100644 lucene/src/java/org/apache/lucene/util/FloatsRef.java create mode 100644 lucene/src/java/org/apache/lucene/util/LongsRef.java create mode 100644 lucene/src/java/org/apache/lucene/util/ParallelArray.java create mode 100644 lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java diff --git a/lucene/contrib/benchmark/conf/sort-standard.alg b/lucene/contrib/benchmark/conf/sort-standard.alg index c7413fc594b..85fbf892bd1 100644 --- a/lucene/contrib/benchmark/conf/sort-standard.alg +++ b/lucene/contrib/benchmark/conf/sort-standard.alg @@ -26,6 +26,7 @@ analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer directory=FSDirectory #directory=RamDirectory +doc.index.props=true doc.stored=true doc.tokenized=true doc.term.vector=false @@ -66,6 +67,4 @@ log.queries=true } : 4 } - -RepSumByName - +RepSumByName \ No newline at end of file diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index 191e1ac34f7..de030dadf23 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.feeds; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Properties; @@ -28,9 +29,11 @@ import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Format; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.ValuesField; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.values.Values; /** * Creates {@link Document} objects. Uses a {@link ContentSource} to generate @@ -157,12 +160,13 @@ public class DocMaker { private long lastPrintedNumUniqueBytes = 0; private int printNum = 0; + private Map fieldVauleMap; // create a doc // use only part of the body, modify it to keep the rest (or use all if size==0). // reset the docdata properties so they are not added more than once. private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException { - + Values valueType; final DocState ds = getDocState(); final Document doc = reuseFields ? ds.doc : new Document(); doc.getFields().clear(); @@ -178,6 +182,7 @@ public class DocMaker { name = cnt < 0 ? name : name + "_" + cnt; Field nameField = ds.getField(NAME_FIELD, storeVal, indexVal, termVecVal); nameField.setValue(name); + trySetIndexValues(nameField); doc.add(nameField); // Set DATE_FIELD @@ -187,12 +192,14 @@ public class DocMaker { } Field dateField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal); dateField.setValue(date); + trySetIndexValues(dateField); doc.add(dateField); // Set TITLE_FIELD String title = docData.getTitle(); Field titleField = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal); titleField.setValue(title == null ? "" : title); + trySetIndexValues(titleField); doc.add(titleField); String body = docData.getBody(); @@ -214,12 +221,15 @@ public class DocMaker { } Field bodyField = ds.getField(BODY_FIELD, bodyStoreVal, bodyIndexVal, termVecVal); bodyField.setValue(bdy); + trySetIndexValues(bodyField); doc.add(bodyField); if (storeBytes) { Field bytesField = ds.getField(BYTES_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO); bytesField.setValue(bdy.getBytes("UTF-8")); + trySetIndexValues(bytesField); doc.add(bytesField); + } } @@ -229,6 +239,7 @@ public class DocMaker { for (final Map.Entry entry : props.entrySet()) { Field f = ds.getField((String) entry.getKey(), storeVal, indexVal, termVecVal); f.setValue((String) entry.getValue()); + trySetIndexValues(f); doc.add(f); } docData.setProps(null); @@ -238,6 +249,12 @@ public class DocMaker { //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n=========="); return doc; } + + private void trySetIndexValues(Field field) { + final Values valueType; + if((valueType = fieldVauleMap.get(field.name())) != null) + ValuesField.set(field, valueType); + } private void resetLeftovers() { leftovr.set(null); @@ -367,6 +384,22 @@ public class DocMaker { resetLeftovers(); } + private static final Map parseValueFields(String fields) { + if(fields == null) + return Collections.emptyMap(); + String[] split = fields.split(";"); + Map result = new HashMap(); + for (String tuple : split) { + final String[] nameValue = tuple.split(":"); + if (nameValue.length != 2) { + throw new IllegalArgumentException("illegal doc.stored.values format: " + + fields + " expected fieldname:ValuesType;...;...;"); + } + result.put(nameValue[0].trim(), Values.valueOf(nameValue[1])); + } + return result; + } + /** Set the configuration parameters of this doc maker. */ public void setConfig(Config config) { this.config = config; @@ -386,6 +419,7 @@ public class DocMaker { boolean norms = config.get("doc.tokenized.norms", false); boolean bodyNorms = config.get("doc.body.tokenized.norms", true); boolean termVec = config.get("doc.term.vector", false); + fieldVauleMap = parseValueFields(config.get("doc.stored.values", null)); storeVal = (stored ? Field.Store.YES : Field.Store.NO); bodyStoreVal = (bodyStored ? Field.Store.YES : Field.Store.NO); if (tokenized) { @@ -423,7 +457,6 @@ public class DocMaker { docState = new ThreadLocal(); indexProperties = config.get("doc.index.props", false); - updateDocIDLimit = config.get("doc.random.id.limit", -1); if (updateDocIDLimit != -1) { r = new Random(179); diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java index e00583f6e28..4662b55a63b 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java @@ -75,8 +75,7 @@ public class SearchWithSortTask extends ReadTask { } else { throw new RuntimeException("You must specify the sort type ie page:int,subject:string"); } - int type = getType(typeString); - sortField0 = new SortField(fieldName, type); + sortField0 = getSortField(fieldName, typeString); } sortFields[upto++] = sortField0; } @@ -86,12 +85,26 @@ public class SearchWithSortTask extends ReadTask { System.arraycopy(sortFields, 0, newSortFields, 0, upto); sortFields = newSortFields; } + this.sort = new Sort(sortFields); } - private int getType(String typeString) { - int type; - if (typeString.equals("float")) { + private SortField getSortField(String fieldName, String typeString) { + boolean useIndexValues = false; + int type = -1; + if (typeString.equals("intvalues")) { + useIndexValues = true; + type = SortField.INT; + } else if (typeString.equals("floatvalues")) { + useIndexValues = true; + type = SortField.FLOAT; + } else if (typeString.equals("stringvalues")) { + useIndexValues = true; + type = SortField.STRING; + } else if (typeString.equals("bytesvalues")) { + useIndexValues = true; + type = SortField.BYTES; + } else if (typeString.equals("float")) { type = SortField.FLOAT; } else if (typeString.equals("double")) { type = SortField.DOUBLE; @@ -110,7 +123,10 @@ public class SearchWithSortTask extends ReadTask { } else { throw new RuntimeException("Unrecognized sort field type " + typeString); } - return type; + + SortField f = new SortField(fieldName, type); + f.setUseIndexValues(useIndexValues); + return f; } @Override diff --git a/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene/src/java/org/apache/lucene/document/AbstractField.java index 8f025aca33c..0bbd6841265 100755 --- a/lucene/src/java/org/apache/lucene/document/AbstractField.java +++ b/lucene/src/java/org/apache/lucene/document/AbstractField.java @@ -19,6 +19,7 @@ import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.spans.SpanQuery; // for javadocs import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.StringHelper; // for javadocs @@ -292,4 +293,16 @@ public abstract class AbstractField implements Fieldable { result.append('>'); return result.toString(); } + private AttributeSource source; + + public boolean hasFieldAttribute() { + return source != null; + } + + public AttributeSource getFieldAttributes() { + if(source == null) + source = new AttributeSource(); + return source; + } + } diff --git a/lucene/src/java/org/apache/lucene/document/Fieldable.java b/lucene/src/java/org/apache/lucene/document/Fieldable.java index 3b48ad53350..eaed7090c85 100755 --- a/lucene/src/java/org/apache/lucene/document/Fieldable.java +++ b/lucene/src/java/org/apache/lucene/document/Fieldable.java @@ -20,6 +20,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInvertState; // for javadocs import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.spans.SpanQuery; // for javadocs +import org.apache.lucene.util.AttributeSource; import java.io.Reader; import java.io.Serializable; @@ -209,4 +210,7 @@ public interface Fieldable extends Serializable { * silently fail to find results. */ void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions); + + boolean hasFieldAttribute(); + AttributeSource getFieldAttributes(); } diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java new file mode 100644 index 00000000000..df9332dc42d --- /dev/null +++ b/lucene/src/java/org/apache/lucene/document/ValuesField.java @@ -0,0 +1,136 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.io.Reader; +import java.util.Comparator; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.ValuesAttribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; + +/** + * + */ +@SuppressWarnings("serial") +public class ValuesField extends AbstractField { + private final ValuesAttribute attr; + private final AttributeSource fieldAttributes; + + + public ValuesField(String name) { + super(name, Store.NO, Index.NO, TermVector.NO); + fieldAttributes = getFieldAttributes(); + attr = fieldAttributes.addAttribute(ValuesAttribute.class); + } + + ValuesField() { + this(""); + } + + public Reader readerValue() { + return null; + } + + public String stringValue() { + return null; + } + + public TokenStream tokenStreamValue() { + return tokenStream; + } + + public void setInt(long value) { + attr.setType(Values.PACKED_INTS); + attr.ints().set(value); + } + + public void setFloat(float value) { + attr.setType(Values.SIMPLE_FLOAT_4BYTE); + attr.floats().set(value); + } + + public void setFloat(double value) { + attr.setType(Values.SIMPLE_FLOAT_8BYTE); + attr.floats().set(value); + } + + public void setBytes(BytesRef value, Values type) { + setBytes(value, type, null); + + } + + public void setBytes(BytesRef value, Values type, Comparator comp) { + attr.setType(type); + attr.bytes().copy(value); + attr.setBytesComparator(comp); + } + + public ValuesAttribute values() { + return attr; + } + + public T set(T field) { + AttributeSource src = field.getFieldAttributes(); + src.addAttribute(ValuesAttribute.class); + fieldAttributes.copyTo(field.getFieldAttributes()); + return field; + } + + public static ValuesAttribute values(Fieldable fieldable) { + return fieldable.getFieldAttributes().addAttribute(ValuesAttribute.class); + } + + public static T set(T field, Values type) { + if(field instanceof ValuesField) + return field; + final ValuesField valField = new ValuesField(); + switch (type) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + BytesRef ref = field.isBinary() ? new BytesRef(field.getBinaryValue(), + field.getBinaryOffset(), field.getBinaryLength()) : new BytesRef(field + .stringValue()); + valField.setBytes(ref, type); + break; + case PACKED_INTS: + case PACKED_INTS_FIXED: + valField.setInt(Long.parseLong(field.stringValue())); + break; + case SIMPLE_FLOAT_4BYTE: + valField.setFloat(Float.parseFloat(field.stringValue())); + break; + case SIMPLE_FLOAT_8BYTE: + valField.setFloat(Double.parseDouble(field.stringValue())); + break; + default: + throw new IllegalArgumentException("unknown type: " + type); + } + + return valField.set(field); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java b/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java index 72813ae162c..fa13aaf656b 100644 --- a/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java +++ b/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java @@ -157,7 +157,7 @@ public class CompoundFileReader extends Directory { throw new IOException("Stream closed"); id = IndexFileNames.stripSegmentName(id); - FileEntry entry = entries.get(id); + final FileEntry entry = entries.get(id); if (entry == null) throw new IOException("No sub-file with id " + id + " found"); diff --git a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java index e5c11d3275f..a11dab49d03 100644 --- a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java +++ b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java @@ -49,9 +49,13 @@ import java.io.IOException; */ final class CompoundFileWriter { - private static final class FileEntry { + static final class FileEntry { + + FileEntry(String file) { + this.file = file; + } /** source file */ - String file; + final String file; /** temporary holder for the start of directory entry for this file */ long directoryOffset; @@ -128,10 +132,7 @@ final class CompoundFileWriter { if (! ids.add(file)) throw new IllegalArgumentException( "File " + file + " already added"); - - FileEntry entry = new FileEntry(); - entry.file = file; - entries.add(entry); + entries.add(new FileEntry(file)); } /** Merge files with the extensions added up to now. diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index 523fb30e447..2d29a248847 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -36,7 +36,14 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.values.Reader; +import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.ValuesEnum; +import org.apache.lucene.index.values.Reader.Source; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; @@ -990,7 +997,264 @@ class DirectoryReader extends IndexReader implements Cloneable { return commits; } + + public Reader getIndexValues(String field) { + ensureOpen(); + if (subReaders.length == 1) { + return subReaders[0].getIndexValues(field); + } + return new MultiValueReader(field); + } + + private class MultiValueReader extends Reader { + private String id; + private Values value; + + public MultiValueReader(String id) { + this.id = id; + for (SegmentReader reader : subReaders) { + FieldInfo fieldInfo = reader.fieldInfos().fieldInfo(id); + if(fieldInfo != null){ + value = fieldInfo.getIndexValues(); + break; + } + } + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + return new MultiValuesEnum(id, value); + } + + @Override + public Source load() throws IOException { + return new MultiSource(id); + } + + public void close() throws IOException { + // + } + + } + + private class MultiValuesEnum extends ValuesEnum { + private int numDocs_ = 0; + private int pos = -1; + private int start = 0; + private final String id; + private final ValuesEnum[] enumCache; + private ValuesEnum current; + + protected MultiValuesEnum(String id, Values enumType) { + super(enumType); + enumCache = new ValuesEnum[subReaders.length]; + this.id = id; + } + + @Override + public void close() throws IOException { + for (ValuesEnum valuesEnum : enumCache) { + if(valuesEnum != null) + valuesEnum.close(); + } + } + + @Override + public int advance( int target) throws IOException { + int n = target - start; + do { + if(target >= maxDoc) + return pos = NO_MORE_DOCS; + if (n >= numDocs_) { + int idx = readerIndex(target); + if (enumCache[idx] == null) { + try { + Reader indexValues = subReaders[idx].getIndexValues(id); + if (indexValues != null) // nocommit does that work with default + // values? + enumCache[idx] = indexValues.getEnum(this.attributes()); + else + enumCache[idx] = new DummyEnum(this.attributes(), + subReaders[idx].maxDoc(), attr.type()); + } catch (IOException ex) { + // nocommit what to do here? + throw new RuntimeException(ex); + } + } + current = enumCache[idx]; + start = starts[idx]; + numDocs_ = subReaders[idx].maxDoc(); + n = target - start; + } + target = start+numDocs_; + } while ((n = current.advance(n)) == NO_MORE_DOCS); + return pos = start+current.docID(); + } + + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos+1); + } + } + + private class MultiSource extends Source { + private int numDocs_ = 0; + private int start = 0; + private Source current; + private final String id; + + MultiSource(String id) { + this.id = id; + } + + public long ints(int docID) { + int n = docID - start; + if(n >= numDocs_) { + int idx = readerIndex(docID); + try{ + current = subReaders[idx].getIndexValuesCache().getInts(id); + if(current == null) //nocommit does that work with default values? + current = new DummySource(); + }catch(IOException ex) { + // nocommit what to do here? + throw new RuntimeException(ex); + } + start = starts[idx]; + numDocs_ = subReaders[idx].maxDoc(); + n = docID - start; + } + return current.ints(n); + } + + public double floats(int docID) { + int n = docID - start; + if(n >= numDocs_) { + int idx = readerIndex(docID); + try{ + current = subReaders[idx].getIndexValuesCache().getFloats(id); + if(current == null) //nocommit does that work with default values? + current = new DummySource(); + }catch(IOException ex) { + // nocommit what to do here? + throw new RuntimeException(ex); + } + numDocs_ = subReaders[idx].maxDoc(); + + start = starts[idx]; + n = docID - start; + } + return current.floats(n); + } + + public BytesRef bytes(int docID) { + int n = docID - start; + if(n >= numDocs_) { + int idx = readerIndex(docID); + try{ + current = subReaders[idx].getIndexValuesCache().getBytes(id); + if(current == null) //nocommit does that work with default values? + current = new DummySource(); + }catch(IOException ex) { + // nocommit what to do here? + throw new RuntimeException(ex); + } + numDocs_ = subReaders[idx].maxDoc(); + start = starts[idx]; + n = docID - start; + } + return current.bytes(n); + } + + public long ramBytesUsed() { + return current.ramBytesUsed(); + } + + } + + private static class DummySource extends Source { + private final BytesRef ref = new BytesRef(); + @Override + public BytesRef bytes(int docID) { + return ref; + } + + + @Override + public double floats(int docID) { + return 0.0d; + } + + @Override + public long ints(int docID) { + return 0; + } + + public long ramBytesUsed() { + return 0; + } + } + + private static class DummyEnum extends ValuesEnum { + private int pos = -1; + private final int maxDoc; + + public DummyEnum(AttributeSource source, int maxDoc, Values type) { + super(source, type); + this.maxDoc = maxDoc; + switch (type) { + case BYTES_VAR_STRAIGHT: + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + // nocommit - this is not correct for Fixed_straight + BytesRef bytes = attr.bytes(); + bytes.length = 0; + bytes.offset = 0; + break; + case PACKED_INTS: + case PACKED_INTS_FIXED: + LongsRef ints = attr.ints(); + ints.set(0); + break; + + case SIMPLE_FLOAT_4BYTE: + case SIMPLE_FLOAT_8BYTE: + FloatsRef floats = attr.floats(); + floats.set(0d); + break; + default: + throw new IllegalArgumentException("unknown Values type: " + type); + } + } + @Override + public void close() throws IOException { + } + + @Override + public int advance(int target) throws IOException { + return pos = (pos < maxDoc ? target: NO_MORE_DOCS); + } + @Override + public int docID() { + return pos; + } + @Override + public int nextDoc() throws IOException { + return advance(pos+1); + } + + } + + private static final class ReaderCommit extends IndexCommit { private String segmentsFileName; Collection files; diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index c2a586a6ca2..dee8168937f 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -17,8 +17,19 @@ package org.apache.lucene.index; * limitations under the License. */ +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.values.Ints; +import org.apache.lucene.index.values.Floats; +import org.apache.lucene.index.values.Bytes; +import org.apache.lucene.index.values.ValuesAttribute; +import org.apache.lucene.index.values.Writer; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; + import java.io.IOException; import java.util.Collection; +import java.util.Comparator; import java.util.Map; import java.util.HashMap; @@ -37,6 +48,153 @@ final class DocFieldProcessor extends DocConsumer { final FieldInfos fieldInfos = new FieldInfos(); final DocFieldConsumer consumer; final StoredFieldsWriter fieldsWriter; + final private Map indexValues = new HashMap(); + + synchronized IndexValuesProcessor getProcessor(Directory dir, String segment, String name, ValuesAttribute attr, FieldInfo fieldInfo) + throws IOException { + if(attr == null) + return null; + IndexValuesProcessor p = indexValues.get(name); + if (p == null) { + org.apache.lucene.index.values.Values v = attr.type(); + final String id = segment + "_" + fieldInfo.number; + switch(v) { + case PACKED_INTS: + p = new IntValuesProcessor(dir, id, false); + break; + case PACKED_INTS_FIXED: + p = new IntValuesProcessor(dir, id, true); + break; + case SIMPLE_FLOAT_4BYTE: + p = new FloatValuesProcessor(dir, id, 4); + break; + case SIMPLE_FLOAT_8BYTE: + p = new FloatValuesProcessor(dir, id, 8); + break; + case BYTES_FIXED_STRAIGHT: + p = new BytesValuesProcessor(dir, id, true, null, Bytes.Mode.STRAIGHT); + break; + case BYTES_FIXED_DEREF: + p = new BytesValuesProcessor(dir, id, true, null, Bytes.Mode.DEREF); + break; + case BYTES_FIXED_SORTED: + p = new BytesValuesProcessor(dir, id, true, attr.bytesComparator(), Bytes.Mode.SORTED); + break; + case BYTES_VAR_STRAIGHT: + p = new BytesValuesProcessor(dir, id, false, null, Bytes.Mode.STRAIGHT); + break; + case BYTES_VAR_DEREF: + p = new BytesValuesProcessor(dir, id, false, null, Bytes.Mode.DEREF); + break; + case BYTES_VAR_SORTED: + p = new BytesValuesProcessor(dir, id, false, attr.bytesComparator(), Bytes.Mode.SORTED); + break; + } + fieldInfo.setIndexValues(v); + indexValues.put(name, p); + } + + return p; + } + + static abstract class IndexValuesProcessor { + public abstract void add(int docID, String name, ValuesAttribute attr) throws IOException; + public abstract void finish(int docCount) throws IOException; + public abstract void files(Collection files) throws IOException; + } + + static class FloatValuesProcessor extends IndexValuesProcessor { + private final Writer writer; + private final String id; + + public FloatValuesProcessor(Directory dir, String id, int precision) throws IOException { + this.id = id; + writer = Floats.getWriter(dir, id, precision); + } + + @Override + public void add(int docID, String name, ValuesAttribute attr) throws IOException { + final FloatsRef floats = attr.floats(); + if(floats != null) { + writer.add(docID, floats.get()); + return; + } + throw new IllegalArgumentException("could not extract float/double from field " + name); + } + + @Override + public void finish(int docCount) throws IOException { + writer.finish(docCount); + } + + @Override + public void files(Collection files) { + Floats.files(id, files); + } + } + + static class IntValuesProcessor extends IndexValuesProcessor { + private final Writer writer; + private final String id; + + public IntValuesProcessor(Directory dir, String id, boolean fixedArray) throws IOException { + this.id = id; + writer = Ints.getWriter(dir, id, fixedArray); + } + + @Override + public void add(int docID, String name, ValuesAttribute attr) throws IOException { + final LongsRef ints = attr.ints(); + if(ints != null) { + writer.add(docID, ints.get()); + return; + } + throw new IllegalArgumentException("could not extract int/long from field " + name); + } + + @Override + public void finish(int docCount) throws IOException { + writer.finish(docCount); + } + + @Override + public void files(Collection files) throws IOException { + Ints.files(id, files); + } + } + + static class BytesValuesProcessor extends IndexValuesProcessor { + private final Writer writer; + private final String id; + private final Directory dir; + + public BytesValuesProcessor(Directory dir, String id, boolean fixedSize, Comparator comp, Bytes.Mode mode) throws IOException { + this.id = id; + writer = Bytes.getWriter(dir, id, mode,comp, fixedSize); + this.dir = dir; + } + + // nocommit -- make this thread private and not sync'd + @Override + public synchronized void add(int docID, String name, ValuesAttribute attr) throws IOException { + final BytesRef bytes = attr.bytes(); + if(bytes != null) { + writer.add(docID, bytes); + return; + } + throw new IllegalArgumentException("could not extract byte[] from field " + name); + } + + @Override + public void finish(int docCount) throws IOException { + writer.finish(docCount); + } + + @Override + public void files(Collection files) throws IOException { + Bytes.files(dir, id, files); + } + } public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) { this.docWriter = docWriter; @@ -63,6 +221,14 @@ final class DocFieldProcessor extends DocConsumer { fieldsWriter.flush(state); consumer.flush(childThreadsAndFields, state); + for(IndexValuesProcessor p : indexValues.values()) { + if (p != null) { + p.finish(state.numDocs); + p.files(state.flushedFiles); + } + } + indexValues.clear(); + // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java index 51e46201c50..56e7dea9597 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java @@ -20,10 +20,16 @@ package org.apache.lucene.index; import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.Map.Entry; import java.io.IOException; + +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.values.ValuesAttribute; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.RamUsageEstimator; /** @@ -243,10 +249,24 @@ final class DocFieldProcessorPerThread extends DocConsumerPerThread { // enabled; we could save [small amount of] CPU // here. quickSort(fields, 0, fieldCount-1); + - for(int i=0;i= YES || dir.fileExists(delFileName))) { fileSet.add(delFileName); } - + //nocommit - is there a better way to get all the dat / idx files? + for(String file : dir.listAll()) { + if(file.startsWith(name) && (file.endsWith("dat") || file.endsWith("idx"))){ + fileSet.add(file); + } + } if (normGen != null) { for (int i = 0; i < normGen.length; i++) { long gen = normGen[i]; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 70ed9f0807c..4546b3d8278 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -31,6 +31,12 @@ import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.values.Bytes; +import org.apache.lucene.index.values.Ints; +import org.apache.lucene.index.values.Reader; +import org.apache.lucene.index.values.Floats; +import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -157,6 +163,8 @@ final class SegmentMerger { if (mergeDocStores && fieldInfos.hasVectors()) mergeVectors(); + mergeIndexValues(); + return mergedDocs; } @@ -170,6 +178,12 @@ final class SegmentMerger { reader.close(); } } + + private void addIfExists(Set files, String file, Directory dir) throws IOException{ + if(dir.fileExists(file)){ + files.add(file); + } + } final List createCompoundFile(String fileName, final SegmentInfo info) throws IOException { @@ -183,13 +197,20 @@ final class SegmentMerger { !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) fileSet.add(IndexFileNames.segmentFileName(segment, "", ext)); } - codec.files(directory, info, fileSet); // Fieldable norm files - int numFIs = fieldInfos.size(); + final int numFIs = fieldInfos.size(); for (int i = 0; i < numFIs; i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); + final FieldInfo fi = fieldInfos.fieldInfo(i); + // Index Values aka. CSF + if (fi.indexValues != null) { + addIfExists(fileSet, IndexFileNames.segmentFileName(segment, Integer + .toString(fi.number), IndexFileNames.CSF_DATA_EXTENSION), directory); + addIfExists(fileSet, IndexFileNames.segmentFileName(segment, Integer + .toString(fi.number), IndexFileNames.CSF_INDEX_EXTENSION), + directory); + } if (fi.isIndexed && !fi.omitNorms) { fileSet.add(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); break; @@ -288,10 +309,18 @@ final class SegmentMerger { int numReaderFieldInfos = readerFieldInfos.size(); for (int j = 0; j < numReaderFieldInfos; j++) { FieldInfo fi = readerFieldInfos.fieldInfo(j); - fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, - fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, - !reader.hasNorms(fi.name), fi.storePayloads, - fi.omitTermFreqAndPositions); + FieldInfo merged = fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, + fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, + !reader.hasNorms(fi.name), fi.storePayloads, + fi.omitTermFreqAndPositions); + final Values fiIndexValues = fi.indexValues; + final Values mergedIndexValues = merged.indexValues; + if (mergedIndexValues == null) { + merged.setIndexValues(fiIndexValues); + } else if (mergedIndexValues != fiIndexValues) { + // nocommit -- what to do? + throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedIndexValues + " to " + fiIndexValues); + } } } else { addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); @@ -302,6 +331,8 @@ final class SegmentMerger { addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false); + + // nocommit -- how should we handle index values here? } } fieldInfos.write(directory, segment + ".fnm"); @@ -362,6 +393,77 @@ final class SegmentMerger { return docCount; } + private void mergeIndexValues() throws IOException { + final int numFields = fieldInfos.size(); + for (int i = 0; i < numFields; i++) { + final FieldInfo fieldInfo = fieldInfos.fieldInfo(i); + final Values v = fieldInfo.indexValues; + // nocommit we need some kind of compatibility notation for values such + // that two slighly different segments can be merged eg. fixed vs. + // variable byte len or float32 vs. float64 + + if (v != null) { + int docBase = 0; + final List mergeStates = new ArrayList(); + for (IndexReader reader : readers) { + Reader r = reader.getIndexValues(fieldInfo.name); + if (r != null) { + mergeStates.add(new Writer.MergeState(r, docBase, reader + .maxDoc(), reader.getDeletedDocs())); + } + docBase += reader.numDocs(); + } + if (mergeStates.isEmpty()) { + continue; + } + final String id = segment + "_" + fieldInfo.number; + final Writer writer; + switch (v) { + case PACKED_INTS: + case PACKED_INTS_FIXED: + writer = Ints.getWriter(directory, id, true); + break; + case SIMPLE_FLOAT_4BYTE: + writer = Floats.getWriter(directory, id, 4); + break; + case SIMPLE_FLOAT_8BYTE: + writer = Floats.getWriter(directory, id, 8); + break; + case BYTES_FIXED_STRAIGHT: + writer = Bytes.getWriter(directory, id, + Bytes.Mode.STRAIGHT, null, true); + break; + case BYTES_FIXED_DEREF: + writer = Bytes.getWriter(directory, id, + Bytes.Mode.DEREF, null, true); + break; + case BYTES_FIXED_SORTED: + // nocommit -- enable setting Comparator + writer = Bytes.getWriter(directory, id, + Bytes.Mode.SORTED, null, true); + break; + case BYTES_VAR_STRAIGHT: + writer = Bytes.getWriter(directory, id, + Bytes.Mode.STRAIGHT, null, false); + break; + case BYTES_VAR_DEREF: + writer = Bytes.getWriter(directory, id, + Bytes.Mode.DEREF, null, false); + break; + case BYTES_VAR_SORTED: + // nocommit -- enable setting Comparator + writer = Bytes.getWriter(directory, id, + Bytes.Mode.SORTED, null, false); + break; + default: + continue; + } + writer.add(mergeStates); + writer.finish(mergedDocs); + } + } + } + private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader, final FieldsReader matchingFieldsReader) throws IOException, MergeAbortedException, CorruptIndexException { diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 3b71ab64c34..bbbd90e43b2 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -30,6 +30,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.BufferedIndexInput; @@ -41,6 +42,11 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.values.Bytes; +import org.apache.lucene.index.values.Ints; +import org.apache.lucene.index.values.Reader; +import org.apache.lucene.index.values.Floats; +import org.apache.lucene.index.values.Values; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.BytesRef; @@ -135,7 +141,7 @@ public class SegmentReader extends IndexReader implements Cloneable { // Ask codec for its Fields fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor)); assert fields != null; - + openIndexValuesReaders(cfsDir, si); success = true; } finally { if (!success) { @@ -150,6 +156,57 @@ public class SegmentReader extends IndexReader implements Cloneable { this.origInstance = origInstance; } + final Map indexValues = new HashMap(); + + // Only opens files... doesn't actually load any values + private void openIndexValuesReaders(Directory dir, SegmentInfo si) throws IOException { + final int numFields = fieldInfos.size(); + for(int i=0;i + * NOTE: The total amount of byte[] data stored (across a single segment) cannot + * exceed 2GB. + *

+ *

+ * NOTE: Each byte[] must be <= 32768 bytes in length + *

+ */ +//nocommit - add mmap version +//nocommti - add bulk copy where possible +public final class Bytes { + + // don't instantiate! + private Bytes() { + } + + public static enum Mode { + STRAIGHT, DEREF, SORTED + }; + + public static void files(Directory dir, String id, Collection files) + throws IOException { + files.add(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + final String idxFile = IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_INDEX_EXTENSION); + if (dir.fileExists(idxFile)) { + files.add(idxFile); + } + } + + // nocommit -- i shouldn't have to specify fixed? can + // track itself & do the write thing at write time? + public static Writer getWriter(Directory dir, String id, Mode mode, + Comparator comp, boolean fixedSize) throws IOException { + + if (comp == null) { + comp = BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + if (fixedSize) { + if (mode == Mode.STRAIGHT) { + return new FixedStraightBytesImpl.Writer(dir, id); + } else if (mode == Mode.DEREF) { + return new FixedDerefBytesImpl.Writer(dir, id); + } else if (mode == Mode.SORTED) { + return new FixedSortedBytesImpl.Writer(dir, id, comp); + } + } else { + if (mode == Mode.STRAIGHT) { + return new VarStraightBytesImpl.Writer(dir, id); + } else if (mode == Mode.DEREF) { + return new VarDerefBytesImpl.Writer(dir, id); + } else if (mode == Mode.SORTED) { + return new VarSortedBytesImpl.Writer(dir, id, comp); + } + } + + throw new IllegalArgumentException(""); + } + + // nocommit -- I can peek @ header to determing fixed/mode? + public static Reader getReader(Directory dir, String id, Mode mode, + boolean fixedSize, int maxDoc) throws IOException { + if (fixedSize) { + if (mode == Mode.STRAIGHT) { + try { + return new FixedStraightBytesImpl.Reader(dir, id, maxDoc); + } catch (IOException e) { + throw e; + } + } else if (mode == Mode.DEREF) { + try { + return new FixedDerefBytesImpl.Reader(dir, id, maxDoc); + } catch (IOException e) { + throw e; + } + } else if (mode == Mode.SORTED) { + return new FixedSortedBytesImpl.Reader(dir, id, maxDoc); + } + } else { + if (mode == Mode.STRAIGHT) { + return new VarStraightBytesImpl.Reader(dir, id, maxDoc); + } else if (mode == Mode.DEREF) { + return new VarDerefBytesImpl.Reader(dir, id, maxDoc); + } else if (mode == Mode.SORTED) { + return new VarSortedBytesImpl.Reader(dir, id, maxDoc); + } + } + + throw new IllegalArgumentException(""); + } + + static abstract class BytesBaseSource extends Source { + protected final IndexInput datIn; + protected final IndexInput idxIn; + protected final BytesRef defaultValue = new BytesRef(); + + protected BytesBaseSource(IndexInput datIn, IndexInput idxIn) { + this.datIn = datIn; + this.idxIn = idxIn; + } + + public void close() throws IOException { + if (datIn != null) + datIn.close(); + if (idxIn != null) // if straight + idxIn.close(); + + } + } + + static abstract class BytesBaseSortedSource extends SortedSource { + protected final IndexInput datIn; + protected final IndexInput idxIn; + protected final BytesRef defaultValue = new BytesRef(); + + protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn) { + this.datIn = datIn; + this.idxIn = idxIn; + } + + public void close() throws IOException { + if (datIn != null) + datIn.close(); + if (idxIn != null) // if straight + idxIn.close(); + + } + } + + static abstract class BytesWriterBase extends Writer { + + private final Directory dir; + private final String id; + protected IndexOutput idxOut; + protected IndexOutput datOut; + protected BytesRef bytesRef; + private String codecName; + private int version; + protected final ByteBlockPool pool; + protected final AtomicLong bytesUsed; + + protected BytesWriterBase(Directory dir, String id, String codecName, + int version, boolean initIndex, boolean initData, ByteBlockPool pool, AtomicLong bytesUsed) throws IOException { + this.dir = dir; + this.id = id; + this.codecName = codecName; + this.version = version; + this.pool = pool; + this.bytesUsed = bytesUsed; + if (initData) + initDataOut(); + if (initIndex) + initIndexOut(); + } + + protected void initDataOut() throws IOException { + datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + CodecUtil.writeHeader(datOut, codecName, version); + } + + protected void initIndexOut() throws IOException { + idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_INDEX_EXTENSION)); + CodecUtil.writeHeader(idxOut, codecName, version); + } + + public long ramBytesUsed() { + return bytesUsed.get(); + } + + /** + * Must be called only with increasing docIDs. It's OK for some docIDs to be + * skipped; they will be filled with 0 bytes. + */ + @Override + public abstract void add(int docID, BytesRef bytes) throws IOException; + + @Override + public synchronized void finish(int docCount) throws IOException { + if (datOut != null) + datOut.close(); + if (idxOut != null) + idxOut.close(); + if(pool != null) + pool.reset(); + } + + @Override + protected void add(int docID) throws IOException { + add(docID, bytesRef); + } + + @Override + protected void setNextAttribute(ValuesAttribute attr) { + bytesRef = attr.bytes(); + assert bytesRef != null; + } + } + + /** + * Opens all necessary files, but does not read any data in until you call + * {@link #load}. + */ + static abstract class BytesReaderBase extends Reader { + protected final IndexInput idxIn; + protected final IndexInput datIn; + protected final int version; + protected final String id; + + protected BytesReaderBase(Directory dir, String id, String codecName, + int maxVersion, boolean doIndex) throws IOException { + this.id = id; + datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion); + + if (doIndex) { + idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_INDEX_EXTENSION)); + final int version2 = CodecUtil.checkHeader(idxIn, codecName, + maxVersion, maxVersion); + assert version == version2; + } else { + idxIn = null; + } + } + + protected final IndexInput cloneData() { + assert !isClosed.get():printEx(); + // is never NULL + return (IndexInput) datIn.clone(); + } + + protected final IndexInput cloneIndex() { + assert !isClosed.get():printEx(); + return idxIn == null ? null : (IndexInput) idxIn.clone(); + } + private final AtomicBoolean isClosed = new AtomicBoolean(false); + Exception ex; + public void close() throws IOException { + assert !isClosed.getAndSet(true); + ex =new Exception(); + if (datIn != null) { + datIn.close(); + } + if (idxIn != null) { + idxIn.close(); + } + } + + private String printEx() { + ex.printStackTrace(); + return ex.getMessage(); + } + } + +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/Cache.java b/lucene/src/java/org/apache/lucene/index/values/Cache.java new file mode 100644 index 00000000000..3f3b9dc4890 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/Cache.java @@ -0,0 +1,116 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.values.Reader.SortedSource; +import org.apache.lucene.index.values.Reader.Source; +import org.apache.lucene.util.BytesRef; + +public class Cache { + final IndexReader r; + // TODO(simonw): use WeakHashMaps instead here? + final Map ints = new HashMap(); + final Map floats = new HashMap(); + final Map bytes = new HashMap(); + final Map sortedBytes = new HashMap(); + + public Cache(IndexReader r) { + this.r = r; + } + + synchronized public Source getInts(String id) throws IOException { + Source s = ints.get(id); + if (s == null) { + final Reader indexValues = r.getIndexValues(id); + if (indexValues == null) { + return null; + } + s = indexValues.load(); + ints.put(id, s); + } + + return s; + } + + synchronized public Source getFloats(String id) throws IOException { + Source s = floats.get(id); + if (s == null) { + final Reader indexValues = r.getIndexValues(id); + if (indexValues == null) { + return null; + } + s = indexValues.load(); + floats.put(id, s); + } + + return s; + } + + synchronized public SortedSource getSortedBytes(String id, + Comparator comp) throws IOException { + SortedSource s = sortedBytes.get(id); + if (s == null) { + final Reader indexValues = r.getIndexValues(id); + if (indexValues == null) { + return null; + } + s = indexValues.loadSorted(comp); + sortedBytes.put(id, s); + } else { + // TODO(simonw): verify comp is the same! + } + + return s; + } + + synchronized public Source getBytes(String id) throws IOException { + Source s = bytes.get(id); + if (s == null) { + final Reader indexValues = r.getIndexValues(id); + if (indexValues == null) { + return null; + } + s = indexValues.load(); + bytes.put(id, s); + } + + return s; + } + + public void purgeInts(String id) { + ints.remove(id); + } + + public void purgeFloats(String id) { + floats.remove(id); + } + + public void purgeBytes(String id) { + bytes.remove(id); + } + + public void purgeSortedBytes(String id) { + sortedBytes.remove(id); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java new file mode 100644 index 00000000000..3cac5b20ac2 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -0,0 +1,262 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.index.values.Bytes.BytesBaseSource; +import org.apache.lucene.index.values.Bytes.BytesReaderBase; +import org.apache.lucene.index.values.Bytes.BytesWriterBase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.packed.PackedInts; + +// Stores fixed-length byte[] by deref, ie when two docs +// have the same value, they store only 1 byte[] + +class FixedDerefBytesImpl { + + static final String CODEC_NAME = "FixedDerefBytes"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + static class Writer extends BytesWriterBase { + private int size = -1; + private int[] docToID; + private final BytesRefHash hash = new BytesRefHash(pool); + + public Writer(Directory dir, String id) throws IOException { + this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + new AtomicLong()); + } + + public Writer(Directory dir, String id, Allocator allocator, + AtomicLong bytesUsed) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, + new ByteBlockPool(allocator), bytesUsed); + docToID = new int[1]; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); + } + + @Override + synchronized public void add(int docID, BytesRef bytes) throws IOException { + if(bytes.length == 0) // default value - skip it + return; + if (size == -1) { + size = bytes.length; + initDataOut(); + datOut.writeInt(size); + } else if (bytes.length != size) { + throw new IllegalArgumentException("expected bytes size=" + size + + " but got " + bytes.length); + } + int ord = hash.add(bytes); + + if (ord >= 0) { + // new added entry + datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); + } else { + ord = (-ord)-1; + } + + if (docID >= docToID.length) { + int size = docToID.length; + docToID = ArrayUtil.grow(docToID, 1 + docID); + bytesUsed.addAndGet((docToID.length - size) * RamUsageEstimator.NUM_BYTES_INT); + } + docToID[docID] = 1+ord; + } + + + // Important that we get docCount, in case there were + // some last docs that we didn't see + @Override + synchronized public void finish(int docCount) throws IOException { + if (datOut == null) // no added data + return; + initIndexOut(); + final int count = 1+hash.size(); + idxOut.writeInt(count - 1); + // write index + final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, + PackedInts.bitsRequired(count - 1)); + final int limit = docCount > docToID.length ? docToID.length : docCount; + for (int i = 0; i < limit; i++) { + w.add(docToID[i]); + } + // fill up remaining doc with zeros + for (int i = limit; i < docCount; i++) { + w.add(0); + } + w.finish(); + hash.clear(); + + super.finish(docCount); + } + } + + public static class Reader extends BytesReaderBase { + private final int size; + + Reader(Directory dir, String id, int maxDoc) throws IOException { + super(dir, id, CODEC_NAME, VERSION_START, true); + try { + size = datIn.readInt(); + } catch (IOException e) { + throw e; + } + } + + @Override + public Source load() throws IOException { + return new Source(cloneData(), cloneIndex(), size); + } + + private static class Source extends BytesBaseSource { + // TODO: paged data or mmap? + private final byte[] data; + private final BytesRef bytesRef = new BytesRef(); + private final PackedInts.Reader index; + private final int numValue; + private final int size; + + protected Source(IndexInput datIn, IndexInput idxIn, int size) + throws IOException { + super(datIn, idxIn); + this.size = size; + numValue = idxIn.readInt(); + data = new byte[size * numValue]; + datIn.readBytes(data, 0, size * numValue); + index = PackedInts.getReader(idxIn); + bytesRef.bytes = data; + bytesRef.length = size; + } + + @Override + public BytesRef bytes(int docID) { + final int id = (int) index.get(docID); + if (id == 0) { + return defaultValue; + } + bytesRef.offset = ((id - 1) * size); + return bytesRef; + } + + public long ramBytesUsed() { + // TODO(simonw): move ram calculation to PackedInts?! + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + data.length + + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index + .getBitsPerValue() + * index.size()); + } + + @Override + public int getValueCount() { + return numValue; + } + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, + size); + } + + static class DerefBytesEnum extends ValuesEnum { + protected final IndexInput datIn; + private final PackedInts.ReaderIterator idx; + protected final long fp; + private final int size; + protected final BytesRef ref; + private final int valueCount; + private int pos = -1; + + public DerefBytesEnum(AttributeSource source, IndexInput datIn, + IndexInput idxIn, String codecName, int size) throws IOException { + this(source, datIn, idxIn, codecName, size, Values.BYTES_FIXED_DEREF); + } + + protected DerefBytesEnum(AttributeSource source, IndexInput datIn, + IndexInput idxIn, String codecName, int size, Values enumType) + throws IOException { + super(source, enumType); + ref = attr.bytes(); + this.datIn = datIn; + this.size = size == -1 ? 128 : size; + idxIn.readInt();// read valueCount + idx = PackedInts.getReaderIterator(idxIn); + fp = datIn.getFilePointer(); + ref.grow(this.size); + ref.length = this.size; + ref.offset = 0; + valueCount = idx.size(); + } + + @Override + public int advance(int target) throws IOException { + if (target < valueCount) { + final long address = idx.advance(target); + pos = idx.ord(); + if(address == 0) { + // default is empty + ref.length = 0; + ref.offset = 0; + return pos; + } + fill(address, ref); + return pos; + } + return pos = NO_MORE_DOCS; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos + 1); + } + + public void close() throws IOException { + datIn.close(); + idx.close(); + } + + protected void fill(long address, BytesRef ref) throws IOException { + datIn.seek(fp + ((address - 1) * size)); + datIn.readBytes(ref.bytes, 0, size); + ref.length = size; + ref.offset = 0; + } + + @Override + public int docID() { + return pos; + } + + } + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java new file mode 100644 index 00000000000..350aa765530 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -0,0 +1,258 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Comparator; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource; +import org.apache.lucene.index.values.Bytes.BytesReaderBase; +import org.apache.lucene.index.values.Bytes.BytesWriterBase; +import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.packed.PackedInts; + +// Stores fixed-length byte[] by deref, ie when two docs +// have the same value, they store only 1 byte[] + +class FixedSortedBytesImpl { + + static final String CODEC_NAME = "FixedSortedBytes"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + static class Writer extends BytesWriterBase { + private int size = -1; + private int[] docToEntry; + private final Comparator comp; + + + private final BytesRefHash hash = new BytesRefHash(pool); + + public Writer(Directory dir, String id, Comparator comp) throws IOException { + this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + new AtomicLong()); + } + + public Writer(Directory dir, String id, Comparator comp, Allocator allocator, AtomicLong bytesUsed) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed); + docToEntry = new int[1]; +// docToEntry[0] = -1; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); + this.comp = comp; + } + + @Override + synchronized public void add(int docID, BytesRef bytes) throws IOException { + if(bytes.length == 0) + return; // default - skip it + if (size == -1) { + size = bytes.length; + initDataOut(); + datOut.writeInt(size); + } else if (bytes.length != size) { + throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length); + } + if (docID >= docToEntry.length) { + int[] newArray = new int[ArrayUtil.oversize(1 + docID, + RamUsageEstimator.NUM_BYTES_INT)]; + System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length); +// Arrays.fill(newArray, docToEntry.length, newArray.length, -1); + + bytesUsed.addAndGet((newArray.length - docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT); + docToEntry = newArray; + } + int e = hash.add(bytes); + docToEntry[docID] = 1+(e < 0? (-e)-1: e); + } + + + // Important that we get docCount, in case there were + // some last docs that we didn't see + @Override + synchronized public void finish(int docCount) throws IOException { + if(datOut == null)// no data added + return; + initIndexOut(); + final int[] sortedEntries = hash.sort(comp); + final int count = hash.size(); + int[] address= new int[count]; + // first dump bytes data, recording address as we go + for(int i=0;i docToEntry.length) { + limit = docToEntry.length; + } else { + limit = docCount; + } + for(int i=0;i 0 && e <= count: "index must 0 > && <= " + count + " was: " + e; + w.add(address[e-1]); + } + } + + for(int i=limit;i comp) throws IOException { + return new Source(cloneData(), cloneIndex(), size, comp); + } + + private static class Source extends BytesBaseSortedSource { + + // TODO: paged data + private final byte[] data; + private final BytesRef bytesRef = new BytesRef(); + private final PackedInts.Reader index; + private final LookupResult lookupResult = new LookupResult(); + private final int numValue; + private final Comparator comp; + private final int size; + + public Source(IndexInput datIn, IndexInput idxIn, int size, Comparator comp) throws IOException { + super(datIn, idxIn); + this.size = size; + datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4); + idxIn.seek(CodecUtil.headerLength(CODEC_NAME)); + + numValue = idxIn.readInt(); + data = new byte[size*numValue]; + datIn.readBytes(data, 0, size*numValue); + datIn.close(); + + index = PackedInts.getReader(idxIn); + idxIn.close(); // do we need to close that here? + + bytesRef.bytes = data; + bytesRef.length = size; + // default byte sort order + this.comp = comp==null?BytesRef.getUTF8SortedAsUnicodeComparator():comp; + } + + @Override + public int ord(int docID) { + return (int) index.get(docID); + } + + @Override + public BytesRef getByOrd(int ord) { + if (ord == 0) { + return defaultValue; + } else { + bytesRef.offset = ((ord-1) * size); + return bytesRef; + } + } + + @Override + public LookupResult getByValue(BytesRef bytes) { + return binarySearch(bytes, 0, numValue-1); + } + + public long ramBytesUsed() { + // TODO(simonw): move ram calcultation to PackedInts? + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size()); + } + + @Override + public int getValueCount() { + return numValue; + } + + private LookupResult binarySearch(BytesRef b, int low, int high) { + + while (low <= high) { + int mid = (low + high) >>> 1; + bytesRef.offset = mid * size; + int cmp = comp.compare(bytesRef, b); + if (cmp < 0) { + low = mid + 1; + } else if (cmp > 0) { + high = mid - 1; + } else { + lookupResult.ord = mid+1; + lookupResult.found = true; + return lookupResult; + } + } + lookupResult.ord = low; + lookupResult.found = false; + return lookupResult; + } + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + // do unsorted + return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, size); + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java new file mode 100644 index 00000000000..00564264178 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -0,0 +1,221 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.values.Bytes.BytesBaseSource; +import org.apache.lucene.index.values.Bytes.BytesReaderBase; +import org.apache.lucene.index.values.Bytes.BytesWriterBase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; + +// Simplest storage: stores fixed length byte[] per +// document, with no dedup and no sorting. + +class FixedStraightBytesImpl { + + static final String CODEC_NAME = "FixedStraightBytes"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + static class Writer extends BytesWriterBase { + private int size = -1; + // start at -1 if the first added value is > 0 + private int lastDocID = -1; + private byte[] oneRecord; + + protected Writer(Directory dir, String id) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, null); + } + + // nocommit - impl bulk copy here! + + @Override + synchronized public void add(int docID, BytesRef bytes) throws IOException { + if (size == -1) { + size = bytes.length; + initDataOut(); + datOut.writeInt(size); + oneRecord = new byte[size]; + } else if (bytes.length != size) { + throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length); + } + fill(docID); + assert bytes.bytes.length >= bytes.length; + datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); + } + + /* (non-Javadoc) + * @see org.apache.lucene.index.values.Writer#merge(org.apache.lucene.index.values.Writer.MergeState) + */ + @Override + protected void merge(MergeState state) throws IOException { + if(state.bits == null && state.reader instanceof Reader){ + Reader reader = (Reader) state.reader; + final int maxDocs = reader.maxDoc; + if(maxDocs == 0) + return; + if(size == -1) { + size = reader.size; + initDataOut(); + datOut.writeInt(size); + oneRecord = new byte[size]; + } + fill(state.docBase); + // nocommit should we add a transfer to API to each reader? + datOut.copyBytes(reader.cloneData(), size * maxDocs); + lastDocID += maxDocs-1; + } else + super.merge(state); + } + + // Fills up to but not including this docID + private void fill(int docID) throws IOException { + assert size >= 0; + for(int i=lastDocID+1;i= maxDoc){ + ref.length = 0; + ref.offset = 0; + return pos = NO_MORE_DOCS; + } + if((target-1) != pos) // pos inc == 1 + datIn.seek(fp + target * size); + datIn.readBytes(ref.bytes, 0, size); + return pos = target; + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos+1); + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java new file mode 100644 index 00000000000..3caccdb7eb0 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -0,0 +1,389 @@ +package org.apache.lucene.index.values; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.util.Collection; + +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * Exposes writer/reader for floating point values. You can specify 4 (java + * float) or 8 (java double) byte precision. + */ +//nocommit - add mmap version +//nocommti - add bulk copy where possible +public class Floats { + private static final String CODEC_NAME = "SimpleFloats"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + private static final int INT_ZERO = Float.floatToRawIntBits(0.0f); + private static final long LONG_ZERO = Double.doubleToRawLongBits(0.0); + + public static void files(String id, Collection files) { + files.add(id + "." + IndexFileNames.CSF_DATA_EXTENSION); + } + + public static Writer getWriter(Directory dir, String id, int precisionBytes) + throws IOException { + if (precisionBytes != 4 && precisionBytes != 8) { + throw new IllegalArgumentException("precisionBytes must be 4 or 8; got " + + precisionBytes); + } + if (precisionBytes == 4) { + return new Float4Writer(dir, id); + } else { + return new Float8Writer(dir, id); + } + } + + public static Reader getReader(Directory dir, String id, int maxDoc) + throws IOException { + return new FloatsReader(dir, id, maxDoc); + } + + abstract static class FloatsWriter extends Writer { + private final Directory dir; + private final String id; + private FloatsRef floatsRef; + protected int lastDocId = -1; + protected IndexOutput datOut; + private final byte precision; + + protected FloatsWriter(Directory dir, String id, int precision) + throws IOException { + this.dir = dir; + this.id = id; + this.precision = (byte) precision; + } + + protected void initDatOut() throws IOException { + datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); + assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME); + datOut.writeByte(precision); + } + + public long ramBytesUsed() { + return 0; + } + + @Override + protected void add(int docID) throws IOException { + add(docID, floatsRef.get()); + } + + @Override + protected void setNextAttribute(ValuesAttribute attr) { + floatsRef = attr.floats(); + } + + protected abstract int fillDefault(int num) throws IOException; + + @Override + protected void merge(MergeState state) throws IOException { + if (state.bits == null && state.reader instanceof FloatsReader) { + // no deletes - bulk copy + // nocommit - should be do bulks with deletes too? + final FloatsReader reader = (FloatsReader) state.reader; + assert reader.precisionBytes == (int) precision; + if (reader.maxDoc == 0) + return; + if (datOut == null) + initDatOut(); + final int docBase = state.docBase; + if (docBase - lastDocId > 1) { + // fill with default values + lastDocId += fillDefault(docBase - lastDocId - 1); + } + lastDocId += reader.transferTo(datOut); + } else + super.merge(state); + } + + } + + // Writes 4 bytes (float) per value + static class Float4Writer extends FloatsWriter { + + protected Float4Writer(Directory dir, String id) throws IOException { + super(dir, id, 4); + } + + @Override + synchronized public void add(final int docID, final double v) + throws IOException { + assert docID > lastDocId : "docID: " + docID + + " must be greater than the last added doc id: " + lastDocId; + if (datOut == null) { + initDatOut(); + } + if (docID - lastDocId > 1) { + // fill with default values + lastDocId += fillDefault(docID - lastDocId - 1); + } + assert datOut != null; + datOut.writeInt(Float.floatToRawIntBits((float) v)); + ++lastDocId; + } + + @Override + synchronized public void finish(int docCount) throws IOException { + if (datOut == null) + return; // no data added - don't create file! + if (docCount > lastDocId + 1) + for (int i = lastDocId; i < docCount; i++) { + datOut.writeInt(INT_ZERO); // default value + } + datOut.close(); + } + + @Override + protected int fillDefault(int numValues) throws IOException { + for (int i = 0; i < numValues; i++) { + datOut.writeInt(INT_ZERO); + } + return numValues; + } + } + + // Writes 8 bytes (double) per value + static class Float8Writer extends FloatsWriter { + + protected Float8Writer(Directory dir, String id) throws IOException { + super(dir, id, 8); + } + + @Override + synchronized public void add(int docID, double v) throws IOException { + assert docID > lastDocId : "docID: " + docID + + " must be greater than the last added doc id: " + lastDocId; + if (datOut == null) { + initDatOut(); + } + if (docID - lastDocId > 1) { + // fill with default values + lastDocId += fillDefault(docID - lastDocId - 1); + } + assert datOut != null; + datOut.writeLong(Double.doubleToRawLongBits(v)); + ++lastDocId; + } + + @Override + synchronized public void finish(int docCount) throws IOException { + if (datOut == null) + return; // no data added - don't create file! + if (docCount > lastDocId + 1) + for (int i = lastDocId; i < docCount; i++) { + datOut.writeLong(LONG_ZERO); // default value + } + datOut.close(); + } + + @Override + protected int fillDefault(int numValues) throws IOException { + for (int i = 0; i < numValues; i++) { + datOut.writeLong(LONG_ZERO); + } + return numValues; + } + } + + /** + * Opens all necessary files, but does not read any data in until you call + * {@link #load}. + */ + static class FloatsReader extends Reader { + + private final IndexInput datIn; + private final int precisionBytes; + // TODO(simonw) is ByteBuffer the way to go here? + private final int maxDoc; + + protected FloatsReader(Directory dir, String id, int maxDoc) + throws IOException { + datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START); + precisionBytes = datIn.readByte(); + assert precisionBytes == 4 || precisionBytes == 8; + this.maxDoc = maxDoc; + } + + int transferTo(IndexOutput out) throws IOException { + IndexInput indexInput = (IndexInput) datIn.clone(); + try { + indexInput.seek(CodecUtil.headerLength(CODEC_NAME)); + // skip precision: + indexInput.readByte(); + out.copyBytes(indexInput, precisionBytes * maxDoc); + } finally { + indexInput.close(); + } + return maxDoc; + } + + /** + * Loads the actual values. You may call this more than once, eg if you + * already previously loaded but then discarded the Source. + */ + @Override + public Source load() throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(precisionBytes * maxDoc); + IndexInput indexInput = (IndexInput) datIn.clone(); + indexInput.seek(CodecUtil.headerLength(CODEC_NAME)); + // skip precision: + indexInput.readByte(); + assert buffer.hasArray() : "Buffer must support Array"; + final byte[] arr = buffer.array(); + indexInput.readBytes(arr, 0, arr.length); + return precisionBytes == 4 ? new Source4(buffer) : new Source8(buffer); + } + + private class Source4 extends Source { + private final FloatBuffer values; + + Source4(ByteBuffer buffer) { + values = buffer.asFloatBuffer(); + } + + @Override + public double floats(int docID) { + final float f = values.get(docID); + // nocommit should we return NaN as default instead of 0.0? + return Float.isNaN(f) ? 0.0f : f; + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit() + * RamUsageEstimator.NUM_BYTES_FLOAT; + } + } + + private class Source8 extends Source { + private final DoubleBuffer values; + + Source8(ByteBuffer buffer) { + values = buffer.asDoubleBuffer(); + } + + @Override + public double floats(int docID) { + final double d = values.get(docID); + // nocommit should we return NaN as default instead of 0.0? + return Double.isNaN(d) ? 0.0d : d; + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit() + * RamUsageEstimator.NUM_BYTES_DOUBLE; + } + } + + public void close() throws IOException { + datIn.close(); + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + IndexInput indexInput = (IndexInput) datIn.clone(); + indexInput.seek(CodecUtil.headerLength(CODEC_NAME)); + // skip precision: + indexInput.readByte(); + return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc) + : new Floats8EnumImpl(source, indexInput, maxDoc); + } + } + + static final class Floats4Enum extends FloatsEnumImpl { + + Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc) + throws IOException { + super(source, dataIn, 4, maxDoc, Values.SIMPLE_FLOAT_4BYTE); + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) + return pos = NO_MORE_DOCS; + dataIn.seek(fp + (target * precision)); + ref.floats[0] = Float.intBitsToFloat(dataIn.readInt()); + ref.offset = 0; // nocommit -- can we igore this? + return pos = target; + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos + 1); + } + } + + private static final class Floats8EnumImpl extends FloatsEnumImpl { + + Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc) + throws IOException { + super(source, dataIn, 8, maxDoc, Values.SIMPLE_FLOAT_8BYTE); + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) + return pos = NO_MORE_DOCS; + dataIn.seek(fp + (target * precision)); + ref.floats[0] = Double.longBitsToDouble(dataIn.readLong()); + ref.offset = 0; // nocommit -- can we igore this? + return pos = target; + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos + 1); + } + } + + static abstract class FloatsEnumImpl extends ValuesEnum { + protected final IndexInput dataIn; + protected int pos = -1; + protected final int precision; + protected final int maxDoc; + protected final long fp; + protected final FloatsRef ref; + + FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision, + int maxDoc, Values type) throws IOException { + super(source, precision == 4 ? Values.SIMPLE_FLOAT_4BYTE + : Values.SIMPLE_FLOAT_8BYTE); + this.dataIn = dataIn; + this.precision = precision; + this.maxDoc = maxDoc; + fp = dataIn.getFilePointer(); + this.ref = attr.floats(); + this.ref.offset = 0; + } + + @Override + public void close() throws IOException { + dataIn.close(); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java new file mode 100644 index 00000000000..a5ea55204c5 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java @@ -0,0 +1,32 @@ +package org.apache.lucene.index.values; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.values.PackedIntsImpl.IntsReader; +import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter; +import org.apache.lucene.store.Directory; +//nocommit - add mmap version +//nocommti - add bulk copy where possible +public class Ints { + + private Ints() { + } + + public static void files(String id, Collection files) + throws IOException { + files.add(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + } + + public static Writer getWriter(Directory dir, String id, boolean useFixedArray) + throws IOException { + //nocommit - implement fixed?! + return new IntsWriter(dir, id); + } + + public static Reader getReader(Directory dir, String id, boolean useFixedArray) throws IOException { + return new IntsReader(dir, id); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java new file mode 100644 index 00000000000..ac843859a97 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -0,0 +1,240 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts; + +/** Stores ints packed with fixed-bit precision. */ +class PackedIntsImpl { + + private static final String CODEC_NAME = "PackedInts"; + + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + static class IntsWriter extends Writer { + // nocommit - can we bulkcopy this on a merge? + private LongsRef intsRef; + private long[] docToValue; + private long minValue; + private long maxValue; + private boolean started; + private final Directory dir; + private final String id; + private int maxDocID; + private int minDocID; + + protected IntsWriter(Directory dir, String id) throws IOException { + this.dir = dir; + this.id = id; + docToValue = new long[1]; + } + + @Override + synchronized public void add(int docID, long v) throws IOException { + + if (!started) { + minValue = maxValue = v; + minDocID = maxDocID = docID; + started = true; + + } else { + if (v < minValue) { + minValue = v; + } else if (v > maxValue) { + maxValue = v; + } + if (docID < minDocID) { + minDocID = docID; + } else if (docID > maxDocID) { + maxDocID = docID; + } + } + if (docID >= docToValue.length) { + docToValue = ArrayUtil.grow(docToValue, 1 + docID); + } + docToValue[docID] = v; + } + + @Override + synchronized public void finish(int docCount) throws IOException { + if(!started) + return; + final IndexOutput datOut = dir.createOutput(IndexFileNames + .segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION)); + CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); + + // nocommit -- long can't work right since it's signed + datOut.writeLong(minValue); + // write a default value to recognize docs without a value for that field + final long defaultValue = ++maxValue - minValue; + datOut.writeLong(defaultValue); + PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts.bitsRequired(maxValue-minValue)); + + final int limit = maxDocID + 1; + for (int i = 0; i < minDocID; i++) { + w.add(defaultValue); + } + for (int i = minDocID; i < limit; i++) { + w.add(docToValue[i] - minValue); + } + for (int i = limit; i < docCount; i++) { + w.add(defaultValue); + } + w.finish(); + + datOut.close(); + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToValue.length + * RamUsageEstimator.NUM_BYTES_LONG; + } + + @Override + protected void add(int docID) throws IOException { + add(docID, intsRef.get()); + } + + @Override + protected void setNextAttribute(ValuesAttribute attr) { + intsRef = attr.ints(); + } + } + + /** + * Opens all necessary files, but does not read any data in until you call + * {@link #load}. + */ + static class IntsReader extends Reader { + private final IndexInput datIn; + + protected IntsReader(Directory dir, String id) throws IOException { + datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START); + } + + /** + * Loads the actual values. You may call this more than once, eg if you + * already previously loaded but then discarded the Source. + */ + @Override + public Source load() throws IOException { + return new IntsSource((IndexInput) datIn.clone()); + } + + private static class IntsSource extends Source { + private final long minValue; + private final long defaultValue; + private final PackedInts.Reader values; + + public IntsSource(IndexInput dataIn) throws IOException { + dataIn.seek(CodecUtil.headerLength(CODEC_NAME)); + minValue = dataIn.readLong(); + defaultValue = dataIn.readLong(); + values = PackedInts.getReader(dataIn); + } + + @Override + public long ints(int docID) { + // nocommit -- can we somehow avoid 2X method calls + // on each get? must push minValue down, and make + // PackedInts implement Ints.Source + final long val = values.get(docID); + // docs not having a value for that field must return a default value + return val == defaultValue ? 0 : minValue + val; + } + + public long ramBytesUsed() { + // TODO(simonw): move that to PackedInts? + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + values.getBitsPerValue() * values.size(); + } + } + + public void close() throws IOException { + datIn.close(); + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + return new IntsEnumImpl(source, (IndexInput) datIn.clone()); + } + + } + + private static final class IntsEnumImpl extends ValuesEnum { + private final PackedInts.ReaderIterator ints; + private long minValue; + private final IndexInput dataIn; + private final long defaultValue; + private LongsRef ref; + private final int maxDoc; + private int pos = -1; + + private IntsEnumImpl(AttributeSource source, IndexInput dataIn) + throws IOException { + super(source, Values.PACKED_INTS); + this.ref = attr.ints(); + this.ref.offset = 0; + this.dataIn = dataIn; + dataIn.seek(CodecUtil.headerLength(CODEC_NAME)); + minValue = dataIn.readLong(); + defaultValue = dataIn.readLong(); + this.ints = PackedInts.getReaderIterator(dataIn); + maxDoc = ints.size(); + } + + @Override + public void close() throws IOException { + ints.close(); + dataIn.close(); + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) + return pos = NO_MORE_DOCS; + final long val = ints.advance(target); + ref.ints[0] = val == defaultValue? 0:minValue + val; + ref.offset = 0; // can we skip this? + return pos = target; + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos+1); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/Reader.java b/lucene/src/java/org/apache/lucene/index/values/Reader.java new file mode 100644 index 00000000000..0bbd90f3a59 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/Reader.java @@ -0,0 +1,109 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.Closeable; +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; + +public abstract class Reader implements Closeable { + + + public ValuesEnum getEnum() throws IOException{ + return getEnum(null); + } + + public abstract ValuesEnum getEnum(AttributeSource attrSource) throws IOException; + + public abstract Source load() throws IOException; + + public SortedSource loadSorted(Comparator comparator) throws IOException { + throw new UnsupportedOperationException(); + } + + + /** + * Source of integer (returned as java long), per document. The underlying + * implementation may use different numbers of bits per value; long is only + * used since it can handle all precisions. + */ + public static abstract class Source { + + public long ints(int docID) { + throw new UnsupportedOperationException("ints are not supported"); + } + + public double floats(int docID) { + throw new UnsupportedOperationException("floats are not supported"); + } + + public BytesRef bytes(int docID) { + throw new UnsupportedOperationException("bytes are not supported"); + } + + /** Returns number of unique values. Some impls may + * throw UnsupportedOperationException. */ + public int getValueCount() { + throw new UnsupportedOperationException(); + } + + public ValuesEnum getEnum() throws IOException{ + return getEnum(null); + } + + // nocommit - enable obtaining enum from source since this is already in memory + public /*abstract*/ ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + throw new UnsupportedOperationException(); + } + + public abstract long ramBytesUsed(); + } + + public static abstract class SortedSource extends Source { + + @Override + public BytesRef bytes(int docID) { + return getByOrd(ord(docID)); + } + + /** + * Returns ord for specified docID. If this docID had not been added to the + * Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1 + * for the next (as defined by {@link Comparator} value. + */ + public abstract int ord(int docID); + + /** Returns value for specified ord. */ + public abstract BytesRef getByOrd(int ord); + + public static class LookupResult { + public boolean found; + public int ord; + } + + /** + * Finds the largest ord whose value is <= the requested value. If + * {@link LookupResult#found} is true, then ord is an exact match. The + * returned {@link LookupResult} may be reused across calls. + */ + public abstract LookupResult getByValue(BytesRef value); + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java new file mode 100644 index 00000000000..c806b1650f7 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/Values.java @@ -0,0 +1,48 @@ +package org.apache.lucene.index.values; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Controls whether per-field values are stored into + * index. This storage is non-sparse, so it's best to + * use this when all docs have the field, and loads all + * values into RAM, exposing a random access API, when + * loaded. + * + *

NOTE: This feature is experimental and the + * API is free to change in non-backwards-compatible ways. */ +public enum Values { + + /** Integral value is stored as packed ints. The bit + * precision is fixed across the segment, and + * determined by the min/max values in the field. */ + PACKED_INTS, + PACKED_INTS_FIXED, + SIMPLE_FLOAT_4BYTE, + SIMPLE_FLOAT_8BYTE, + + // nocommit -- shouldn't lucene decide/detect straight vs + // deref, as well fixed vs var? + BYTES_FIXED_STRAIGHT, + BYTES_FIXED_DEREF, + BYTES_FIXED_SORTED, + + BYTES_VAR_STRAIGHT, + BYTES_VAR_DEREF, + BYTES_VAR_SORTED + + // nocommit -- need STRING variants as well +} diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java new file mode 100644 index 00000000000..35299482360 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java @@ -0,0 +1,34 @@ +package org.apache.lucene.index.values; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Comparator; + +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; + +public interface ValuesAttribute extends Attribute { + public Values type(); + public BytesRef bytes(); + public FloatsRef floats(); + public LongsRef ints(); + public void setType(Values type); + public Comparator bytesComparator(); + public void setBytesComparator(Comparator comp); + +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java new file mode 100644 index 00000000000..714ba4b7969 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java @@ -0,0 +1,151 @@ +package org.apache.lucene.index.values; + +import java.util.Comparator; + +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.SetOnce; + +public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribute { + private Values type; + private BytesRef bytes = null; + private FloatsRef floats = null; + private LongsRef ints = null; + private Comparator bytesComp; + + public BytesRef bytes() { + return bytes; + } + + public FloatsRef floats() { + return floats; + } + + public LongsRef ints() { + return ints; + } + + public Values type() { + return type; + } + + public void setType(Values type) { + this.type = type; + switch (type) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + bytes = new BytesRef(); + ints = null; + floats = null; + break; + case PACKED_INTS: + case PACKED_INTS_FIXED: + ints = new LongsRef(new long[1], 0, 1); + bytes = null; + floats = null; + break; + case SIMPLE_FLOAT_4BYTE: + case SIMPLE_FLOAT_8BYTE: + floats = new FloatsRef(new double[1], 0, 1); + ints = null; + bytes = null; + break; + + } + } + + @Override + public void clear() { + // TODO + } + + @Override + public void copyTo(AttributeImpl target) { + ValuesAttributeImpl other = (ValuesAttributeImpl)target; + other.setType(type); + + switch (type) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + bytes = (BytesRef) other.bytes.clone(); + break; + case PACKED_INTS: + case PACKED_INTS_FIXED: + ints = (LongsRef) other.ints.clone(); + break; + case SIMPLE_FLOAT_4BYTE: + case SIMPLE_FLOAT_8BYTE: + floats = (FloatsRef) other.floats.clone(); + break; + + } + } + + /* (non-Javadoc) + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + final int prime = 31; + int result = 0; + result = prime * result + ((bytes == null) ? 0 : bytes.hashCode()); + result = prime * result + ((floats == null) ? 0 : floats.hashCode()); + result = prime * result + ((ints == null) ? 0 : ints.hashCode()); + result = prime * result + ((type == null) ? 0 : type.hashCode()); + return result; + } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (getClass() != obj.getClass()) + return false; + ValuesAttributeImpl other = (ValuesAttributeImpl) obj; + if (bytes == null) { + if (other.bytes != null) + return false; + } else if (!bytes.equals(other.bytes)) + return false; + if (floats == null) { + if (other.floats != null) + return false; + } else if (!floats.equals(other.floats)) + return false; + if (ints == null) { + if (other.ints != null) + return false; + } else if (!ints.equals(other.ints)) + return false; + if (type == null) { + if (other.type != null) + return false; + } else if (!type.equals(other.type)) + return false; + return true; + } + + public Comparator bytesComparator() { + return bytesComp; + } + + public void setBytesComparator(Comparator comp) { + bytesComp = comp; + } + + + +} diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java new file mode 100644 index 00000000000..eed33457380 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java @@ -0,0 +1,62 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeSource; + +public abstract class ValuesEnum extends DocIdSetIterator{ + private AttributeSource source; + protected final ValuesAttribute attr; + + + protected ValuesEnum(Values enumType) { + this(null, enumType); + } + + protected ValuesEnum(AttributeSource source, Values enumType) { + this.source = source; + boolean setType = !hasAttribute(ValuesAttribute.class); + attr = addAttribute(ValuesAttribute.class); + if (setType) + attr.setType(enumType); + } + + public AttributeSource attributes() { + if (source == null) + source = new AttributeSource(); + return source; + } + + public T addAttribute(Class attr) { + return attributes().addAttribute(attr); + } + + public T getAttribute(Class attr) { + return attributes().getAttribute(attr); + } + + public boolean hasAttribute(Class attr) { + return attributes().hasAttribute(attr); + } + + public abstract void close() throws IOException; + +} diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java new file mode 100644 index 00000000000..5a9f9d6093b --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -0,0 +1,255 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.index.values.Bytes.BytesBaseSource; +import org.apache.lucene.index.values.Bytes.BytesReaderBase; +import org.apache.lucene.index.values.Bytes.BytesWriterBase; +import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.BytesRefHash.ParallelArrayBase; +import org.apache.lucene.util.BytesRefHash.ParallelBytesStartArray; +import org.apache.lucene.util.packed.PackedInts; + +// Stores variable-length byte[] by deref, ie when two docs +// have the same value, they store only 1 byte[] and both +// docs reference that single source + +class VarDerefBytesImpl { + + static final String CODEC_NAME = "VarDerefBytes"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + + + private static class AddressParallelArray extends ParallelArrayBase { + final int[] address; + + AddressParallelArray(int size, AtomicLong bytesUsed) { + super(size, bytesUsed); + address = new int[size]; + } + @Override + protected int bytesPerEntry() { + return RamUsageEstimator.NUM_BYTES_INT + super.bytesPerEntry(); + } + + @Override + protected void copyTo(AddressParallelArray toArray, int numToCopy) { + super.copyTo(toArray, numToCopy); + System.arraycopy(address, 0, toArray.address, 0, size); + + } + + @Override + public AddressParallelArray newInstance(int size) { + return new AddressParallelArray(size, bytesUsed); + } + + } + + + static class Writer extends BytesWriterBase { + private int[] docToAddress; + private int address = 1; + + private final ParallelBytesStartArray array = new ParallelBytesStartArray(new AddressParallelArray(0, bytesUsed)); + private final BytesRefHash hash = new BytesRefHash(pool, 16, array) ; + + public Writer(Directory dir, String id) throws IOException { + this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + new AtomicLong()); + } + public Writer(Directory dir, String id, Allocator allocator, AtomicLong bytesUsed) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed); + docToAddress = new int[1]; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); + } + + @Override + synchronized public void add(int docID, BytesRef bytes) throws IOException { + if(bytes.length == 0) + return; // default + if(datOut == null) + initDataOut(); + final int e = hash.add(bytes); + + if (docID >= docToAddress.length) { + final int oldSize = docToAddress.length; + docToAddress = ArrayUtil.grow(docToAddress, 1+docID); + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * (docToAddress.length - oldSize)); + } + final int docAddress; + if (e >= 0) { + docAddress = array.array.address[e] = address; + address += IOUtils.writeLength(datOut, bytes); + datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); + address += bytes.length; + } else { + docAddress = array.array.address[(-e)-1]; + } + docToAddress[docID] = docAddress; + } + + public long ramBytesUsed() { + return bytesUsed.get(); + } + + // Important that we get docCount, in case there were + // some last docs that we didn't see + @Override + synchronized public void finish(int docCount) throws IOException { + if(datOut == null) + return; + initIndexOut(); + idxOut.writeInt(address-1); + + // write index + // nocommit -- allow forcing fixed array (not -1) + // TODO(simonw): check the address calculation / make it more intuitive + final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1)); + final int limit; + if (docCount > docToAddress.length) { + limit = docToAddress.length; + } else { + limit = docCount; + } + for(int i=0;i comp; + + private final BytesRefHash hash = new BytesRefHash(pool); + + public Writer(Directory dir, String id, Comparator comp) + throws IOException { + this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + new AtomicLong()); + } + + public Writer(Directory dir, String id, Comparator comp, + Allocator allocator, AtomicLong bytesUsed) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, + new ByteBlockPool(allocator), bytesUsed); + this.comp = comp; + docToEntry = new int[1]; + docToEntry[0] = -1; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); + + } + + @Override + synchronized public void add(int docID, BytesRef bytes) throws IOException { + if (bytes.length == 0) + return;// default + if (docID >= docToEntry.length) { + int[] newArray = new int[ArrayUtil.oversize(1 + docID, + RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length); + Arrays.fill(newArray, docToEntry.length, newArray.length, -1); + bytesUsed.addAndGet((newArray.length - docToEntry.length) + * RamUsageEstimator.NUM_BYTES_INT); + docToEntry = newArray; + } + final int e = hash.add(bytes); + docToEntry[docID] = e < 0 ? (-e) - 1 : e; + } + + // Important that we get docCount, in case there were + // some last docs that we didn't see + @Override + synchronized public void finish(int docCount) throws IOException { + final int count = hash.size(); + if (count == 0) + return; + initIndexOut(); + initDataOut(); + int[] sortedEntries = hash.sort(comp); + + // first dump bytes data, recording index & offset as + // we go + long offset = 0; + long lastOffset = 0; + final int[] index = new int[count]; + final long[] offsets = new long[count]; + for (int i = 0; i < count; i++) { + final int e = sortedEntries[i]; + offsets[i] = offset; + index[e] = 1 + i; + + final BytesRef bytes = hash.get(e); + // TODO: we could prefix code... + datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); + lastOffset = offset; + offset += bytes.length; + } + + // total bytes of data + idxOut.writeLong(offset); + + // write index -- first doc -> 1+ord + // nocommit -- allow not -1: + final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut, + docCount, PackedInts.bitsRequired(count)); + final int limit = docCount > docToEntry.length ? docToEntry.length + : docCount; + for (int i = 0; i < limit; i++) { + final int e = docToEntry[i]; + indexWriter.add(e == -1 ? 0 : index[e]); + } + for (int i = limit; i < docCount; i++) { + indexWriter.add(0); + } + indexWriter.finish(); + + // next ord (0-based) -> offset + // nocommit -- allow not -1: + PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count, + PackedInts.bitsRequired(lastOffset)); + for (int i = 0; i < count; i++) { + offsetWriter.add(offsets[i]); + } + offsetWriter.finish(); + + super.finish(docCount); + bytesUsed.addAndGet((-docToEntry.length) + * RamUsageEstimator.NUM_BYTES_INT); + + } + } + + public static class Reader extends BytesReaderBase { + + Reader(Directory dir, String id, int maxDoc) throws IOException { + super(dir, id, CODEC_NAME, VERSION_START, true); + } + + @Override + public org.apache.lucene.index.values.Reader.Source load() + throws IOException { + return loadSorted(null); + } + + @Override + public SortedSource loadSorted(Comparator comp) + throws IOException { + return new Source(cloneData(), cloneIndex(), comp); + } + + private static class Source extends BytesBaseSortedSource { + // TODO: paged data + private final byte[] data; + private final BytesRef bytesRef = new BytesRef(); + private final PackedInts.Reader docToOrdIndex; + private final PackedInts.Reader ordToOffsetIndex; // 0-based + private final long totBytes; + private final int valueCount; + private final LookupResult lookupResult = new LookupResult(); + private final Comparator comp; + + public Source(IndexInput datIn, IndexInput idxIn, + Comparator comp) throws IOException { + super(datIn, idxIn); + totBytes = idxIn.readLong(); + data = new byte[(int) totBytes]; + datIn.readBytes(data, 0, (int) totBytes); + docToOrdIndex = PackedInts.getReader(idxIn); + ordToOffsetIndex = PackedInts.getReader(idxIn); + valueCount = ordToOffsetIndex.size(); + bytesRef.bytes = data; + // default byte sort order + this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator() + : comp; + + } + + @Override + public BytesRef getByOrd(int ord) { + return ord == 0 ? defaultValue : deref(--ord); + } + + @Override + public int ord(int docID) { + return (int) docToOrdIndex.get(docID); + } + + @Override + public LookupResult getByValue(BytesRef bytes) { + return binarySearch(bytes, 0, valueCount - 1); + } + + public long ramBytesUsed() { + // TODO(simonw): move ram usage to PackedInts? + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + data.length + + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex + .getBitsPerValue() + * docToOrdIndex.getBitsPerValue()) + + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + ordToOffsetIndex + .getBitsPerValue() + * ordToOffsetIndex.getBitsPerValue()); + } + + @Override + public int getValueCount() { + return valueCount; + } + + // ord is 0-based + private BytesRef deref(int ord) { + bytesRef.offset = (int) ordToOffsetIndex.get(ord); + final long nextOffset; + if (ord == valueCount - 1) { + nextOffset = totBytes; + } else { + nextOffset = ordToOffsetIndex.get(1 + ord); + } + bytesRef.length = (int) (nextOffset - bytesRef.offset); + return bytesRef; + } + + // TODO: share w/ FixedSortedBytesValues? + private LookupResult binarySearch(BytesRef b, int low, int high) { + + while (low <= high) { + int mid = (low + high) >>> 1; + deref(mid); + final int cmp = comp.compare(bytesRef, b); + if (cmp < 0) { + low = mid + 1; + } else if (cmp > 0) { + high = mid - 1; + } else { + lookupResult.ord = mid + 1; + lookupResult.found = true; + return lookupResult; + } + } + assert comp.compare(bytesRef, b) != 0; + lookupResult.ord = low; + lookupResult.found = false; + return lookupResult; + } + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + return new VarSortedBytesEnum(source, cloneData(), cloneIndex()); + } + + private static class VarSortedBytesEnum extends ValuesEnum { + + private PackedInts.Reader docToOrdIndex; + private PackedInts.Reader ordToOffsetIndex; + private IndexInput idxIn; + private IndexInput datIn; + private final BytesRef bytesRef; + private int valueCount; + private long totBytes; + private int docCount; + private int pos = -1; + private final long fp; + + protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn, + IndexInput idxIn) throws IOException { + super(source, Values.BYTES_VAR_SORTED); + bytesRef = attr.bytes(); + totBytes = idxIn.readLong(); + // keep that in memory to prevent lots of disk seeks + docToOrdIndex = PackedInts.getReader(idxIn); + ordToOffsetIndex = PackedInts.getReader(idxIn); + valueCount = ordToOffsetIndex.size(); + docCount = docToOrdIndex.size(); + fp = datIn.getFilePointer(); + this.idxIn = idxIn; + this.datIn = datIn; + } + + @Override + public void close() throws IOException { + idxIn.close(); + datIn.close(); + } + + @Override + public int advance(int target) throws IOException { + if (target >= docCount) + return pos = NO_MORE_DOCS; + final int ord = (int) docToOrdIndex.get(target) - 1; + if (ord == -1) { + bytesRef.length = 0; + bytesRef.offset = 0; + return pos = target; + } + final long offset = ordToOffsetIndex.get(ord); + final long nextOffset; + if (ord == valueCount - 1) { + nextOffset = totBytes; + } else { + nextOffset = ordToOffsetIndex.get(1 + ord); + } + final int length = (int) (nextOffset - offset); + datIn.seek(fp + offset); + if (bytesRef.bytes.length < length) + bytesRef.grow(length); + datIn.readBytes(bytesRef.bytes, 0, length); + bytesRef.length = length; + bytesRef.offset = 0; + return pos = target; + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos + 1); + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java new file mode 100644 index 00000000000..83b97479171 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -0,0 +1,232 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.index.values.Bytes.BytesBaseSource; +import org.apache.lucene.index.values.Bytes.BytesReaderBase; +import org.apache.lucene.index.values.Bytes.BytesWriterBase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts; + +// Variable length byte[] per document, no sharing + +class VarStraightBytesImpl { + + static final String CODEC_NAME = "VarStraightBytes"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + static class Writer extends BytesWriterBase { + private int address; + // start at -1 if the first added value is > 0 + private int lastDocID = -1; + private int[] docToAddress; + + public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, bytesUsed); + docToAddress = new int[1]; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); + } + + public Writer(Directory dir, String id) throws IOException { + this(dir, id, new AtomicLong()); + } + + // Fills up to but not including this docID + private void fill(final int docID) { + if (docID >= docToAddress.length) { + int oldSize = docToAddress.length; + docToAddress = ArrayUtil.grow(docToAddress, 1 + docID); + bytesUsed.addAndGet(-(docToAddress.length-oldSize)*RamUsageEstimator.NUM_BYTES_INT); + } + for (int i = lastDocID + 1; i < docID; i++) { + docToAddress[i] = address; + } + lastDocID = docID; + } + + @Override + synchronized public void add(int docID, BytesRef bytes) throws IOException { + if(bytes.length == 0) + return; // default + if (datOut == null) + initDataOut(); + fill(docID); + docToAddress[docID] = address; + datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); + address += bytes.length; + } + + @Override + synchronized public void finish(int docCount) throws IOException { + if (datOut == null) + return; + initIndexOut(); + // write all lengths to index + // write index + fill(docCount); + idxOut.writeVInt(address); + // nocommit -- allow not -1 + final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, + PackedInts.bitsRequired(address)); + for (int i = 0; i < docCount; i++) { + w.add(docToAddress[i]); + } + w.finish(); + bytesUsed.addAndGet(-(docToAddress.length)*RamUsageEstimator.NUM_BYTES_INT); + docToAddress = null; + super.finish(docCount); + } + + public long ramBytesUsed() { + return bytesUsed.get(); + } + } + + public static class Reader extends BytesReaderBase { + private final int maxDoc; + + Reader(Directory dir, String id, int maxDoc) throws IOException { + super(dir, id, CODEC_NAME, VERSION_START, true); + this.maxDoc = maxDoc; + } + + @Override + public Source load() throws IOException { + return new Source(cloneData(), cloneIndex()); + } + + private class Source extends BytesBaseSource { + private final int totBytes; + // TODO: paged data + private final byte[] data; + private final BytesRef bytesRef = new BytesRef(); + private final PackedInts.Reader addresses; + + public Source(IndexInput datIn, IndexInput idxIn) throws IOException { + super(datIn, idxIn); + totBytes = idxIn.readVInt(); + data = new byte[totBytes]; + datIn.readBytes(data, 0, totBytes); + addresses = PackedInts.getReader(idxIn); + bytesRef.bytes = data; + } + + @Override + public BytesRef bytes(int docID) { + final int address = (int) addresses.get(docID); + bytesRef.offset = address; + if (docID == maxDoc - 1) { + bytesRef.length = totBytes - bytesRef.offset; + } else { + bytesRef.length = (int) addresses.get(1 + docID) - bytesRef.offset; + } + return bytesRef; + } + + @Override + public int getValueCount() { + throw new UnsupportedOperationException(); + } + + public long ramBytesUsed() { + // TODO(simonw): move address ram usage to PackedInts? + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + data.length + + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + addresses + .getBitsPerValue() + * addresses.size()); + } + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + return new VarStrainghtBytesEnum(source, cloneData(), cloneIndex()); + } + + private class VarStrainghtBytesEnum extends ValuesEnum { + private final PackedInts.Reader addresses; + private final IndexInput datIn; + private final IndexInput idxIn; + private final long fp; + private final int totBytes; + private final BytesRef ref; + private int pos = -1; + + protected VarStrainghtBytesEnum(AttributeSource source, IndexInput datIn, + IndexInput idxIn) throws IOException { + super(source, Values.BYTES_VAR_STRAIGHT); + totBytes = idxIn.readVInt(); + fp = datIn.getFilePointer(); + addresses = PackedInts.getReader(idxIn); + this.datIn = datIn; + this.idxIn = idxIn; + ref = attr.bytes(); + + } + + @Override + public void close() throws IOException { + datIn.close(); + idxIn.close(); + } + + @Override + public int advance(final int target) throws IOException { + if (target >= maxDoc) { + ref.length = 0; + ref.offset = 0; + return pos = NO_MORE_DOCS; + } + final long addr = addresses.get(target); + if (addr == totBytes) { + // nocommit is that a valid default value + ref.length = 0; + ref.offset = 0; + return pos = target; + } + datIn.seek(fp + addr); + final int size = (int) (target == maxDoc - 1 ? totBytes - addr + : addresses.get(target + 1) - addr); + if (ref.bytes.length < size) + ref.grow(size); + ref.length = size; + datIn.readBytes(ref.bytes, 0, size); + return pos = target; + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos+1); + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java new file mode 100644 index 00000000000..13bf0947614 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -0,0 +1,92 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +public abstract class Writer { + + /** Records the specfied value for the docID */ + public void add(int docID, long value) throws IOException { + throw new UnsupportedOperationException(); + } + + /** Records the specfied value for the docID */ + public void add(int docID, double value) throws IOException { + throw new UnsupportedOperationException(); + } + + /** Records the specfied value for the docID */ + public void add(int docID, BytesRef value) throws IOException { + throw new UnsupportedOperationException(); + } + + /** Records the specfied value for the docID */ + protected abstract void add(int docID) throws IOException; + + protected abstract void setNextAttribute(ValuesAttribute attr); + + /** Finish writing, close any files */ + public abstract void finish(int docCount) throws IOException; + + public static class MergeState { + public final Reader reader; + public final int docBase; + public final int docCount; + public final Bits bits; + + public MergeState(Reader reader, int docBase, int docCount, Bits bits) { + assert reader != null; + this.reader = reader; + this.docBase = docBase; + this.docCount = docCount; + this.bits = bits; + } + } + + public void add(List states) throws IOException { + for (MergeState state : states) { + merge(state); + } + } + + // enables bulk copies in subclasses per MergeState + protected void merge(MergeState state) throws IOException { + final ValuesEnum valEnum = state.reader.getEnum(); + assert valEnum != null; + try { + final ValuesAttribute attr = valEnum.addAttribute(ValuesAttribute.class); + setNextAttribute(attr); + int docID = state.docBase; + final Bits bits = state.bits; + final int docCount = state.docCount; + for (int i = 0; i < docCount; i++) { + if (bits == null || !bits.get(i)) { + if (valEnum.advance(i) == ValuesEnum.NO_MORE_DOCS) + break; + add(docID++); + } + } + } finally { + valEnum.close(); + } + } +} diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 2677ef0d5b3..d71b89f6fa6 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -22,8 +22,9 @@ import java.text.Collator; import java.util.Locale; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.FieldCache.DocTermsIndex; +import org.apache.lucene.index.values.Reader.Source; import org.apache.lucene.search.FieldCache.DocTerms; +import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.cache.ByteValuesCreator; import org.apache.lucene.search.cache.CachedArray; import org.apache.lucene.search.cache.CachedArrayCreator; @@ -39,9 +40,9 @@ import org.apache.lucene.search.cache.CachedArray.IntValues; import org.apache.lucene.search.cache.CachedArray.LongValues; import org.apache.lucene.search.cache.CachedArray.ShortValues; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.packed.Direct8; import org.apache.lucene.util.packed.Direct16; import org.apache.lucene.util.packed.Direct32; +import org.apache.lucene.util.packed.Direct8; import org.apache.lucene.util.packed.PackedInts; /** @@ -159,7 +160,6 @@ public abstract class FieldComparator { * comparators can just return "this" to reuse the same * comparator across segments * @throws IOException - * @throws IOException */ public abstract FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException; @@ -309,6 +309,65 @@ public abstract class FieldComparator { } } + /** Uses float index values to sort by ascending value */ + public static final class FloatIndexValuesComparator extends FieldComparator { + private final double[] values; + private Source currentReaderValues; + private final String field; + private double bottom; + + FloatIndexValuesComparator(int numHits, String field) { + values = new double[numHits]; + this.field = field; + } + + @Override + public int compare(int slot1, int slot2) { + final double v1 = values[slot1]; + final double v2 = values[slot2]; + if (v1 > v2) { + return 1; + } else if (v1 < v2) { + return -1; + } else { + return 0; + } + } + + @Override + public int compareBottom(int doc) { + final double v2 = currentReaderValues.floats(doc); + if (bottom > v2) { + return 1; + } else if (bottom < v2) { + return -1; + } else { + return 0; + } + } + + @Override + public void copy(int slot, int doc) { + values[slot] = currentReaderValues.floats(doc); + } + + @Override + public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { + currentReaderValues = reader.getIndexValuesCache().getFloats(field); + return this; + } + + @Override + public void setBottom(final int bottom) { + this.bottom = values[bottom]; + } + + @Override + public Comparable value(int slot) { + return Double.valueOf(values[slot]); + } + } + /** Parses field's values as float (using {@link * FieldCache#getFloats} and sorts by ascending value */ public static final class FloatComparator extends NumericComparator { @@ -448,6 +507,69 @@ public abstract class FieldComparator { } } + /** Loads int index values and sorts by ascending value. */ + public static final class IntIndexValuesComparator extends FieldComparator { + private final long[] values; + private Source currentReaderValues; + private final String field; + private long bottom; + + IntIndexValuesComparator(int numHits, String field) { + values = new long[numHits]; + this.field = field; + } + + @Override + public int compare(int slot1, int slot2) { + // TODO: there are sneaky non-branch ways to compute + // -1/+1/0 sign + final long v1 = values[slot1]; + final long v2 = values[slot2]; + if (v1 > v2) { + return 1; + } else if (v1 < v2) { + return -1; + } else { + return 0; + } + } + + @Override + public int compareBottom(int doc) { + // TODO: there are sneaky non-branch ways to compute + // -1/+1/0 sign + final long v2 = currentReaderValues.ints(doc); + if (bottom > v2) { + return 1; + } else if (bottom < v2) { + return -1; + } else { + return 0; + } + } + + @Override + public void copy(int slot, int doc) { + values[slot] = currentReaderValues.ints(doc); + } + + @Override + public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { + currentReaderValues = reader.getIndexValuesCache().getInts(field); + return this; + } + + @Override + public void setBottom(final int bottom) { + this.bottom = values[bottom]; + } + + @Override + public Comparable value(int slot) { + return Long.valueOf(values[slot]); + } + } + /** Parses field's values as long (using {@link * FieldCache#getLongs} and sorts by ascending value */ public static final class LongComparator extends NumericComparator { diff --git a/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java index a32922b1947..c365294fe55 100644 --- a/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -23,7 +23,7 @@ import java.io.IOException; /** A Scorer for queries with a required subscorer * and an excluding (prohibited) sub DocIdSetIterator. *
- * This Scorer implements {@link Scorer#skipTo(int)}, + * This Scorer implements {@link Scorer#advance(int)}, * and it uses the skipTo() on the given scorers. */ class ReqExclScorer extends Scorer { diff --git a/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java index c8e1b81ff54..cab09c864c8 100644 --- a/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -21,7 +21,7 @@ import java.io.IOException; /** A Scorer for queries with a required part and an optional part. * Delays skipTo() on the optional part until a score() is needed. *
- * This Scorer implements {@link Scorer#skipTo(int)}. + * This Scorer implements {@link Scorer#advance(int)}. */ class ReqOptSumScorer extends Scorer { /** The scorers passed from the constructor. diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index 58c4582c0e3..623b785489a 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; import java.io.Serializable; +import java.util.Comparator; import java.util.Locale; import org.apache.lucene.search.cache.ByteValuesCreator; @@ -29,6 +30,11 @@ import org.apache.lucene.search.cache.IntValuesCreator; import org.apache.lucene.search.cache.LongValuesCreator; import org.apache.lucene.search.cache.ShortValuesCreator; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.BytesRef; + +// nocommit -- for cleaner transition, maybe we should make +// a new SortField that subclasses this one and always uses +// index values? /** * Stores information about how to sort documents by terms in an individual @@ -90,6 +96,9 @@ implements Serializable { * uses ordinals to do the sorting. */ public static final int STRING_VAL = 11; + /** Sort use byte[] index values. */ + public static final int BYTES = 12; + /** Represents sorting by document score (relevancy). */ public static final SortField FIELD_SCORE = new SortField (null, SCORE); @@ -440,6 +449,26 @@ implements Serializable { field = StringHelper.intern(field); } + private boolean useIndexValues; + + public void setUseIndexValues(boolean b) { + useIndexValues = b; + } + + public boolean getUseIndexValues() { + return useIndexValues; + } + + private Comparator bytesComparator = BytesRef.getUTF8SortedAsUnicodeComparator(); + + public void setBytesComparator(Comparator b) { + bytesComparator = b; + } + + public Comparator getBytesComparator() { + return bytesComparator; + } + /** Returns the {@link FieldComparator} to use for * sorting. * @@ -469,10 +498,18 @@ implements Serializable { return new FieldComparator.DocComparator(numHits); case SortField.INT: - return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer)missingValue ); + if (useIndexValues) { + return new FieldComparator.IntIndexValuesComparator(numHits, field); + } else { + return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer) missingValue); + } case SortField.FLOAT: - return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator)creator, (Float)missingValue ); + if (useIndexValues) { + return new FieldComparator.FloatIndexValuesComparator(numHits, field); + } else { + return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator) creator, (Float) missingValue); + } case SortField.LONG: return new FieldComparator.LongComparator(numHits, (LongValuesCreator)creator, (Long)missingValue ); diff --git a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java index 151ae1a95db..4638313de98 100644 --- a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java +++ b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java @@ -247,6 +247,19 @@ public final class ArrayUtil { public static short[] grow(short[] array) { return grow(array, 1 + array.length); } + + public static double[] grow(double[] array, int minSize) { + if (array.length < minSize) { + double[] newArray = new double[oversize(minSize, RamUsageEstimator.NUM_BYTES_DOUBLE)]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static double[] grow(double[] array) { + return grow(array, 1 + array.length); + } public static short[] shrink(short[] array, int targetSize) { final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT); diff --git a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java index a34e9fe7a3c..a79e6f57f33 100644 --- a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -62,6 +62,18 @@ public final class ByteBlockPool { return new byte[blockSize]; } } + + public static class DirectAllocator extends Allocator { + + public DirectAllocator(int blockSize) { + super(blockSize); + } + + @Override + public void recycleByteBlocks(byte[][] blocks, int start, int end) { + } + + } public byte[][] buffers = new byte[10][]; diff --git a/lucene/src/java/org/apache/lucene/util/BytesRef.java b/lucene/src/java/org/apache/lucene/util/BytesRef.java index 1ec291a5c65..342cc6d7aab 100644 --- a/lucene/src/java/org/apache/lucene/util/BytesRef.java +++ b/lucene/src/java/org/apache/lucene/util/BytesRef.java @@ -18,6 +18,7 @@ package org.apache.lucene.util; */ import java.util.Comparator; +import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.io.ObjectInput; import java.io.ObjectOutput; @@ -271,12 +272,13 @@ public final class BytesRef implements Comparable, Externalizable { } private final static Comparator utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator(); - + public static Comparator getUTF8SortedAsUnicodeComparator() { return utf8SortedAsUnicodeSortOrder; } - private static class UTF8SortedAsUnicodeComparator implements Comparator { + @SuppressWarnings("serial") // serializable to work with contrib/remote + private static final class UTF8SortedAsUnicodeComparator implements Serializable, Comparator { // Only singleton private UTF8SortedAsUnicodeComparator() {}; diff --git a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java index a34ad775178..5254e76f742 100644 --- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java +++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java @@ -353,6 +353,7 @@ public final class BytesRefHash { // 1 byte to store length buffer[bufferUpto] = (byte) length; pool.byteUpto += length + 1; + assert length >= 0: "Length must be positive: " + length; System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length); } else { @@ -569,4 +570,64 @@ public final class BytesRefHash { } } + + public static class ParallelBytesStartArray> extends BytesStartArray { + private final T prototype; + public T array; + + public ParallelBytesStartArray(T template) { + this.prototype = template; + } + @Override + public int[] init() { + if(array == null) { + array = prototype.newInstance(2); + } + return array.textStart; + } + + @Override + public int[] grow() { + array = array.grow(); + return array.textStart; + } + + @Override + public int[] clear() { + if(array != null) { + array.deref(); + array = null; + } + return null; + } + + @Override + public AtomicLong bytesUsed() { + return array.bytesUsed(); + } + + } + + public abstract static class ParallelArrayBase> extends ParallelArray { + final int[] textStart; + + protected ParallelArrayBase(int size, AtomicLong bytesUsed) { + super(size, bytesUsed); + textStart = new int[size]; + } + + @Override + protected int bytesPerEntry() { + return RamUsageEstimator.NUM_BYTES_INT; + } + + @Override + protected void copyTo(T toArray, int numToCopy) { + System.arraycopy(textStart, 0, toArray.textStart, 0, size); + } + + @Override + public abstract T newInstance(int size); + + } } diff --git a/lucene/src/java/org/apache/lucene/util/FloatsRef.java b/lucene/src/java/org/apache/lucene/util/FloatsRef.java new file mode 100644 index 00000000000..9dd107e323d --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/FloatsRef.java @@ -0,0 +1,91 @@ +/** + * + */ +package org.apache.lucene.util; + + +public final class FloatsRef implements Cloneable{ + public double[] floats; + public int offset; + public int length; + + public FloatsRef() { + } + + public FloatsRef(int capacity) { + floats = new double[capacity]; + } + + public void set(double value) { + floats[offset] = value; + } + + public double get() { + return floats[offset]; + } + + public FloatsRef(double[] floats, int offset, int length) { + this.floats = floats; + this.offset = offset; + this.length = length; + } + + public FloatsRef(FloatsRef other) { + copy(other); + } + + @Override + public Object clone() { + return new FloatsRef(this); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 0; + final int end = offset + length; + for(int i = offset; i < end; i++) { + long value = Double.doubleToLongBits(floats[i]); + result = prime * result + (int) (value ^ (value >>> 32)); + } + return result; + } + + @Override + public boolean equals(Object other) { + return other instanceof FloatsRef && this.floatsEquals((FloatsRef) other); + } + + public boolean floatsEquals(FloatsRef other) { + if (length == other.length) { + int otherUpto = other.offset; + final double[] otherFloats = other.floats; + final int end = offset + length; + for(int upto=offset;upto> 7) & 0xff)); + return 2; + } + } } diff --git a/lucene/src/java/org/apache/lucene/util/LongsRef.java b/lucene/src/java/org/apache/lucene/util/LongsRef.java new file mode 100644 index 00000000000..c5dee1a15b1 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/LongsRef.java @@ -0,0 +1,91 @@ +/** + * + */ +package org.apache.lucene.util; + + +public final class LongsRef implements Cloneable { + public long[] ints; + public int offset; + public int length; + + public LongsRef() { + } + + public LongsRef(int capacity) { + ints = new long[capacity]; + } + + public LongsRef(long[] ints, int offset, int length) { + this.ints = ints; + this.offset = offset; + this.length = length; + } + + public LongsRef(LongsRef other) { + copy(other); + } + + @Override + public Object clone() { + return new LongsRef(this); + } + + public void set(long value) { + ints[offset] = value; + } + + public long get() { + return ints[offset]; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 0; + final int end = offset + length; + for(int i = offset; i < end; i++) { + long value = ints[i]; + result = prime * result + (int) (value ^ (value >>> 32)); + } + return result; + } + + @Override + public boolean equals(Object other) { + return this.intsEquals((LongsRef) other); + } + + public boolean intsEquals(LongsRef other) { + if (length == other.length) { + int otherUpto = other.offset; + final long[] otherInts = other.ints; + final int end = offset + length; + for(int upto=offset;upto> { + + public final int size; + protected final AtomicLong bytesUsed; + + protected ParallelArray(final int size, AtomicLong bytesUsed) { + this.size = size; + this.bytesUsed = bytesUsed; + bytesUsed.addAndGet((size) * bytesPerEntry()); + + } + + protected abstract int bytesPerEntry(); + + public AtomicLong bytesUsed() { + return bytesUsed; + } + + public void deref() { + bytesUsed.addAndGet((-size) * bytesPerEntry()); + } + + public abstract T newInstance(int size); + + public final T grow() { + int newSize = ArrayUtil.oversize(size + 1, bytesPerEntry()); + T newArray = newInstance(newSize); + copyTo(newArray, size); + bytesUsed.addAndGet((newSize - size) * bytesPerEntry()); + return newArray; + } + + protected abstract void copyTo(T toArray, int numToCopy); +} diff --git a/lucene/src/java/org/apache/lucene/util/packed/Packed64.java b/lucene/src/java/org/apache/lucene/util/packed/Packed64.java index b3826676503..691cec42a77 100644 --- a/lucene/src/java/org/apache/lucene/util/packed/Packed64.java +++ b/lucene/src/java/org/apache/lucene/util/packed/Packed64.java @@ -182,7 +182,7 @@ class Packed64 extends PackedInts.ReaderImpl implements PackedInts.Mutable { final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); final int base = bitPos * FAC_BITPOS; - + assert elementPos < blocks.length : "elementPos: " + elementPos + "; blocks.len: " + blocks.length; return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]); } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java index db77cb88d4d..07098d41e59 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java @@ -47,7 +47,7 @@ public class TestIndexWriterConfig extends LuceneTestCase { // Does not implement anything - used only for type checking on IndexWriterConfig. @Override - DocConsumer getChain(DocumentsWriter documentsWriter) { + public DocConsumer getChain(DocumentsWriter documentsWriter) { return null; } diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java index 782cd3a2a01..97e1963ccd9 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java +++ b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java @@ -61,7 +61,7 @@ final class TermInfosWriter { int indexInterval = 128; /** Expert: The fraction of {@link TermDocs} entries stored in skip tables, - * used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in + * used to accelerate {@link TermDocs#advance(int)}. Larger values result in * smaller indexes, greater acceleration, but fewer accelerable cases, while * smaller values result in bigger indexes, less acceleration and more * accelerable cases. More detailed experiments would be useful here. */ diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java new file mode 100644 index 00000000000..87efd06e5c4 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java @@ -0,0 +1,658 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.ValuesField; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.values.Reader.SortedSource; +import org.apache.lucene.index.values.Reader.Source; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util._TestUtil; + +public class TestIndexValues extends LuceneTestCase { + + public void testBytesStraight() throws IOException { + runTestBytes(Bytes.Mode.STRAIGHT, true); + runTestBytes(Bytes.Mode.STRAIGHT, false); + } + + public void testBytesDeref() throws IOException { + runTestBytes(Bytes.Mode.DEREF, true); + runTestBytes(Bytes.Mode.DEREF, false); + } + + public void testBytesSorted() throws IOException { + runTestBytes(Bytes.Mode.SORTED, true); + runTestBytes(Bytes.Mode.SORTED, false); + } + + // nocommit -- for sorted test, do our own Sort of the + // values and verify it's identical + public void runTestBytes(final Bytes.Mode mode, + final boolean fixedSize) throws IOException { + + final BytesRef bytesRef = new BytesRef(); + + final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef + .getUTF8SortedAsUnicodeComparator() + : null; + + Directory dir = newDirectory(); + Writer w = Bytes + .getWriter(dir, "test", mode, comp, fixedSize); + int maxDoc = 220; + final String[] values = new String[maxDoc]; + final int lenMin, lenMax; + if (fixedSize) { + lenMin = lenMax = 3 + random.nextInt(7); + } else { + lenMin = 1; + lenMax = 15 + random.nextInt(6); + } + for (int i = 0; i < 100; i++) { + final String s; + if (i > 0 && random.nextInt(5) <= 2) { + // use prior value + s = values[2 * random.nextInt(i)]; + } else { + s = _TestUtil.randomUnicodeString(random, lenMin, lenMax); + } + values[2 * i] = s; + + UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef); + w.add(2 * i, bytesRef); + } + w.finish(maxDoc); + + Reader r = Bytes.getReader(dir, "test", mode, fixedSize, maxDoc); + for (int iter = 0; iter < 2; iter++) { + ValuesEnum bytesEnum = r.getEnum(); + assertNotNull("enum is null", bytesEnum); + ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class); + assertNotNull("attribute is null", attr); + BytesRef ref = attr.bytes(); + assertNotNull("BytesRef is null - enum not initialized to use bytes", attr); + + for (int i = 0; i < 2; i++) { + final int idx = 2 * i; + assertEquals("doc: " + idx, idx, bytesEnum.advance(idx)); + String utf8String = ref.utf8ToString(); + assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + " lenRight: " + utf8String.length() , values[idx], utf8String); + } + assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); + assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc+1)); + + bytesEnum.close(); + } + + + // Verify we can load source twice: + for (int iter = 0; iter < 2; iter++) { + Source s; + Reader.SortedSource ss; + if (mode == Bytes.Mode.SORTED) { + s = ss = r.loadSorted(comp); + } else { + s = r.load(); + ss = null; + } + + for (int i = 0; i < 100; i++) { + final int idx = 2 * i; + assertNotNull("doc " + idx + "; value=" + values[idx], s.bytes(idx)); + assertEquals("doc " + idx, values[idx], s.bytes(idx).utf8ToString()); + if (ss != null) { + assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx)) + .utf8ToString()); + Reader.SortedSource.LookupResult result = ss.getByValue(new BytesRef( + values[idx])); + assertTrue(result.found); + assertEquals(ss.ord(idx), result.ord); + } + } + + // Lookup random strings: + if (mode == Bytes.Mode.SORTED) { + final int numValues = ss.getValueCount(); + for (int i = 0; i < 1000; i++) { + BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString( + random, lenMin, lenMax)); + SortedSource.LookupResult result = ss.getByValue(bytesValue); + if (result.found) { + assert result.ord > 0; + assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord))); + int count = 0; + for (int k = 0; k < 100; k++) { + if (bytesValue.utf8ToString().equals(values[2 * k])) { + assertEquals(ss.ord(2 * k), result.ord); + count++; + } + } + assertTrue(count > 0); + } else { + assert result.ord >= 0; + if (result.ord == 0) { + final BytesRef firstRef = ss.getByOrd(1); + // random string was before our first + assertTrue(firstRef.compareTo(bytesValue) > 0); + } else if (result.ord == numValues) { + final BytesRef lastRef = ss.getByOrd(numValues); + // random string was after our last + assertTrue(lastRef.compareTo(bytesValue) < 0); + } else { + // random string fell between two of our values + final BytesRef before = (BytesRef) ss.getByOrd(result.ord) + .clone(); + final BytesRef after = ss.getByOrd(result.ord + 1); + assertTrue(before.compareTo(bytesValue) < 0); + assertTrue(bytesValue.compareTo(after) < 0); + + } + } + } + } + } + + r.close(); + dir.close(); + } + + public void testInts() throws IOException { + long maxV = 1; + final int NUM_VALUES = 1000; + final long[] values = new long[NUM_VALUES]; + for (int rx = 1; rx < 63; rx++, maxV *= 2) { + for (int b = 0; b < 2; b++) { + Directory dir = newDirectory(); + boolean useFixedArrays = b == 0; + Writer w = Ints.getWriter(dir, "test", useFixedArrays); + for (int i = 0; i < NUM_VALUES; i++) { + final long v = random.nextLong() % (1 + maxV); + values[i] = v; + w.add(i, v); + } + final int additionalDocs = 1 + random.nextInt(9); + w.finish(NUM_VALUES + additionalDocs); + + Reader r = Ints.getReader(dir, "test", useFixedArrays); + for (int iter = 0; iter < 2; iter++) { + Source s = r.load(); + for (int i = 0; i < NUM_VALUES; i++) { + final long v = s.ints(i); + assertEquals("index " + i + " b: " + b, values[i], v); + } + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum iEnum = r.getEnum(); + ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); + LongsRef ints = attr.ints(); + for (int i = 0; i < NUM_VALUES; i++) { + assertEquals(i, iEnum.nextDoc()); + assertEquals(values[i], ints.get()); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { + assertEquals(i, iEnum.nextDoc()); + assertEquals("" + i, 0, ints.get()); + } + + iEnum.close(); + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum iEnum = r.getEnum(); + ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); + LongsRef ints = attr.ints(); + for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { + assertEquals(i, iEnum.advance(i)); + assertEquals(values[i], ints.get()); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { + assertEquals(i, iEnum.advance(i)); + assertEquals("" + i, 0, ints.get()); + } + + iEnum.close(); + } + r.close(); + dir.close(); + } + } + } + + public void testFloats4() throws IOException { + runTestFloats(4, 0.00001); + } + + private void runTestFloats(int precision, double delta) + throws IOException { + Directory dir = newDirectory(); + Writer w = Floats.getWriter(dir, "test", precision); + final int NUM_VALUES = 1000; + final double[] values = new double[NUM_VALUES]; + for (int i = 0; i < NUM_VALUES; i++) { + final double v = precision == 4 ? random.nextFloat() : random.nextDouble(); + values[i] = v; + w.add(i, v); + } + final int additionalValues = 1 + random.nextInt(10); + w.finish(NUM_VALUES + additionalValues); + + Reader r = Floats.getReader(dir, "test", NUM_VALUES + + additionalValues); + for (int iter = 0; iter < 2; iter++) { + Source s = r.load(); + for (int i = 0; i < NUM_VALUES; i++) { + assertEquals(values[i], s.floats(i), 0.0f); + } + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum fEnum = r.getEnum(); + ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); + FloatsRef floats = attr.floats(); + for (int i = 0; i < NUM_VALUES; i++) { + assertEquals(i, fEnum.nextDoc()); + assertEquals(values[i], floats.get(), delta); + } + for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { + assertEquals(i, fEnum.nextDoc()); + assertEquals(0.0, floats.get(), delta); + } + fEnum.close(); + } + for (int iter = 0; iter < 2; iter++) { + ValuesEnum fEnum = r.getEnum(); + ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); + FloatsRef floats = attr.floats(); + for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { + assertEquals(i, fEnum.advance(i)); + assertEquals(values[i], floats.get(), delta); + } + for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { + assertEquals(i, fEnum.advance(i)); + assertEquals(0.0, floats.get(), delta); + } + fEnum.close(); + } + + r.close(); + dir.close(); + } + + public void testFloats8() throws IOException { + runTestFloats(8, 0.0); + } + + /** + * Tests complete indexing of {@link Values} including deletions, merging and + * sparse value fields on Compound-File + */ + public void testCFSIndex() throws IOException { + // without deletions + IndexWriterConfig cfg = writerConfig(true); + // primitives - no deletes + runTestNumerics(cfg,false); + + cfg = writerConfig(true); + // bytes - no deletes + runTestIndexBytes(cfg, false); + + // with deletions + cfg = writerConfig(true); + // primitives + runTestNumerics(cfg, true); + + cfg = writerConfig(true); + // bytes + runTestIndexBytes(cfg, true); + } + + /** + * Tests complete indexing of {@link Values} including deletions, merging and + * sparse value fields on None-Compound-File + */ + public void testIndex() throws IOException { + // + // without deletions + IndexWriterConfig cfg = writerConfig(false); + // primitives - no deletes + runTestNumerics(cfg, false); + + cfg = writerConfig(false); + // bytes - no deletes + runTestIndexBytes(cfg, false); + + // with deletions + cfg = writerConfig(false); + // primitives + runTestNumerics(cfg, true); + + cfg = writerConfig(false); + // bytes + runTestIndexBytes(cfg, true); + } + + private IndexWriterConfig writerConfig(boolean useCompoundFile) { + final IndexWriterConfig cfg = newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer()); + MergePolicy mergePolicy = cfg.getMergePolicy(); + if(mergePolicy instanceof LogMergePolicy) { + ((LogMergePolicy)mergePolicy).setUseCompoundFile(useCompoundFile); + } else if(useCompoundFile) { + LogMergePolicy policy = new LogDocMergePolicy(); + policy.setUseCompoundFile(useCompoundFile); + cfg.setMergePolicy(policy); + } + return cfg; + } + + public void runTestNumerics(IndexWriterConfig cfg, + boolean withDeletions) throws IOException { + Directory d = newDirectory(); + IndexWriter w = new IndexWriter(d, cfg); + final int numValues = 350; + final List numVariantList = new ArrayList(NUMERICS); + + // run in random order to test if fill works correctly during merges + Collections.shuffle(numVariantList, random); + for (Values val : numVariantList) { + OpenBitSet deleted = indexValues(w, numValues, val, numVariantList, + withDeletions, 7); + List closeables = new ArrayList(); + IndexReader r = IndexReader.open(w); + final int numRemainingValues = (int) (numValues - deleted.cardinality()); + final int base = r.numDocs() - numRemainingValues; + switch (val) { + case PACKED_INTS: + case PACKED_INTS_FIXED: { + Reader intsReader = r.getIndexValues(val.name()); + Source ints = intsReader.load(); + ValuesEnum intsEnum = intsReader.getEnum(); + assertNotNull(intsEnum); + LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints(); + for (int i = 0; i < base; i++) { + assertEquals(0, ints.ints(i)); + assertEquals(val.name() + " base: " + base + " index: " + i, i, random.nextBoolean()?intsEnum.advance(i): intsEnum.nextDoc()); + assertEquals(0, enumRef.get()); + } + int expected = 0; + for (int i = base; i < r.numDocs(); i++, expected++) { + while (deleted.get(expected)) { + expected++; + } + assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs", i, intsEnum.advance(i)); + assertEquals(expected, ints.ints(i)); + assertEquals(expected, enumRef.get()); + + } + } + break; + case SIMPLE_FLOAT_4BYTE: + case SIMPLE_FLOAT_8BYTE: { + Reader floatReader = r.getIndexValues(val.name()); + Source floats = floatReader.load(); + ValuesEnum floatEnum = floatReader.getEnum(); + assertNotNull(floatEnum); + FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class).floats(); + + for (int i = 0; i < base; i++) { + assertEquals(0.0d, floats.floats(i), 0.0d); + assertEquals(i, random.nextBoolean()?floatEnum.advance(i): floatEnum.nextDoc()); + assertEquals("index " + i, 0.0 ,enumRef.get(), 0.0); + } + int expected = 0; + for (int i = base; i < r.numDocs(); i++, expected++) { + while (deleted.get(expected)) { + expected++; + } + assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs base:" + base, i, floatEnum.advance(i)); + assertEquals("index " + i, 2.0 * expected ,enumRef.get() , 0.00001); + assertEquals("index " + i, 2.0 * expected, floats.floats(i), 0.00001); + } + } + break; + default: + fail("unexpected value " + val); + } + + closeables.add(r); + for (Closeable toClose : closeables) { + toClose.close(); + } + } + w.close(); + d.close(); + } + + private static EnumSet BYTES = EnumSet.of( + Values.BYTES_FIXED_DEREF, + Values.BYTES_FIXED_SORTED, + Values.BYTES_FIXED_STRAIGHT, + Values.BYTES_VAR_DEREF , + Values.BYTES_VAR_SORTED, + Values.BYTES_VAR_STRAIGHT + ); + + private static EnumSet STRAIGHT_BYTES = EnumSet.of( + Values.BYTES_FIXED_STRAIGHT, + Values.BYTES_VAR_STRAIGHT + ); + + private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE); + + private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS}; + private OpenBitSet indexValues(IndexWriter w, int numValues, + Values value, List valueVarList, boolean withDeletions, + int multOfSeven) throws CorruptIndexException, IOException { + final boolean isNumeric = NUMERICS.contains(value); + OpenBitSet deleted = new OpenBitSet(numValues); + Document doc = new Document(); + Fieldable field = random.nextBoolean()? new ValuesField(value.name()):newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]); + doc.add(field); + + ValuesAttribute valuesAttribute = ValuesField.values(field); + valuesAttribute.setType(value); + final LongsRef intsRef = valuesAttribute.ints(); + final FloatsRef floatsRef = valuesAttribute.floats(); + final BytesRef bytesRef = valuesAttribute.bytes(); + + final String idBase = value.name() + "_"; + final byte[] b = new byte[multOfSeven]; + if (bytesRef != null) { + bytesRef.bytes = b; + bytesRef.length = b.length; + bytesRef.offset = 0; + } + // + byte upto = 0; + for (int i = 0; i < numValues; i++) { + if (isNumeric) { + switch (value) { + case PACKED_INTS: + case PACKED_INTS_FIXED: + intsRef.set(i); + break; + case SIMPLE_FLOAT_4BYTE: + case SIMPLE_FLOAT_8BYTE: + floatsRef.set(2.0f * i); + break; + default: + fail("unexpected value " + value); + } + } else { + for (int j = 0; j < b.length; j++) { + b[j] = upto++; + } + } + doc.removeFields("id"); + doc.add(new Field("id", idBase + i, Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + w.addDocument(doc); + + if (i % 7 == 0) { + if (withDeletions && random.nextBoolean()) { + Values val = valueVarList.get(random.nextInt(1 + valueVarList + .indexOf(value))); + final int randInt = val == value ? random.nextInt(1 + i) : random + .nextInt(numValues); + w.deleteDocuments(new Term("id", val.name() + "_" + randInt)); + if (val == value) { + deleted.set(randInt); + } + } + w.commit(); + + } + } + w.commit(); + + // nocommit test unoptimized with deletions + if(withDeletions || random.nextBoolean()) + w.optimize(); + return deleted; + } + + public void runTestIndexBytes(IndexWriterConfig cfg, + boolean withDeletions) throws CorruptIndexException, + LockObtainFailedException, IOException { + Directory d = newDirectory(); + IndexWriter w = new IndexWriter(d, cfg); + final List byteVariantList = new ArrayList(BYTES); + + // run in random order to test if fill works correctly during merges + Collections.shuffle(byteVariantList, random); + final int numValues = 350; + for (Values byteIndexValue : byteVariantList) { + List closeables = new ArrayList(); + + int bytesSize = 7 + random.nextInt(128); + OpenBitSet deleted = indexValues(w, numValues, byteIndexValue, + byteVariantList, withDeletions, bytesSize); + final IndexReader r = IndexReader.open(w); + assertEquals(0, r.numDeletedDocs()); + final int numRemainingValues = (int) (numValues - deleted.cardinality()); + final int base = r.numDocs() - numRemainingValues; + + Reader bytesReader = r.getIndexValues(byteIndexValue.name()); +// closeables.add(bytesReader); + assertNotNull("field " + byteIndexValue.name() + + " returned null reader - maybe merged failed", bytesReader); + Source bytes = bytesReader.load(); + ValuesEnum bytesEnum = bytesReader.getEnum(); + assertNotNull(bytesEnum); + final ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class); + byte upto = 0; + // test the filled up slots for correctness + for (int i = 0; i < base; i++) { + final BytesRef br = bytes.bytes(i); + String msg = " field: " + byteIndexValue.name() + " at index: " + i + + " base: " + base + " numDocs:" + r.numDocs(); + switch (byteIndexValue) { + case BYTES_VAR_STRAIGHT: + case BYTES_FIXED_STRAIGHT: + assertEquals(i, bytesEnum.advance(i)); + // fixed straight returns bytesref with zero bytes all of fixed + // length + assertNotNull("expected none null - " + msg, br); + if(br.length != 0) { + assertEquals("expected zero bytes of length " + bytesSize + " - " + + msg, bytesSize, br.length); + for (int j = 0; j < br.length; j++) { + assertEquals("Byte at index " + j + " doesn't match - " + msg, 0, + br.bytes[br.offset + j]); + } + } + break; + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + case BYTES_VAR_DEREF: + case BYTES_FIXED_DEREF: + default: + assertNotNull("expected none null - " + msg, br); + if(br.length != 0){ + bytes.bytes(i); + } + assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0, br.length); + } + } + final BytesRef enumRef = attr.bytes(); + + + // test the actual doc values added in this iteration + assertEquals(base + numRemainingValues, r.numDocs()); + int v = 0; + for (int i = base; i < r.numDocs(); i++) { + + String msg = " field: " + byteIndexValue.name() + " at index: " + i + + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + bytesSize; + while (withDeletions && deleted.get(v++)) { + upto += bytesSize; + } + + BytesRef br = bytes.bytes(i); + if(bytesEnum.docID() != i) + assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum.advance(i)); + for (int j = 0; j < br.length; j++, upto++) { + assertEquals("EnumRef Byte at index " + j + " doesn't match - " + msg, + upto, enumRef.bytes[enumRef.offset + j]); + assertEquals("SourceRef Byte at index " + j + " doesn't match - " + msg, + upto, br.bytes[br.offset + j]); + } + } + + // clean up + closeables.add(r); + for (Closeable toClose : closeables) { + toClose.close(); + } + } + + w.close(); + d.close(); + } + +} diff --git a/lucene/src/test/org/apache/lucene/util/_TestUtil.java b/lucene/src/test/org/apache/lucene/util/_TestUtil.java index 380e321f02d..c19044b5578 100644 --- a/lucene/src/test/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test/org/apache/lucene/util/_TestUtil.java @@ -117,6 +117,37 @@ public class _TestUtil { } return new String(buffer, 0, end); } + + public static String randomUnicodeString(Random r, int minLength, int maxLength) { + if(minLength > maxLength) + throw new IllegalArgumentException("minLength must be >= maxLength"); + final boolean lenEqual = minLength==maxLength; + final int end = lenEqual?minLength:minLength + r.nextInt(maxLength-minLength+1); + if (end == 0) { + // allow 0 length + return ""; + } + + // TODO(simonw): check this + final int fixedPlane = 5;//minLength % 5; + final char[] buffer = new char[end]; + for (int i = 0; i < end; i++) { + int t = lenEqual? fixedPlane: r.nextInt(5); + //buffer[i] = (char) (97 + r.nextInt(26)); + if (0 == t && i < end - 1 && !lenEqual) { + // Make a surrogate pair + // High surrogate + buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff); + // Low surrogate + buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff); + } + else if (t <= 1) buffer[i] = (char) r.nextInt(0x80); + else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800); + else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff); + else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff); + } + return new String(buffer, 0, end); + } private static final int[] blockStarts = { 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400, From 26833e93156f57011732d50859302941c6a3eba9 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 21 Oct 2010 07:09:39 +0000 Subject: [PATCH 003/116] Merged from trunk due to SOLR-2185: QueryElevationComponentTest depends on execution order - failed on Hudson only on this branch git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1025859 13f79535-47bb-0310-9956-ffa450edef68 --- .../handler/component/QueryElevationComponentTest.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index ebaacdfdb0e..725abebcd6f 100644 --- a/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -49,6 +49,15 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { public static void beforeClass() throws Exception { initCore("solrconfig-elevate.xml","schema12.xml"); } + + @Before + @Override + public void setUp() throws Exception{ + super.setUp(); + clearIndex(); + assertU(commit()); + assertU(optimize()); + } @Test public void testInterface() throws Exception From 6528add69fe199a2feaad555c09e632a380d142c Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 22 Oct 2010 10:19:37 +0000 Subject: [PATCH 004/116] Taking SolrInfoMBeanTest out of the loop see SOLR-2160 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1026275 13f79535-47bb-0310-9956-ffa450edef68 --- solr/src/test/org/apache/solr/SolrInfoMBeanTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java b/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java index 2177f823a7f..d712c6cb2bd 100644 --- a/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java +++ b/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java @@ -41,7 +41,9 @@ public class SolrInfoMBeanTest extends LuceneTestCase * Gets a list of everything we can find in the classpath and makes sure it has * a name, description, etc... */ + @Ignore // TODO: reenable once SOLR-2160 is fixed public void testCallMBeanInfo() throws Exception { +// Object[] init = org.apache.solr.search.QParserPlugin.standardPlugins; List classes = new ArrayList(); classes.addAll(getClassesForPackage(StandardRequestHandler.class.getPackage().getName())); classes.addAll(getClassesForPackage(SearchHandler.class.getPackage().getName())); @@ -82,7 +84,7 @@ public class SolrInfoMBeanTest extends LuceneTestCase } } assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath, found " + checked, checked > 10 ); - } + } static final String FOLDER = File.separator + "build" + File.separator + "solr" + File.separator + "org" + File.separator + "apache" + File.separator + "solr" + File.separator; From 3a4d4aa0c6e10a522c713ed1f9768b205845aca3 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 22 Oct 2010 19:12:12 +0000 Subject: [PATCH 005/116] fixed copy method in ValuesAttribute git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1026453 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/benchmark/byTask/feeds/DocMaker.java | 8 ++++---- .../src/java/org/apache/lucene/document/ValuesField.java | 1 - .../apache/lucene/index/values/ValuesAttributeImpl.java | 6 +++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index de030dadf23..b28300713ed 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -175,7 +175,8 @@ public class DocMaker { Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal); idField.setValue("doc" + (r != null ? r.nextInt(updateDocIDLimit) : incrNumDocsCreated())); doc.add(idField); - + trySetIndexValues(idField); + // Set NAME_FIELD String name = docData.getName(); if (name == null) name = ""; @@ -390,16 +391,15 @@ public class DocMaker { String[] split = fields.split(";"); Map result = new HashMap(); for (String tuple : split) { - final String[] nameValue = tuple.split(":"); + final String[] nameValue = tuple.split("="); if (nameValue.length != 2) { throw new IllegalArgumentException("illegal doc.stored.values format: " - + fields + " expected fieldname:ValuesType;...;...;"); + + fields + " expected fieldname=ValuesType;...;...;"); } result.put(nameValue[0].trim(), Values.valueOf(nameValue[1])); } return result; } - /** Set the configuration parameters of this doc maker. */ public void setConfig(Config config) { this.config = config; diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java index df9332dc42d..43fc6bd6623 100644 --- a/lucene/src/java/org/apache/lucene/document/ValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/ValuesField.java @@ -130,7 +130,6 @@ public class ValuesField extends AbstractField { default: throw new IllegalArgumentException("unknown type: " + type); } - return valField.set(field); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java index 714ba4b7969..fe504514ba7 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java @@ -77,15 +77,15 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut case BYTES_VAR_DEREF: case BYTES_VAR_SORTED: case BYTES_VAR_STRAIGHT: - bytes = (BytesRef) other.bytes.clone(); + other.bytes.copy(bytes); break; case PACKED_INTS: case PACKED_INTS_FIXED: - ints = (LongsRef) other.ints.clone(); + other.ints.copy(ints); break; case SIMPLE_FLOAT_4BYTE: case SIMPLE_FLOAT_8BYTE: - floats = (FloatsRef) other.floats.clone(); + other.floats.copy(floats); break; } From 99fd795df2dd4d2545ff20cdb4973187b7fa588a Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 26 Oct 2010 08:29:35 +0000 Subject: [PATCH 006/116] First step towards LUCENE-2700: Expose DocValues via Fields git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1027401 13f79535-47bb-0310-9956-ffa450edef68 --- .../instantiated/InstantiatedIndexReader.java | 11 + .../lucene/index/memory/MemoryIndex.java | 13 + .../apache/lucene/index/DirectoryReader.java | 266 +----------------- .../lucene/index/DocFieldProcessor.java | 171 ++--------- .../index/DocFieldProcessorPerThread.java | 11 +- .../apache/lucene/index/DocumentsWriter.java | 10 +- .../org/apache/lucene/index/FieldInfo.java | 2 +- .../java/org/apache/lucene/index/Fields.java | 12 + .../org/apache/lucene/index/FieldsEnum.java | 16 ++ .../lucene/index/FilterIndexReader.java | 11 + .../org/apache/lucene/index/IndexReader.java | 16 +- .../org/apache/lucene/index/MultiFields.java | 41 +++ .../apache/lucene/index/MultiFieldsEnum.java | 19 ++ .../apache/lucene/index/ParallelReader.java | 25 +- .../org/apache/lucene/index/SegmentInfo.java | 7 +- .../apache/lucene/index/SegmentMerger.java | 95 +------ .../apache/lucene/index/SegmentReader.java | 73 +---- .../lucene/index/codecs/FieldsConsumer.java | 50 +++- .../lucene/index/codecs/FieldsProducer.java | 38 ++- .../index/codecs/PerFieldCodecWrapper.java | 27 ++ .../index/codecs/PrefixCodedTermsReader.java | 7 + .../index/codecs/PrefixCodedTermsWriter.java | 2 +- .../index/codecs/preflex/PreFlexFields.java | 7 + .../simpletext/SimpleTextFieldsReader.java | 7 + .../org/apache/lucene/index/values/Bytes.java | 55 ++-- .../org/apache/lucene/index/values/Cache.java | 12 +- .../values/{Reader.java => DocValues.java} | 6 +- .../index/values/FixedDerefBytesImpl.java | 5 + .../index/values/FixedSortedBytesImpl.java | 7 +- .../index/values/FixedStraightBytesImpl.java | 5 + .../apache/lucene/index/values/Floats.java | 30 +- .../org/apache/lucene/index/values/Ints.java | 14 +- .../lucene/index/values/PackedIntsImpl.java | 24 +- .../index/values/VarDerefBytesImpl.java | 5 + .../index/values/VarSortedBytesImpl.java | 7 +- .../index/values/VarStraightBytesImpl.java | 5 + .../apache/lucene/index/values/Writer.java | 58 ++-- .../apache/lucene/search/FieldComparator.java | 2 +- .../org/apache/lucene/TestExternalCodecs.java | 14 + .../codecs/preflexrw/PreFlexFieldsWriter.java | 7 + .../lucene/index/values/TestIndexValues.java | 231 +++++++++------ 41 files changed, 661 insertions(+), 763 deletions(-) rename lucene/src/java/org/apache/lucene/index/values/{Reader.java => DocValues.java} (95%) diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java index 8fede649e66..96d9198146d 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java @@ -31,6 +31,7 @@ import java.util.Comparator; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.*; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; @@ -394,6 +395,11 @@ public class InstantiatedIndexReader extends IndexReader { public TermsEnum terms() { return new InstantiatedTermsEnum(orderedTerms, upto, currentField); } + + @Override + public DocValues docValues() throws IOException { + return null; + } }; } @@ -422,6 +428,11 @@ public class InstantiatedIndexReader extends IndexReader { } }; } + + @Override + public DocValues docValues(String field) throws IOException { + return null; + } }; } diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 11dd692ff83..5072cf3a762 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -48,6 +48,7 @@ import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorMapper; import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -789,6 +790,12 @@ public class MemoryIndex implements Serializable { public TermsEnum terms() { return new MemoryTermsEnum(sortedFields[upto].getValue()); } + + @Override + public DocValues docValues() throws IOException { + // TODO + throw new UnsupportedOperationException("not implemented"); + } }; } @@ -819,6 +826,12 @@ public class MemoryIndex implements Serializable { }; } } + + @Override + public DocValues docValues(String field) throws IOException { + // TODO + throw new UnsupportedOperationException("not implemented"); + } }; } diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index 2d29a248847..9863fb961d6 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -36,14 +36,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.values.Reader; -import org.apache.lucene.index.values.Values; -import org.apache.lucene.index.values.ValuesEnum; -import org.apache.lucene.index.values.Reader.Source; -import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.FloatsRef; -import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; @@ -996,264 +989,7 @@ class DirectoryReader extends IndexReader implements Cloneable { } return commits; - } - - public Reader getIndexValues(String field) { - ensureOpen(); - if (subReaders.length == 1) { - return subReaders[0].getIndexValues(field); - } - return new MultiValueReader(field); - } - - private class MultiValueReader extends Reader { - - private String id; - private Values value; - - public MultiValueReader(String id) { - this.id = id; - for (SegmentReader reader : subReaders) { - FieldInfo fieldInfo = reader.fieldInfos().fieldInfo(id); - if(fieldInfo != null){ - value = fieldInfo.getIndexValues(); - break; - } - } - } - - @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { - return new MultiValuesEnum(id, value); - } - - @Override - public Source load() throws IOException { - return new MultiSource(id); - } - - public void close() throws IOException { - // - } - - } - - private class MultiValuesEnum extends ValuesEnum { - private int numDocs_ = 0; - private int pos = -1; - private int start = 0; - private final String id; - private final ValuesEnum[] enumCache; - private ValuesEnum current; - - protected MultiValuesEnum(String id, Values enumType) { - super(enumType); - enumCache = new ValuesEnum[subReaders.length]; - this.id = id; - } - - @Override - public void close() throws IOException { - for (ValuesEnum valuesEnum : enumCache) { - if(valuesEnum != null) - valuesEnum.close(); - } - } - - @Override - public int advance( int target) throws IOException { - int n = target - start; - do { - if(target >= maxDoc) - return pos = NO_MORE_DOCS; - if (n >= numDocs_) { - int idx = readerIndex(target); - if (enumCache[idx] == null) { - try { - Reader indexValues = subReaders[idx].getIndexValues(id); - if (indexValues != null) // nocommit does that work with default - // values? - enumCache[idx] = indexValues.getEnum(this.attributes()); - else - enumCache[idx] = new DummyEnum(this.attributes(), - subReaders[idx].maxDoc(), attr.type()); - } catch (IOException ex) { - // nocommit what to do here? - throw new RuntimeException(ex); - } - } - current = enumCache[idx]; - start = starts[idx]; - numDocs_ = subReaders[idx].maxDoc(); - n = target - start; - } - target = start+numDocs_; - } while ((n = current.advance(n)) == NO_MORE_DOCS); - return pos = start+current.docID(); - } - - - @Override - public int docID() { - return pos; - } - - @Override - public int nextDoc() throws IOException { - return advance(pos+1); - } - } - - private class MultiSource extends Source { - private int numDocs_ = 0; - private int start = 0; - private Source current; - private final String id; - - MultiSource(String id) { - this.id = id; - } - - public long ints(int docID) { - int n = docID - start; - if(n >= numDocs_) { - int idx = readerIndex(docID); - try{ - current = subReaders[idx].getIndexValuesCache().getInts(id); - if(current == null) //nocommit does that work with default values? - current = new DummySource(); - }catch(IOException ex) { - // nocommit what to do here? - throw new RuntimeException(ex); - } - start = starts[idx]; - numDocs_ = subReaders[idx].maxDoc(); - n = docID - start; - } - return current.ints(n); - } - - public double floats(int docID) { - int n = docID - start; - if(n >= numDocs_) { - int idx = readerIndex(docID); - try{ - current = subReaders[idx].getIndexValuesCache().getFloats(id); - if(current == null) //nocommit does that work with default values? - current = new DummySource(); - }catch(IOException ex) { - // nocommit what to do here? - throw new RuntimeException(ex); - } - numDocs_ = subReaders[idx].maxDoc(); - - start = starts[idx]; - n = docID - start; - } - return current.floats(n); - } - - public BytesRef bytes(int docID) { - int n = docID - start; - if(n >= numDocs_) { - int idx = readerIndex(docID); - try{ - current = subReaders[idx].getIndexValuesCache().getBytes(id); - if(current == null) //nocommit does that work with default values? - current = new DummySource(); - }catch(IOException ex) { - // nocommit what to do here? - throw new RuntimeException(ex); - } - numDocs_ = subReaders[idx].maxDoc(); - start = starts[idx]; - n = docID - start; - } - return current.bytes(n); - } - - public long ramBytesUsed() { - return current.ramBytesUsed(); - } - - } - - private static class DummySource extends Source { - private final BytesRef ref = new BytesRef(); - @Override - public BytesRef bytes(int docID) { - return ref; - } - - - @Override - public double floats(int docID) { - return 0.0d; - } - - @Override - public long ints(int docID) { - return 0; - } - - public long ramBytesUsed() { - return 0; - } - } - - private static class DummyEnum extends ValuesEnum { - private int pos = -1; - private final int maxDoc; - - public DummyEnum(AttributeSource source, int maxDoc, Values type) { - super(source, type); - this.maxDoc = maxDoc; - switch (type) { - case BYTES_VAR_STRAIGHT: - case BYTES_FIXED_STRAIGHT: - case BYTES_FIXED_DEREF: - case BYTES_FIXED_SORTED: - case BYTES_VAR_DEREF: - case BYTES_VAR_SORTED: - // nocommit - this is not correct for Fixed_straight - BytesRef bytes = attr.bytes(); - bytes.length = 0; - bytes.offset = 0; - break; - case PACKED_INTS: - case PACKED_INTS_FIXED: - LongsRef ints = attr.ints(); - ints.set(0); - break; - - case SIMPLE_FLOAT_4BYTE: - case SIMPLE_FLOAT_8BYTE: - FloatsRef floats = attr.floats(); - floats.set(0d); - break; - default: - throw new IllegalArgumentException("unknown Values type: " + type); - } - } - @Override - public void close() throws IOException { - } - - @Override - public int advance(int target) throws IOException { - return pos = (pos < maxDoc ? target: NO_MORE_DOCS); - } - @Override - public int docID() { - return pos; - } - @Override - public int nextDoc() throws IOException { - return advance(pos+1); - } - - } - + } private static final class ReaderCommit extends IndexCommit { private String segmentsFileName; diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index dee8168937f..4db1363e722 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -18,11 +18,13 @@ package org.apache.lucene.index; */ import org.apache.lucene.store.Directory; +import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.values.Ints; import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.ValuesAttribute; import org.apache.lucene.index.values.Writer; +import org.apache.lucene.index.values.codec.DocValuesConsumer; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; @@ -48,154 +50,33 @@ final class DocFieldProcessor extends DocConsumer { final FieldInfos fieldInfos = new FieldInfos(); final DocFieldConsumer consumer; final StoredFieldsWriter fieldsWriter; - final private Map indexValues = new HashMap(); + final private Map docValues = new HashMap(); + private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter - synchronized IndexValuesProcessor getProcessor(Directory dir, String segment, String name, ValuesAttribute attr, FieldInfo fieldInfo) - throws IOException { - if(attr == null) - return null; - IndexValuesProcessor p = indexValues.get(name); - if (p == null) { - org.apache.lucene.index.values.Values v = attr.type(); - final String id = segment + "_" + fieldInfo.number; - switch(v) { - case PACKED_INTS: - p = new IntValuesProcessor(dir, id, false); - break; - case PACKED_INTS_FIXED: - p = new IntValuesProcessor(dir, id, true); - break; - case SIMPLE_FLOAT_4BYTE: - p = new FloatValuesProcessor(dir, id, 4); - break; - case SIMPLE_FLOAT_8BYTE: - p = new FloatValuesProcessor(dir, id, 8); - break; - case BYTES_FIXED_STRAIGHT: - p = new BytesValuesProcessor(dir, id, true, null, Bytes.Mode.STRAIGHT); - break; - case BYTES_FIXED_DEREF: - p = new BytesValuesProcessor(dir, id, true, null, Bytes.Mode.DEREF); - break; - case BYTES_FIXED_SORTED: - p = new BytesValuesProcessor(dir, id, true, attr.bytesComparator(), Bytes.Mode.SORTED); - break; - case BYTES_VAR_STRAIGHT: - p = new BytesValuesProcessor(dir, id, false, null, Bytes.Mode.STRAIGHT); - break; - case BYTES_VAR_DEREF: - p = new BytesValuesProcessor(dir, id, false, null, Bytes.Mode.DEREF); - break; - case BYTES_VAR_SORTED: - p = new BytesValuesProcessor(dir, id, false, attr.bytesComparator(), Bytes.Mode.SORTED); - break; - } - fieldInfo.setIndexValues(v); - indexValues.put(name, p); - } + synchronized DocValuesConsumer docValuesConsumer(Directory dir, + String segment, String name, ValuesAttribute attr, FieldInfo fieldInfo) + throws IOException { + DocValuesConsumer valuesConsumer; + if ((valuesConsumer = docValues.get(name)) == null) { + fieldInfo.setIndexValues(attr.type()); - return p; - } - - static abstract class IndexValuesProcessor { - public abstract void add(int docID, String name, ValuesAttribute attr) throws IOException; - public abstract void finish(int docCount) throws IOException; - public abstract void files(Collection files) throws IOException; - } - - static class FloatValuesProcessor extends IndexValuesProcessor { - private final Writer writer; - private final String id; - - public FloatValuesProcessor(Directory dir, String id, int precision) throws IOException { - this.id = id; - writer = Floats.getWriter(dir, id, precision); - } - - @Override - public void add(int docID, String name, ValuesAttribute attr) throws IOException { - final FloatsRef floats = attr.floats(); - if(floats != null) { - writer.add(docID, floats.get()); - return; - } - throw new IllegalArgumentException("could not extract float/double from field " + name); - } - - @Override - public void finish(int docCount) throws IOException { - writer.finish(docCount); - } - - @Override - public void files(Collection files) { - Floats.files(id, files); - } - } - - static class IntValuesProcessor extends IndexValuesProcessor { - private final Writer writer; - private final String id; - - public IntValuesProcessor(Directory dir, String id, boolean fixedArray) throws IOException { - this.id = id; - writer = Ints.getWriter(dir, id, fixedArray); - } - - @Override - public void add(int docID, String name, ValuesAttribute attr) throws IOException { - final LongsRef ints = attr.ints(); - if(ints != null) { - writer.add(docID, ints.get()); - return; - } - throw new IllegalArgumentException("could not extract int/long from field " + name); - } - - @Override - public void finish(int docCount) throws IOException { - writer.finish(docCount); - } - - @Override - public void files(Collection files) throws IOException { - Ints.files(id, files); - } - } - - static class BytesValuesProcessor extends IndexValuesProcessor { - private final Writer writer; - private final String id; - private final Directory dir; - - public BytesValuesProcessor(Directory dir, String id, boolean fixedSize, Comparator comp, Bytes.Mode mode) throws IOException { - this.id = id; - writer = Bytes.getWriter(dir, id, mode,comp, fixedSize); - this.dir = dir; - } - - // nocommit -- make this thread private and not sync'd - @Override - public synchronized void add(int docID, String name, ValuesAttribute attr) throws IOException { - final BytesRef bytes = attr.bytes(); - if(bytes != null) { - writer.add(docID, bytes); - return; + if(fieldsConsumer == null) { + /* nocommit -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice. + * we need to find a way that allows us to obtain a FieldsConsumer per DocumentsWriter. Currently some codecs rely on + * the SegmentsWriteState passed in right at the moment when the segment is flushed (doccount etc) but we need the consumer earlier + * to support docvalues and later on stored fields too. + */ + SegmentWriteState state = docWriter.segWriteState(); + fieldsConsumer = state.codec.fieldsConsumer(state); } - throw new IllegalArgumentException("could not extract byte[] from field " + name); + valuesConsumer = fieldsConsumer.addValuesField(fieldInfo); + docValues.put(name, valuesConsumer); } + return valuesConsumer; - @Override - public void finish(int docCount) throws IOException { - writer.finish(docCount); - } - - @Override - public void files(Collection files) throws IOException { - Bytes.files(dir, id, files); - } } + public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) { this.docWriter = docWriter; this.consumer = consumer; @@ -221,13 +102,17 @@ final class DocFieldProcessor extends DocConsumer { fieldsWriter.flush(state); consumer.flush(childThreadsAndFields, state); - for(IndexValuesProcessor p : indexValues.values()) { + for(DocValuesConsumer p : docValues.values()) { if (p != null) { p.finish(state.numDocs); p.files(state.flushedFiles); } } - indexValues.clear(); + docValues.clear(); + if(fieldsConsumer != null) { + fieldsConsumer.close(); // nocommit this should go away + fieldsConsumer = null; + } // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java index 56e7dea9597..0f2fed91c6d 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java @@ -20,14 +20,12 @@ package org.apache.lucene.index; import java.util.Collection; import java.util.HashSet; import java.util.List; -import java.util.Set; -import java.util.Map.Entry; import java.io.IOException; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.values.ValuesAttribute; +import org.apache.lucene.index.values.codec.DocValuesConsumer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.RamUsageEstimator; @@ -255,17 +253,16 @@ final class DocFieldProcessorPerThread extends DocConsumerPerThread { final DocFieldProcessorPerField perField = fields[i]; final Fieldable fieldable = perField.fields[0]; perField.consumer.processFields(perField.fields, perField.fieldCount); + if(!fieldable.hasFieldAttribute()) continue; final AttributeSource attrSource = fieldable.getFieldAttributes(); if(!attrSource.hasAttribute(ValuesAttribute.class)) continue; final ValuesAttribute attribute = attrSource.getAttribute(ValuesAttribute.class); - final DocFieldProcessor.IndexValuesProcessor processor = docFieldProcessor - .getProcessor(docState.docWriter.directory, + final DocValuesConsumer consumer = docFieldProcessor.docValuesConsumer(docState.docWriter.directory, docState.docWriter.segment, fieldable.name(), attribute, perField.fieldInfo); - if (processor != null) - processor.add(docState.docID, fieldable.name(), attribute); + consumer.add(docState.docID, attribute); } if (docState.maxTermPrefix != null && docState.infoStream != null) { docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index 27784c261a3..acc20d4b842 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -604,9 +604,13 @@ final class DocumentsWriter { synchronized private void initFlushState(boolean onlyDocStore) { initSegmentName(onlyDocStore); - flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos, - docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), - writer.codecs); + flushState = segWriteState(); + } + + SegmentWriteState segWriteState() { + return new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos, + docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), + writer.codecs); } /** Returns the codec used to flush the last segment */ diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index d0bdd1cc68f..d7529874599 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -103,7 +103,7 @@ public final class FieldInfo { } } - Values getIndexValues() { + public Values getIndexValues() { return indexValues; } } diff --git a/lucene/src/java/org/apache/lucene/index/Fields.java b/lucene/src/java/org/apache/lucene/index/Fields.java index a14ca1d52c3..f3fe6542775 100644 --- a/lucene/src/java/org/apache/lucene/index/Fields.java +++ b/lucene/src/java/org/apache/lucene/index/Fields.java @@ -19,6 +19,8 @@ package org.apache.lucene.index; import java.io.IOException; +import org.apache.lucene.index.values.DocValues; + /** Flex API for access to fields and terms * @lucene.experimental */ @@ -31,6 +33,16 @@ public abstract class Fields { /** Get the {@link Terms} for this field. This may return * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; + + /** + * Returns {@link DocValues} for the current field. + * + * @param field the field name + * @return the {@link DocValues} for this field or null if not + * applicable. + * @throws IOException + */ + public abstract DocValues docValues(String field) throws IOException; public final static Fields[] EMPTY_ARRAY = new Fields[0]; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java index 4a2d2dc0d35..e3112ca8b5b 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.ValuesEnum; import org.apache.lucene.util.AttributeSource; @@ -57,6 +58,16 @@ public abstract class FieldsEnum { * will not return null. */ public abstract TermsEnum terms() throws IOException; + /** + * Returns {@link DocValues} for the current field. + * + * @return the {@link DocValues} for this field or null if not + * applicable. + * @throws IOException + */ + public abstract DocValues docValues() throws IOException; + + public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; /** Provides zero fields */ @@ -71,5 +82,10 @@ public abstract class FieldsEnum { public TermsEnum terms() { throw new IllegalStateException("this method should never be called"); } + + @Override + public DocValues docValues() throws IOException { + throw new IllegalStateException("this method should never be called"); + } }; } diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index 0731a1c3553..838e939945e 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.ValuesEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -59,6 +60,11 @@ public class FilterIndexReader extends IndexReader { public Terms terms(String field) throws IOException { return in.terms(field); } + + @Override + public DocValues docValues(String field) throws IOException { + return in.docValues(field); + } } /** Base class for filtering {@link Terms} @@ -117,6 +123,11 @@ public class FilterIndexReader extends IndexReader { public TermsEnum terms() throws IOException { return in.terms(); } + + @Override + public DocValues docValues() throws IOException { + return in.docValues(); + } } /** Base class for filtering {@link TermsEnum} implementations. */ diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index c28f13856ee..2cb8d6d9d72 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -22,7 +22,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.values.Cache; -import org.apache.lucene.index.values.Reader; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.store.*; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -117,6 +117,9 @@ public abstract class IndexReader implements Cloneable,Closeable { public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET"); /** All fields with termvectors with offset values and position values enabled */ public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET"); + /** All fields holding doc values */ + public static final FieldOption DOC_VALUES = new FieldOption ("DOC_VALUES"); + } private boolean closed; @@ -1374,10 +1377,13 @@ public abstract class IndexReader implements Cloneable,Closeable { public int getTermInfosIndexDivisor() { throw new UnsupportedOperationException("This reader does not support this method."); } - - // nocommit -- should this expose the iterator API via Fields and access Source only via getIndexValuesCache? - public Reader getIndexValues(String field) { - throw new UnsupportedOperationException(); + + public DocValues docValues(String field) throws IOException { + final Fields fields = fields(); + if (fields == null) { + return null; + } + return fields.docValues(field); } private final Cache indexValuesCache = new Cache(this); diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index fc3beb7e74c..f642383c36b 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -22,6 +22,10 @@ import java.util.Map; import java.util.HashMap; import java.util.List; import java.util.ArrayList; + +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.MultiDocValues; +import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs import org.apache.lucene.util.Bits; @@ -46,6 +50,7 @@ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderUtil.Slice[] subSlices; private final Map terms = new HashMap(); + private final Map docValues = new HashMap(); /** Returns a single {@link Fields} instance for this * reader, merging fields/terms/docs/positions on the @@ -186,6 +191,12 @@ public final class MultiFields extends Fields { return fields.terms(field); } } + + /** This method may return null if the field does not exist.*/ + public static DocValues getDocValues(IndexReader r, String field) throws IOException { + final Fields fields = getFields(r); + return fields == null? null: fields.docValues(field); + } /** Returns {@link DocsEnum} for the specified field & * term. This may return null if the term does not @@ -270,5 +281,35 @@ public final class MultiFields extends Fields { return result; } + + @Override + public DocValues docValues(String field) throws IOException { + final DocValues result; + + if (!docValues.containsKey(field)) { + + // Lazy init: first time this field is requested, we + // create & add to docValues: + final List subs2 = new ArrayList(); + final List slices2 = new ArrayList(); + + // Gather all sub-readers that share this field + for(int i=0;i values = new ArrayList(); + for (int i = 0; i < numTop; i++) { + final DocValues docValues = top[i].fields.docValues(); + if (docValues != null) { + values.add(new MultiDocValues.DocValuesIndex(docValues, + top[i].index)); + } + } + // TODO return an empty docvalues instance if values are empty + return docValues.reset(values.toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY)); + } } diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java index 0aa19ae4d5d..e553f2998a8 100644 --- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java +++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java @@ -21,7 +21,9 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.Pair; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.BytesRef; @@ -174,14 +176,22 @@ public class ParallelReader extends IndexReader { return TermsEnum.EMPTY; } } + + @Override + public DocValues docValues() throws IOException { + assert currentReader != null; + return MultiFields.getDocValues(currentReader, currentField); + } } // Single instance of this, per ParallelReader instance private class ParallelFields extends Fields { - final HashMap fields = new HashMap(); + final HashMap> fields = new HashMap>(); public void addField(String field, IndexReader r) throws IOException { - fields.put(field, MultiFields.getFields(r).terms(field)); + Fields multiFields = MultiFields.getFields(r); + fields.put(field, new Pair( multiFields.terms(field), + multiFields.docValues(field))); } @Override @@ -190,11 +200,16 @@ public class ParallelReader extends IndexReader { } @Override public Terms terms(String field) throws IOException { - return fields.get(field); + return fields.get(field).cur; + } + + @Override + public DocValues docValues(String field) throws IOException { + return fields.get(field).cud; } } - - @Override + + @Override public Bits getDeletedDocs() { return MultiFields.getDeletedDocs(readers.get(0)); } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index e8804653432..830072e7329 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -478,12 +478,7 @@ public final class SegmentInfo { if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) { fileSet.add(delFileName); } - //nocommit - is there a better way to get all the dat / idx files? - for(String file : dir.listAll()) { - if(file.startsWith(name) && (file.endsWith("dat") || file.endsWith("idx"))){ - fileSet.add(file); - } - } + if (normGen != null) { for (int i = 0; i < normGen.length; i++) { long gen = normGen[i]; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 4546b3d8278..95577954852 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -33,7 +33,7 @@ import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.Ints; -import org.apache.lucene.index.values.Reader; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Values; import org.apache.lucene.index.values.Writer; @@ -162,9 +162,6 @@ final class SegmentMerger { if (mergeDocStores && fieldInfos.hasVectors()) mergeVectors(); - - mergeIndexValues(); - return mergedDocs; } @@ -178,12 +175,6 @@ final class SegmentMerger { reader.close(); } } - - private void addIfExists(Set files, String file, Directory dir) throws IOException{ - if(dir.fileExists(file)){ - files.add(file); - } - } final List createCompoundFile(String fileName, final SegmentInfo info) throws IOException { @@ -203,14 +194,6 @@ final class SegmentMerger { final int numFIs = fieldInfos.size(); for (int i = 0; i < numFIs; i++) { final FieldInfo fi = fieldInfos.fieldInfo(i); - // Index Values aka. CSF - if (fi.indexValues != null) { - addIfExists(fileSet, IndexFileNames.segmentFileName(segment, Integer - .toString(fi.number), IndexFileNames.CSF_DATA_EXTENSION), directory); - addIfExists(fileSet, IndexFileNames.segmentFileName(segment, Integer - .toString(fi.number), IndexFileNames.CSF_INDEX_EXTENSION), - directory); - } if (fi.isIndexed && !fi.omitNorms) { fileSet.add(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); break; @@ -318,7 +301,7 @@ final class SegmentMerger { if (mergedIndexValues == null) { merged.setIndexValues(fiIndexValues); } else if (mergedIndexValues != fiIndexValues) { - // nocommit -- what to do? + // TODO -- can we recover from this? throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedIndexValues + " to " + fiIndexValues); } } @@ -331,8 +314,7 @@ final class SegmentMerger { addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false); fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false); - - // nocommit -- how should we handle index values here? + fieldInfos.add(reader.getFieldNames(FieldOption.DOC_VALUES), false); } } fieldInfos.write(directory, segment + ".fnm"); @@ -393,77 +375,6 @@ final class SegmentMerger { return docCount; } - private void mergeIndexValues() throws IOException { - final int numFields = fieldInfos.size(); - for (int i = 0; i < numFields; i++) { - final FieldInfo fieldInfo = fieldInfos.fieldInfo(i); - final Values v = fieldInfo.indexValues; - // nocommit we need some kind of compatibility notation for values such - // that two slighly different segments can be merged eg. fixed vs. - // variable byte len or float32 vs. float64 - - if (v != null) { - int docBase = 0; - final List mergeStates = new ArrayList(); - for (IndexReader reader : readers) { - Reader r = reader.getIndexValues(fieldInfo.name); - if (r != null) { - mergeStates.add(new Writer.MergeState(r, docBase, reader - .maxDoc(), reader.getDeletedDocs())); - } - docBase += reader.numDocs(); - } - if (mergeStates.isEmpty()) { - continue; - } - final String id = segment + "_" + fieldInfo.number; - final Writer writer; - switch (v) { - case PACKED_INTS: - case PACKED_INTS_FIXED: - writer = Ints.getWriter(directory, id, true); - break; - case SIMPLE_FLOAT_4BYTE: - writer = Floats.getWriter(directory, id, 4); - break; - case SIMPLE_FLOAT_8BYTE: - writer = Floats.getWriter(directory, id, 8); - break; - case BYTES_FIXED_STRAIGHT: - writer = Bytes.getWriter(directory, id, - Bytes.Mode.STRAIGHT, null, true); - break; - case BYTES_FIXED_DEREF: - writer = Bytes.getWriter(directory, id, - Bytes.Mode.DEREF, null, true); - break; - case BYTES_FIXED_SORTED: - // nocommit -- enable setting Comparator - writer = Bytes.getWriter(directory, id, - Bytes.Mode.SORTED, null, true); - break; - case BYTES_VAR_STRAIGHT: - writer = Bytes.getWriter(directory, id, - Bytes.Mode.STRAIGHT, null, false); - break; - case BYTES_VAR_DEREF: - writer = Bytes.getWriter(directory, id, - Bytes.Mode.DEREF, null, false); - break; - case BYTES_VAR_SORTED: - // nocommit -- enable setting Comparator - writer = Bytes.getWriter(directory, id, - Bytes.Mode.SORTED, null, false); - break; - default: - continue; - } - writer.add(mergeStates); - writer.finish(mergedDocs); - } - } - } - private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader, final FieldsReader matchingFieldsReader) throws IOException, MergeAbortedException, CorruptIndexException { diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index bbbd90e43b2..9c854662a24 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -44,7 +44,7 @@ import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.Ints; -import org.apache.lucene.index.values.Reader; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Values; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close @@ -141,7 +141,6 @@ public class SegmentReader extends IndexReader implements Cloneable { // Ask codec for its Fields fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor)); assert fields != null; - openIndexValuesReaders(cfsDir, si); success = true; } finally { if (!success) { @@ -155,57 +154,8 @@ public class SegmentReader extends IndexReader implements Cloneable { // not assigned yet). this.origInstance = origInstance; } - - final Map indexValues = new HashMap(); - - // Only opens files... doesn't actually load any values - private void openIndexValuesReaders(Directory dir, SegmentInfo si) throws IOException { - final int numFields = fieldInfos.size(); - for(int i=0;i mergeStates = new ArrayList(); + for (IndexReader reader : mergeState.readers) { + DocValues r = reader.docValues(mergeState.fieldInfo.name); + if (r != null) { + mergeStates.add(new Writer.MergeState(r, docBase, reader + .maxDoc(), reader.getDeletedDocs())); + } + docBase += reader.numDocs(); + } + if (mergeStates.isEmpty()) { + continue; + } + final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo); + docValuesConsumer.merge(mergeStates); + docValuesConsumer.finish(mergeState.mergedDocCount); + } + + // merge doc values +// } } + } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java index a378680328e..a4ce963828b 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java @@ -17,10 +17,13 @@ package org.apache.lucene.index.codecs; * limitations under the License. */ -import org.apache.lucene.index.Fields; - -import java.io.IOException; import java.io.Closeable; +import java.io.IOException; + +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.values.DocValues; /** Abstract API that consumes terms, doc, freq, prox and * payloads postings. Concrete implementations of this @@ -33,4 +36,33 @@ import java.io.Closeable; public abstract class FieldsProducer extends Fields implements Closeable { public abstract void close() throws IOException; public abstract void loadTermsIndex(int indexDivisor) throws IOException; + + @Override + public DocValues docValues(String field) throws IOException { + return null; + } + + public static final FieldsProducer EMPTY = new FieldsProducer() { + + @Override + public Terms terms(String field) throws IOException { + return null; + } + + @Override + public FieldsEnum iterator() throws IOException { + return FieldsEnum.EMPTY; + } + + @Override + public void loadTermsIndex(int indexDivisor) throws IOException { + + } + + @Override + public void close() throws IOException { + + } + }; + } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java index 8839e8f2c30..cf21d6c3620 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java @@ -35,6 +35,8 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.codec.DocValuesConsumer; import org.apache.lucene.store.Directory; @@ -112,6 +114,18 @@ public class PerFieldCodecWrapper extends Codec { throw err; } } + + @Override + public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { + fieldsSeen.add(field.name); + Codec codec = getCodec(field.name); + FieldsConsumer fields = codecs.get(codec); + if (fields == null) { + fields = codec.fieldsConsumer(state); + codecs.put(codec, fields); + } + return fields.addValuesField(field); + } } private class FieldsReader extends FieldsProducer { @@ -164,6 +178,11 @@ public class PerFieldCodecWrapper extends Codec { return null; } } + + @Override + public DocValues docValues() throws IOException { + return codecs.get(getCodec(current)).docValues(current); + } } @Override @@ -207,6 +226,14 @@ public class PerFieldCodecWrapper extends Codec { it.next().loadTermsIndex(indexDivisor); } } + + @Override + public DocValues docValues(String field) throws IOException { + final Codec codec = getCodec(field); + FieldsProducer fields = codecs.get(codec); + assert fields != null; + return fields.docValues(field); + } } public FieldsProducer fieldsProducer(SegmentReadState state) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java index d3cd2ce7597..5f5f607cb1a 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java @@ -41,6 +41,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs +import org.apache.lucene.index.values.DocValues; /** Handles a terms dict, but decouples all details of * doc/freqs/positions reading to an instance of {@link @@ -245,6 +246,12 @@ public class PrefixCodedTermsReader extends FieldsProducer { public TermsEnum terms() throws IOException { return current.iterator(); } + + @Override + public DocValues docValues() throws IOException { + // TODO Auto-generated method stub + return null; + } } private class FieldReader extends Terms implements Closeable { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java index 198ed7dac50..c9929903fd0 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java @@ -94,7 +94,7 @@ public class PrefixCodedTermsWriter extends FieldsConsumer { @Override public TermsConsumer addField(FieldInfo field) { - assert currentField == null || currentField.name.compareTo(field.name) < 0; + assert currentField == null || currentField.name.compareTo(field.name) < 0 : "current field name " + (currentField == null? null: currentField.name) + " given: " +field.name; currentField = field; TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field); TermsConsumer terms = new TermsWriter(fieldIndexWriter, field, postingsWriter); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java index 57072463ca0..61781aeecca 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.CompoundFileReader; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; @@ -222,6 +223,12 @@ public class PreFlexFields extends FieldsProducer { termsEnum.reset(current); return termsEnum; } + + @Override + public DocValues docValues() throws IOException { + //DocValues are not available on PreFlex indices + return null; + } } private class PreTerms extends Terms { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java index 14c72b8a919..cc9f7de9be8 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java @@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.simpletext; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.Terms; @@ -103,6 +104,12 @@ class SimpleTextFieldsReader extends FieldsProducer { public TermsEnum terms() throws IOException { return new SimpleTextTermsEnum(in.getFilePointer(), omitTF); } + + @Override + public DocValues docValues() throws IOException { + // TODO Auto-generated method stub + return null; + } } private class SimpleTextTermsEnum extends TermsEnum { diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index 34e79758293..bd9fd4544f4 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -25,8 +25,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.values.Reader.SortedSource; -import org.apache.lucene.index.values.Reader.Source; +import org.apache.lucene.index.values.DocValues.SortedSource; +import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -47,8 +47,7 @@ import org.apache.lucene.util.CodecUtil; * NOTE: Each byte[] must be <= 32768 bytes in length *

*/ -//nocommit - add mmap version -//nocommti - add bulk copy where possible +//TODO - add bulk copy where possible public final class Bytes { // don't instantiate! @@ -59,17 +58,7 @@ public final class Bytes { STRAIGHT, DEREF, SORTED }; - public static void files(Directory dir, String id, Collection files) - throws IOException { - files.add(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); - final String idxFile = IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_INDEX_EXTENSION); - if (dir.fileExists(idxFile)) { - files.add(idxFile); - } - } - + // nocommit -- i shouldn't have to specify fixed? can // track itself & do the write thing at write time? public static Writer getWriter(Directory dir, String id, Mode mode, @@ -101,7 +90,7 @@ public final class Bytes { } // nocommit -- I can peek @ header to determing fixed/mode? - public static Reader getReader(Directory dir, String id, Mode mode, + public static DocValues getValues(Directory dir, String id, Mode mode, boolean fixedSize, int maxDoc) throws IOException { if (fixedSize) { if (mode == Mode.STRAIGHT) { @@ -172,6 +161,7 @@ public final class Bytes { static abstract class BytesWriterBase extends Writer { + private final Directory dir; private final String id; protected IndexOutput idxOut; @@ -239,13 +229,32 @@ public final class Bytes { bytesRef = attr.bytes(); assert bytesRef != null; } + + @Override + public void add(int docID, ValuesAttribute attr) throws IOException { + final BytesRef ref; + if((ref = attr.bytes()) != null) { + add(docID, ref); + } + } + + @Override + public void files(Collection files) throws IOException { + files.add(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + final String idxFile = IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_INDEX_EXTENSION); + if (dir.fileExists(idxFile)) { // TODO is this correct? could be initialized lazy + files.add(idxFile); + } + } } /** * Opens all necessary files, but does not read any data in until you call * {@link #load}. */ - static abstract class BytesReaderBase extends Reader { + static abstract class BytesReaderBase extends DocValues { protected final IndexInput idxIn; protected final IndexInput datIn; protected final int version; @@ -270,20 +279,15 @@ public final class Bytes { } protected final IndexInput cloneData() { - assert !isClosed.get():printEx(); // is never NULL return (IndexInput) datIn.clone(); } protected final IndexInput cloneIndex() { - assert !isClosed.get():printEx(); return idxIn == null ? null : (IndexInput) idxIn.clone(); } - private final AtomicBoolean isClosed = new AtomicBoolean(false); - Exception ex; + public void close() throws IOException { - assert !isClosed.getAndSet(true); - ex =new Exception(); if (datIn != null) { datIn.close(); } @@ -291,11 +295,6 @@ public final class Bytes { idxIn.close(); } } - - private String printEx() { - ex.printStackTrace(); - return ex.getMessage(); - } } } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/Cache.java b/lucene/src/java/org/apache/lucene/index/values/Cache.java index 3f3b9dc4890..711e11cdb3d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Cache.java +++ b/lucene/src/java/org/apache/lucene/index/values/Cache.java @@ -23,8 +23,8 @@ import java.util.HashMap; import java.util.Map; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.values.Reader.SortedSource; -import org.apache.lucene.index.values.Reader.Source; +import org.apache.lucene.index.values.DocValues.SortedSource; +import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.util.BytesRef; public class Cache { @@ -42,7 +42,7 @@ public class Cache { synchronized public Source getInts(String id) throws IOException { Source s = ints.get(id); if (s == null) { - final Reader indexValues = r.getIndexValues(id); + final DocValues indexValues = r.docValues(id); if (indexValues == null) { return null; } @@ -56,7 +56,7 @@ public class Cache { synchronized public Source getFloats(String id) throws IOException { Source s = floats.get(id); if (s == null) { - final Reader indexValues = r.getIndexValues(id); + final DocValues indexValues = r.docValues(id); if (indexValues == null) { return null; } @@ -71,7 +71,7 @@ public class Cache { Comparator comp) throws IOException { SortedSource s = sortedBytes.get(id); if (s == null) { - final Reader indexValues = r.getIndexValues(id); + final DocValues indexValues = r.docValues(id); if (indexValues == null) { return null; } @@ -87,7 +87,7 @@ public class Cache { synchronized public Source getBytes(String id) throws IOException { Source s = bytes.get(id); if (s == null) { - final Reader indexValues = r.getIndexValues(id); + final DocValues indexValues = r.docValues(id); if (indexValues == null) { return null; } diff --git a/lucene/src/java/org/apache/lucene/index/values/Reader.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java similarity index 95% rename from lucene/src/java/org/apache/lucene/index/values/Reader.java rename to lucene/src/java/org/apache/lucene/index/values/DocValues.java index 0bbd90f3a59..501a2c981fe 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Reader.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -23,9 +23,11 @@ import java.util.Comparator; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; -public abstract class Reader implements Closeable { +public abstract class DocValues implements Closeable { + public static final DocValues[] EMPTY_ARRAY = new DocValues[0]; + public ValuesEnum getEnum() throws IOException{ return getEnum(null); } @@ -38,6 +40,8 @@ public abstract class Reader implements Closeable { throw new UnsupportedOperationException(); } + public abstract Values type(); + /** * Source of integer (returned as java long), per document. The underlying diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 3cac5b20ac2..7e30711b465 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -257,6 +257,11 @@ class FixedDerefBytesImpl { } } + + @Override + public Values type() { + return Values.BYTES_FIXED_DEREF; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index 75e26eb588c..810c6a0a82c 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -156,7 +156,7 @@ class FixedSortedBytesImpl { } @Override - public org.apache.lucene.index.values.Reader.Source load() throws IOException { + public org.apache.lucene.index.values.DocValues.Source load() throws IOException { return loadSorted(null); } @@ -254,5 +254,10 @@ class FixedSortedBytesImpl { // do unsorted return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, size); } + + @Override + public Values type() { + return Values.BYTES_FIXED_SORTED; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 00564264178..3566e336764 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -217,5 +217,10 @@ class FixedStraightBytesImpl { return advance(pos+1); } } + + @Override + public Values type() { + return Values.BYTES_FIXED_STRAIGHT; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 3caccdb7eb0..e343565c9b5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -28,10 +28,6 @@ public class Floats { private static final int INT_ZERO = Float.floatToRawIntBits(0.0f); private static final long LONG_ZERO = Double.doubleToRawLongBits(0.0); - public static void files(String id, Collection files) { - files.add(id + "." + IndexFileNames.CSF_DATA_EXTENSION); - } - public static Writer getWriter(Directory dir, String id, int precisionBytes) throws IOException { if (precisionBytes != 4 && precisionBytes != 8) { @@ -45,12 +41,14 @@ public class Floats { } } - public static Reader getReader(Directory dir, String id, int maxDoc) + public static DocValues getValues(Directory dir, String id, int maxDoc) throws IOException { return new FloatsReader(dir, id, maxDoc); } abstract static class FloatsWriter extends Writer { + + private final Directory dir; private final String id; private FloatsRef floatsRef; @@ -81,6 +79,13 @@ public class Floats { protected void add(int docID) throws IOException { add(docID, floatsRef.get()); } + + @Override + public void add(int docID, ValuesAttribute attr) throws IOException { + final FloatsRef ref; + if((ref = attr.floats()) != null) + add(docID, ref.get()); + } @Override protected void setNextAttribute(ValuesAttribute attr) { @@ -109,6 +114,13 @@ public class Floats { } else super.merge(state); } + + @Override + public void files(Collection files) throws IOException { + files.add(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + } + } @@ -203,7 +215,7 @@ public class Floats { * Opens all necessary files, but does not read any data in until you call * {@link #load}. */ - static class FloatsReader extends Reader { + static class FloatsReader extends DocValues { private final IndexInput datIn; private final int precisionBytes; @@ -303,6 +315,12 @@ public class Floats { return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc) : new Floats8EnumImpl(source, indexInput, maxDoc); } + + @Override + public Values type() { + return precisionBytes == 4 ? Values.SIMPLE_FLOAT_4BYTE + : Values.SIMPLE_FLOAT_8BYTE; + } } static final class Floats4Enum extends FloatsEnumImpl { diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java index a5ea55204c5..9b4e585c64d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java @@ -1,32 +1,24 @@ package org.apache.lucene.index.values; import java.io.IOException; -import java.util.Collection; -import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.values.PackedIntsImpl.IntsReader; import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter; import org.apache.lucene.store.Directory; -//nocommit - add mmap version -//nocommti - add bulk copy where possible +//TODO - add bulk copy where possible public class Ints { private Ints() { } - public static void files(String id, Collection files) - throws IOException { - files.add(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); - } public static Writer getWriter(Directory dir, String id, boolean useFixedArray) throws IOException { - //nocommit - implement fixed?! + //TODO - implement fixed?! return new IntsWriter(dir, id); } - public static Reader getReader(Directory dir, String id, boolean useFixedArray) throws IOException { + public static DocValues getValues(Directory dir, String id, boolean useFixedArray) throws IOException { return new IntsReader(dir, id); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index ac843859a97..64735a6597c 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -17,6 +17,7 @@ package org.apache.lucene.index.values; * limitations under the License. */ import java.io.IOException; +import java.util.Collection; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.store.Directory; @@ -38,6 +39,8 @@ class PackedIntsImpl { static final int VERSION_CURRENT = VERSION_START; static class IntsWriter extends Writer { + + // nocommit - can we bulkcopy this on a merge? private LongsRef intsRef; private long[] docToValue; @@ -125,13 +128,27 @@ class PackedIntsImpl { protected void setNextAttribute(ValuesAttribute attr) { intsRef = attr.ints(); } + + @Override + public void add(int docID, ValuesAttribute attr) throws IOException { + final LongsRef ref; + if((ref = attr.ints()) != null) { + add(docID, ref.get()); + } + } + + @Override + public void files(Collection files) throws IOException { + files.add(IndexFileNames.segmentFileName(id, "", + IndexFileNames.CSF_DATA_EXTENSION)); + } } /** * Opens all necessary files, but does not read any data in until you call * {@link #load}. */ - static class IntsReader extends Reader { + static class IntsReader extends DocValues { private final IndexInput datIn; protected IntsReader(Directory dir, String id) throws IOException { @@ -186,6 +203,11 @@ class PackedIntsImpl { public ValuesEnum getEnum(AttributeSource source) throws IOException { return new IntsEnumImpl(source, (IndexInput) datIn.clone()); } + + @Override + public Values type() { + return Values.PACKED_INTS; + } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index 5a9f9d6093b..dccbd3bba08 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -251,5 +251,10 @@ class VarDerefBytesImpl { datIn.readBytes(ref.bytes, 0, size); } } + + @Override + public Values type() { + return Values.BYTES_VAR_DEREF; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 9987343d684..c8536d8dc0c 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -157,7 +157,7 @@ class VarSortedBytesImpl { } @Override - public org.apache.lucene.index.values.Reader.Source load() + public org.apache.lucene.index.values.DocValues.Source load() throws IOException { return loadSorted(null); } @@ -340,5 +340,10 @@ class VarSortedBytesImpl { return advance(pos + 1); } } + + @Override + public Values type() { + return Values.BYTES_VAR_SORTED; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 83b97479171..436a9799fc4 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -228,5 +228,10 @@ class VarStraightBytesImpl { return advance(pos+1); } } + + @Override + public Values type() { + return Values.BYTES_VAR_STRAIGHT; + } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index 13bf0947614..ae081778769 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -17,12 +17,17 @@ package org.apache.lucene.index.values; * limitations under the License. */ import java.io.IOException; -import java.util.List; +import java.util.Comparator; +import org.apache.lucene.index.values.codec.DocValuesConsumer; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -public abstract class Writer { +public abstract class Writer extends DocValuesConsumer { + + public static final String INDEX_EXTENSION = "idx"; + public static final String DATA_EXTENSION = "dat"; /** Records the specfied value for the docID */ public void add(int docID, long value) throws IOException { @@ -47,28 +52,8 @@ public abstract class Writer { /** Finish writing, close any files */ public abstract void finish(int docCount) throws IOException; - public static class MergeState { - public final Reader reader; - public final int docBase; - public final int docCount; - public final Bits bits; - - public MergeState(Reader reader, int docBase, int docCount, Bits bits) { - assert reader != null; - this.reader = reader; - this.docBase = docBase; - this.docCount = docCount; - this.bits = bits; - } - } - - public void add(List states) throws IOException { - for (MergeState state : states) { - merge(state); - } - } - // enables bulk copies in subclasses per MergeState + @Override protected void merge(MergeState state) throws IOException { final ValuesEnum valEnum = state.reader.getEnum(); assert valEnum != null; @@ -89,4 +74,31 @@ public abstract class Writer { valEnum.close(); } } + + public static Writer create(Values v, String id, + Directory directory, Comparator comp) throws IOException { + switch (v) { + case PACKED_INTS: + case PACKED_INTS_FIXED: + return Ints.getWriter(directory, id, true); + case SIMPLE_FLOAT_4BYTE: + return Floats.getWriter(directory, id, 4); + case SIMPLE_FLOAT_8BYTE: + return Floats.getWriter(directory, id, 8); + case BYTES_FIXED_STRAIGHT: + return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true); + case BYTES_FIXED_DEREF: + return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true); + case BYTES_FIXED_SORTED: + return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true); + case BYTES_VAR_STRAIGHT: + return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false); + case BYTES_VAR_DEREF: + return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false); + case BYTES_VAR_SORTED: + return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false); + default: + throw new IllegalArgumentException("Unknown Values: " + v); + } + } } diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index d71b89f6fa6..2b322d6d4f8 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -22,7 +22,7 @@ import java.text.Collator; import java.util.Locale; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.values.Reader.Source; +import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.cache.ByteValuesCreator; diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java index 2d421b03808..cf552bf058d 100644 --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java @@ -25,6 +25,8 @@ import org.apache.lucene.analysis.*; import org.apache.lucene.index.codecs.*; import org.apache.lucene.index.codecs.standard.*; import org.apache.lucene.index.codecs.pulsing.*; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.codec.DocValuesConsumer; import org.apache.lucene.store.*; import java.util.*; import java.io.*; @@ -159,6 +161,13 @@ public class TestExternalCodecs extends LuceneTestCase { public void close() { // TODO: finalize stuff } + + @Override + public DocValuesConsumer addValuesField(FieldInfo field) + throws IOException { + //TODO(simonw): can we fix this easily? + throw new UnsupportedOperationException("no implemented"); + } } private static class RAMTermsConsumer extends TermsConsumer { @@ -257,6 +266,11 @@ public class TestExternalCodecs extends LuceneTestCase { public TermsEnum terms() { return new RAMTermsEnum(postings.fieldToTerms.get(current)); } + + @Override + public DocValues docValues() throws IOException { + throw new UnsupportedOperationException("not implemented"); + } } static class RAMTermsEnum extends TermsEnum { diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java index 2b0a4167174..deb23f079f9 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java +++ b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.codecs.preflex.TermInfo; +import org.apache.lucene.index.values.codec.DocValuesConsumer; import org.apache.lucene.store.IndexOutput; import java.io.IOException; @@ -209,4 +210,10 @@ class PreFlexFieldsWriter extends FieldsConsumer { return BytesRef.getUTF8SortedAsUTF16Comparator(); } } + + @Override + public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { + //TODO(simonw): can we fix this easily? + throw new UnsupportedOperationException("no implemented"); + } } \ No newline at end of file diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java index 87efd06e5c4..5bc064965c1 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java @@ -33,15 +33,20 @@ import org.apache.lucene.document.ValuesField; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; -import org.apache.lucene.index.values.Reader.SortedSource; -import org.apache.lucene.index.values.Reader.Source; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.values.DocValues.SortedSource; +import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.index.values.codec.DocValuesCodec; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; @@ -51,9 +56,33 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; public class TestIndexValues extends LuceneTestCase { + // TODO test addIndexes + private static DocValuesCodec docValuesCodec; + + @BeforeClass + public static void beforeClassLuceneTestCaseJ4() { + LuceneTestCase.beforeClassLuceneTestCaseJ4(); + final CodecProvider cp = CodecProvider.getDefault(); + docValuesCodec = new DocValuesCodec(cp.lookup(CodecProvider.getDefaultCodec())); + cp.register(docValuesCodec); + CodecProvider.setDefaultCodec(docValuesCodec.name); + } + + @AfterClass + public static void afterClassLuceneTestCaseJ4() { + final CodecProvider cp = CodecProvider.getDefault(); + cp.unregister(docValuesCodec); + LuceneTestCase.afterClassLuceneTestCaseJ4(); + } + + public void testBytesStraight() throws IOException { runTestBytes(Bytes.Mode.STRAIGHT, true); runTestBytes(Bytes.Mode.STRAIGHT, false); @@ -71,18 +100,16 @@ public class TestIndexValues extends LuceneTestCase { // nocommit -- for sorted test, do our own Sort of the // values and verify it's identical - public void runTestBytes(final Bytes.Mode mode, - final boolean fixedSize) throws IOException { + public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize) + throws IOException { final BytesRef bytesRef = new BytesRef(); final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef - .getUTF8SortedAsUnicodeComparator() - : null; + .getUTF8SortedAsUnicodeComparator() : null; Directory dir = newDirectory(); - Writer w = Bytes - .getWriter(dir, "test", mode, comp, fixedSize); + Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize); int maxDoc = 220; final String[] values = new String[maxDoc]; final int lenMin, lenMax; @@ -107,32 +134,33 @@ public class TestIndexValues extends LuceneTestCase { } w.finish(maxDoc); - Reader r = Bytes.getReader(dir, "test", mode, fixedSize, maxDoc); + DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); for (int iter = 0; iter < 2; iter++) { ValuesEnum bytesEnum = r.getEnum(); assertNotNull("enum is null", bytesEnum); ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class); assertNotNull("attribute is null", attr); BytesRef ref = attr.bytes(); - assertNotNull("BytesRef is null - enum not initialized to use bytes", attr); + assertNotNull("BytesRef is null - enum not initialized to use bytes", + attr); for (int i = 0; i < 2; i++) { final int idx = 2 * i; assertEquals("doc: " + idx, idx, bytesEnum.advance(idx)); String utf8String = ref.utf8ToString(); - assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + " lenRight: " + utf8String.length() , values[idx], utf8String); + assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + + " lenRight: " + utf8String.length(), values[idx], utf8String); } assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); - assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc+1)); + assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1)); bytesEnum.close(); } - - + // Verify we can load source twice: for (int iter = 0; iter < 2; iter++) { Source s; - Reader.SortedSource ss; + DocValues.SortedSource ss; if (mode == Bytes.Mode.SORTED) { s = ss = r.loadSorted(comp); } else { @@ -147,8 +175,8 @@ public class TestIndexValues extends LuceneTestCase { if (ss != null) { assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx)) .utf8ToString()); - Reader.SortedSource.LookupResult result = ss.getByValue(new BytesRef( - values[idx])); + DocValues.SortedSource.LookupResult result = ss + .getByValue(new BytesRef(values[idx])); assertTrue(result.found); assertEquals(ss.ord(idx), result.ord); } @@ -217,7 +245,7 @@ public class TestIndexValues extends LuceneTestCase { final int additionalDocs = 1 + random.nextInt(9); w.finish(NUM_VALUES + additionalDocs); - Reader r = Ints.getReader(dir, "test", useFixedArrays); + DocValues r = Ints.getValues(dir, "test", useFixedArrays); for (int iter = 0; iter < 2; iter++) { Source s = r.load(); for (int i = 0; i < NUM_VALUES; i++) { @@ -254,7 +282,7 @@ public class TestIndexValues extends LuceneTestCase { assertEquals(i, iEnum.advance(i)); assertEquals("" + i, 0, ints.get()); } - + iEnum.close(); } r.close(); @@ -267,22 +295,21 @@ public class TestIndexValues extends LuceneTestCase { runTestFloats(4, 0.00001); } - private void runTestFloats(int precision, double delta) - throws IOException { + private void runTestFloats(int precision, double delta) throws IOException { Directory dir = newDirectory(); Writer w = Floats.getWriter(dir, "test", precision); final int NUM_VALUES = 1000; final double[] values = new double[NUM_VALUES]; for (int i = 0; i < NUM_VALUES; i++) { - final double v = precision == 4 ? random.nextFloat() : random.nextDouble(); + final double v = precision == 4 ? random.nextFloat() : random + .nextDouble(); values[i] = v; w.add(i, v); } final int additionalValues = 1 + random.nextInt(10); w.finish(NUM_VALUES + additionalValues); - Reader r = Floats.getReader(dir, "test", NUM_VALUES - + additionalValues); + DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues); for (int iter = 0; iter < 2; iter++) { Source s = r.load(); for (int i = 0; i < NUM_VALUES; i++) { @@ -298,7 +325,7 @@ public class TestIndexValues extends LuceneTestCase { assertEquals(i, fEnum.nextDoc()); assertEquals(values[i], floats.get(), delta); } - for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { + for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { assertEquals(i, fEnum.nextDoc()); assertEquals(0.0, floats.get(), delta); } @@ -312,7 +339,7 @@ public class TestIndexValues extends LuceneTestCase { assertEquals(i, fEnum.advance(i)); assertEquals(values[i], floats.get(), delta); } - for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { + for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { assertEquals(i, fEnum.advance(i)); assertEquals(0.0, floats.get(), delta); } @@ -335,7 +362,7 @@ public class TestIndexValues extends LuceneTestCase { // without deletions IndexWriterConfig cfg = writerConfig(true); // primitives - no deletes - runTestNumerics(cfg,false); + runTestNumerics(cfg, false); cfg = writerConfig(true); // bytes - no deletes @@ -377,12 +404,12 @@ public class TestIndexValues extends LuceneTestCase { } private IndexWriterConfig writerConfig(boolean useCompoundFile) { - final IndexWriterConfig cfg = newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()); + final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer()); MergePolicy mergePolicy = cfg.getMergePolicy(); - if(mergePolicy instanceof LogMergePolicy) { - ((LogMergePolicy)mergePolicy).setUseCompoundFile(useCompoundFile); - } else if(useCompoundFile) { + if (mergePolicy instanceof LogMergePolicy) { + ((LogMergePolicy) mergePolicy).setUseCompoundFile(useCompoundFile); + } else if (useCompoundFile) { LogMergePolicy policy = new LogDocMergePolicy(); policy.setUseCompoundFile(useCompoundFile); cfg.setMergePolicy(policy); @@ -390,8 +417,8 @@ public class TestIndexValues extends LuceneTestCase { return cfg; } - public void runTestNumerics(IndexWriterConfig cfg, - boolean withDeletions) throws IOException { + public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions) + throws IOException { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 350; @@ -409,14 +436,15 @@ public class TestIndexValues extends LuceneTestCase { switch (val) { case PACKED_INTS: case PACKED_INTS_FIXED: { - Reader intsReader = r.getIndexValues(val.name()); + DocValues intsReader = getDocValues(r, val.name()); Source ints = intsReader.load(); ValuesEnum intsEnum = intsReader.getEnum(); assertNotNull(intsEnum); LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints(); for (int i = 0; i < base; i++) { assertEquals(0, ints.ints(i)); - assertEquals(val.name() + " base: " + base + " index: " + i, i, random.nextBoolean()?intsEnum.advance(i): intsEnum.nextDoc()); + assertEquals(val.name() + " base: " + base + " index: " + i, i, + random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc()); assertEquals(0, enumRef.get()); } int expected = 0; @@ -424,7 +452,8 @@ public class TestIndexValues extends LuceneTestCase { while (deleted.get(expected)) { expected++; } - assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs", i, intsEnum.advance(i)); + assertEquals("advance failed at index: " + i + " of " + r.numDocs() + + " docs", i, intsEnum.advance(i)); assertEquals(expected, ints.ints(i)); assertEquals(expected, enumRef.get()); @@ -433,24 +462,27 @@ public class TestIndexValues extends LuceneTestCase { break; case SIMPLE_FLOAT_4BYTE: case SIMPLE_FLOAT_8BYTE: { - Reader floatReader = r.getIndexValues(val.name()); + DocValues floatReader = getDocValues(r, val.name()); Source floats = floatReader.load(); ValuesEnum floatEnum = floatReader.getEnum(); assertNotNull(floatEnum); - FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class).floats(); + FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class) + .floats(); for (int i = 0; i < base; i++) { assertEquals(0.0d, floats.floats(i), 0.0d); - assertEquals(i, random.nextBoolean()?floatEnum.advance(i): floatEnum.nextDoc()); - assertEquals("index " + i, 0.0 ,enumRef.get(), 0.0); + assertEquals(i, random.nextBoolean() ? floatEnum.advance(i) + : floatEnum.nextDoc()); + assertEquals("index " + i, 0.0, enumRef.get(), 0.0); } int expected = 0; for (int i = base; i < r.numDocs(); i++, expected++) { while (deleted.get(expected)) { expected++; } - assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs base:" + base, i, floatEnum.advance(i)); - assertEquals("index " + i, 2.0 * expected ,enumRef.get() , 0.00001); + assertEquals("advance failed at index: " + i + " of " + r.numDocs() + + " docs base:" + base, i, floatEnum.advance(i)); + assertEquals("index " + i, 2.0 * expected, enumRef.get(), 0.00001); assertEquals("index " + i, 2.0 * expected, floats.floats(i), 0.00001); } } @@ -468,30 +500,30 @@ public class TestIndexValues extends LuceneTestCase { d.close(); } - private static EnumSet BYTES = EnumSet.of( - Values.BYTES_FIXED_DEREF, - Values.BYTES_FIXED_SORTED, - Values.BYTES_FIXED_STRAIGHT, - Values.BYTES_VAR_DEREF , - Values.BYTES_VAR_SORTED, - Values.BYTES_VAR_STRAIGHT - ); - - private static EnumSet STRAIGHT_BYTES = EnumSet.of( - Values.BYTES_FIXED_STRAIGHT, - Values.BYTES_VAR_STRAIGHT - ); + private static EnumSet BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF, + Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT, + Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED, + Values.BYTES_VAR_STRAIGHT); - private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE); - - private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS}; - private OpenBitSet indexValues(IndexWriter w, int numValues, - Values value, List valueVarList, boolean withDeletions, - int multOfSeven) throws CorruptIndexException, IOException { + private static EnumSet STRAIGHT_BYTES = EnumSet.of( + Values.BYTES_FIXED_STRAIGHT, Values.BYTES_VAR_STRAIGHT); + + private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, + Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, + Values.SIMPLE_FLOAT_8BYTE); + + private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, + Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS }; + + private OpenBitSet indexValues(IndexWriter w, int numValues, Values value, + List valueVarList, boolean withDeletions, int multOfSeven) + throws CorruptIndexException, IOException { final boolean isNumeric = NUMERICS.contains(value); OpenBitSet deleted = new OpenBitSet(numValues); Document doc = new Document(); - Fieldable field = random.nextBoolean()? new ValuesField(value.name()):newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]); + Fieldable field = random.nextBoolean() ? new ValuesField(value.name()) + : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, + 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]); doc.add(field); ValuesAttribute valuesAttribute = ValuesField.values(field); @@ -549,16 +581,15 @@ public class TestIndexValues extends LuceneTestCase { } } w.commit(); - + // nocommit test unoptimized with deletions - if(withDeletions || random.nextBoolean()) + if (true || withDeletions || random.nextBoolean()) w.optimize(); return deleted; } - public void runTestIndexBytes(IndexWriterConfig cfg, - boolean withDeletions) throws CorruptIndexException, - LockObtainFailedException, IOException { + public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions) + throws CorruptIndexException, LockObtainFailedException, IOException { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final List byteVariantList = new ArrayList(BYTES); @@ -577,14 +608,14 @@ public class TestIndexValues extends LuceneTestCase { final int numRemainingValues = (int) (numValues - deleted.cardinality()); final int base = r.numDocs() - numRemainingValues; - Reader bytesReader = r.getIndexValues(byteIndexValue.name()); -// closeables.add(bytesReader); + DocValues bytesReader = getDocValues(r, byteIndexValue.name()); assertNotNull("field " + byteIndexValue.name() + " returned null reader - maybe merged failed", bytesReader); Source bytes = bytesReader.load(); ValuesEnum bytesEnum = bytesReader.getEnum(); assertNotNull(bytesEnum); - final ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class); + final ValuesAttribute attr = bytesEnum + .addAttribute(ValuesAttribute.class); byte upto = 0; // test the filled up slots for correctness for (int i = 0; i < base; i++) { @@ -598,7 +629,7 @@ public class TestIndexValues extends LuceneTestCase { // fixed straight returns bytesref with zero bytes all of fixed // length assertNotNull("expected none null - " + msg, br); - if(br.length != 0) { + if (br.length != 0) { assertEquals("expected zero bytes of length " + bytesSize + " - " + msg, bytesSize, br.length); for (int j = 0; j < br.length; j++) { @@ -613,35 +644,38 @@ public class TestIndexValues extends LuceneTestCase { case BYTES_FIXED_DEREF: default: assertNotNull("expected none null - " + msg, br); - if(br.length != 0){ + if (br.length != 0) { bytes.bytes(i); } - assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0, br.length); + assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0, + br.length); } } final BytesRef enumRef = attr.bytes(); - // test the actual doc values added in this iteration assertEquals(base + numRemainingValues, r.numDocs()); int v = 0; for (int i = base; i < r.numDocs(); i++) { - + String msg = " field: " + byteIndexValue.name() + " at index: " + i - + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + bytesSize; + + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + + bytesSize; while (withDeletions && deleted.get(v++)) { upto += bytesSize; } - + BytesRef br = bytes.bytes(i); - if(bytesEnum.docID() != i) - assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum.advance(i)); + if (bytesEnum.docID() != i) + assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum + .advance(i)); for (int j = 0; j < br.length; j++, upto++) { - assertEquals("EnumRef Byte at index " + j + " doesn't match - " + msg, - upto, enumRef.bytes[enumRef.offset + j]); - assertEquals("SourceRef Byte at index " + j + " doesn't match - " + msg, - upto, br.bytes[br.offset + j]); - } + assertEquals( + "EnumRef Byte at index " + j + " doesn't match - " + msg, upto, + enumRef.bytes[enumRef.offset + j]); + assertEquals("SourceRef Byte at index " + j + " doesn't match - " + + msg, upto, br.bytes[br.offset + j]); + } } // clean up @@ -650,9 +684,32 @@ public class TestIndexValues extends LuceneTestCase { toClose.close(); } } - + w.close(); d.close(); } - + + private DocValues getDocValues(IndexReader reader, String field) + throws IOException { + boolean optimized = reader.isOptimized(); + Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() : MultiFields + .getFields(reader); +// return fields.docValues(field); + switch (random.nextInt(optimized ? 3 : 2)) { + case 0: + return fields.docValues(field); + case 1: + FieldsEnum iterator = fields.iterator(); + String name; + while ((name = iterator.next()) != null) { + if (name.equals(field)) + return iterator.docValues(); + } + throw new RuntimeException("no such field " + field); + case 2: + return reader.getSequentialSubReaders()[0].docValues(field); + } +throw new RuntimeException(); +} + } From 211ab616b04954659abb37a1c1027114ad948bc8 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 26 Oct 2010 09:28:52 +0000 Subject: [PATCH 007/116] LUCENE-2700: added missing files git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1027415 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/DocFieldProcessor.java | 19 +- .../index/DocFieldProcessorPerThread.java | 4 +- .../lucene/index/codecs/FieldsConsumer.java | 3 +- .../index/codecs/PerFieldCodecWrapper.java | 3 +- .../codecs/docvalues/DocValuesCodec.java | 298 +++++++++++++++++ .../codecs/docvalues/DocValuesConsumer.java | 67 ++++ .../docvalues/DocValuesProducerBase.java | 99 ++++++ .../lucene/index/values/MultiDocValues.java | 300 ++++++++++++++++++ .../src/java/org/apache/lucene/util/Pair.java | 36 +++ 9 files changed, 809 insertions(+), 20 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java create mode 100644 lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java create mode 100644 lucene/src/java/org/apache/lucene/util/Pair.java diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 4db1363e722..030979cb5e7 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -17,24 +17,15 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.store.Directory; -import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.values.Ints; -import org.apache.lucene.index.values.Floats; -import org.apache.lucene.index.values.Bytes; -import org.apache.lucene.index.values.ValuesAttribute; -import org.apache.lucene.index.values.Writer; -import org.apache.lucene.index.values.codec.DocValuesConsumer; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FloatsRef; -import org.apache.lucene.util.LongsRef; - import java.io.IOException; import java.util.Collection; -import java.util.Comparator; -import java.util.Map; import java.util.HashMap; +import java.util.Map; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; +import org.apache.lucene.index.values.ValuesAttribute; +import org.apache.lucene.store.Directory; /** * This is a DocConsumer that gathers all fields under the diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java index 0f2fed91c6d..5bd7321fce1 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java @@ -24,8 +24,8 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.index.values.ValuesAttribute; -import org.apache.lucene.index.values.codec.DocValuesConsumer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.RamUsageEstimator; @@ -407,4 +407,4 @@ final class DocFieldProcessorPerThread extends DocConsumerPerThread { } } } -} \ No newline at end of file +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java index 5bc0b48b6ef..0f90deeeff1 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java @@ -22,10 +22,9 @@ import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Writer; -import org.apache.lucene.index.values.Values; -import org.apache.lucene.index.values.codec.DocValuesConsumer; import java.io.IOException; import java.io.Closeable; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java index cf21d6c3620..b00d4dbc774 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java @@ -23,7 +23,6 @@ import java.util.Set; import java.util.HashSet; import java.util.Iterator; import java.util.IdentityHashMap; -import java.util.TreeMap; import java.util.TreeSet; import java.io.IOException; @@ -35,8 +34,8 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.codec.DocValuesConsumer; import org.apache.lucene.store.Directory; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java new file mode 100644 index 00000000000..821f766bd83 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -0,0 +1,298 @@ +package org.apache.lucene.index.codecs.docvalues; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.TermsConsumer; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.Writer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.AttributeSource; + +/** + * A codec that adds DocValues support to a given codec transparently. + */ +public class DocValuesCodec extends Codec { + private final Map consumers = new HashMap(); + private final Codec other; + + public DocValuesCodec(Codec other) { + this.name = "docvalues_" + other.name; + this.other = other; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) + throws IOException { + WrappingFieldsConsumer consumer; + if ((consumer = consumers.get(state.segmentName)) == null) { + consumer = new WrappingFieldsConsumer(other); + } + consumer.state = state; // nocommit this is a hack and only necessary since + // we want to initialized the wrapped + // fieldsConsumer lazily with a SegmentWriteState created after the docvalue + // ones is. We should fix this in DocumentWriter I guess. See + // DocFieldProcessor too! + return consumer; + } + + private static class WrappingFieldsConsumer extends FieldsConsumer { + SegmentWriteState state; + private final List docValuesConsumers = new ArrayList(); + private FieldsConsumer wrappedConsumer; + private final Codec other; + + public WrappingFieldsConsumer(Codec other) { + this.other = other; + } + + @Override + public void close() throws IOException { + synchronized (this) { + if (wrappedConsumer != null) + wrappedConsumer.close(); + } + } + + @Override + public synchronized DocValuesConsumer addValuesField(FieldInfo field) + throws IOException { + DocValuesConsumer consumer = DocValuesConsumer.create(state.segmentName, + state.directory, field, null); // TODO: set comparator here + docValuesConsumers.add(consumer); + return consumer; + } + + @Override + public TermsConsumer addField(FieldInfo field) throws IOException { + synchronized (this) { + if (wrappedConsumer == null) + wrappedConsumer = other.fieldsConsumer(state); + } + return wrappedConsumer.addField(field); + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) + throws IOException { + Directory dir = state.dir; + Set files = new HashSet(); + + other.files(dir, state.segmentInfo, files); + for (String string : files) { + if (dir.fileExists(string)) + return new WrappingFielsdProducer(state.segmentInfo, state.dir, + state.fieldInfos, other.fieldsProducer(state)); + } + return new WrappingFielsdProducer(state.segmentInfo, state.dir, + state.fieldInfos, FieldsProducer.EMPTY); + + } + + @Override + public void files(Directory dir, SegmentInfo segmentInfo, Set files) + throws IOException { + Set otherFiles = new HashSet(); + other.files(dir, segmentInfo, otherFiles); + for (String string : otherFiles) { + if (dir.fileExists(string)) + files.add(string); + } + + for (String file : dir.listAll()) { + if (file.startsWith(segmentInfo.name) + && (file.endsWith(Writer.DATA_EXTENSION) || file + .endsWith(Writer.INDEX_EXTENSION))) { + files.add(file); + } + } + // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "", + // Writer.DATA_EXTENSION)); + // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "", + // Writer.INDEX_EXTENSION)); + + } + + @Override + public void getExtensions(Set extensions) { + other.getExtensions(extensions); + extensions.add(Writer.DATA_EXTENSION); + extensions.add(Writer.INDEX_EXTENSION); + } + + static class WrappingFielsdProducer extends DocValuesProducerBase { + + private final FieldsProducer other; + + WrappingFielsdProducer(SegmentInfo si, Directory dir, FieldInfos fieldInfo, + FieldsProducer other) throws IOException { + super(si, dir, fieldInfo); + this.other = other; + } + + @Override + public void close() throws IOException { + try { + other.close(); + } finally { + super.close(); + } + } + + @Override + public void loadTermsIndex(int indexDivisor) throws IOException { + other.loadTermsIndex(indexDivisor); + } + + @Override + public FieldsEnum iterator() throws IOException { + return new WrappingFieldsEnum(other.iterator(), docValues.entrySet() + .iterator()); + } + + @Override + public Terms terms(String field) throws IOException { + return other.terms(field); + } + } + + static abstract class NameValue { + String name; + V value; + + NameValue smaller(NameValue other) throws IOException { + if (other.name == null) { + if (this.name == null) { + return null; + } + return this; + } else if (this.name == null) { + return other; + } + final int res = this.name.compareTo(other.name); + if (res < 0) + return this; + if (res == 0) + other.name = this.name; + return other; + } + + abstract NameValue next() throws IOException; + } + + static class FieldsEnumNameValue extends NameValue { + @Override + NameValue next() throws IOException { + name = value.next(); + return this; + } + + } + + static class DocValueNameValue extends NameValue { + Iterator> iter; + + @Override + NameValue next() { + if (iter.hasNext()) { + Entry next = iter.next(); + value = next.getValue(); + name = next.getKey(); + } else { + name = null; + } + return this; + } + + } + + static class WrappingFieldsEnum extends FieldsEnum { + private final DocValueNameValue docValues = new DocValueNameValue(); + private final NameValue fieldsEnum = new FieldsEnumNameValue(); + private NameValue coordinator; + + @Override + public AttributeSource attributes() { + return fieldsEnum.value.attributes(); + } + + public WrappingFieldsEnum(FieldsEnum wrapped, + Iterator> docValues) { + this.docValues.iter = docValues; + this.fieldsEnum.value = wrapped; + coordinator = null; + + } + + @Override + public DocValues docValues() throws IOException { + if (docValues.name == coordinator.name) + return docValues.value; + return null; + } + + @Override + public String next() throws IOException { + if (coordinator == null) { + coordinator = fieldsEnum.next().smaller(docValues.next()); + // old = coordinator.name; + } else { + String current = coordinator.name; + if (current == docValues.name) { + docValues.next(); + } + if (current == fieldsEnum.name) { + fieldsEnum.next(); + } + coordinator = docValues.smaller(fieldsEnum); + + } + return coordinator == null ? null : coordinator.name; + + } + + @Override + public TermsEnum terms() throws IOException { + if (fieldsEnum.name == coordinator.name) + return fieldsEnum.value.terms(); + return null; + } + + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java new file mode 100644 index 00000000000..22b04137d27 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java @@ -0,0 +1,67 @@ +package org.apache.lucene.index.codecs.docvalues; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.ValuesAttribute; +import org.apache.lucene.index.values.Writer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +public abstract class DocValuesConsumer { + public abstract void add(int docID, ValuesAttribute attr) throws IOException; + + public abstract void finish(int docCount) throws IOException; + + public abstract void files(Collection files) throws IOException; + + public void merge(List states) throws IOException { + for (MergeState state : states) { + merge(state); + } + } + + protected abstract void merge(MergeState mergeState) throws IOException; + + + public static class MergeState { + public final DocValues reader; + public final int docBase; + public final int docCount; + public final Bits bits; + + public MergeState(DocValues reader, int docBase, int docCount, Bits bits) { + assert reader != null; + this.reader = reader; + this.docBase = docBase; + this.docCount = docCount; + this.bits = bits; + } + } + + public static DocValuesConsumer create(String segmentName, Directory directory, + FieldInfo field, Comparator comp) throws IOException { + final String id = segmentName + "_" + field.number; + return Writer.create(field.getIndexValues(), id, directory, comp); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java new file mode 100644 index 00000000000..ce016755455 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -0,0 +1,99 @@ +package org.apache.lucene.index.codecs.docvalues; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Collection; +import java.util.TreeMap; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.values.Bytes; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.Floats; +import org.apache.lucene.index.values.Ints; +import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Writer; +import org.apache.lucene.store.Directory; + +public abstract class DocValuesProducerBase extends FieldsProducer{ + + protected final TreeMap docValues = new TreeMap(); + + protected DocValuesProducerBase(SegmentInfo si, Directory dir, FieldInfos fieldInfo) throws IOException { + load(fieldInfo, si.name, si.docCount, dir); + } + + @Override + public DocValues docValues(String field) throws IOException { + return docValues.get(field); + } + + // Only opens files... doesn't actually load any values + protected void load(FieldInfos fieldInfos, String segment, int docCount, + Directory dir) throws IOException { + final int numFields = fieldInfos.size(); + for (int i = 0; i < numFields; i++) { + final FieldInfo fieldInfo = fieldInfos.fieldInfo(i); + final Values v = fieldInfo.getIndexValues(); + final String field = fieldInfo.name; + final String id = IndexFileNames.segmentFileName(segment, Integer + .toString(fieldInfo.number), ""); + if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) { + docValues.put(field, loadDocValues(docCount, dir, id, v)); + } + } + } + + protected DocValues loadDocValues(int docCount, Directory dir, String id, + Values v) throws IOException { + switch (v) { + case PACKED_INTS: + return Ints.getValues(dir, id, false); + case PACKED_INTS_FIXED: + return Ints.getValues(dir, id, true); + case SIMPLE_FLOAT_4BYTE: + return Floats.getValues(dir, id, docCount); + case SIMPLE_FLOAT_8BYTE: + return Floats.getValues(dir, id, docCount); + case BYTES_FIXED_STRAIGHT: + return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount); + case BYTES_FIXED_DEREF: + return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount); + case BYTES_FIXED_SORTED: + return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount); + case BYTES_VAR_STRAIGHT: + return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount); + case BYTES_VAR_DEREF: + return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount); + case BYTES_VAR_SORTED: + return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount); + default: + throw new IllegalStateException("unrecognized index values mode " + v); + } + } + + @Override + public void close() throws IOException { + Collection values = docValues.values(); + for (DocValues docValues : values) { + docValues.close(); + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java new file mode 100644 index 00000000000..cd6216a2530 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -0,0 +1,300 @@ +package org.apache.lucene.index.values; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.ReaderUtil.Slice; + +public class MultiDocValues extends DocValues { + + public static class DocValuesIndex { + public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0]; + final int subIndex; + final DocValues docValues; + + public DocValuesIndex(DocValues docValues, int subIndex) { + this.docValues = docValues; + this.subIndex = subIndex; + } + } + + private DocValuesIndex[] docValuesIdx; + private Slice[] subSlices; + + public MultiDocValues(Slice[] subSlices) { + this.subSlices = subSlices; + } + + public MultiDocValues(DocValuesIndex[] docValuesIdx, Slice[] subSlices) { + this(subSlices); + reset(docValuesIdx); + } + + @Override + public ValuesEnum getEnum(AttributeSource source) throws IOException { + return new MultiValuesEnum(subSlices, docValuesIdx, docValuesIdx[0].docValues.type()); + } + + @Override + public Source load() throws IOException { + return new MultiSource(subSlices, docValuesIdx); + } + + public void close() throws IOException { + // + } + + public DocValues reset(DocValuesIndex[] docValuesIdx) { + this.docValuesIdx = docValuesIdx; + return this; + } + + private static class MultiValuesEnum extends ValuesEnum { + private int numDocs_ = 0; + private int pos = -1; + private int start = 0; + private ValuesEnum current; + private Slice[] subSlices; + private DocValuesIndex[] docValuesIdx; + private final int maxDoc; + + public MultiValuesEnum(Slice[] subSlices, DocValuesIndex[] docValuesIdx, Values type) { + super(type); + this.subSlices = subSlices; + this.docValuesIdx = docValuesIdx; + Slice slice = subSlices[subSlices.length-1]; + maxDoc = slice.start + slice.length; + } + + @Override + public void close() throws IOException { + + } + + @Override + public int advance(int target) throws IOException { +// int n = target - start; +// do { +// if (target >= maxDoc) +// return pos = NO_MORE_DOCS; +// if (n >= numDocs_) { +// int idx = readerIndex(target); +// if (enumCache[idx] == null) { +// try { +// DocValues indexValues = subReaders[idx].docValues(id); +// if (indexValues != null) // nocommit does that work with default +// // values? +// enumCache[idx] = indexValues.getEnum(this.attributes()); +// else +// enumCache[idx] = new DummyEnum(this.attributes(), +// subSlices[idx].length, attr.type()); +// } catch (IOException ex) { +// // nocommit what to do here? +// throw new RuntimeException(ex); +// } +// } +// current = enumCache[idx]; +// start = subSlices[idx].start; +// numDocs_ = subSlices[idx].length; +// n = target - start; +// } +// target = start + numDocs_; +// } while ((n = current.advance(n)) == NO_MORE_DOCS); + return pos = start + current.docID(); + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos + 1); + } + } + + private class MultiSource extends Source { + private int numDocs_ = 0; + private int start = 0; + private Source current; + private Slice[] subSlices; + private DocValuesIndex[] docVAluesIdx; + + public MultiSource(Slice[] subSlices, DocValuesIndex[] docValuesIdx) { + this.subSlices = subSlices; + this.docVAluesIdx = docValuesIdx; + } + + public long ints(int docID) { +// int n = docID - start; +// if (n >= numDocs_) { +// int idx = readerIndex(docID); +// try { +// current = subReaders[idx].getIndexValuesCache().getInts(id); +// if (current == null) // nocommit does that work with default values? +// current = new DummySource(); +// } catch (IOException ex) { +// // nocommit what to do here? +// throw new RuntimeException(ex); +// } +// start = starts[idx]; +// numDocs_ = subReaders[idx].maxDoc(); +// n = docID - start; +// } +// return current.ints(n); + return 0l; + } + + public double floats(int docID) { +// int n = docID - start; +// if (n >= numDocs_) { +// int idx = readerIndex(docID); +// try { +// current = subReaders[idx].getIndexValuesCache().getFloats(id); +// if (current == null) // nocommit does that work with default values? +// current = new DummySource(); +// } catch (IOException ex) { +// // nocommit what to do here? +// throw new RuntimeException(ex); +// } +// numDocs_ = subReaders[idx].maxDoc(); +// +// start = starts[idx]; +// n = docID - start; +// } +// return current.floats(n); + return 0d; + } + + public BytesRef bytes(int docID) { +// int n = docID - start; +// if (n >= numDocs_) { +// int idx = readerIndex(docID); +// try { +// current = subReaders[idx].getIndexValuesCache().getBytes(id); +// if (current == null) // nocommit does that work with default values? +// current = new DummySource(); +// } catch (IOException ex) { +// // nocommit what to do here? +// throw new RuntimeException(ex); +// } +// numDocs_ = subReaders[idx].maxDoc(); +// start = starts[idx]; +// n = docID - start; +// } +// return current.bytes(n); + return null; + } + + public long ramBytesUsed() { + return current.ramBytesUsed(); + } + + } + + private static class DummySource extends Source { + private final BytesRef ref = new BytesRef(); + + @Override + public BytesRef bytes(int docID) { + return ref; + } + + @Override + public double floats(int docID) { + return 0.0d; + } + + @Override + public long ints(int docID) { + return 0; + } + + public long ramBytesUsed() { + return 0; + } + } + + private static class DummyEnum extends ValuesEnum { + private int pos = -1; + private final int maxDoc; + + public DummyEnum(AttributeSource source, int maxDoc, Values type) { + super(source, type); + this.maxDoc = maxDoc; + switch (type) { + case BYTES_VAR_STRAIGHT: + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + // nocommit - this is not correct for Fixed_straight + BytesRef bytes = attr.bytes(); + bytes.length = 0; + bytes.offset = 0; + break; + case PACKED_INTS: + case PACKED_INTS_FIXED: + LongsRef ints = attr.ints(); + ints.set(0); + break; + + case SIMPLE_FLOAT_4BYTE: + case SIMPLE_FLOAT_8BYTE: + FloatsRef floats = attr.floats(); + floats.set(0d); + break; + default: + throw new IllegalArgumentException("unknown Values type: " + type); + } + } + + @Override + public void close() throws IOException { + } + + @Override + public int advance(int target) throws IOException { + return pos = (pos < maxDoc ? target : NO_MORE_DOCS); + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + return advance(pos + 1); + } + } + + @Override + public Values type() { + return this.docValuesIdx[0].docValues.type(); + } + +} diff --git a/lucene/src/java/org/apache/lucene/util/Pair.java b/lucene/src/java/org/apache/lucene/util/Pair.java new file mode 100644 index 00000000000..9459a7548db --- /dev/null +++ b/lucene/src/java/org/apache/lucene/util/Pair.java @@ -0,0 +1,36 @@ +package org.apache.lucene.util; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Simple Pair + * @lucene.internal + */ +public class Pair { + public final Cur cur; + public final Cud cud; + + /** + * Create a simple pair + * @param cur the first element + * @param cud the second element + */ + public Pair(Cur cur, Cud cud) { + this.cur = cur; + this.cud = cud; + } +} \ No newline at end of file From beb1bb74fa56760ef3f5ce0d3037f35b335165c5 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 26 Oct 2010 09:37:49 +0000 Subject: [PATCH 008/116] fixed remaining imports - sorry for the noise git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1027420 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/values/Writer.java | 2 +- lucene/src/test/org/apache/lucene/TestExternalCodecs.java | 2 +- .../lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java | 2 +- .../test/org/apache/lucene/index/values/TestIndexValues.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index ae081778769..b73b8ab4113 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -19,7 +19,7 @@ package org.apache.lucene.index.values; import java.io.IOException; import java.util.Comparator; -import org.apache.lucene.index.values.codec.DocValuesConsumer; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java index cf552bf058d..ef6596e43ca 100644 --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java @@ -26,7 +26,7 @@ import org.apache.lucene.index.codecs.*; import org.apache.lucene.index.codecs.standard.*; import org.apache.lucene.index.codecs.pulsing.*; import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.codec.DocValuesConsumer; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.store.*; import java.util.*; import java.io.*; diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java index deb23f079f9..4a196d9b7f5 100644 --- a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java +++ b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java @@ -28,7 +28,7 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.codecs.preflex.TermInfo; -import org.apache.lucene.index.values.codec.DocValuesConsumer; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.store.IndexOutput; import java.io.IOException; diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java index 5bc064965c1..3e046296f4b 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java @@ -46,7 +46,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; -import org.apache.lucene.index.values.codec.DocValuesCodec; +import org.apache.lucene.index.codecs.docvalues.DocValuesCodec; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; From 1fced2aa40b2500ccb6cb3a02cea33b5b543d459 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 29 Oct 2010 20:33:36 +0000 Subject: [PATCH 009/116] LUCENE-2700: Added support for MultiFields, MultiFieldsEnum and moved actual merge code to DocValuesCodec git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1028899 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/DocFieldProcessor.java | 2 +- .../org/apache/lucene/index/FieldInfo.java | 20 +- .../org/apache/lucene/index/FieldInfos.java | 30 +- .../org/apache/lucene/index/IndexReader.java | 10 +- .../org/apache/lucene/index/MultiFields.java | 28 +- .../apache/lucene/index/MultiFieldsEnum.java | 48 +++- .../apache/lucene/index/SegmentMerger.java | 17 +- .../apache/lucene/index/SegmentReader.java | 2 +- .../lucene/index/codecs/FieldsConsumer.java | 29 +- .../codecs/docvalues/DocValuesConsumer.java | 52 +++- .../docvalues/DocValuesProducerBase.java | 8 +- .../org/apache/lucene/index/values/Bytes.java | 2 + .../org/apache/lucene/index/values/Cache.java | 116 -------- .../apache/lucene/index/values/DocValues.java | 66 +++-- .../index/values/FixedDerefBytesImpl.java | 2 +- .../index/values/FixedStraightBytesImpl.java | 2 +- .../apache/lucene/index/values/Floats.java | 8 +- .../lucene/index/values/MultiDocValues.java | 264 ++++++++++-------- .../lucene/index/values/PackedIntsImpl.java | 6 +- .../apache/lucene/index/values/Values.java | 4 +- .../index/values/VarDerefBytesImpl.java | 4 +- .../index/values/VarSortedBytesImpl.java | 4 +- .../index/values/VarStraightBytesImpl.java | 7 +- .../apache/lucene/search/FieldComparator.java | 12 +- .../org/apache/lucene/search/SortField.java | 2 +- .../org/apache/lucene/util/ReaderUtil.java | 22 ++ .../lucene/index/values/TestIndexValues.java | 84 +++--- 27 files changed, 428 insertions(+), 423 deletions(-) delete mode 100644 lucene/src/java/org/apache/lucene/index/values/Cache.java diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 030979cb5e7..d1b46e1b9c9 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -49,7 +49,7 @@ final class DocFieldProcessor extends DocConsumer { throws IOException { DocValuesConsumer valuesConsumer; if ((valuesConsumer = docValues.get(name)) == null) { - fieldInfo.setIndexValues(attr.type()); + fieldInfo.setDocValues(attr.type()); if(fieldsConsumer == null) { /* nocommit -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice. diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index d7529874599..f07f73764c9 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -24,7 +24,7 @@ public final class FieldInfo { public String name; public boolean isIndexed; public int number; - Values indexValues; + Values docValues; // true if term vector for this field should be stored @@ -93,17 +93,21 @@ public final class FieldInfo { } } - void setIndexValues(Values v) { - if (indexValues != null) { - if (indexValues != v) { - throw new IllegalArgumentException("indexValues is already set to " + indexValues + "; cannot change to " + v); + void setDocValues(Values v) { + if (docValues != null) { + if (docValues != v) { + throw new IllegalArgumentException("indexValues is already set to " + docValues + "; cannot change to " + v); } } else{ - indexValues = v; + docValues = v; } } + + public boolean hasDocValues() { + return docValues != null; + } - public Values getIndexValues() { - return indexValues; + public Values getDocValues() { + return docValues; } } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 26ed7136d97..aa11aa77fbd 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -311,10 +311,10 @@ public final class FieldInfos { final byte b; - if (fi.indexValues == null) { + if (fi.docValues == null) { b = 0; } else { - switch(fi.indexValues) { + switch(fi.docValues) { case PACKED_INTS: b = 1; break; @@ -346,7 +346,7 @@ public final class FieldInfos { b = 10; break; default: - throw new IllegalStateException("unhandled indexValues type " + fi.indexValues); + throw new IllegalStateException("unhandled indexValues type " + fi.docValues); } } output.writeByte(b); @@ -377,43 +377,41 @@ public final class FieldInfos { boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; FieldInfo fi = addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); - if (format <= FORMAT_INDEX_VALUES) { final byte b = input.readByte(); - switch(b) { case 0: - fi.indexValues = null; + fi.docValues = null; break; case 1: - fi.indexValues = Values.PACKED_INTS; + fi.docValues = Values.PACKED_INTS; break; case 2: - fi.indexValues = Values.SIMPLE_FLOAT_4BYTE; + fi.docValues = Values.SIMPLE_FLOAT_4BYTE; break; case 3: - fi.indexValues = Values.SIMPLE_FLOAT_8BYTE; + fi.docValues = Values.SIMPLE_FLOAT_8BYTE; break; case 4: - fi.indexValues = Values.BYTES_FIXED_STRAIGHT; + fi.docValues = Values.BYTES_FIXED_STRAIGHT; break; case 5: - fi.indexValues = Values.BYTES_FIXED_DEREF; + fi.docValues = Values.BYTES_FIXED_DEREF; break; case 6: - fi.indexValues = Values.BYTES_FIXED_SORTED; + fi.docValues = Values.BYTES_FIXED_SORTED; break; case 7: - fi.indexValues = Values.BYTES_VAR_STRAIGHT; + fi.docValues = Values.BYTES_VAR_STRAIGHT; break; case 8: - fi.indexValues = Values.BYTES_VAR_DEREF; + fi.docValues = Values.BYTES_VAR_DEREF; break; case 9: - fi.indexValues = Values.BYTES_VAR_SORTED; + fi.docValues = Values.BYTES_VAR_SORTED; break; case 10: - fi.indexValues = Values.PACKED_INTS_FIXED; + fi.docValues = Values.PACKED_INTS_FIXED; break; default: throw new IllegalStateException("unhandled indexValues type " + b); diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 2cb8d6d9d72..c292460d2f5 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -21,7 +21,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.values.Cache; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.store.*; import org.apache.lucene.util.Bits; @@ -1090,7 +1089,7 @@ public abstract class IndexReader implements Cloneable,Closeable { if (docs == null) return 0; int n = 0; int doc; - while ((doc = docs.nextDoc()) != docs.NO_MORE_DOCS) { + while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) { deleteDocument(doc); n++; } @@ -1386,13 +1385,6 @@ public abstract class IndexReader implements Cloneable,Closeable { return fields.docValues(field); } - private final Cache indexValuesCache = new Cache(this); - - // nocommit -- don't expose readers if we have this? - public Cache getIndexValuesCache() { - return indexValuesCache; - } - private Fields fields; /** @lucene.internal */ diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index f642383c36b..9137d6077ff 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.MultiDocValues; +import org.apache.lucene.index.values.Values; import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs @@ -290,19 +291,30 @@ public final class MultiFields extends Fields { // Lazy init: first time this field is requested, we // create & add to docValues: - final List subs2 = new ArrayList(); - final List slices2 = new ArrayList(); - + final List docValuesIndex = new ArrayList(); + int docsUpto = 0; + Values type = null; // Gather all sub-readers that share this field for(int i=0;i enumWithSlices = new ArrayList(); // Init q for(int i=0;i values = new ArrayList(); - for (int i = 0; i < numTop; i++) { - final DocValues docValues = top[i].fields.docValues(); - if (docValues != null) { - values.add(new MultiDocValues.DocValuesIndex(docValues, - top[i].index)); + final List docValuesIndex = new ArrayList(); + int docsUpto = 0; + Values type = null; + final int numEnums = enumWithSlices.length; + for (int i = 0; i < numEnums; i++) { + FieldsEnumWithSlice withSlice = enumWithSlices[i]; + Slice slice = withSlice.slice; + final DocValues values = withSlice.fields.docValues(); + + final int start = slice.start; + final int length = slice.length; + if (values != null) { + if (docsUpto != start) { + type = values.type(); + docValuesIndex.add(new MultiDocValues.DocValuesIndex( + new MultiDocValues.DummyDocValues(start, type), docsUpto, start + - docsUpto)); + } + docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, + length)); + docsUpto = start + length; + + + } else if (i+1 == numEnums && !docValuesIndex.isEmpty()) { + docValuesIndex.add(new MultiDocValues.DocValuesIndex( + new MultiDocValues.DummyDocValues(start, type), docsUpto, start + - docsUpto)); } } - // TODO return an empty docvalues instance if values are empty - return docValues.reset(values.toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY)); + return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex + .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY)); } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 95577954852..64a7b479f41 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -31,12 +31,7 @@ import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.values.Bytes; -import org.apache.lucene.index.values.Ints; -import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Values; -import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -296,13 +291,13 @@ final class SegmentMerger { fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); - final Values fiIndexValues = fi.indexValues; - final Values mergedIndexValues = merged.indexValues; - if (mergedIndexValues == null) { - merged.setIndexValues(fiIndexValues); - } else if (mergedIndexValues != fiIndexValues) { + final Values fiIndexValues = fi.docValues; + final Values mergedDocValues = merged.docValues; + if (mergedDocValues == null) { + merged.setDocValues(fiIndexValues); + } else if (mergedDocValues != fiIndexValues) { // TODO -- can we recover from this? - throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedIndexValues + " to " + fiIndexValues); + throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedDocValues + " to " + fiIndexValues); } } } else { diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 9c854662a24..b1b73929caf 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -968,7 +968,7 @@ public class SegmentReader extends IndexReader implements Cloneable { fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { fieldSet.add(fi.name); } - else if (fi.indexValues != null && fieldOption == IndexReader.FieldOption.DOC_VALUES) { + else if (fi.docValues != null && fieldOption == IndexReader.FieldOption.DOC_VALUES) { fieldSet.add(fi.name); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java index 0f90deeeff1..de57c651824 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java @@ -65,32 +65,13 @@ public abstract class FieldsConsumer implements Closeable { final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); termsConsumer.merge(mergeState, terms); } - - DocValues docValues = fieldsEnum.docValues(); // fix this - does not work due to multi fields - if(docValues != null) { - // TODO we need some kind of compatibility notation for values such - // that two slighly different segments can be merged eg. fixed vs. - // variable byte len or float32 vs. float64 - int docBase = 0; - final List mergeStates = new ArrayList(); - for (IndexReader reader : mergeState.readers) { - DocValues r = reader.docValues(mergeState.fieldInfo.name); - if (r != null) { - mergeStates.add(new Writer.MergeState(r, docBase, reader - .maxDoc(), reader.getDeletedDocs())); - } - docBase += reader.numDocs(); - } - if (mergeStates.isEmpty()) { - continue; - } + if (mergeState.fieldInfo.hasDocValues()) { + final DocValues docValues = fieldsEnum.docValues(); + assert docValues != null : "DocValues are null for " + mergeState.fieldInfo.getDocValues(); final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo); - docValuesConsumer.merge(mergeStates); - docValuesConsumer.finish(mergeState.mergedDocCount); + assert docValuesConsumer != null; + docValuesConsumer.merge(mergeState, docValues); } - - // merge doc values -// } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java index 22b04137d27..0ca72d38261 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java @@ -1,4 +1,5 @@ package org.apache.lucene.index.codecs.docvalues; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -18,9 +19,9 @@ package org.apache.lucene.index.codecs.docvalues; import java.io.IOException; import java.util.Collection; import java.util.Comparator; -import java.util.List; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.ValuesAttribute; import org.apache.lucene.index.values.Writer; @@ -28,22 +29,50 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +/** + * @lucene.experimental + */ +// TODO this might need to go in the codec package since is a direct relative to +// TermsConsumer public abstract class DocValuesConsumer { + public abstract void add(int docID, ValuesAttribute attr) throws IOException; public abstract void finish(int docCount) throws IOException; public abstract void files(Collection files) throws IOException; - - public void merge(List states) throws IOException { - for (MergeState state : states) { - merge(state); + + public void merge(org.apache.lucene.index.codecs.MergeState mergeState, + DocValues values) throws IOException { + // TODO we need some kind of compatibility notation for values such + // that two slightly different segments can be merged eg. fixed vs. + // variable byte len or float32 vs. float64 + int docBase = 0; + boolean merged = false; + /* + * We ignore the given DocValues here and merge from the subReaders directly + * to support bulk copies on the DocValues Writer level. if this gets merged + * with MultiDocValues the writer can not optimize for bulk-copyable data + */ + for (final IndexReader reader : mergeState.readers) { + final DocValues r = reader.docValues(mergeState.fieldInfo.name); + if (r != null) { + merged = true; + merge(new Writer.MergeState(r, docBase, reader.maxDoc(), reader + .getDeletedDocs())); + } + docBase += reader.numDocs(); } + if (merged) + finish(mergeState.mergedDocCount); } - + protected abstract void merge(MergeState mergeState) throws IOException; - - + + /* + * specialized auxiliary MergeState is necessary since we don't want to + * exploit internals up to the codec ones + */ public static class MergeState { public final DocValues reader; public final int docBase; @@ -59,9 +88,10 @@ public abstract class DocValuesConsumer { } } - public static DocValuesConsumer create(String segmentName, Directory directory, - FieldInfo field, Comparator comp) throws IOException { + public static DocValuesConsumer create(String segmentName, + Directory directory, FieldInfo field, Comparator comp) + throws IOException { final String id = segmentName + "_" + field.number; - return Writer.create(field.getIndexValues(), id, directory, comp); + return Writer.create(field.getDocValues(), id, directory, comp); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java index ce016755455..8cdc41b5161 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -51,13 +51,13 @@ public abstract class DocValuesProducerBase extends FieldsProducer{ final int numFields = fieldInfos.size(); for (int i = 0; i < numFields; i++) { final FieldInfo fieldInfo = fieldInfos.fieldInfo(i); - final Values v = fieldInfo.getIndexValues(); + final Values v = fieldInfo.getDocValues(); final String field = fieldInfo.name; final String id = IndexFileNames.segmentFileName(segment, Integer - .toString(fieldInfo.number), ""); - if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) { + .toString(fieldInfo.number),""); + if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) { docValues.put(field, loadDocValues(docCount, dir, id, v)); - } + } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index bd9fd4544f4..fb1d2738d8e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -287,7 +287,9 @@ public final class Bytes { return idxIn == null ? null : (IndexInput) idxIn.clone(); } + @Override public void close() throws IOException { + super.close(); if (datIn != null) { datIn.close(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/Cache.java b/lucene/src/java/org/apache/lucene/index/values/Cache.java deleted file mode 100644 index 711e11cdb3d..00000000000 --- a/lucene/src/java/org/apache/lucene/index/values/Cache.java +++ /dev/null @@ -1,116 +0,0 @@ -package org.apache.lucene.index.values; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.values.DocValues.SortedSource; -import org.apache.lucene.index.values.DocValues.Source; -import org.apache.lucene.util.BytesRef; - -public class Cache { - final IndexReader r; - // TODO(simonw): use WeakHashMaps instead here? - final Map ints = new HashMap(); - final Map floats = new HashMap(); - final Map bytes = new HashMap(); - final Map sortedBytes = new HashMap(); - - public Cache(IndexReader r) { - this.r = r; - } - - synchronized public Source getInts(String id) throws IOException { - Source s = ints.get(id); - if (s == null) { - final DocValues indexValues = r.docValues(id); - if (indexValues == null) { - return null; - } - s = indexValues.load(); - ints.put(id, s); - } - - return s; - } - - synchronized public Source getFloats(String id) throws IOException { - Source s = floats.get(id); - if (s == null) { - final DocValues indexValues = r.docValues(id); - if (indexValues == null) { - return null; - } - s = indexValues.load(); - floats.put(id, s); - } - - return s; - } - - synchronized public SortedSource getSortedBytes(String id, - Comparator comp) throws IOException { - SortedSource s = sortedBytes.get(id); - if (s == null) { - final DocValues indexValues = r.docValues(id); - if (indexValues == null) { - return null; - } - s = indexValues.loadSorted(comp); - sortedBytes.put(id, s); - } else { - // TODO(simonw): verify comp is the same! - } - - return s; - } - - synchronized public Source getBytes(String id) throws IOException { - Source s = bytes.get(id); - if (s == null) { - final DocValues indexValues = r.docValues(id); - if (indexValues == null) { - return null; - } - s = indexValues.load(); - bytes.put(id, s); - } - - return s; - } - - public void purgeInts(String id) { - ints.remove(id); - } - - public void purgeFloats(String id) { - floats.remove(id); - } - - public void purgeBytes(String id) { - bytes.remove(id); - } - - public void purgeSortedBytes(String id) { - sortedBytes.remove(id); - } -} diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index 501a2c981fe..44a2ae05722 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -24,24 +24,48 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; public abstract class DocValues implements Closeable { - - + + private final Object lock = new Object(); + + private Source cachedReference; + public static final DocValues[] EMPTY_ARRAY = new DocValues[0]; - public ValuesEnum getEnum() throws IOException{ + public ValuesEnum getEnum() throws IOException { return getEnum(null); } - public abstract ValuesEnum getEnum(AttributeSource attrSource) throws IOException; + public abstract ValuesEnum getEnum(AttributeSource attrSource) + throws IOException; public abstract Source load() throws IOException; - public SortedSource loadSorted(Comparator comparator) throws IOException { + public Source getCached(boolean load) throws IOException { + synchronized (lock) { // TODO make sorted source cachable too + if (load && cachedReference == null) + cachedReference = load(); + return cachedReference; + } + } + + public Source releaseCached() { + synchronized (lock) { + final Source retVal = cachedReference; + cachedReference = null; + return retVal; + } + } + + public SortedSource loadSorted(Comparator comparator) + throws IOException { throw new UnsupportedOperationException(); } - + public abstract Values type(); + public void close() throws IOException { + releaseCached(); + } /** * Source of integer (returned as java long), per document. The underlying @@ -50,30 +74,34 @@ public abstract class DocValues implements Closeable { */ public static abstract class Source { - public long ints(int docID) { + public long getInt(int docID) { throw new UnsupportedOperationException("ints are not supported"); } - public double floats(int docID) { + public double getFloat(int docID) { throw new UnsupportedOperationException("floats are not supported"); } - public BytesRef bytes(int docID) { + public BytesRef getBytes(int docID) { throw new UnsupportedOperationException("bytes are not supported"); } - - /** Returns number of unique values. Some impls may - * throw UnsupportedOperationException. */ + + /** + * Returns number of unique values. Some impls may throw + * UnsupportedOperationException. + */ public int getValueCount() { throw new UnsupportedOperationException(); } - - public ValuesEnum getEnum() throws IOException{ + + public ValuesEnum getEnum() throws IOException { return getEnum(null); } - - // nocommit - enable obtaining enum from source since this is already in memory - public /*abstract*/ ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + + // nocommit - enable obtaining enum from source since this is already in + // memory + public/* abstract */ValuesEnum getEnum(AttributeSource attrSource) + throws IOException { throw new UnsupportedOperationException(); } @@ -83,7 +111,7 @@ public abstract class DocValues implements Closeable { public static abstract class SortedSource extends Source { @Override - public BytesRef bytes(int docID) { + public BytesRef getBytes(int docID) { return getByOrd(ord(docID)); } @@ -109,5 +137,5 @@ public abstract class DocValues implements Closeable { */ public abstract LookupResult getByValue(BytesRef value); } - + } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 7e30711b465..b1e2449cbfd 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -157,7 +157,7 @@ class FixedDerefBytesImpl { } @Override - public BytesRef bytes(int docID) { + public BytesRef getBytes(int docID) { final int id = (int) index.get(docID); if (id == 0) { return defaultValue; diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 3566e336764..6df5217788b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -150,7 +150,7 @@ class FixedStraightBytesImpl { } @Override - public BytesRef bytes(int docID) { + public BytesRef getBytes(int docID) { bytesRef.offset = docID * size; return bytesRef; } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index e343565c9b5..36dc00fadd5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -270,7 +270,7 @@ public class Floats { } @Override - public double floats(int docID) { + public double getFloat(int docID) { final float f = values.get(docID); // nocommit should we return NaN as default instead of 0.0? return Float.isNaN(f) ? 0.0f : f; @@ -290,9 +290,9 @@ public class Floats { } @Override - public double floats(int docID) { + public double getFloat(int docID) { final double d = values.get(docID); - // nocommit should we return NaN as default instead of 0.0? + // TODO should we return NaN as default instead of 0.0? return Double.isNaN(d) ? 0.0d : d; } @@ -302,7 +302,9 @@ public class Floats { } } + @Override public void close() throws IOException { + super.close(); datIn.close(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index cd6216a2530..77a78c25121 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -1,4 +1,5 @@ package org.apache.lucene.index.values; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,196 +17,214 @@ package org.apache.lucene.index.values; * limitations under the License. */ import java.io.IOException; -import java.util.List; +import java.util.Arrays; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex; +import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; -import org.apache.lucene.util.ReaderUtil.Slice; +import org.apache.lucene.util.ReaderUtil; public class MultiDocValues extends DocValues { - public static class DocValuesIndex { + public static class DocValuesIndex { // nocommit is this necessary? public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0]; - final int subIndex; + final int start; + final int length; final DocValues docValues; - public DocValuesIndex(DocValues docValues, int subIndex) { + public DocValuesIndex(DocValues docValues, int start, int length) { this.docValues = docValues; - this.subIndex = subIndex; + this.start = start; + this.length = length; } } private DocValuesIndex[] docValuesIdx; - private Slice[] subSlices; + private int[] starts; - public MultiDocValues(Slice[] subSlices) { - this.subSlices = subSlices; + public MultiDocValues() { + starts = new int[0]; + docValuesIdx = new DocValuesIndex[0]; } - public MultiDocValues(DocValuesIndex[] docValuesIdx, Slice[] subSlices) { - this(subSlices); + public MultiDocValues(DocValuesIndex[] docValuesIdx) { reset(docValuesIdx); } @Override public ValuesEnum getEnum(AttributeSource source) throws IOException { - return new MultiValuesEnum(subSlices, docValuesIdx, docValuesIdx[0].docValues.type()); + return new MultiValuesEnum(docValuesIdx, starts); } @Override public Source load() throws IOException { - return new MultiSource(subSlices, docValuesIdx); + return new MultiSource(docValuesIdx, starts); } public void close() throws IOException { - // + super.close(); } public DocValues reset(DocValuesIndex[] docValuesIdx) { + int[] start = new int[docValuesIdx.length]; + for (int i = 0; i < docValuesIdx.length; i++) { + start[i] = docValuesIdx[i].start; + } + this.starts = start; this.docValuesIdx = docValuesIdx; return this; } + public static class DummyDocValues extends DocValues { + final int maxDoc; + final Values type; + static final Source DUMMY = new DummySource(); + + public DummyDocValues(int maxDoc, Values type) { + this.type = type; + this.maxDoc = maxDoc; + } + + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + return new DummyEnum(attrSource, maxDoc, type); + } + + @Override + public Source load() throws IOException { + return DUMMY; + } + + @Override + public Source getCached(boolean load) throws IOException { + return DUMMY; + } + + @Override + public Source releaseCached() { + return DUMMY; + } + + @Override + public Values type() { + return type; + } + + public void close() throws IOException { + super.close(); + } + + } + private static class MultiValuesEnum extends ValuesEnum { - private int numDocs_ = 0; - private int pos = -1; - private int start = 0; - private ValuesEnum current; - private Slice[] subSlices; private DocValuesIndex[] docValuesIdx; private final int maxDoc; + private int currentStart; + private int currentMax; + private int currentDoc = -1; + private ValuesEnum currentEnum; + private final int[] starts; - public MultiValuesEnum(Slice[] subSlices, DocValuesIndex[] docValuesIdx, Values type) { - super(type); - this.subSlices = subSlices; + public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts) + throws IOException { + super(docValuesIdx[0].docValues.type()); this.docValuesIdx = docValuesIdx; - Slice slice = subSlices[subSlices.length-1]; - maxDoc = slice.start + slice.length; + final DocValuesIndex last = docValuesIdx[docValuesIdx.length - 1]; + maxDoc = last.start + last.length; + final DocValuesIndex idx = docValuesIdx[0]; + currentEnum = idx.docValues.getEnum(this.attributes()); + currentMax = idx.length; + currentStart = 0; + this.starts = starts; } @Override public void close() throws IOException { - + currentEnum.close(); } @Override public int advance(int target) throws IOException { -// int n = target - start; -// do { -// if (target >= maxDoc) -// return pos = NO_MORE_DOCS; -// if (n >= numDocs_) { -// int idx = readerIndex(target); -// if (enumCache[idx] == null) { -// try { -// DocValues indexValues = subReaders[idx].docValues(id); -// if (indexValues != null) // nocommit does that work with default -// // values? -// enumCache[idx] = indexValues.getEnum(this.attributes()); -// else -// enumCache[idx] = new DummyEnum(this.attributes(), -// subSlices[idx].length, attr.type()); -// } catch (IOException ex) { -// // nocommit what to do here? -// throw new RuntimeException(ex); -// } -// } -// current = enumCache[idx]; -// start = subSlices[idx].start; -// numDocs_ = subSlices[idx].length; -// n = target - start; -// } -// target = start + numDocs_; -// } while ((n = current.advance(n)) == NO_MORE_DOCS); - return pos = start + current.docID(); + assert target > currentDoc : "target " + target + + " must be > than the current doc " + currentDoc; + int relativeDoc = target - currentStart; + do { + if (target >= maxDoc) // we are beyond max doc + return currentDoc = NO_MORE_DOCS; + if (target >= currentMax) { + final int idx = ReaderUtil.subIndex(target, starts); + currentEnum.close(); + currentEnum = docValuesIdx[idx].docValues.getEnum(this.attributes()); + currentStart = docValuesIdx[idx].start; + currentMax = currentStart + docValuesIdx[idx].length; + relativeDoc = target - currentStart; + } else { + return currentDoc = currentStart + currentEnum.advance(relativeDoc); + } + } while ((relativeDoc = currentEnum.advance(relativeDoc)) == NO_MORE_DOCS); + return currentDoc = currentStart + relativeDoc; } @Override public int docID() { - return pos; + return currentDoc; } @Override public int nextDoc() throws IOException { - return advance(pos + 1); + return advance(currentDoc + 1); } } - private class MultiSource extends Source { - private int numDocs_ = 0; + private static class MultiSource extends Source { + private int numDocs = 0; private int start = 0; private Source current; - private Slice[] subSlices; - private DocValuesIndex[] docVAluesIdx; + private final int[] starts; + private final DocValuesIndex[] docValuesIdx; + + public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts) { + this.docValuesIdx = docValuesIdx; + this.starts = starts; - public MultiSource(Slice[] subSlices, DocValuesIndex[] docValuesIdx) { - this.subSlices = subSlices; - this.docVAluesIdx = docValuesIdx; } - public long ints(int docID) { -// int n = docID - start; -// if (n >= numDocs_) { -// int idx = readerIndex(docID); -// try { -// current = subReaders[idx].getIndexValuesCache().getInts(id); -// if (current == null) // nocommit does that work with default values? -// current = new DummySource(); -// } catch (IOException ex) { -// // nocommit what to do here? -// throw new RuntimeException(ex); -// } -// start = starts[idx]; -// numDocs_ = subReaders[idx].maxDoc(); -// n = docID - start; -// } -// return current.ints(n); - return 0l; + public long getInt(int docID) { + final int doc = ensureSource(docID); + return current.getInt(doc); } - public double floats(int docID) { -// int n = docID - start; -// if (n >= numDocs_) { -// int idx = readerIndex(docID); -// try { -// current = subReaders[idx].getIndexValuesCache().getFloats(id); -// if (current == null) // nocommit does that work with default values? -// current = new DummySource(); -// } catch (IOException ex) { -// // nocommit what to do here? -// throw new RuntimeException(ex); -// } -// numDocs_ = subReaders[idx].maxDoc(); -// -// start = starts[idx]; -// n = docID - start; -// } -// return current.floats(n); - return 0d; + private final int ensureSource(int docID) { + int n = docID - start; + if (n >= numDocs) { + final int idx = ReaderUtil.subIndex(docID, starts); + assert idx >= 0 && idx < docValuesIdx.length : "idx was " + idx + + " for doc id: " + docID + " slices : " + Arrays.toString(starts); + assert docValuesIdx[idx] != null; + try { + current = docValuesIdx[idx].docValues.load(); + } catch (IOException e) { + throw new RuntimeException("load failed", e); // TODO how should we + // handle this + } + + start = docValuesIdx[idx].start; + numDocs = docValuesIdx[idx].length; + n = docID - start; + } + return n; } - public BytesRef bytes(int docID) { -// int n = docID - start; -// if (n >= numDocs_) { -// int idx = readerIndex(docID); -// try { -// current = subReaders[idx].getIndexValuesCache().getBytes(id); -// if (current == null) // nocommit does that work with default values? -// current = new DummySource(); -// } catch (IOException ex) { -// // nocommit what to do here? -// throw new RuntimeException(ex); -// } -// numDocs_ = subReaders[idx].maxDoc(); -// start = starts[idx]; -// n = docID - start; -// } -// return current.bytes(n); - return null; + public double getFloat(int docID) { + final int doc = ensureSource(docID); + return current.getFloat(doc); + } + + public BytesRef getBytes(int docID) { + final int doc = ensureSource(docID); + return current.getBytes(doc); } public long ramBytesUsed() { @@ -218,17 +237,17 @@ public class MultiDocValues extends DocValues { private final BytesRef ref = new BytesRef(); @Override - public BytesRef bytes(int docID) { + public BytesRef getBytes(int docID) { return ref; } @Override - public double floats(int docID) { + public double getFloat(int docID) { return 0.0d; } @Override - public long ints(int docID) { + public long getInt(int docID) { return 0; } @@ -296,5 +315,4 @@ public class MultiDocValues extends DocValues { public Values type() { return this.docValuesIdx[0].docValues.type(); } - } diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 64735a6597c..f0c7a6cdcf5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -179,8 +179,8 @@ class PackedIntsImpl { } @Override - public long ints(int docID) { - // nocommit -- can we somehow avoid 2X method calls + public long getInt(int docID) { + // TODO -- can we somehow avoid 2X method calls // on each get? must push minValue down, and make // PackedInts implement Ints.Source final long val = values.get(docID); @@ -195,7 +195,9 @@ class PackedIntsImpl { } } + @Override public void close() throws IOException { + super.close(); datIn.close(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java index c806b1650f7..d7d613c0510 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Values.java +++ b/lucene/src/java/org/apache/lucene/index/values/Values.java @@ -34,7 +34,7 @@ public enum Values { SIMPLE_FLOAT_4BYTE, SIMPLE_FLOAT_8BYTE, - // nocommit -- shouldn't lucene decide/detect straight vs + // TODO(simonw): -- shouldn't lucene decide/detect straight vs // deref, as well fixed vs var? BYTES_FIXED_STRAIGHT, BYTES_FIXED_DEREF, @@ -44,5 +44,5 @@ public enum Values { BYTES_VAR_DEREF, BYTES_VAR_SORTED - // nocommit -- need STRING variants as well + // TODO(simonw): -- need STRING variants as well } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index dccbd3bba08..9ab2adc6dde 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -135,7 +135,7 @@ class VarDerefBytesImpl { idxOut.writeInt(address-1); // write index - // nocommit -- allow forcing fixed array (not -1) + // TODO(simonw): -- allow forcing fixed array (not -1) // TODO(simonw): check the address calculation / make it more intuitive final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1)); final int limit; @@ -188,7 +188,7 @@ class VarDerefBytesImpl { } @Override - public BytesRef bytes(int docID) { + public BytesRef getBytes(int docID) { int address = (int) index.get(docID); if (address == 0) { assert defaultValue.length == 0: " default value manipulated"; diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index c8536d8dc0c..7b291520a16 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -120,7 +120,7 @@ class VarSortedBytesImpl { idxOut.writeLong(offset); // write index -- first doc -> 1+ord - // nocommit -- allow not -1: + // TODO(simonw): allow not -1: final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(count)); final int limit = docCount > docToEntry.length ? docToEntry.length @@ -135,7 +135,7 @@ class VarSortedBytesImpl { indexWriter.finish(); // next ord (0-based) -> offset - // nocommit -- allow not -1: + // TODO(simonw): -- allow not -1: PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count, PackedInts.bitsRequired(lastOffset)); for (int i = 0; i < count; i++) { diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 436a9799fc4..f747bb06139 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -82,14 +82,15 @@ class VarStraightBytesImpl { @Override synchronized public void finish(int docCount) throws IOException { - if (datOut == null) + if (datOut == null) { return; + } initIndexOut(); // write all lengths to index // write index fill(docCount); idxOut.writeVInt(address); - // nocommit -- allow not -1 + // TODO(simonw): allow not -1 final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address)); for (int i = 0; i < docCount; i++) { @@ -136,7 +137,7 @@ class VarStraightBytesImpl { } @Override - public BytesRef bytes(int docID) { + public BytesRef getBytes(int docID) { final int address = (int) addresses.get(docID); bytesRef.offset = address; if (docID == maxDoc - 1) { diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 2b322d6d4f8..49ae53060b6 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -336,7 +336,7 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { - final double v2 = currentReaderValues.floats(doc); + final double v2 = currentReaderValues.getFloat(doc); if (bottom > v2) { return 1; } else if (bottom < v2) { @@ -348,12 +348,12 @@ public abstract class FieldComparator { @Override public void copy(int slot, int doc) { - values[slot] = currentReaderValues.floats(doc); + values[slot] = currentReaderValues.getFloat(doc); } @Override public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = reader.getIndexValuesCache().getFloats(field); + currentReaderValues = reader.docValues(field).getCached(true); return this; } @@ -538,7 +538,7 @@ public abstract class FieldComparator { public int compareBottom(int doc) { // TODO: there are sneaky non-branch ways to compute // -1/+1/0 sign - final long v2 = currentReaderValues.ints(doc); + final long v2 = currentReaderValues.getInt(doc); if (bottom > v2) { return 1; } else if (bottom < v2) { @@ -550,12 +550,12 @@ public abstract class FieldComparator { @Override public void copy(int slot, int doc) { - values[slot] = currentReaderValues.ints(doc); + values[slot] = currentReaderValues.getInt(doc); } @Override public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = reader.getIndexValuesCache().getInts(field); + currentReaderValues = reader.docValues(field).getCached(true); return this; } diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index 623b785489a..0e512de4a9f 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -32,7 +32,7 @@ import org.apache.lucene.search.cache.ShortValuesCreator; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.BytesRef; -// nocommit -- for cleaner transition, maybe we should make +// TODO(simonw) -- for cleaner transition, maybe we should make // a new SortField that subclasses this one and always uses // index values? diff --git a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java index 7d971e989b7..875e6208195 100644 --- a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java +++ b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java @@ -173,4 +173,26 @@ public class ReaderUtil { } return hi; } + + public static int subIndex(int n, Slice[] slices) { + // searcher/reader for doc n: + int size = slices.length; + int lo = 0; // search starts array + int hi = size - 1; // for first element less than n, return its index + while (hi >= lo) { + int mid = (lo + hi) >>> 1; + int midValue = slices[mid].start; + if (n < midValue) + hi = mid - 1; + else if (n > midValue) + lo = mid + 1; + else { // found a match + while (mid + 1 < size && slices[mid + 1].start == midValue) { + mid++; // scan to last match + } + return mid; + } + } + return hi; + } } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java index 3e046296f4b..4677935740d 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java @@ -70,19 +70,19 @@ public class TestIndexValues extends LuceneTestCase { public static void beforeClassLuceneTestCaseJ4() { LuceneTestCase.beforeClassLuceneTestCaseJ4(); final CodecProvider cp = CodecProvider.getDefault(); - docValuesCodec = new DocValuesCodec(cp.lookup(CodecProvider.getDefaultCodec())); + docValuesCodec = new DocValuesCodec(cp.lookup(CodecProvider + .getDefaultCodec())); cp.register(docValuesCodec); CodecProvider.setDefaultCodec(docValuesCodec.name); } - + @AfterClass public static void afterClassLuceneTestCaseJ4() { final CodecProvider cp = CodecProvider.getDefault(); cp.unregister(docValuesCodec); - LuceneTestCase.afterClassLuceneTestCaseJ4(); + LuceneTestCase.afterClassLuceneTestCaseJ4(); } - - + public void testBytesStraight() throws IOException { runTestBytes(Bytes.Mode.STRAIGHT, true); runTestBytes(Bytes.Mode.STRAIGHT, false); @@ -164,14 +164,14 @@ public class TestIndexValues extends LuceneTestCase { if (mode == Bytes.Mode.SORTED) { s = ss = r.loadSorted(comp); } else { - s = r.load(); + s = getSource(r); ss = null; } for (int i = 0; i < 100; i++) { final int idx = 2 * i; - assertNotNull("doc " + idx + "; value=" + values[idx], s.bytes(idx)); - assertEquals("doc " + idx, values[idx], s.bytes(idx).utf8ToString()); + assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx)); + assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString()); if (ss != null) { assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx)) .utf8ToString()); @@ -247,9 +247,9 @@ public class TestIndexValues extends LuceneTestCase { DocValues r = Ints.getValues(dir, "test", useFixedArrays); for (int iter = 0; iter < 2; iter++) { - Source s = r.load(); + Source s = getSource(r); for (int i = 0; i < NUM_VALUES; i++) { - final long v = s.ints(i); + final long v = s.getInt(i); assertEquals("index " + i + " b: " + b, values[i], v); } } @@ -311,9 +311,9 @@ public class TestIndexValues extends LuceneTestCase { DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues); for (int iter = 0; iter < 2; iter++) { - Source s = r.load(); + Source s = getSource(r); for (int i = 0; i < NUM_VALUES; i++) { - assertEquals(values[i], s.floats(i), 0.0f); + assertEquals(values[i], s.getFloat(i), 0.0f); } } @@ -437,12 +437,12 @@ public class TestIndexValues extends LuceneTestCase { case PACKED_INTS: case PACKED_INTS_FIXED: { DocValues intsReader = getDocValues(r, val.name()); - Source ints = intsReader.load(); + Source ints = getSource(intsReader); ValuesEnum intsEnum = intsReader.getEnum(); assertNotNull(intsEnum); LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints(); for (int i = 0; i < base; i++) { - assertEquals(0, ints.ints(i)); + assertEquals(0, ints.getInt(i)); assertEquals(val.name() + " base: " + base + " index: " + i, i, random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc()); assertEquals(0, enumRef.get()); @@ -454,8 +454,8 @@ public class TestIndexValues extends LuceneTestCase { } assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs", i, intsEnum.advance(i)); - assertEquals(expected, ints.ints(i)); assertEquals(expected, enumRef.get()); + assertEquals(expected, ints.getInt(i)); } } @@ -463,14 +463,16 @@ public class TestIndexValues extends LuceneTestCase { case SIMPLE_FLOAT_4BYTE: case SIMPLE_FLOAT_8BYTE: { DocValues floatReader = getDocValues(r, val.name()); - Source floats = floatReader.load(); + assertNotNull(floatReader); + Source floats = getSource(floatReader); ValuesEnum floatEnum = floatReader.getEnum(); assertNotNull(floatEnum); FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class) .floats(); for (int i = 0; i < base; i++) { - assertEquals(0.0d, floats.floats(i), 0.0d); + assertEquals(" floats failed for doc: " + i + " base: " + base, 0.0d, + floats.getFloat(i), 0.0d); assertEquals(i, random.nextBoolean() ? floatEnum.advance(i) : floatEnum.nextDoc()); assertEquals("index " + i, 0.0, enumRef.get(), 0.0); @@ -483,7 +485,8 @@ public class TestIndexValues extends LuceneTestCase { assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs base:" + base, i, floatEnum.advance(i)); assertEquals("index " + i, 2.0 * expected, enumRef.get(), 0.00001); - assertEquals("index " + i, 2.0 * expected, floats.floats(i), 0.00001); + assertEquals("index " + i, 2.0 * expected, floats.getFloat(i), + 0.00001); } } break; @@ -505,15 +508,13 @@ public class TestIndexValues extends LuceneTestCase { Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED, Values.BYTES_VAR_STRAIGHT); - private static EnumSet STRAIGHT_BYTES = EnumSet.of( - Values.BYTES_FIXED_STRAIGHT, Values.BYTES_VAR_STRAIGHT); - private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE); private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, - Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS }; + Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS, + Index.NO }; private OpenBitSet indexValues(IndexWriter w, int numValues, Values value, List valueVarList, boolean withDeletions, int multOfSeven) @@ -521,9 +522,10 @@ public class TestIndexValues extends LuceneTestCase { final boolean isNumeric = NUMERICS.contains(value); OpenBitSet deleted = new OpenBitSet(numValues); Document doc = new Document(); + Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)]; Fieldable field = random.nextBoolean() ? new ValuesField(value.name()) : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, - 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]); + 10), idx == Index.NO ? Store.YES : Store.NO, idx); doc.add(field); ValuesAttribute valuesAttribute = ValuesField.values(field); @@ -582,9 +584,10 @@ public class TestIndexValues extends LuceneTestCase { } w.commit(); - // nocommit test unoptimized with deletions - if (true || withDeletions || random.nextBoolean()) - w.optimize(); + // TODO test unoptimized with deletions + if (withDeletions || random.nextBoolean()) + ; + w.optimize(); return deleted; } @@ -593,10 +596,9 @@ public class TestIndexValues extends LuceneTestCase { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final List byteVariantList = new ArrayList(BYTES); - // run in random order to test if fill works correctly during merges Collections.shuffle(byteVariantList, random); - final int numValues = 350; + final int numValues = 333 + random.nextInt(150); for (Values byteIndexValue : byteVariantList) { List closeables = new ArrayList(); @@ -607,11 +609,10 @@ public class TestIndexValues extends LuceneTestCase { assertEquals(0, r.numDeletedDocs()); final int numRemainingValues = (int) (numValues - deleted.cardinality()); final int base = r.numDocs() - numRemainingValues; - DocValues bytesReader = getDocValues(r, byteIndexValue.name()); assertNotNull("field " + byteIndexValue.name() + " returned null reader - maybe merged failed", bytesReader); - Source bytes = bytesReader.load(); + Source bytes = getSource(bytesReader); ValuesEnum bytesEnum = bytesReader.getEnum(); assertNotNull(bytesEnum); final ValuesAttribute attr = bytesEnum @@ -619,7 +620,7 @@ public class TestIndexValues extends LuceneTestCase { byte upto = 0; // test the filled up slots for correctness for (int i = 0; i < base; i++) { - final BytesRef br = bytes.bytes(i); + final BytesRef br = bytes.getBytes(i); String msg = " field: " + byteIndexValue.name() + " at index: " + i + " base: " + base + " numDocs:" + r.numDocs(); switch (byteIndexValue) { @@ -645,7 +646,7 @@ public class TestIndexValues extends LuceneTestCase { default: assertNotNull("expected none null - " + msg, br); if (br.length != 0) { - bytes.bytes(i); + bytes.getBytes(i); } assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0, br.length); @@ -665,7 +666,7 @@ public class TestIndexValues extends LuceneTestCase { upto += bytesSize; } - BytesRef br = bytes.bytes(i); + BytesRef br = bytes.getBytes(i); if (bytesEnum.docID() != i) assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum .advance(i)); @@ -692,10 +693,9 @@ public class TestIndexValues extends LuceneTestCase { private DocValues getDocValues(IndexReader reader, String field) throws IOException { boolean optimized = reader.isOptimized(); - Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() : MultiFields - .getFields(reader); -// return fields.docValues(field); - switch (random.nextInt(optimized ? 3 : 2)) { + Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() + : MultiFields.getFields(reader); + switch (random.nextInt(optimized ? 3 : 2)) { // case 2 only if optimized case 0: return fields.docValues(field); case 1: @@ -706,10 +706,14 @@ public class TestIndexValues extends LuceneTestCase { return iterator.docValues(); } throw new RuntimeException("no such field " + field); - case 2: + case 2:// this only works if we are on an optimized index! return reader.getSequentialSubReaders()[0].docValues(field); } -throw new RuntimeException(); -} + throw new RuntimeException(); + } + + private Source getSource(DocValues values) throws IOException { + return random.nextBoolean() ? values.load() : values.getCached(true); + } } From 977bb062846ba65fe3c837efa8d8e601e60f0617 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 30 Oct 2010 06:58:15 +0000 Subject: [PATCH 010/116] removed unused function git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1029001 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/util/ReaderUtil.java | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java index 875e6208195..7d971e989b7 100644 --- a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java +++ b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java @@ -173,26 +173,4 @@ public class ReaderUtil { } return hi; } - - public static int subIndex(int n, Slice[] slices) { - // searcher/reader for doc n: - int size = slices.length; - int lo = 0; // search starts array - int hi = size - 1; // for first element less than n, return its index - while (hi >= lo) { - int mid = (lo + hi) >>> 1; - int midValue = slices[mid].start; - if (n < midValue) - hi = mid - 1; - else if (n > midValue) - lo = mid + 1; - else { // found a match - while (mid + 1 < size && slices[mid + 1].start == midValue) { - mid++; // scan to last match - } - return mid; - } - } - return hi; - } } From aff234e85e438fdae512079565ffab82ae173ef6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 30 Oct 2010 10:56:01 +0000 Subject: [PATCH 011/116] LUCENE-2700: Enabled MultiField tests & introduced SourceCache git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1029017 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/MultiFieldsEnum.java | 6 +- .../lucene/index/codecs/FieldsConsumer.java | 6 +- .../apache/lucene/index/values/DocValues.java | 34 +++++----- .../lucene/index/values/MultiDocValues.java | 15 +---- .../lucene/index/values/SourceCache.java | 65 +++++++++++++++++++ .../index/values/ValuesAttributeImpl.java | 12 ++-- .../apache/lucene/search/FieldComparator.java | 4 +- .../lucene/index/values/TestIndexValues.java | 24 ++++--- 8 files changed, 110 insertions(+), 56 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/index/values/SourceCache.java diff --git a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java index 57505592a1b..f59587107e1 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java @@ -163,10 +163,9 @@ public final class MultiFieldsEnum extends FieldsEnum { FieldsEnumWithSlice withSlice = enumWithSlices[i]; Slice slice = withSlice.slice; final DocValues values = withSlice.fields.docValues(); - final int start = slice.start; final int length = slice.length; - if (values != null) { + if (values != null && currentField.equals(withSlice.current)) { if (docsUpto != start) { type = values.type(); docValuesIndex.add(new MultiDocValues.DocValuesIndex( @@ -176,9 +175,8 @@ public final class MultiFieldsEnum extends FieldsEnum { docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, length)); docsUpto = start + length; - - } else if (i+1 == numEnums && !docValuesIndex.isEmpty()) { + } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) { docValuesIndex.add(new MultiDocValues.DocValuesIndex( new MultiDocValues.DummyDocValues(start, type), docsUpto, start - docsUpto)); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java index de57c651824..e4e29b79bd1 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java @@ -67,7 +67,11 @@ public abstract class FieldsConsumer implements Closeable { } if (mergeState.fieldInfo.hasDocValues()) { final DocValues docValues = fieldsEnum.docValues(); - assert docValues != null : "DocValues are null for " + mergeState.fieldInfo.getDocValues(); + // TODO: is this assert values and if so when? +// assert docValues != null : "DocValues are null for " + mergeState.fieldInfo.getDocValues(); + if(docValues == null) { // for now just continue + continue; + } final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo); assert docValuesConsumer != null; docValuesConsumer.merge(mergeState, docValues); diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index 44a2ae05722..2ed2192f831 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -25,11 +25,8 @@ import org.apache.lucene.util.BytesRef; public abstract class DocValues implements Closeable { - private final Object lock = new Object(); - - private Source cachedReference; - public static final DocValues[] EMPTY_ARRAY = new DocValues[0]; + private SourceCache cache = new SourceCache.DirectSourceCache(); public ValuesEnum getEnum() throws IOException { return getEnum(null); @@ -40,20 +37,12 @@ public abstract class DocValues implements Closeable { public abstract Source load() throws IOException; - public Source getCached(boolean load) throws IOException { - synchronized (lock) { // TODO make sorted source cachable too - if (load && cachedReference == null) - cachedReference = load(); - return cachedReference; - } + public Source getSource() throws IOException { + return cache.load(this); } - - public Source releaseCached() { - synchronized (lock) { - final Source retVal = cachedReference; - cachedReference = null; - return retVal; - } + + public SortedSource getSortedSorted(Comparator comparator) throws IOException { + return cache.laodSorted(this, comparator); } public SortedSource loadSorted(Comparator comparator) @@ -64,7 +53,14 @@ public abstract class DocValues implements Closeable { public abstract Values type(); public void close() throws IOException { - releaseCached(); + this.cache.close(this); + } + + public void setCache(SourceCache cache) { + synchronized (this.cache) { + this.cache.close(this); + this.cache = cache; + } } /** @@ -137,5 +133,5 @@ public abstract class DocValues implements Closeable { */ public abstract LookupResult getByValue(BytesRef value); } - + } diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index 77a78c25121..0c458cf205e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -19,7 +19,6 @@ package org.apache.lucene.index.values; import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; @@ -28,7 +27,7 @@ import org.apache.lucene.util.ReaderUtil; public class MultiDocValues extends DocValues { - public static class DocValuesIndex { // nocommit is this necessary? + public static class DocValuesIndex { public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0]; final int start; final int length; @@ -96,17 +95,7 @@ public class MultiDocValues extends DocValues { public Source load() throws IOException { return DUMMY; } - - @Override - public Source getCached(boolean load) throws IOException { - return DUMMY; - } - - @Override - public Source releaseCached() { - return DUMMY; - } - + @Override public Values type() { return type; diff --git a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java new file mode 100644 index 00000000000..69037b28a68 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java @@ -0,0 +1,65 @@ +package org.apache.lucene.index.values; + +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.index.values.DocValues.SortedSource; +import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.util.BytesRef; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Per {@link DocValues} {@link Source} cache. + */ +public abstract class SourceCache { + public abstract Source load(DocValues values) throws IOException; + + public abstract SortedSource laodSorted(DocValues values, + Comparator comp) throws IOException; + + public abstract void invalidate(DocValues values); + + public synchronized void close(DocValues values) { + invalidate(values); + } + + public static final class DirectSourceCache extends SourceCache { + private Source ref; + private SortedSource sortedRef; + + public synchronized Source load(DocValues values) throws IOException { + if (ref == null) + ref = values.load(); + return ref; + } + + public synchronized SortedSource laodSorted(DocValues values, + Comparator comp) throws IOException { + if (sortedRef == null) + sortedRef = values.loadSorted(comp); + return sortedRef; + } + + public synchronized void invalidate(DocValues values) { + ref = null; + sortedRef = null; + } + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java index fe504514ba7..6cd1e0294c7 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java @@ -62,7 +62,11 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut @Override public void clear() { - // TODO + bytes = null; + ints = null; + floats = null; + type = null; + bytesComp = null; } @Override @@ -91,9 +95,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut } } - /* (non-Javadoc) - * @see java.lang.Object#hashCode() - */ @Override public int hashCode() { final int prime = 31; @@ -105,9 +106,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut return result; } - /* (non-Javadoc) - * @see java.lang.Object#equals(java.lang.Object) - */ @Override public boolean equals(Object obj) { if (this == obj) diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 49ae53060b6..d5f34bb4e72 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -353,7 +353,7 @@ public abstract class FieldComparator { @Override public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = reader.docValues(field).getCached(true); + currentReaderValues = reader.docValues(field).getSource(); return this; } @@ -555,7 +555,7 @@ public abstract class FieldComparator { @Override public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = reader.docValues(field).getCached(true); + currentReaderValues = reader.docValues(field).getSource(); return this; } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java index 4677935740d..0912e0d1204 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java @@ -44,9 +44,9 @@ import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.codecs.docvalues.DocValuesCodec; import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; -import org.apache.lucene.index.codecs.docvalues.DocValuesCodec; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; @@ -56,9 +56,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; -import org.junit.After; import org.junit.AfterClass; -import org.junit.Before; import org.junit.BeforeClass; public class TestIndexValues extends LuceneTestCase { @@ -162,7 +160,7 @@ public class TestIndexValues extends LuceneTestCase { Source s; DocValues.SortedSource ss; if (mode == Bytes.Mode.SORTED) { - s = ss = r.loadSorted(comp); + s = ss = getSortedSource(r, comp); } else { s = getSource(r); ss = null; @@ -436,13 +434,16 @@ public class TestIndexValues extends LuceneTestCase { switch (val) { case PACKED_INTS: case PACKED_INTS_FIXED: { + if(val == Values.PACKED_INTS_FIXED) + getDocValues(r, val.name()); DocValues intsReader = getDocValues(r, val.name()); Source ints = getSource(intsReader); + ValuesEnum intsEnum = intsReader.getEnum(); assertNotNull(intsEnum); LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints(); for (int i = 0; i < base; i++) { - assertEquals(0, ints.getInt(i)); + assertEquals("index " + i, 0, ints.getInt(i)); assertEquals(val.name() + " base: " + base + " index: " + i, i, random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc()); assertEquals(0, enumRef.get()); @@ -586,8 +587,7 @@ public class TestIndexValues extends LuceneTestCase { // TODO test unoptimized with deletions if (withDeletions || random.nextBoolean()) - ; - w.optimize(); + w.optimize(); return deleted; } @@ -598,7 +598,7 @@ public class TestIndexValues extends LuceneTestCase { final List byteVariantList = new ArrayList(BYTES); // run in random order to test if fill works correctly during merges Collections.shuffle(byteVariantList, random); - final int numValues = 333 + random.nextInt(150); + final int numValues = 179 + random.nextInt(151); for (Values byteIndexValue : byteVariantList) { List closeables = new ArrayList(); @@ -713,7 +713,11 @@ public class TestIndexValues extends LuceneTestCase { } private Source getSource(DocValues values) throws IOException { - return random.nextBoolean() ? values.load() : values.getCached(true); + // getSource uses cache internally + return random.nextBoolean() ? values.load() : values.getSource(); + } + private SortedSource getSortedSource(DocValues values, Comparator comparator) throws IOException { + // getSortedSource uses cache internally + return random.nextBoolean() ? values.loadSorted(comparator) : values.getSortedSorted(comparator); } - } From ee763f345b597660ada713c523b261dd21dbe9f4 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 12 Nov 2010 17:07:39 +0000 Subject: [PATCH 012/116] splitted DocValues TestCase into two and converted some nocommits to TODOs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1034471 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/DocFieldProcessor.java | 2 +- .../codecs/docvalues/DocValuesCodec.java | 7 +- .../org/apache/lucene/index/values/Bytes.java | 12 +- .../apache/lucene/index/values/Floats.java | 3 +- .../lucene/index/values/PackedIntsImpl.java | 2 +- .../lucene/index/values/TestDocValues.java | 318 ++++++++++++++++++ ...Values.java => TestDocValuesIndexing.java} | 297 +--------------- 7 files changed, 342 insertions(+), 299 deletions(-) create mode 100644 lucene/src/test/org/apache/lucene/index/values/TestDocValues.java rename lucene/src/test/org/apache/lucene/index/values/{TestIndexValues.java => TestDocValuesIndexing.java} (61%) diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 097113a1532..dee260a976d 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -41,7 +41,7 @@ final class DocFieldProcessor extends DocConsumer { final FieldInfos fieldInfos; final DocFieldConsumer consumer; final StoredFieldsWriter fieldsWriter; - final private Map docValues = new HashMap(); + final private Map docValues = new HashMap(); private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter synchronized DocValuesConsumer docValuesConsumer(Directory dir, diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index 821f766bd83..dec6204f30b 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -129,7 +129,8 @@ public class DocValuesCodec extends Codec { throws IOException { Set otherFiles = new HashSet(); other.files(dir, segmentInfo, otherFiles); - for (String string : otherFiles) { + for (String string : otherFiles) { // under some circumstances we only write DocValues + // so other files will be added even if they don't exist if (dir.fileExists(string)) files.add(string); } @@ -141,10 +142,6 @@ public class DocValuesCodec extends Codec { files.add(file); } } - // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "", - // Writer.DATA_EXTENSION)); - // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "", - // Writer.INDEX_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index fb1d2738d8e..3080cbecdb0 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -21,7 +21,6 @@ package org.apache.lucene.index.values; import java.io.IOException; import java.util.Collection; import java.util.Comparator; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; @@ -59,7 +58,7 @@ public final class Bytes { }; - // nocommit -- i shouldn't have to specify fixed? can + // TODO -- i shouldn't have to specify fixed? can // track itself & do the write thing at write time? public static Writer getWriter(Directory dir, String id, Mode mode, Comparator comp, boolean fixedSize) throws IOException { @@ -240,11 +239,12 @@ public final class Bytes { @Override public void files(Collection files) throws IOException { + assert datOut != null; files.add(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION)); - final String idxFile = IndexFileNames.segmentFileName(id, "", + if(idxOut != null) { // called after flush - so this must be initialized if needed or present + final String idxFile = IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_INDEX_EXTENSION); - if (dir.fileExists(idxFile)) { // TODO is this correct? could be initialized lazy files.add(idxFile); } } @@ -279,11 +279,11 @@ public final class Bytes { } protected final IndexInput cloneData() { - // is never NULL + assert datIn != null; return (IndexInput) datIn.clone(); } - protected final IndexInput cloneIndex() { + protected final IndexInput cloneIndex() { // TODO assert here for null rather than return null return idxIn == null ? null : (IndexInput) idxIn.clone(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 36dc00fadd5..f844bba40e4 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -19,8 +19,7 @@ import org.apache.lucene.util.RamUsageEstimator; * Exposes writer/reader for floating point values. You can specify 4 (java * float) or 8 (java double) byte precision. */ -//nocommit - add mmap version -//nocommti - add bulk copy where possible +//TODO - add bulk copy where possible public class Floats { private static final String CODEC_NAME = "SimpleFloats"; static final int VERSION_START = 0; diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index f0c7a6cdcf5..f37f7bbbdff 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -41,7 +41,7 @@ class PackedIntsImpl { static class IntsWriter extends Writer { - // nocommit - can we bulkcopy this on a merge? + // TODO: can we bulkcopy this on a merge? private LongsRef intsRef; private long[] docToValue; private long minValue; diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java new file mode 100644 index 00000000000..8086871678c --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java @@ -0,0 +1,318 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.index.values.DocValues.SortedSource; +import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util._TestUtil; + +public class TestDocValues extends LuceneTestCase { + + // TODO -- for sorted test, do our own Sort of the + // values and verify it's identical + + public void testBytesStraight() throws IOException { + runTestBytes(Bytes.Mode.STRAIGHT, true); + runTestBytes(Bytes.Mode.STRAIGHT, false); + } + + public void testBytesDeref() throws IOException { + runTestBytes(Bytes.Mode.DEREF, true); + runTestBytes(Bytes.Mode.DEREF, false); + } + + public void testBytesSorted() throws IOException { + runTestBytes(Bytes.Mode.SORTED, true); + runTestBytes(Bytes.Mode.SORTED, false); + } + + public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize) + throws IOException { + + final BytesRef bytesRef = new BytesRef(); + + final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef + .getUTF8SortedAsUnicodeComparator() : null; + + Directory dir = newDirectory(); + Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize); + int maxDoc = 220; + final String[] values = new String[maxDoc]; + final int lenMin, lenMax; + if (fixedSize) { + lenMin = lenMax = 3 + random.nextInt(7); + } else { + lenMin = 1; + lenMax = 15 + random.nextInt(6); + } + for (int i = 0; i < 100; i++) { + final String s; + if (i > 0 && random.nextInt(5) <= 2) { + // use prior value + s = values[2 * random.nextInt(i)]; + } else { + s = _TestUtil.randomUnicodeString(random, lenMin, lenMax); + } + values[2 * i] = s; + + UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef); + w.add(2 * i, bytesRef); + } + w.finish(maxDoc); + + DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); + for (int iter = 0; iter < 2; iter++) { + ValuesEnum bytesEnum = r.getEnum(); + assertNotNull("enum is null", bytesEnum); + ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class); + assertNotNull("attribute is null", attr); + BytesRef ref = attr.bytes(); + assertNotNull("BytesRef is null - enum not initialized to use bytes", + attr); + + for (int i = 0; i < 2; i++) { + final int idx = 2 * i; + assertEquals("doc: " + idx, idx, bytesEnum.advance(idx)); + String utf8String = ref.utf8ToString(); + assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + + " lenRight: " + utf8String.length(), values[idx], utf8String); + } + assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); + assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1)); + + bytesEnum.close(); + } + + // Verify we can load source twice: + for (int iter = 0; iter < 2; iter++) { + Source s; + DocValues.SortedSource ss; + if (mode == Bytes.Mode.SORTED) { + s = ss = getSortedSource(r, comp); + } else { + s = getSource(r); + ss = null; + } + + for (int i = 0; i < 100; i++) { + final int idx = 2 * i; + assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx)); + assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString()); + if (ss != null) { + assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx)) + .utf8ToString()); + DocValues.SortedSource.LookupResult result = ss + .getByValue(new BytesRef(values[idx])); + assertTrue(result.found); + assertEquals(ss.ord(idx), result.ord); + } + } + + // Lookup random strings: + if (mode == Bytes.Mode.SORTED) { + final int numValues = ss.getValueCount(); + for (int i = 0; i < 1000; i++) { + BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString( + random, lenMin, lenMax)); + SortedSource.LookupResult result = ss.getByValue(bytesValue); + if (result.found) { + assert result.ord > 0; + assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord))); + int count = 0; + for (int k = 0; k < 100; k++) { + if (bytesValue.utf8ToString().equals(values[2 * k])) { + assertEquals(ss.ord(2 * k), result.ord); + count++; + } + } + assertTrue(count > 0); + } else { + assert result.ord >= 0; + if (result.ord == 0) { + final BytesRef firstRef = ss.getByOrd(1); + // random string was before our first + assertTrue(firstRef.compareTo(bytesValue) > 0); + } else if (result.ord == numValues) { + final BytesRef lastRef = ss.getByOrd(numValues); + // random string was after our last + assertTrue(lastRef.compareTo(bytesValue) < 0); + } else { + // random string fell between two of our values + final BytesRef before = (BytesRef) ss.getByOrd(result.ord) + .clone(); + final BytesRef after = ss.getByOrd(result.ord + 1); + assertTrue(before.compareTo(bytesValue) < 0); + assertTrue(bytesValue.compareTo(after) < 0); + + } + } + } + } + } + + r.close(); + dir.close(); + } + + public void testInts() throws IOException { + long maxV = 1; + final int NUM_VALUES = 1000; + final long[] values = new long[NUM_VALUES]; + for (int rx = 1; rx < 63; rx++, maxV *= 2) { + for (int b = 0; b < 2; b++) { + Directory dir = newDirectory(); + boolean useFixedArrays = b == 0; + Writer w = Ints.getWriter(dir, "test", useFixedArrays); + for (int i = 0; i < NUM_VALUES; i++) { + final long v = random.nextLong() % (1 + maxV); + values[i] = v; + w.add(i, v); + } + final int additionalDocs = 1 + random.nextInt(9); + w.finish(NUM_VALUES + additionalDocs); + + DocValues r = Ints.getValues(dir, "test", useFixedArrays); + for (int iter = 0; iter < 2; iter++) { + Source s = getSource(r); + for (int i = 0; i < NUM_VALUES; i++) { + final long v = s.getInt(i); + assertEquals("index " + i + " b: " + b, values[i], v); + } + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum iEnum = r.getEnum(); + ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); + LongsRef ints = attr.ints(); + for (int i = 0; i < NUM_VALUES; i++) { + assertEquals(i, iEnum.nextDoc()); + assertEquals(values[i], ints.get()); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { + assertEquals(i, iEnum.nextDoc()); + assertEquals("" + i, 0, ints.get()); + } + + iEnum.close(); + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum iEnum = r.getEnum(); + ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); + LongsRef ints = attr.ints(); + for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { + assertEquals(i, iEnum.advance(i)); + assertEquals(values[i], ints.get()); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { + assertEquals(i, iEnum.advance(i)); + assertEquals("" + i, 0, ints.get()); + } + + iEnum.close(); + } + r.close(); + dir.close(); + } + } + } + + public void testFloats4() throws IOException { + runTestFloats(4, 0.00001); + } + + private void runTestFloats(int precision, double delta) throws IOException { + Directory dir = newDirectory(); + Writer w = Floats.getWriter(dir, "test", precision); + final int NUM_VALUES = 1000; + final double[] values = new double[NUM_VALUES]; + for (int i = 0; i < NUM_VALUES; i++) { + final double v = precision == 4 ? random.nextFloat() : random + .nextDouble(); + values[i] = v; + w.add(i, v); + } + final int additionalValues = 1 + random.nextInt(10); + w.finish(NUM_VALUES + additionalValues); + + DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues); + for (int iter = 0; iter < 2; iter++) { + Source s = getSource(r); + for (int i = 0; i < NUM_VALUES; i++) { + assertEquals(values[i], s.getFloat(i), 0.0f); + } + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum fEnum = r.getEnum(); + ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); + FloatsRef floats = attr.floats(); + for (int i = 0; i < NUM_VALUES; i++) { + assertEquals(i, fEnum.nextDoc()); + assertEquals(values[i], floats.get(), delta); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { + assertEquals(i, fEnum.nextDoc()); + assertEquals(0.0, floats.get(), delta); + } + fEnum.close(); + } + for (int iter = 0; iter < 2; iter++) { + ValuesEnum fEnum = r.getEnum(); + ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); + FloatsRef floats = attr.floats(); + for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { + assertEquals(i, fEnum.advance(i)); + assertEquals(values[i], floats.get(), delta); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { + assertEquals(i, fEnum.advance(i)); + assertEquals(0.0, floats.get(), delta); + } + fEnum.close(); + } + + r.close(); + dir.close(); + } + + public void testFloats8() throws IOException { + runTestFloats(8, 0.0); + } + + private Source getSource(DocValues values) throws IOException { + // getSource uses cache internally + return random.nextBoolean() ? values.load() : values.getSource(); + } + + private SortedSource getSortedSource(DocValues values, + Comparator comparator) throws IOException { + // getSortedSource uses cache internally + return random.nextBoolean() ? values.loadSorted(comparator) : values + .getSortedSorted(comparator); + } +} diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java similarity index 61% rename from lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java rename to lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 5728c396600..e2f3a206256 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -16,12 +16,10 @@ package org.apache.lucene.index.values; * See the License for the specific language governing permissions and * limitations under the License. */ - import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.EnumSet; import java.util.List; @@ -45,7 +43,6 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.docvalues.DocValuesCodec; -import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; @@ -54,22 +51,28 @@ import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; -public class TestIndexValues extends LuceneTestCase { +/** + * + * Tests DocValues integration into IndexWriter & Codecs + * + */ +public class TestDocValuesIndexing extends LuceneTestCase { + // TODO Add a test for addIndexes + // TODO add test for unoptimized case with deletes - // TODO test addIndexes private static DocValuesCodec docValuesCodec; private static CodecProvider provider; + @BeforeClass public static void beforeClassLuceneTestCaseJ4() { LuceneTestCase.beforeClassLuceneTestCaseJ4(); provider = new CodecProvider(); - docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup(CodecProvider - .getDefaultCodec())); + docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup( + CodecProvider.getDefaultCodec())); provider.register(docValuesCodec); provider.setDefaultFieldCodec(docValuesCodec.name); } @@ -79,277 +82,6 @@ public class TestIndexValues extends LuceneTestCase { LuceneTestCase.afterClassLuceneTestCaseJ4(); } - public void testBytesStraight() throws IOException { - runTestBytes(Bytes.Mode.STRAIGHT, true); - runTestBytes(Bytes.Mode.STRAIGHT, false); - } - - public void testBytesDeref() throws IOException { - runTestBytes(Bytes.Mode.DEREF, true); - runTestBytes(Bytes.Mode.DEREF, false); - } - - public void testBytesSorted() throws IOException { - runTestBytes(Bytes.Mode.SORTED, true); - runTestBytes(Bytes.Mode.SORTED, false); - } - - // nocommit -- for sorted test, do our own Sort of the - // values and verify it's identical - public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize) - throws IOException { - - final BytesRef bytesRef = new BytesRef(); - - final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef - .getUTF8SortedAsUnicodeComparator() : null; - - Directory dir = newDirectory(); - Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize); - int maxDoc = 220; - final String[] values = new String[maxDoc]; - final int lenMin, lenMax; - if (fixedSize) { - lenMin = lenMax = 3 + random.nextInt(7); - } else { - lenMin = 1; - lenMax = 15 + random.nextInt(6); - } - for (int i = 0; i < 100; i++) { - final String s; - if (i > 0 && random.nextInt(5) <= 2) { - // use prior value - s = values[2 * random.nextInt(i)]; - } else { - s = _TestUtil.randomUnicodeString(random, lenMin, lenMax); - } - values[2 * i] = s; - - UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef); - w.add(2 * i, bytesRef); - } - w.finish(maxDoc); - - DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); - for (int iter = 0; iter < 2; iter++) { - ValuesEnum bytesEnum = r.getEnum(); - assertNotNull("enum is null", bytesEnum); - ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class); - assertNotNull("attribute is null", attr); - BytesRef ref = attr.bytes(); - assertNotNull("BytesRef is null - enum not initialized to use bytes", - attr); - - for (int i = 0; i < 2; i++) { - final int idx = 2 * i; - assertEquals("doc: " + idx, idx, bytesEnum.advance(idx)); - String utf8String = ref.utf8ToString(); - assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() - + " lenRight: " + utf8String.length(), values[idx], utf8String); - } - assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); - assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1)); - - bytesEnum.close(); - } - - // Verify we can load source twice: - for (int iter = 0; iter < 2; iter++) { - Source s; - DocValues.SortedSource ss; - if (mode == Bytes.Mode.SORTED) { - s = ss = getSortedSource(r, comp); - } else { - s = getSource(r); - ss = null; - } - - for (int i = 0; i < 100; i++) { - final int idx = 2 * i; - assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx)); - assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString()); - if (ss != null) { - assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx)) - .utf8ToString()); - DocValues.SortedSource.LookupResult result = ss - .getByValue(new BytesRef(values[idx])); - assertTrue(result.found); - assertEquals(ss.ord(idx), result.ord); - } - } - - // Lookup random strings: - if (mode == Bytes.Mode.SORTED) { - final int numValues = ss.getValueCount(); - for (int i = 0; i < 1000; i++) { - BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString( - random, lenMin, lenMax)); - SortedSource.LookupResult result = ss.getByValue(bytesValue); - if (result.found) { - assert result.ord > 0; - assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord))); - int count = 0; - for (int k = 0; k < 100; k++) { - if (bytesValue.utf8ToString().equals(values[2 * k])) { - assertEquals(ss.ord(2 * k), result.ord); - count++; - } - } - assertTrue(count > 0); - } else { - assert result.ord >= 0; - if (result.ord == 0) { - final BytesRef firstRef = ss.getByOrd(1); - // random string was before our first - assertTrue(firstRef.compareTo(bytesValue) > 0); - } else if (result.ord == numValues) { - final BytesRef lastRef = ss.getByOrd(numValues); - // random string was after our last - assertTrue(lastRef.compareTo(bytesValue) < 0); - } else { - // random string fell between two of our values - final BytesRef before = (BytesRef) ss.getByOrd(result.ord) - .clone(); - final BytesRef after = ss.getByOrd(result.ord + 1); - assertTrue(before.compareTo(bytesValue) < 0); - assertTrue(bytesValue.compareTo(after) < 0); - - } - } - } - } - } - - r.close(); - dir.close(); - } - - public void testInts() throws IOException { - long maxV = 1; - final int NUM_VALUES = 1000; - final long[] values = new long[NUM_VALUES]; - for (int rx = 1; rx < 63; rx++, maxV *= 2) { - for (int b = 0; b < 2; b++) { - Directory dir = newDirectory(); - boolean useFixedArrays = b == 0; - Writer w = Ints.getWriter(dir, "test", useFixedArrays); - for (int i = 0; i < NUM_VALUES; i++) { - final long v = random.nextLong() % (1 + maxV); - values[i] = v; - w.add(i, v); - } - final int additionalDocs = 1 + random.nextInt(9); - w.finish(NUM_VALUES + additionalDocs); - - DocValues r = Ints.getValues(dir, "test", useFixedArrays); - for (int iter = 0; iter < 2; iter++) { - Source s = getSource(r); - for (int i = 0; i < NUM_VALUES; i++) { - final long v = s.getInt(i); - assertEquals("index " + i + " b: " + b, values[i], v); - } - } - - for (int iter = 0; iter < 2; iter++) { - ValuesEnum iEnum = r.getEnum(); - ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); - LongsRef ints = attr.ints(); - for (int i = 0; i < NUM_VALUES; i++) { - assertEquals(i, iEnum.nextDoc()); - assertEquals(values[i], ints.get()); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { - assertEquals(i, iEnum.nextDoc()); - assertEquals("" + i, 0, ints.get()); - } - - iEnum.close(); - } - - for (int iter = 0; iter < 2; iter++) { - ValuesEnum iEnum = r.getEnum(); - ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); - LongsRef ints = attr.ints(); - for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { - assertEquals(i, iEnum.advance(i)); - assertEquals(values[i], ints.get()); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { - assertEquals(i, iEnum.advance(i)); - assertEquals("" + i, 0, ints.get()); - } - - iEnum.close(); - } - r.close(); - dir.close(); - } - } - } - - public void testFloats4() throws IOException { - runTestFloats(4, 0.00001); - } - - private void runTestFloats(int precision, double delta) throws IOException { - Directory dir = newDirectory(); - Writer w = Floats.getWriter(dir, "test", precision); - final int NUM_VALUES = 1000; - final double[] values = new double[NUM_VALUES]; - for (int i = 0; i < NUM_VALUES; i++) { - final double v = precision == 4 ? random.nextFloat() : random - .nextDouble(); - values[i] = v; - w.add(i, v); - } - final int additionalValues = 1 + random.nextInt(10); - w.finish(NUM_VALUES + additionalValues); - - DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues); - for (int iter = 0; iter < 2; iter++) { - Source s = getSource(r); - for (int i = 0; i < NUM_VALUES; i++) { - assertEquals(values[i], s.getFloat(i), 0.0f); - } - } - - for (int iter = 0; iter < 2; iter++) { - ValuesEnum fEnum = r.getEnum(); - ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); - FloatsRef floats = attr.floats(); - for (int i = 0; i < NUM_VALUES; i++) { - assertEquals(i, fEnum.nextDoc()); - assertEquals(values[i], floats.get(), delta); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(i, fEnum.nextDoc()); - assertEquals(0.0, floats.get(), delta); - } - fEnum.close(); - } - for (int iter = 0; iter < 2; iter++) { - ValuesEnum fEnum = r.getEnum(); - ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); - FloatsRef floats = attr.floats(); - for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { - assertEquals(i, fEnum.advance(i)); - assertEquals(values[i], floats.get(), delta); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(i, fEnum.advance(i)); - assertEquals(0.0, floats.get(), delta); - } - fEnum.close(); - } - - r.close(); - dir.close(); - } - - public void testFloats8() throws IOException { - runTestFloats(8, 0.0); - } - /** * Tests complete indexing of {@link Values} including deletions, merging and * sparse value fields on Compound-File @@ -437,7 +169,7 @@ public class TestIndexValues extends LuceneTestCase { assertNotNull(intsReader); Source ints = getSource(intsReader); - + ValuesEnum intsEnum = intsReader.getEnum(); assertNotNull(intsEnum); LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints(); @@ -715,8 +447,5 @@ public class TestIndexValues extends LuceneTestCase { // getSource uses cache internally return random.nextBoolean() ? values.load() : values.getSource(); } - private SortedSource getSortedSource(DocValues values, Comparator comparator) throws IOException { - // getSortedSource uses cache internally - return random.nextBoolean() ? values.loadSorted(comparator) : values.getSortedSorted(comparator); - } + } From eabf2867ebbfdce70494056807048a25f8a4ecdb Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 16 Nov 2010 18:34:16 +0000 Subject: [PATCH 013/116] Use pagedBytes in DocValues Byte variants instead of direct byte arrays git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1035731 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/DocumentsWriter.java | 2 +- .../org/apache/lucene/index/values/Bytes.java | 98 +++++++++--- .../index/values/FixedDerefBytesImpl.java | 33 ++--- .../index/values/FixedSortedBytesImpl.java | 139 +++++++----------- .../index/values/FixedStraightBytesImpl.java | 23 +-- .../index/values/VarDerefBytesImpl.java | 40 ++--- .../index/values/VarSortedBytesImpl.java | 57 ++----- .../index/values/VarStraightBytesImpl.java | 49 +++--- 8 files changed, 194 insertions(+), 247 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index ea971b93370..175ab09bc68 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -601,7 +601,7 @@ final class DocumentsWriter { } synchronized private void initFlushState(boolean onlyDocStore) { - initSegmentName(onlyDocStore); // nocommit - this does not work for docValues + initSegmentName(onlyDocStore); final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs); flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos, docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info); diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index 3080cbecdb0..691abe588bf 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -32,6 +32,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.PagedBytes; /** * Provides concrete Writer/Reader impls for byte[] value per document. There @@ -46,7 +47,7 @@ import org.apache.lucene.util.CodecUtil; * NOTE: Each byte[] must be <= 32768 bytes in length *

*/ -//TODO - add bulk copy where possible +// TODO - add bulk copy where possible public final class Bytes { // don't instantiate! @@ -57,7 +58,6 @@ public final class Bytes { STRAIGHT, DEREF, SORTED }; - // TODO -- i shouldn't have to specify fixed? can // track itself & do the write thing at write time? public static Writer getWriter(Directory dir, String id, Mode mode, @@ -124,29 +124,65 @@ public final class Bytes { protected final IndexInput datIn; protected final IndexInput idxIn; protected final BytesRef defaultValue = new BytesRef(); + protected final static int PAGED_BYTES_BITS = 15; + private final PagedBytes pagedBytes; + protected final PagedBytes.Reader data; + protected final long totalLengthInBytes; - protected BytesBaseSource(IndexInput datIn, IndexInput idxIn) { + protected BytesBaseSource(IndexInput datIn, IndexInput idxIn, PagedBytes pagedBytes, long bytesToRead) + throws IOException { + assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer(); this.datIn = datIn; + this.totalLengthInBytes = bytesToRead; + this.pagedBytes = pagedBytes; + this.pagedBytes.copy(datIn, bytesToRead); + data = pagedBytes.freeze(true); this.idxIn = idxIn; } public void close() throws IOException { - if (datIn != null) - datIn.close(); - if (idxIn != null) // if straight - idxIn.close(); - + data.close(); + try { + if (datIn != null) + datIn.close(); + } finally { + if (idxIn != null) // if straight + idxIn.close(); + } } + public long ramBytesUsed() { + return 0; //TOODO + } + } static abstract class BytesBaseSortedSource extends SortedSource { protected final IndexInput datIn; protected final IndexInput idxIn; protected final BytesRef defaultValue = new BytesRef(); + protected final static int PAGED_BYTES_BITS = 15; + private final PagedBytes pagedBytes; + protected final PagedBytes.Reader data; + protected final BytesRef bytesRef = new BytesRef(); + protected final LookupResult lookupResult = new LookupResult(); + private final Comparator comp; - protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn) { + + protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn, Comparator comp, PagedBytes pagedBytes, long bytesToRead) throws IOException { + assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer(); this.datIn = datIn; + this.pagedBytes = pagedBytes; + this.pagedBytes.copy(datIn, bytesToRead); + data = pagedBytes.freeze(true); this.idxIn = idxIn; + this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator() + : comp; + + } + + @Override + public BytesRef getByOrd(int ord) { + return ord == 0 ? defaultValue : deref(--ord); } public void close() throws IOException { @@ -154,13 +190,35 @@ public final class Bytes { datIn.close(); if (idxIn != null) // if straight idxIn.close(); + } + + protected abstract BytesRef deref(int ord); + + protected LookupResult binarySearch(BytesRef b, int low, int high) { + while (low <= high) { + int mid = (low + high) >>> 1; + deref(mid); + final int cmp = comp.compare(bytesRef, b); + if (cmp < 0) { + low = mid + 1; + } else if (cmp > 0) { + high = mid - 1; + } else { + lookupResult.ord = mid + 1; + lookupResult.found = true; + return lookupResult; + } + } + assert comp.compare(bytesRef, b) != 0; + lookupResult.ord = low; + lookupResult.found = false; + return lookupResult; } } static abstract class BytesWriterBase extends Writer { - private final Directory dir; private final String id; protected IndexOutput idxOut; @@ -172,7 +230,8 @@ public final class Bytes { protected final AtomicLong bytesUsed; protected BytesWriterBase(Directory dir, String id, String codecName, - int version, boolean initIndex, boolean initData, ByteBlockPool pool, AtomicLong bytesUsed) throws IOException { + int version, boolean initIndex, boolean initData, ByteBlockPool pool, + AtomicLong bytesUsed) throws IOException { this.dir = dir; this.id = id; this.codecName = codecName; @@ -214,7 +273,7 @@ public final class Bytes { datOut.close(); if (idxOut != null) idxOut.close(); - if(pool != null) + if (pool != null) pool.reset(); } @@ -228,11 +287,11 @@ public final class Bytes { bytesRef = attr.bytes(); assert bytesRef != null; } - + @Override public void add(int docID, ValuesAttribute attr) throws IOException { final BytesRef ref; - if((ref = attr.bytes()) != null) { + if ((ref = attr.bytes()) != null) { add(docID, ref); } } @@ -242,9 +301,10 @@ public final class Bytes { assert datOut != null; files.add(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION)); - if(idxOut != null) { // called after flush - so this must be initialized if needed or present + if (idxOut != null) { // called after flush - so this must be initialized + // if needed or present final String idxFile = IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_INDEX_EXTENSION); + IndexFileNames.CSF_INDEX_EXTENSION); files.add(idxFile); } } @@ -254,7 +314,7 @@ public final class Bytes { * Opens all necessary files, but does not read any data in until you call * {@link #load}. */ - static abstract class BytesReaderBase extends DocValues { + static abstract class BytesReaderBase extends DocValues { protected final IndexInput idxIn; protected final IndexInput datIn; protected final int version; @@ -276,6 +336,7 @@ public final class Bytes { } else { idxIn = null; } + } protected final IndexInput cloneData() { @@ -283,7 +344,8 @@ public final class Bytes { return (IndexInput) datIn.clone(); } - protected final IndexInput cloneIndex() { // TODO assert here for null rather than return null + protected final IndexInput cloneIndex() { // TODO assert here for null + // rather than return null return idxIn == null ? null : (IndexInput) idxIn.clone(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index b1e2449cbfd..3be9918d55d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -30,6 +30,7 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; @@ -133,27 +134,22 @@ class FixedDerefBytesImpl { @Override public Source load() throws IOException { - return new Source(cloneData(), cloneIndex(), size); + final IndexInput index = cloneIndex(); + return new Source(cloneData(), index , size, index.readInt()); } private static class Source extends BytesBaseSource { - // TODO: paged data or mmap? - private final byte[] data; private final BytesRef bytesRef = new BytesRef(); private final PackedInts.Reader index; - private final int numValue; private final int size; + private final int numValues; - protected Source(IndexInput datIn, IndexInput idxIn, int size) + protected Source(IndexInput datIn, IndexInput idxIn, int size, int numValues) throws IOException { - super(datIn, idxIn); + super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues); this.size = size; - numValue = idxIn.readInt(); - data = new byte[size * numValue]; - datIn.readBytes(data, 0, size * numValue); + this.numValues = numValues; index = PackedInts.getReader(idxIn); - bytesRef.bytes = data; - bytesRef.length = size; } @Override @@ -162,22 +158,13 @@ class FixedDerefBytesImpl { if (id == 0) { return defaultValue; } - bytesRef.offset = ((id - 1) * size); - return bytesRef; - } - - public long ramBytesUsed() { - // TODO(simonw): move ram calculation to PackedInts?! - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER - + data.length - + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index - .getBitsPerValue() - * index.size()); + return data.fill(bytesRef, ((id - 1) * size), size); } + @Override public int getValueCount() { - return numValue; + return numValues; } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index 810c6a0a82c..e445cfb29bd 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -33,6 +33,7 @@ import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; @@ -52,88 +53,93 @@ class FixedSortedBytesImpl { private int[] docToEntry; private final Comparator comp; - private final BytesRefHash hash = new BytesRefHash(pool); - - public Writer(Directory dir, String id, Comparator comp) throws IOException { + + public Writer(Directory dir, String id, Comparator comp) + throws IOException { this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), new AtomicLong()); } - public Writer(Directory dir, String id, Comparator comp, Allocator allocator, AtomicLong bytesUsed) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed); + public Writer(Directory dir, String id, Comparator comp, + Allocator allocator, AtomicLong bytesUsed) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, + new ByteBlockPool(allocator), bytesUsed); docToEntry = new int[1]; -// docToEntry[0] = -1; + // docToEntry[0] = -1; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); this.comp = comp; } @Override synchronized public void add(int docID, BytesRef bytes) throws IOException { - if(bytes.length == 0) + if (bytes.length == 0) return; // default - skip it if (size == -1) { size = bytes.length; initDataOut(); datOut.writeInt(size); } else if (bytes.length != size) { - throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length); + throw new IllegalArgumentException("expected bytes size=" + size + + " but got " + bytes.length); } if (docID >= docToEntry.length) { int[] newArray = new int[ArrayUtil.oversize(1 + docID, RamUsageEstimator.NUM_BYTES_INT)]; System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length); -// Arrays.fill(newArray, docToEntry.length, newArray.length, -1); + // Arrays.fill(newArray, docToEntry.length, newArray.length, -1); - bytesUsed.addAndGet((newArray.length - docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT); + bytesUsed.addAndGet((newArray.length - docToEntry.length) + * RamUsageEstimator.NUM_BYTES_INT); docToEntry = newArray; } int e = hash.add(bytes); - docToEntry[docID] = 1+(e < 0? (-e)-1: e); + docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e); } - // Important that we get docCount, in case there were // some last docs that we didn't see @Override synchronized public void finish(int docCount) throws IOException { - if(datOut == null)// no data added + if (datOut == null)// no data added return; initIndexOut(); final int[] sortedEntries = hash.sort(comp); final int count = hash.size(); - int[] address= new int[count]; + int[] address = new int[count]; // first dump bytes data, recording address as we go - for(int i=0;i docToEntry.length) { limit = docToEntry.length; } else { limit = docCount; } - for(int i=0;i 0 && e <= count: "index must 0 > && <= " + count + " was: " + e; - w.add(address[e-1]); + assert e > 0 && e <= count : "index must 0 > && <= " + count + + " was: " + e; + w.add(address[e - 1]); } } - for(int i=limit;i comp) throws IOException { - return new Source(cloneData(), cloneIndex(), size, comp); + public SortedSource loadSorted(Comparator comp) + throws IOException { + final IndexInput idxInput = cloneIndex(); + final IndexInput datInput = cloneData(); + datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4); + idxInput.seek(CodecUtil.headerLength(CODEC_NAME)); + return new Source(datInput, idxInput, size, idxInput.readInt(), comp); } private static class Source extends BytesBaseSortedSource { - // TODO: paged data - private final byte[] data; - private final BytesRef bytesRef = new BytesRef(); private final PackedInts.Reader index; - private final LookupResult lookupResult = new LookupResult(); private final int numValue; - private final Comparator comp; private final int size; - public Source(IndexInput datIn, IndexInput idxIn, int size, Comparator comp) throws IOException { - super(datIn, idxIn); + public Source(IndexInput datIn, IndexInput idxIn, int size, int numValues, + Comparator comp) throws IOException { + super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size*numValues ); this.size = size; - datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4); - idxIn.seek(CodecUtil.headerLength(CODEC_NAME)); - - numValue = idxIn.readInt(); - data = new byte[size*numValue]; - datIn.readBytes(data, 0, size*numValue); - datIn.close(); - + this.numValue = numValues; index = PackedInts.getReader(idxIn); - idxIn.close(); // do we need to close that here? - bytesRef.bytes = data; bytesRef.length = size; - // default byte sort order - this.comp = comp==null?BytesRef.getUTF8SortedAsUnicodeComparator():comp; } @Override @@ -201,60 +196,38 @@ class FixedSortedBytesImpl { return (int) index.get(docID); } - @Override - public BytesRef getByOrd(int ord) { - if (ord == 0) { - return defaultValue; - } else { - bytesRef.offset = ((ord-1) * size); - return bytesRef; - } - } - @Override public LookupResult getByValue(BytesRef bytes) { - return binarySearch(bytes, 0, numValue-1); + return binarySearch(bytes, 0, numValue - 1); } public long ramBytesUsed() { // TODO(simonw): move ram calcultation to PackedInts? - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + - (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size()); + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + size + * numValue + + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index + .getBitsPerValue() + * index.size()); } @Override public int getValueCount() { return numValue; } - - private LookupResult binarySearch(BytesRef b, int low, int high) { - - while (low <= high) { - int mid = (low + high) >>> 1; - bytesRef.offset = mid * size; - int cmp = comp.compare(bytesRef, b); - if (cmp < 0) { - low = mid + 1; - } else if (cmp > 0) { - high = mid - 1; - } else { - lookupResult.ord = mid+1; - lookupResult.found = true; - return lookupResult; - } - } - lookupResult.ord = low; - lookupResult.found = false; - return lookupResult; + @Override + protected BytesRef deref(int ord) { + return data.fill(bytesRef, (ord* size), size); } } @Override public ValuesEnum getEnum(AttributeSource source) throws IOException { - // do unsorted - return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, size); + // do unsorted + return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, + size); } - + @Override public Values type() { return Values.BYTES_FIXED_SORTED; diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 6df5217788b..202947c5eef 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -26,7 +26,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.PagedBytes; // Simplest storage: stores fixed length byte[] per // document, with no dedup and no sorting. @@ -133,30 +133,17 @@ class FixedStraightBytesImpl { } private static class Source extends BytesBaseSource { - // TODO: paged data - private final byte[] data; private final BytesRef bytesRef = new BytesRef(); private final int size; public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException { - super(datIn, idxIn); + super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size*maxDoc); this.size = size; - final int sizeInBytes = size*maxDoc; - data = new byte[sizeInBytes]; - assert data.length <= datIn.length() : " file size is less than the expected size diff: " + (data.length - datIn.length()) + " size: " + size + " maxDoc " + maxDoc + " pos: " + datIn.getFilePointer(); - datIn.readBytes(data, 0, sizeInBytes); - bytesRef.bytes = data; - bytesRef.length = size; } - + @Override - public BytesRef getBytes(int docID) { - bytesRef.offset = docID * size; - return bytesRef; - } - - public long ramBytesUsed() { - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length; + public BytesRef getBytes(int docID) { + return data.fill(bytesRef, docID * size, size); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index 9ab2adc6dde..e07104f5d0e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -33,6 +33,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; @@ -165,44 +166,31 @@ class VarDerefBytesImpl { @Override public Source load() throws IOException { - return new Source(cloneData(), cloneIndex()); + final IndexInput data = cloneData(); + final IndexInput index = cloneIndex(); + data.seek(CodecUtil.headerLength(CODEC_NAME)); + index.seek(CodecUtil.headerLength(CODEC_NAME)); + final long totalBytes = index.readInt(); // should be long + return new Source(data,index, totalBytes); } private static class Source extends BytesBaseSource { - // TODO: paged data - private final byte[] data; private final BytesRef bytesRef = new BytesRef(); private final PackedInts.Reader index; - public Source(IndexInput datIn, IndexInput idxIn) throws IOException { - super(datIn, idxIn); - datIn.seek(CodecUtil.headerLength(CODEC_NAME)); - idxIn.seek(CodecUtil.headerLength(CODEC_NAME)); - - final int totBytes = idxIn.readInt(); - data = new byte[totBytes]; - datIn.readBytes(data, 0, totBytes); - + public Source(IndexInput datIn, IndexInput idxIn, long totalBytes) throws IOException { + super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes); index = PackedInts.getReader(idxIn); - bytesRef.bytes = data; } @Override public BytesRef getBytes(int docID) { - int address = (int) index.get(docID); + long address = index.get(docID); if (address == 0) { assert defaultValue.length == 0: " default value manipulated"; return defaultValue; } else { - address--; - if ((data[address] & 0x80) == 0) { - // length is 1 byte - bytesRef.length = data[address]; - bytesRef.offset = address+1; - } else { - bytesRef.length = (data[address]&0x7f) + ((data[address+1]&0xff)<<7); - bytesRef.offset = address+2; - } + data.fillUsingLengthPrefix2(bytesRef, --address); return bytesRef; } } @@ -211,12 +199,6 @@ class VarDerefBytesImpl { public int getValueCount() { return index.size(); } - - public long ramBytesUsed() { - // TODO(simonw): move address ram usage to PackedInts? - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + - (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size()); - } } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 7b291520a16..a6eb7d0deb0 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; @@ -165,41 +166,28 @@ class VarSortedBytesImpl { @Override public SortedSource loadSorted(Comparator comp) throws IOException { - return new Source(cloneData(), cloneIndex(), comp); + IndexInput indexIn = cloneIndex(); + return new Source(cloneData(), indexIn , comp, indexIn.readLong()); } private static class Source extends BytesBaseSortedSource { // TODO: paged data - private final byte[] data; - private final BytesRef bytesRef = new BytesRef(); private final PackedInts.Reader docToOrdIndex; private final PackedInts.Reader ordToOffsetIndex; // 0-based private final long totBytes; private final int valueCount; - private final LookupResult lookupResult = new LookupResult(); - private final Comparator comp; public Source(IndexInput datIn, IndexInput idxIn, - Comparator comp) throws IOException { - super(datIn, idxIn); - totBytes = idxIn.readLong(); - data = new byte[(int) totBytes]; - datIn.readBytes(data, 0, (int) totBytes); + Comparator comp, long dataLength) throws IOException { + super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), dataLength); + totBytes = dataLength; docToOrdIndex = PackedInts.getReader(idxIn); ordToOffsetIndex = PackedInts.getReader(idxIn); valueCount = ordToOffsetIndex.size(); - bytesRef.bytes = data; // default byte sort order - this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator() - : comp; } - @Override - public BytesRef getByOrd(int ord) { - return ord == 0 ? defaultValue : deref(--ord); - } - @Override public int ord(int docID) { return (int) docToOrdIndex.get(docID); @@ -213,7 +201,7 @@ class VarSortedBytesImpl { public long ramBytesUsed() { // TODO(simonw): move ram usage to PackedInts? return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER - + data.length + + totBytes + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex .getBitsPerValue() * docToOrdIndex.getBitsPerValue()) @@ -228,40 +216,21 @@ class VarSortedBytesImpl { } // ord is 0-based - private BytesRef deref(int ord) { - bytesRef.offset = (int) ordToOffsetIndex.get(ord); + @Override + protected BytesRef deref(int ord) { + final long nextOffset; if (ord == valueCount - 1) { nextOffset = totBytes; } else { nextOffset = ordToOffsetIndex.get(1 + ord); } - bytesRef.length = (int) (nextOffset - bytesRef.offset); + final long offset = ordToOffsetIndex.get(ord); + data.fill(bytesRef, offset , (int)(nextOffset - offset)); return bytesRef; } - // TODO: share w/ FixedSortedBytesValues? - private LookupResult binarySearch(BytesRef b, int low, int high) { - - while (low <= high) { - int mid = (low + high) >>> 1; - deref(mid); - final int cmp = comp.compare(bytesRef, b); - if (cmp < 0) { - low = mid + 1; - } else if (cmp > 0) { - high = mid - 1; - } else { - lookupResult.ord = mid + 1; - lookupResult.found = true; - return lookupResult; - } - } - assert comp.compare(bytesRef, b) != 0; - lookupResult.ord = low; - lookupResult.found = false; - return lookupResult; - } + } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index f747bb06139..04fd5939d34 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -28,6 +28,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; @@ -44,8 +45,9 @@ class VarStraightBytesImpl { // start at -1 if the first added value is > 0 private int lastDocID = -1; private int[] docToAddress; - - public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { + + public Writer(Directory dir, String id, AtomicLong bytesUsed) + throws IOException { super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, bytesUsed); docToAddress = new int[1]; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); @@ -60,7 +62,8 @@ class VarStraightBytesImpl { if (docID >= docToAddress.length) { int oldSize = docToAddress.length; docToAddress = ArrayUtil.grow(docToAddress, 1 + docID); - bytesUsed.addAndGet(-(docToAddress.length-oldSize)*RamUsageEstimator.NUM_BYTES_INT); + bytesUsed.addAndGet(-(docToAddress.length - oldSize) + * RamUsageEstimator.NUM_BYTES_INT); } for (int i = lastDocID + 1; i < docID; i++) { docToAddress[i] = address; @@ -70,7 +73,7 @@ class VarStraightBytesImpl { @Override synchronized public void add(int docID, BytesRef bytes) throws IOException { - if(bytes.length == 0) + if (bytes.length == 0) return; // default if (datOut == null) initDataOut(); @@ -97,7 +100,8 @@ class VarStraightBytesImpl { w.add(docToAddress[i]); } w.finish(); - bytesUsed.addAndGet(-(docToAddress.length)*RamUsageEstimator.NUM_BYTES_INT); + bytesUsed.addAndGet(-(docToAddress.length) + * RamUsageEstimator.NUM_BYTES_INT); docToAddress = null; super.finish(docCount); } @@ -121,46 +125,29 @@ class VarStraightBytesImpl { } private class Source extends BytesBaseSource { - private final int totBytes; - // TODO: paged data - private final byte[] data; private final BytesRef bytesRef = new BytesRef(); private final PackedInts.Reader addresses; public Source(IndexInput datIn, IndexInput idxIn) throws IOException { - super(datIn, idxIn); - totBytes = idxIn.readVInt(); - data = new byte[totBytes]; - datIn.readBytes(data, 0, totBytes); + super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVInt()); // TODO + // should + // be + // long addresses = PackedInts.getReader(idxIn); - bytesRef.bytes = data; } @Override public BytesRef getBytes(int docID) { final int address = (int) addresses.get(docID); - bytesRef.offset = address; - if (docID == maxDoc - 1) { - bytesRef.length = totBytes - bytesRef.offset; - } else { - bytesRef.length = (int) addresses.get(1 + docID) - bytesRef.offset; - } - return bytesRef; + final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address) + : (int) (addresses.get(1 + docID) - address); + return data.fill(bytesRef, address, length); } @Override public int getValueCount() { throw new UnsupportedOperationException(); } - - public long ramBytesUsed() { - // TODO(simonw): move address ram usage to PackedInts? - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER - + data.length - + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + addresses - .getBitsPerValue() - * addresses.size()); - } } @Override @@ -226,10 +213,10 @@ class VarStraightBytesImpl { @Override public int nextDoc() throws IOException { - return advance(pos+1); + return advance(pos + 1); } } - + @Override public Values type() { return Values.BYTES_VAR_STRAIGHT; From 0aa416f0cb4cae123bd32d89ffe0d6bdcf1d2278 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 17 Nov 2010 14:29:59 +0000 Subject: [PATCH 014/116] fixed length prefix for use with PagedBytes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1036060 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/values/Bytes.java | 2 +- .../index/values/VarDerefBytesImpl.java | 22 +++++++++---- .../java/org/apache/lucene/util/IOUtils.java | 33 ++----------------- .../index/values/TestDocValuesIndexing.java | 4 +-- 4 files changed, 20 insertions(+), 41 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index 691abe588bf..70343ca1eca 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -243,7 +243,7 @@ public final class Bytes { if (initIndex) initIndexOut(); } - + protected void initDataOut() throws IOException { datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION)); diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index e07104f5d0e..beb0c14704c 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.values.Bytes.BytesBaseSource; import org.apache.lucene.index.values.Bytes.BytesReaderBase; import org.apache.lucene.index.values.Bytes.BytesWriterBase; import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; @@ -32,7 +33,6 @@ import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CodecUtil; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; @@ -113,7 +113,7 @@ class VarDerefBytesImpl { final int docAddress; if (e >= 0) { docAddress = array.array.address[e] = address; - address += IOUtils.writeLength(datOut, bytes); + address += writePrefixLength(datOut, bytes); datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); address += bytes.length; } else { @@ -121,7 +121,18 @@ class VarDerefBytesImpl { } docToAddress[docID] = docAddress; } - + + private static int writePrefixLength(DataOutput datOut, BytesRef bytes) throws IOException{ + if (bytes.length < 128) { + datOut.writeByte((byte) bytes.length); + return 1; + } else { + datOut.writeByte((byte) (0x80 | (bytes.length >> 8))); + datOut.writeByte((byte) (bytes.length & 0xff)); + return 2; + } + } + public long ramBytesUsed() { return bytesUsed.get(); } @@ -216,15 +227,14 @@ class VarDerefBytesImpl { @Override protected void fill(long address, BytesRef ref) throws IOException { - // TODO(simonw): use pages here datIn.seek(fp + --address); final byte sizeByte = datIn.readByte(); final int size; - if ((sizeByte & 0x80) == 0) { + if ((sizeByte & 128) == 0) { // length is 1 byte size = sizeByte; } else { - size = (sizeByte & 0x7f) + ((datIn.readByte() & 0xff) << 7); + size = ((sizeByte & 0x7f)<<8) | ((datIn.readByte() & 0xff)); } if(ref.bytes.length < size) ref.grow(size); diff --git a/lucene/src/java/org/apache/lucene/util/IOUtils.java b/lucene/src/java/org/apache/lucene/util/IOUtils.java index 43120dde1bd..40cde1b9095 100644 --- a/lucene/src/java/org/apache/lucene/util/IOUtils.java +++ b/lucene/src/java/org/apache/lucene/util/IOUtils.java @@ -20,8 +20,6 @@ package org.apache.lucene.util; import java.io.Closeable; import java.io.IOException; -import org.apache.lucene.store.DataOutput; - /** @lucene.internal */ public final class IOUtils { @@ -61,37 +59,10 @@ public final class IOUtils { } } - if (priorException != null) + if (priorException != null) { throw priorException; - else if (firstIOE != null) + } else if (firstIOE != null) { throw firstIOE; - } - - /** - * Writes the length of the {@link BytesRef} as either a one or two bytes to - * the {@link DataOutput} and returns the number of bytes used. - * - * @param datOut - * the output to write to - * @param bytes - * the length to write - * @return the length of the {@link BytesRef} as either a one or two bytes to - * the {@link DataOutput} and returns the number of bytes used. - * @throws IOException - * if datOut throws an {@link IOException} - */ - public static int writeLength(DataOutput datOut, BytesRef bytes) - throws IOException { - final int length = bytes.length; - if (length < 128) { - // 1 byte to store length - datOut.writeByte((byte) length); - return 1; - } else { - // 2 byte to store length - datOut.writeByte((byte) (0x80 | (length & 0x7f))); - datOut.writeByte((byte) ((length >> 7) & 0xff)); - return 2; } } } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index e2f3a206256..fcac9640ef7 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -273,7 +273,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { bytesRef.length = b.length; bytesRef.offset = 0; } - // byte upto = 0; for (int i = 0; i < numValues; i++) { if (isNumeric) { @@ -324,7 +323,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions) throws CorruptIndexException, LockObtainFailedException, IOException { - Directory d = newDirectory(); + final Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final List byteVariantList = new ArrayList(BYTES); // run in random order to test if fill works correctly during merges @@ -389,7 +388,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals(base + numRemainingValues, r.numDocs()); int v = 0; for (int i = base; i < r.numDocs(); i++) { - String msg = " field: " + byteIndexValue.name() + " at index: " + i + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + bytesSize; From 8dbe034b5928d41136fa04879157fb841da143ba Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 23 Nov 2010 22:33:50 +0000 Subject: [PATCH 015/116] - Fixed IndexOutOfBoundException due to PagedBytes Boundary - Added Source based ValuesEnums for all types - Cut over to PagedBytes - Share binary search impl with all sorted implementations - DocValuesCodec is now directly compute the file names belonging to it due to per codec metadata - splitted up testcases in more and smaller tests - removed several nocommit git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1038364 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/document/ValuesField.java | 2 - .../lucene/index/DocFieldProcessor.java | 10 +- .../org/apache/lucene/index/FieldInfos.java | 6 - .../apache/lucene/index/IndexFileNames.java | 8 - .../apache/lucene/index/SegmentCodecs.java | 1 - .../codecs/docvalues/DocValuesCodec.java | 94 +++-- .../codecs/docvalues/DocValuesCodecInfo.java | 119 ++++++ .../codecs/docvalues/DocValuesConsumer.java | 5 +- .../docvalues/DocValuesProducerBase.java | 20 +- .../org/apache/lucene/index/values/Bytes.java | 110 ++++-- .../apache/lucene/index/values/DocValues.java | 79 +++- .../index/values/FixedDerefBytesImpl.java | 60 +-- .../index/values/FixedSortedBytesImpl.java | 20 +- .../index/values/FixedStraightBytesImpl.java | 23 +- .../apache/lucene/index/values/Floats.java | 123 +++++-- .../lucene/index/values/MultiDocValues.java | 109 ++---- .../lucene/index/values/PackedIntsImpl.java | 112 ++++-- .../apache/lucene/index/values/Values.java | 1 - .../index/values/ValuesAttributeImpl.java | 3 - .../lucene/index/values/ValuesEnum.java | 48 ++- .../index/values/VarDerefBytesImpl.java | 111 +++--- .../index/values/VarSortedBytesImpl.java | 38 +- .../index/values/VarStraightBytesImpl.java | 43 +-- .../apache/lucene/index/values/Writer.java | 28 +- .../org/apache/lucene/util/PagedBytes.java | 45 ++- .../lucene/index/values/TestDocValues.java | 149 ++++---- .../index/values/TestDocValuesIndexing.java | 343 ++++++++++-------- 27 files changed, 1072 insertions(+), 638 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java index 43fc6bd6623..d71a273a3d9 100644 --- a/lucene/src/java/org/apache/lucene/document/ValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/ValuesField.java @@ -16,7 +16,6 @@ package org.apache.lucene.document; * See the License for the specific language governing permissions and * limitations under the License. */ -import java.io.IOException; import java.io.Reader; import java.util.Comparator; @@ -118,7 +117,6 @@ public class ValuesField extends AbstractField { valField.setBytes(ref, type); break; case PACKED_INTS: - case PACKED_INTS_FIXED: valField.setInt(Long.parseLong(field.stringValue())); break; case SIMPLE_FLOAT_4BYTE: diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index dee260a976d..1eeeacb1176 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -43,6 +43,8 @@ final class DocFieldProcessor extends DocConsumer { final StoredFieldsWriter fieldsWriter; final private Map docValues = new HashMap(); private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter + private SegmentWriteState docValuesConsumerState; // TODO this should be encapsulated in DocumentsWriter + synchronized DocValuesConsumer docValuesConsumer(Directory dir, String segment, String name, ValuesAttribute attr, FieldInfo fieldInfo) @@ -57,8 +59,8 @@ final class DocFieldProcessor extends DocConsumer { * the SegmentsWriteState passed in right at the moment when the segment is flushed (doccount etc) but we need the consumer earlier * to support docvalues and later on stored fields too. */ - SegmentWriteState state = docWriter.segWriteState(); - fieldsConsumer = state.segmentCodecs.codec().fieldsConsumer(state); + docValuesConsumerState = docWriter.segWriteState(); + fieldsConsumer = docValuesConsumerState.segmentCodecs.codec().fieldsConsumer(docValuesConsumerState); } valuesConsumer = fieldsConsumer.addValuesField(fieldInfo); docValues.put(name, valuesConsumer); @@ -102,7 +104,9 @@ final class DocFieldProcessor extends DocConsumer { } docValues.clear(); if(fieldsConsumer != null) { - fieldsConsumer.close(); // nocommit this should go away + fieldsConsumer.close(); // TODO remove this once docvalues are fully supported by codecs + state.flushedFiles.addAll(docValuesConsumerState.flushedFiles); + docValuesConsumerState = null; fieldsConsumer = null; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 0c8aef3b11e..a6baae51324 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -344,9 +344,6 @@ public final class FieldInfos { case BYTES_VAR_SORTED: b = 9; break; - case PACKED_INTS_FIXED: - b = 10; - break; default: throw new IllegalStateException("unhandled indexValues type " + fi.docValues); } @@ -413,9 +410,6 @@ public final class FieldInfos { case 9: fi.docValues = Values.BYTES_VAR_SORTED; break; - case 10: - fi.docValues = Values.PACKED_INTS_FIXED; - break; default: throw new IllegalStateException("unhandled indexValues type " + b); } diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java index 1917b1e7dd8..ef9c4b419c6 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java @@ -79,12 +79,6 @@ public final class IndexFileNames { /** Extension of separate norms */ public static final String SEPARATE_NORMS_EXTENSION = "s"; - /** Extension of Column-Stride Filed data files */ - public static final String CSF_DATA_EXTENSION = "dat"; - - /** Extension of Column-Stride Filed index files */ - public static final String CSF_INDEX_EXTENSION = "idx"; - /** * This array contains all filename extensions used by * Lucene's index files, with one exception, namely the @@ -104,8 +98,6 @@ public final class IndexFileNames { GEN_EXTENSION, NORMS_EXTENSION, COMPOUND_FILE_STORE_EXTENSION, - CSF_DATA_EXTENSION, - CSF_INDEX_EXTENSION }; public static final String[] STORE_INDEX_EXTENSIONS = new String[] { diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java b/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java index 3c707e0d281..0f31e7854e2 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java @@ -92,7 +92,6 @@ final class SegmentCodecs implements Cloneable { } } return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY)); - } Codec codec() { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index 75b330daa65..2a4a880b790 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -17,12 +17,9 @@ package org.apache.lucene.index.codecs.docvalues; * limitations under the License. */ import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; +import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; -import java.util.List; -import java.util.Map; import java.util.Set; import java.util.Map.Entry; @@ -41,28 +38,32 @@ import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; /** * A codec that adds DocValues support to a given codec transparently. */ public class DocValuesCodec extends Codec { - private final Map consumers = new HashMap(); private final Codec other; + private final Comparator comparator; - public DocValuesCodec(Codec other) { + public DocValuesCodec(Codec other, Comparator comparator) { this.name = "docvalues_" + other.name; this.other = other; + this.comparator = comparator; + } + + public DocValuesCodec(Codec other) { + this(other, null); } @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - WrappingFieldsConsumer consumer; - if ((consumer = consumers.get(state.segmentName)) == null) { - consumer = new WrappingFieldsConsumer(other); - } - consumer.state = state; // nocommit this is a hack and only necessary since - // we want to initialized the wrapped + final WrappingFieldsConsumer consumer; + consumer = new WrappingFieldsConsumer(other, comparator, state); + // nocommit this is a hack and only necessary since + // we want to initialized the wrapped // fieldsConsumer lazily with a SegmentWriteState created after the docvalue // ones is. We should fix this in DocumentWriter I guess. See // DocFieldProcessor too! @@ -70,31 +71,44 @@ public class DocValuesCodec extends Codec { } private static class WrappingFieldsConsumer extends FieldsConsumer { - SegmentWriteState state; - private final List docValuesConsumers = new ArrayList(); + private final SegmentWriteState state; private FieldsConsumer wrappedConsumer; private final Codec other; + private final Comparator comparator; + private DocValuesCodecInfo info; - public WrappingFieldsConsumer(Codec other) { + public WrappingFieldsConsumer(Codec other, Comparator comparator, SegmentWriteState state) { this.other = other; + this.comparator = comparator; + this.state = state; } @Override public void close() throws IOException { synchronized (this) { - if (wrappedConsumer != null) + if (info != null) { + info.write(state); + info = null; + } + if (wrappedConsumer != null) { wrappedConsumer.close(); + } } + } @Override public synchronized DocValuesConsumer addValuesField(FieldInfo field) throws IOException { - DocValuesConsumer consumer = DocValuesConsumer.create(state.segmentName, - // TODO: set comparator here - //TODO can we have a compound file per segment and codec for docvalues? - state.directory, field, state.codecId +"-"+ field.number, null); - docValuesConsumers.add(consumer); + if(info == null) { + info = new DocValuesCodecInfo(); + } + final DocValuesConsumer consumer = DocValuesConsumer.create(info.docValuesId(state.segmentName, state.codecId, "" + + field.number), + // TODO can we have a compound file per segment and codec for + // docvalues? + state.directory, field, comparator); + info.add(field.number); return consumer; } @@ -115,35 +129,23 @@ public class DocValuesCodec extends Codec { Set files = new HashSet(); other.files(dir, state.segmentInfo, state.codecId, files); - for (String string : files) { + for (String string : files) { // for now we just check if one of the files + // exists and open the producer if (dir.fileExists(string)) return new WrappingFielsdProducer(state, other.fieldsProducer(state)); } return new WrappingFielsdProducer(state, FieldsProducer.EMPTY); - } @Override public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set files) throws IOException { - Set otherFiles = new HashSet(); - other.files(dir, segmentInfo, codecId, otherFiles); - for (String string : otherFiles) { // under some circumstances we only write - // DocValues - // so other files will be added even if - // they don't exist - if (dir.fileExists(string)) - files.add(string); - } - //TODO can we have a compound file per segment and codec for docvalues? - for (String file : dir.listAll()) { - if (file.startsWith(segmentInfo.name+"_" + codecId) - && (file.endsWith(Writer.DATA_EXTENSION) || file - .endsWith(Writer.INDEX_EXTENSION))) { - files.add(file); - } - } - + other.files(dir, segmentInfo, codecId, files); + // TODO can we have a compound file per segment and codec for docvalues? + DocValuesCodecInfo info = new DocValuesCodecInfo(); // TODO can we do that + // only once? + info.read(dir, segmentInfo, codecId); + info.files(dir, segmentInfo, codecId, files); } @Override @@ -151,6 +153,7 @@ public class DocValuesCodec extends Codec { other.getExtensions(extensions); extensions.add(Writer.DATA_EXTENSION); extensions.add(Writer.INDEX_EXTENSION); + extensions.add(DocValuesCodecInfo.INFO_FILE_EXT); } static class WrappingFielsdProducer extends DocValuesProducerBase { @@ -219,7 +222,6 @@ public class DocValuesCodec extends Codec { name = value.next(); return this; } - } static class DocValueNameValue extends NameValue { @@ -236,7 +238,6 @@ public class DocValuesCodec extends Codec { } return this; } - } static class WrappingFieldsEnum extends FieldsEnum { @@ -254,7 +255,6 @@ public class DocValuesCodec extends Codec { this.docValues.iter = docValues; this.fieldsEnum.value = wrapped; coordinator = null; - } @Override @@ -268,7 +268,6 @@ public class DocValuesCodec extends Codec { public String next() throws IOException { if (coordinator == null) { coordinator = fieldsEnum.next().smaller(docValues.next()); - // old = coordinator.name; } else { String current = coordinator.name; if (current == docValues.name) { @@ -281,16 +280,15 @@ public class DocValuesCodec extends Codec { } return coordinator == null ? null : coordinator.name; - } @Override public TermsEnum terms() throws IOException { - if (fieldsEnum.name == coordinator.name) + if (fieldsEnum.name == coordinator.name) { return fieldsEnum.value.terms(); + } return null; } - } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java new file mode 100644 index 00000000000..cbe9ca6c0fd --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java @@ -0,0 +1,119 @@ +package org.apache.lucene.index.codecs.docvalues; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedInts.Reader; +import org.apache.lucene.util.packed.PackedInts.Writer; + +/** + * @lucene.internal + */ +class DocValuesCodecInfo { + public static final int FORMAT_CURRENT = 0; + static final String INFO_FILE_EXT = "inf"; + private int[] docValuesFields = new int[1]; + private int max; + private int pos; + + public DocValuesCodecInfo() { + } + + void add(int fieldId) { + if (pos >= docValuesFields.length) { + docValuesFields = ArrayUtil.grow(docValuesFields, pos + 1); + } + docValuesFields[pos++] = fieldId; + if (fieldId > max) { + max = fieldId; + } + } + + String docValuesId(String segmentsName, String codecID, String fieldId) { + return segmentsName + "_" + codecID + "-" + fieldId; + } + + void files(Directory dir, SegmentInfo segmentInfo, String codecId, + Set files) throws IOException { + final String file = IndexFileNames.segmentFileName(segmentInfo.name, codecId, + INFO_FILE_EXT); + files.add(file); + for (int i = 0; i < pos; i++) { + int field = docValuesFields[i]; + String docValuesID = docValuesId(segmentInfo.name, codecId, "" + field); + files.add(IndexFileNames.segmentFileName(docValuesID, "", + org.apache.lucene.index.values.Writer.DATA_EXTENSION)); + String idxFile = IndexFileNames.segmentFileName(docValuesID, "", + org.apache.lucene.index.values.Writer.INDEX_EXTENSION); + if (dir.fileExists(idxFile)) { + files.add(idxFile); + } + } + } + + void write(SegmentWriteState state) throws IOException { + final String fileName = IndexFileNames.segmentFileName(state.segmentName, + state.codecId, INFO_FILE_EXT); + final IndexOutput out = state.directory.createOutput(fileName); + state.flushedFiles.add(fileName); + try { + out.writeInt(FORMAT_CURRENT); + Writer writer = PackedInts.getWriter(out, pos, PackedInts + .bitsRequired(max)); + for (int i = 0; i < pos; i++) { + writer.add(docValuesFields[i]); + } + writer.finish(); + } finally { + out.close(); + } + + } + + void read(Directory directory, SegmentInfo info, String codecId) + throws IOException { + final String fileName = IndexFileNames.segmentFileName(info.name, codecId, + INFO_FILE_EXT); + final IndexInput in = directory.openInput(fileName); + try { + in.readInt(); + final Reader reader = PackedInts.getReader(in); + docValuesFields = new int[reader.size()]; + for (int i = 0; i < docValuesFields.length; i++) { + docValuesFields[i] = (int) reader.get(i); + } + pos = docValuesFields.length; + } finally { + in.close(); + } + } + + IntsRef fieldIDs() { + return new IntsRef(docValuesFields, 0, pos); + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java index 3d39701779d..5f9cd9702b9 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java @@ -88,10 +88,9 @@ public abstract class DocValuesConsumer { } } - public static DocValuesConsumer create(String segmentName, - Directory directory, FieldInfo field, String codecId, Comparator comp) + public static DocValuesConsumer create(String id, + Directory directory, FieldInfo field, Comparator comp) throws IOException { - final String id = segmentName + "_" + codecId; return Writer.create(field.getDocValues(), id, directory, comp); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java index 426f9277894..adf9349e766 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -31,12 +31,15 @@ import org.apache.lucene.index.values.Ints; import org.apache.lucene.index.values.Values; import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IntsRef; public abstract class DocValuesProducerBase extends FieldsProducer{ protected final TreeMap docValues = new TreeMap(); + private final DocValuesCodecInfo info = new DocValuesCodecInfo(); protected DocValuesProducerBase(SegmentInfo si, Directory dir, FieldInfos fieldInfo, String codecId) throws IOException { + info.read(dir, si, codecId); load(fieldInfo, si.name, si.docCount, dir, codecId); } @@ -48,16 +51,15 @@ public abstract class DocValuesProducerBase extends FieldsProducer{ // Only opens files... doesn't actually load any values protected void load(FieldInfos fieldInfos, String segment, int docCount, Directory dir, String codecId) throws IOException { - final int numFields = fieldInfos.size(); - for (int i = 0; i < numFields; i++) { - final FieldInfo fieldInfo = fieldInfos.fieldInfo(i); - final Values v = fieldInfo.getDocValues(); + final IntsRef valueFields = info.fieldIDs(); + for (int i = valueFields.offset; i < valueFields.length; i++) { + final int fieldNumber = valueFields.ints[i]; + final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); + assert fieldInfo.hasDocValues(); final String field = fieldInfo.name; //TODO can we have a compound file per segment and codec for docvalues? - final String id = IndexFileNames.segmentFileName(segment, codecId+"-"+fieldInfo.number, ""); - if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) { - docValues.put(field, loadDocValues(docCount, dir, id, v)); - } + final String id = info.docValuesId( segment, codecId, fieldNumber+""); + docValues.put(field, loadDocValues(docCount, dir, id, fieldInfo.getDocValues())); } } @@ -66,8 +68,6 @@ public abstract class DocValuesProducerBase extends FieldsProducer{ switch (v) { case PACKED_INTS: return Ints.getValues(dir, id, false); - case PACKED_INTS_FIXED: - return Ints.getValues(dir, id, true); case SIMPLE_FLOAT_4BYTE: return Floats.getValues(dir, id, docCount); case SIMPLE_FLOAT_8BYTE: diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index 70343ca1eca..f9eeff57093 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -24,11 +24,14 @@ import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.values.DocValues.MissingValues; import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.index.values.DocValues.SourceEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; @@ -88,7 +91,7 @@ public final class Bytes { throw new IllegalArgumentException(""); } - // nocommit -- I can peek @ header to determing fixed/mode? + // TODO -- I can peek @ header to determing fixed/mode? public static DocValues getValues(Directory dir, String id, Mode mode, boolean fixedSize, int maxDoc) throws IOException { if (fixedSize) { @@ -123,15 +126,15 @@ public final class Bytes { static abstract class BytesBaseSource extends Source { protected final IndexInput datIn; protected final IndexInput idxIn; - protected final BytesRef defaultValue = new BytesRef(); protected final static int PAGED_BYTES_BITS = 15; private final PagedBytes pagedBytes; protected final PagedBytes.Reader data; protected final long totalLengthInBytes; - protected BytesBaseSource(IndexInput datIn, IndexInput idxIn, PagedBytes pagedBytes, long bytesToRead) - throws IOException { - assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer(); + protected BytesBaseSource(IndexInput datIn, IndexInput idxIn, + PagedBytes pagedBytes, long bytesToRead) throws IOException { + assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer(); this.datIn = datIn; this.totalLengthInBytes = bytesToRead; this.pagedBytes = pagedBytes; @@ -146,12 +149,36 @@ public final class Bytes { if (datIn != null) datIn.close(); } finally { - if (idxIn != null) // if straight + if (idxIn != null) // if straight - no index needed idxIn.close(); } } + + protected abstract int maxDoc(); + public long ramBytesUsed() { - return 0; //TOODO + return 0; // TODO + } + + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + final MissingValues missing = getMissing(); + return new SourceEnum(attrSource, type(), this, maxDoc()) { + final BytesRef bytesRef = attr.bytes(); + + @Override + public int advance(int target) throws IOException { + if (target >= numDocs) { + return pos = NO_MORE_DOCS; + } + while (source.getBytes(target, bytesRef) == missing.bytesValue) { + if (++target >= numDocs) { + return pos = NO_MORE_DOCS; + } + } + return pos = target; + } + }; } } @@ -163,13 +190,14 @@ public final class Bytes { protected final static int PAGED_BYTES_BITS = 15; private final PagedBytes pagedBytes; protected final PagedBytes.Reader data; - protected final BytesRef bytesRef = new BytesRef(); protected final LookupResult lookupResult = new LookupResult(); private final Comparator comp; - - protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn, Comparator comp, PagedBytes pagedBytes, long bytesToRead) throws IOException { - assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer(); + protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn, + Comparator comp, PagedBytes pagedBytes, long bytesToRead) + throws IOException { + assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer(); this.datIn = datIn; this.pagedBytes = pagedBytes; this.pagedBytes.copy(datIn, bytesToRead); @@ -177,12 +205,12 @@ public final class Bytes { this.idxIn = idxIn; this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator() : comp; - + } - + @Override - public BytesRef getByOrd(int ord) { - return ord == 0 ? defaultValue : deref(--ord); + public BytesRef getByOrd(int ord, BytesRef bytesRef) { + return ord == 0 ? null : deref(--ord, bytesRef); } public void close() throws IOException { @@ -191,14 +219,16 @@ public final class Bytes { if (idxIn != null) // if straight idxIn.close(); } - - protected abstract BytesRef deref(int ord); - - protected LookupResult binarySearch(BytesRef b, int low, int high) { + protected abstract int maxDoc(); + + protected abstract BytesRef deref(int ord, BytesRef bytesRef); + + protected LookupResult binarySearch(BytesRef b, BytesRef bytesRef, int low, + int high) { while (low <= high) { int mid = (low + high) >>> 1; - deref(mid); + deref(mid, bytesRef); final int cmp = comp.compare(bytesRef, b); if (cmp < 0) { low = mid + 1; @@ -215,6 +245,27 @@ public final class Bytes { lookupResult.found = false; return lookupResult; } + + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + final MissingValues missing = getMissing(); + return new SourceEnum(attrSource, type(), this, maxDoc()) { + final BytesRef bytesRef = attr.bytes(); + + @Override + public int advance(int target) throws IOException { + if (target >= numDocs) { + return pos = NO_MORE_DOCS; + } + while (source.getBytes(target, bytesRef) == missing.bytesValue) { + if (++target >= numDocs) { + return pos = NO_MORE_DOCS; + } + } + return pos = target; + } + }; + } } static abstract class BytesWriterBase extends Writer { @@ -243,16 +294,16 @@ public final class Bytes { if (initIndex) initIndexOut(); } - + protected void initDataOut() throws IOException { datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + DATA_EXTENSION)); CodecUtil.writeHeader(datOut, codecName, version); } protected void initIndexOut() throws IOException { idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_INDEX_EXTENSION)); + INDEX_EXTENSION)); CodecUtil.writeHeader(idxOut, codecName, version); } @@ -299,12 +350,11 @@ public final class Bytes { @Override public void files(Collection files) throws IOException { assert datOut != null; - files.add(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION)); if (idxOut != null) { // called after flush - so this must be initialized - // if needed or present + // if needed or present final String idxFile = IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_INDEX_EXTENSION); + INDEX_EXTENSION); files.add(idxFile); } } @@ -324,12 +374,12 @@ public final class Bytes { int maxVersion, boolean doIndex) throws IOException { this.id = id; datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + Writer.DATA_EXTENSION)); version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion); if (doIndex) { idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_INDEX_EXTENSION)); + Writer.INDEX_EXTENSION)); final int version2 = CodecUtil.checkHeader(idxIn, codecName, maxVersion, maxVersion); assert version == version2; @@ -345,7 +395,7 @@ public final class Bytes { } protected final IndexInput cloneIndex() { // TODO assert here for null - // rather than return null + // rather than return null return idxIn == null ? null : (IndexInput) idxIn.clone(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index 2ed2192f831..a0d84ff1d79 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -40,8 +40,9 @@ public abstract class DocValues implements Closeable { public Source getSource() throws IOException { return cache.load(this); } - - public SortedSource getSortedSorted(Comparator comparator) throws IOException { + + public SortedSource getSortedSorted(Comparator comparator) + throws IOException { return cache.laodSorted(this, comparator); } @@ -51,7 +52,7 @@ public abstract class DocValues implements Closeable { } public abstract Values type(); - + public void close() throws IOException { this.cache.close(this); } @@ -69,6 +70,7 @@ public abstract class DocValues implements Closeable { * used since it can handle all precisions. */ public static abstract class Source { + protected final MissingValues missingValues = new MissingValues(); public long getInt(int docID) { throw new UnsupportedOperationException("ints are not supported"); @@ -78,7 +80,7 @@ public abstract class DocValues implements Closeable { throw new UnsupportedOperationException("floats are not supported"); } - public BytesRef getBytes(int docID) { + public BytesRef getBytes(int docID, BytesRef ref) { throw new UnsupportedOperationException("bytes are not supported"); } @@ -91,24 +93,56 @@ public abstract class DocValues implements Closeable { } public ValuesEnum getEnum() throws IOException { - return getEnum(null); + return getEnum(new AttributeSource()); } + + public MissingValues getMissing() { + return missingValues; + } + + public abstract Values type(); - // nocommit - enable obtaining enum from source since this is already in - // memory - public/* abstract */ValuesEnum getEnum(AttributeSource attrSource) - throws IOException { - throw new UnsupportedOperationException(); - } + public abstract ValuesEnum getEnum(AttributeSource attrSource) + throws IOException; public abstract long ramBytesUsed(); + + } + + abstract static class SourceEnum extends ValuesEnum { + protected final Source source; + protected final int numDocs; + protected int pos = -1; + + SourceEnum(AttributeSource attrs, Values type, Source source, int numDocs) { + super(attrs, type); + + this.source = source; + this.numDocs = numDocs; + } + + @Override + public void close() throws IOException { + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + if(pos == NO_MORE_DOCS) + return NO_MORE_DOCS; + return advance(pos + 1); + } } public static abstract class SortedSource extends Source { @Override - public BytesRef getBytes(int docID) { - return getByOrd(ord(docID)); + public BytesRef getBytes(int docID, BytesRef bytesRef) { + return getByOrd(ord(docID), bytesRef); } /** @@ -119,7 +153,7 @@ public abstract class DocValues implements Closeable { public abstract int ord(int docID); /** Returns value for specified ord. */ - public abstract BytesRef getByOrd(int ord); + public abstract BytesRef getByOrd(int ord, BytesRef bytesRef); public static class LookupResult { public boolean found; @@ -131,7 +165,22 @@ public abstract class DocValues implements Closeable { * {@link LookupResult#found} is true, then ord is an exact match. The * returned {@link LookupResult} may be reused across calls. */ - public abstract LookupResult getByValue(BytesRef value); + public final LookupResult getByValue(BytesRef value) { + return getByValue(value, new BytesRef()); + } + public abstract LookupResult getByValue(BytesRef value, BytesRef tmpRef); } + public final static class MissingValues { + public long longValue; + public double doubleValue; + public BytesRef bytesValue; + + public final void copy(MissingValues values) { + longValue = values.longValue; + doubleValue = values.doubleValue; + bytesValue = values.bytesValue; + } + } + } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 3be9918d55d..06a322b9972 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -49,7 +49,7 @@ class FixedDerefBytesImpl { private int size = -1; private int[] docToID; private final BytesRefHash hash = new BytesRefHash(pool); - + public Writer(Directory dir, String id) throws IOException { this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), new AtomicLong()); @@ -65,7 +65,7 @@ class FixedDerefBytesImpl { @Override synchronized public void add(int docID, BytesRef bytes) throws IOException { - if(bytes.length == 0) // default value - skip it + if (bytes.length == 0) // default value - skip it return; if (size == -1) { size = bytes.length; @@ -81,18 +81,18 @@ class FixedDerefBytesImpl { // new added entry datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); } else { - ord = (-ord)-1; + ord = (-ord) - 1; } if (docID >= docToID.length) { int size = docToID.length; docToID = ArrayUtil.grow(docToID, 1 + docID); - bytesUsed.addAndGet((docToID.length - size) * RamUsageEstimator.NUM_BYTES_INT); + bytesUsed.addAndGet((docToID.length - size) + * RamUsageEstimator.NUM_BYTES_INT); } - docToID[docID] = 1+ord; + docToID[docID] = 1 + ord; } - // Important that we get docCount, in case there were // some last docs that we didn't see @Override @@ -100,7 +100,7 @@ class FixedDerefBytesImpl { if (datOut == null) // no added data return; initIndexOut(); - final int count = 1+hash.size(); + final int count = 1 + hash.size(); idxOut.writeInt(count - 1); // write index final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, @@ -135,17 +135,16 @@ class FixedDerefBytesImpl { @Override public Source load() throws IOException { final IndexInput index = cloneIndex(); - return new Source(cloneData(), index , size, index.readInt()); + return new Source(cloneData(), index, size, index.readInt()); } private static class Source extends BytesBaseSource { - private final BytesRef bytesRef = new BytesRef(); private final PackedInts.Reader index; private final int size; private final int numValues; - protected Source(IndexInput datIn, IndexInput idxIn, int size, int numValues) - throws IOException { + protected Source(IndexInput datIn, IndexInput idxIn, int size, + int numValues) throws IOException { super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues); this.size = size; this.numValues = numValues; @@ -153,24 +152,33 @@ class FixedDerefBytesImpl { } @Override - public BytesRef getBytes(int docID) { + public BytesRef getBytes(int docID, BytesRef bytesRef) { final int id = (int) index.get(docID); if (id == 0) { - return defaultValue; + return null; } return data.fill(bytesRef, ((id - 1) * size), size); } - @Override public int getValueCount() { return numValues; } + + @Override + public Values type() { + return Values.BYTES_FIXED_DEREF; + } + + @Override + protected int maxDoc() { + return index.size(); + } } @Override public ValuesEnum getEnum(AttributeSource source) throws IOException { - return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, + return new DerefBytesEnum(source, cloneData(), cloneIndex(), size); } @@ -184,12 +192,12 @@ class FixedDerefBytesImpl { private int pos = -1; public DerefBytesEnum(AttributeSource source, IndexInput datIn, - IndexInput idxIn, String codecName, int size) throws IOException { - this(source, datIn, idxIn, codecName, size, Values.BYTES_FIXED_DEREF); + IndexInput idxIn, int size) throws IOException { + this(source, datIn, idxIn, size, Values.BYTES_FIXED_DEREF); } protected DerefBytesEnum(AttributeSource source, IndexInput datIn, - IndexInput idxIn, String codecName, int size, Values enumType) + IndexInput idxIn, int size, Values enumType) throws IOException { super(source, enumType); ref = attr.bytes(); @@ -207,14 +215,13 @@ class FixedDerefBytesImpl { @Override public int advance(int target) throws IOException { if (target < valueCount) { - final long address = idx.advance(target); - pos = idx.ord(); - if(address == 0) { - // default is empty - ref.length = 0; - ref.offset = 0; - return pos; + long address; + while ((address = idx.advance(target)) == 0) { + if (++target >= valueCount) { + return pos = NO_MORE_DOCS; + } } + pos = idx.ord(); fill(address, ref); return pos; } @@ -223,6 +230,9 @@ class FixedDerefBytesImpl { @Override public int nextDoc() throws IOException { + if (pos < valueCount) { + return pos = NO_MORE_DOCS; + } return advance(pos + 1); } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index e445cfb29bd..e826a709ee9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -187,8 +187,6 @@ class FixedSortedBytesImpl { this.size = size; this.numValue = numValues; index = PackedInts.getReader(idxIn); - - bytesRef.length = size; } @Override @@ -197,8 +195,8 @@ class FixedSortedBytesImpl { } @Override - public LookupResult getByValue(BytesRef bytes) { - return binarySearch(bytes, 0, numValue - 1); + public LookupResult getByValue(BytesRef bytes, BytesRef tmpRef) { + return binarySearch(bytes, tmpRef, 0, numValue - 1); } public long ramBytesUsed() { @@ -216,15 +214,25 @@ class FixedSortedBytesImpl { return numValue; } @Override - protected BytesRef deref(int ord) { + protected BytesRef deref(int ord, BytesRef bytesRef) { return data.fill(bytesRef, (ord* size), size); } + + @Override + public Values type() { + return Values.BYTES_FIXED_SORTED; + } + + @Override + protected int maxDoc() { + return index.size(); + } } @Override public ValuesEnum getEnum(AttributeSource source) throws IOException { // do unsorted - return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, + return new DerefBytesEnum(source, cloneData(), cloneIndex(), size); } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 202947c5eef..1ee7b6e996a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -47,7 +47,7 @@ class FixedStraightBytesImpl { super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, null); } - // nocommit - impl bulk copy here! + // TODO - impl bulk copy here! @Override synchronized public void add(int docID, BytesRef bytes) throws IOException { @@ -133,16 +133,18 @@ class FixedStraightBytesImpl { } private static class Source extends BytesBaseSource { - private final BytesRef bytesRef = new BytesRef(); private final int size; + private final int maxDoc; public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException { super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size*maxDoc); this.size = size; + this.missingValues.bytesValue = new BytesRef(size); + this.maxDoc = maxDoc; } @Override - public BytesRef getBytes(int docID) { + public BytesRef getBytes(int docID, BytesRef bytesRef) { return data.fill(bytesRef, docID * size, size); } @@ -150,6 +152,16 @@ class FixedStraightBytesImpl { public int getValueCount() { throw new UnsupportedOperationException(); } + + @Override + public Values type() { + return Values.BYTES_FIXED_STRAIGHT; + } + + @Override + protected int maxDoc() { + return maxDoc; + } } @Override @@ -184,8 +196,6 @@ class FixedStraightBytesImpl { @Override public int advance(int target) throws IOException { if(target >= maxDoc){ - ref.length = 0; - ref.offset = 0; return pos = NO_MORE_DOCS; } if((target-1) != pos) // pos inc == 1 @@ -201,6 +211,9 @@ class FixedStraightBytesImpl { @Override public int nextDoc() throws IOException { + if(pos >= maxDoc){ + return pos = NO_MORE_DOCS; + } return advance(pos+1); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index f844bba40e4..dcf984b0774 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -19,13 +19,15 @@ import org.apache.lucene.util.RamUsageEstimator; * Exposes writer/reader for floating point values. You can specify 4 (java * float) or 8 (java double) byte precision. */ -//TODO - add bulk copy where possible +// TODO - add bulk copy where possible public class Floats { private static final String CODEC_NAME = "SimpleFloats"; static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; - private static final int INT_ZERO = Float.floatToRawIntBits(0.0f); - private static final long LONG_ZERO = Double.doubleToRawLongBits(0.0); + private static final int INT_DEFAULT = Float + .floatToRawIntBits(Float.NEGATIVE_INFINITY); + private static final long LONG_DEFAULT = Double + .doubleToRawLongBits(Double.NEGATIVE_INFINITY); public static Writer getWriter(Directory dir, String id, int precisionBytes) throws IOException { @@ -47,7 +49,6 @@ public class Floats { abstract static class FloatsWriter extends Writer { - private final Directory dir; private final String id; private FloatsRef floatsRef; @@ -64,7 +65,7 @@ public class Floats { protected void initDatOut() throws IOException { datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + Writer.DATA_EXTENSION)); CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME); datOut.writeByte(precision); @@ -78,12 +79,12 @@ public class Floats { protected void add(int docID) throws IOException { add(docID, floatsRef.get()); } - + @Override public void add(int docID, ValuesAttribute attr) throws IOException { final FloatsRef ref; - if((ref = attr.floats()) != null) - add(docID, ref.get()); + if ((ref = attr.floats()) != null) + add(docID, ref.get()); } @Override @@ -113,14 +114,12 @@ public class Floats { } else super.merge(state); } - + @Override public void files(Collection files) throws IOException { - files.add(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + files.add(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION)); } - } // Writes 4 bytes (float) per value @@ -153,7 +152,7 @@ public class Floats { return; // no data added - don't create file! if (docCount > lastDocId + 1) for (int i = lastDocId; i < docCount; i++) { - datOut.writeInt(INT_ZERO); // default value + datOut.writeInt(INT_DEFAULT); // default value } datOut.close(); } @@ -161,7 +160,7 @@ public class Floats { @Override protected int fillDefault(int numValues) throws IOException { for (int i = 0; i < numValues; i++) { - datOut.writeInt(INT_ZERO); + datOut.writeInt(INT_DEFAULT); } return numValues; } @@ -196,7 +195,7 @@ public class Floats { return; // no data added - don't create file! if (docCount > lastDocId + 1) for (int i = lastDocId; i < docCount; i++) { - datOut.writeLong(LONG_ZERO); // default value + datOut.writeLong(LONG_DEFAULT); // default value } datOut.close(); } @@ -204,7 +203,7 @@ public class Floats { @Override protected int fillDefault(int numValues) throws IOException { for (int i = 0; i < numValues; i++) { - datOut.writeLong(LONG_ZERO); + datOut.writeLong(LONG_DEFAULT); } return numValues; } @@ -224,7 +223,7 @@ public class Floats { protected FloatsReader(Directory dir, String id, int maxDoc) throws IOException { datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + Writer.DATA_EXTENSION)); CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START); precisionBytes = datIn.readByte(); assert precisionBytes == 4 || precisionBytes == 8; @@ -266,19 +265,43 @@ public class Floats { Source4(ByteBuffer buffer) { values = buffer.asFloatBuffer(); + missingValues.doubleValue = Float.NEGATIVE_INFINITY; } @Override public double getFloat(int docID) { - final float f = values.get(docID); - // nocommit should we return NaN as default instead of 0.0? - return Float.isNaN(f) ? 0.0f : f; + return values.get(docID); } public long ramBytesUsed() { return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit() * RamUsageEstimator.NUM_BYTES_FLOAT; } + + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + final MissingValues missing = getMissing(); + return new SourceEnum(attrSource, Values.SIMPLE_FLOAT_4BYTE, this, maxDoc) { + private final FloatsRef ref = attr.floats(); + @Override + public int advance(int target) throws IOException { + if (target >= numDocs) + return pos = NO_MORE_DOCS; + while (missing.doubleValue == source.getFloat(target)) { + if (++target >= numDocs) { + return pos = NO_MORE_DOCS; + } + } + ref.floats[ref.offset] = source.getFloat(target); + return pos = target; + } + }; + } + + @Override + public Values type() { + return Values.SIMPLE_FLOAT_4BYTE; + } } private class Source8 extends Source { @@ -286,19 +309,44 @@ public class Floats { Source8(ByteBuffer buffer) { values = buffer.asDoubleBuffer(); + missingValues.doubleValue = Double.NEGATIVE_INFINITY; + } @Override public double getFloat(int docID) { - final double d = values.get(docID); - // TODO should we return NaN as default instead of 0.0? - return Double.isNaN(d) ? 0.0d : d; + return values.get(docID); } public long ramBytesUsed() { return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit() * RamUsageEstimator.NUM_BYTES_DOUBLE; } + + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + final MissingValues missing = getMissing(); + return new SourceEnum(attrSource, type(), this, maxDoc) { + private final FloatsRef ref = attr.floats(); + @Override + public int advance(int target) throws IOException { + if (target >= numDocs) + return pos = NO_MORE_DOCS; + while (missing.doubleValue == source.getFloat(target)) { + if (++target >= numDocs) { + return pos = NO_MORE_DOCS; + } + } + ref.floats[ref.offset] = source.getFloat(target); + return pos = target; + } + }; + } + + @Override + public Values type() { + return Values.SIMPLE_FLOAT_8BYTE; + } } @Override @@ -316,7 +364,7 @@ public class Floats { return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc) : new Floats8EnumImpl(source, indexInput, maxDoc); } - + @Override public Values type() { return precisionBytes == 4 ? Values.SIMPLE_FLOAT_4BYTE @@ -336,8 +384,13 @@ public class Floats { if (target >= maxDoc) return pos = NO_MORE_DOCS; dataIn.seek(fp + (target * precision)); - ref.floats[0] = Float.intBitsToFloat(dataIn.readInt()); - ref.offset = 0; // nocommit -- can we igore this? + int intBits; + while ((intBits = dataIn.readInt()) == INT_DEFAULT) { + if (++target >= maxDoc) + return pos = NO_MORE_DOCS; + } + ref.floats[0] = Float.intBitsToFloat(intBits); + ref.offset = 0; return pos = target; } @@ -348,6 +401,9 @@ public class Floats { @Override public int nextDoc() throws IOException { + if (pos >= maxDoc) { + return pos = NO_MORE_DOCS; + } return advance(pos + 1); } } @@ -361,11 +417,17 @@ public class Floats { @Override public int advance(int target) throws IOException { - if (target >= maxDoc) + if (target >= maxDoc) { return pos = NO_MORE_DOCS; + } dataIn.seek(fp + (target * precision)); - ref.floats[0] = Double.longBitsToDouble(dataIn.readLong()); - ref.offset = 0; // nocommit -- can we igore this? + long value; + while ((value = dataIn.readLong()) == LONG_DEFAULT) { + if (++target >= maxDoc) + return pos = NO_MORE_DOCS; + } + ref.floats[0] = Double.longBitsToDouble(value); + ref.offset = 0; return pos = target; } @@ -376,6 +438,9 @@ public class Floats { @Override public int nextDoc() throws IOException { + if (pos >= maxDoc) { + return pos = NO_MORE_DOCS; + } return advance(pos + 1); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index 0c458cf205e..d1780936f5c 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -21,8 +21,6 @@ import java.util.Arrays; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FloatsRef; -import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.ReaderUtil; public class MultiDocValues extends DocValues { @@ -78,27 +76,26 @@ public class MultiDocValues extends DocValues { public static class DummyDocValues extends DocValues { final int maxDoc; - final Values type; - static final Source DUMMY = new DummySource(); + final Source emptySoruce; public DummyDocValues(int maxDoc, Values type) { - this.type = type; this.maxDoc = maxDoc; + this.emptySoruce = new EmptySource(type); } @Override public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { - return new DummyEnum(attrSource, maxDoc, type); + return emptySoruce.getEnum(attrSource); } @Override public Source load() throws IOException { - return DUMMY; + return emptySoruce; } - + @Override public Values type() { - return type; + return emptySoruce.type(); } public void close() throws IOException { @@ -177,6 +174,7 @@ public class MultiDocValues extends DocValues { public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts) { this.docValuesIdx = docValuesIdx; this.starts = starts; + assert docValuesIdx.length != 0; } @@ -193,7 +191,8 @@ public class MultiDocValues extends DocValues { + " for doc id: " + docID + " slices : " + Arrays.toString(starts); assert docValuesIdx[idx] != null; try { - current = docValuesIdx[idx].docValues.load(); + current = docValuesIdx[idx].docValues.getSource(); + missingValues.copy(current.getMissing()); } catch (IOException e) { throw new RuntimeException("load failed", e); // TODO how should we // handle this @@ -211,92 +210,62 @@ public class MultiDocValues extends DocValues { return current.getFloat(doc); } - public BytesRef getBytes(int docID) { + public BytesRef getBytes(int docID, BytesRef bytesRef) { final int doc = ensureSource(docID); - return current.getBytes(doc); + return current.getBytes(doc, bytesRef); } public long ramBytesUsed() { return current.ramBytesUsed(); } - } - - private static class DummySource extends Source { - private final BytesRef ref = new BytesRef(); + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + throw new UnsupportedOperationException(); // TODO + } @Override - public BytesRef getBytes(int docID) { - return ref; + public Values type() { + return docValuesIdx[0].docValues.type(); + } + + } + + private static class EmptySource extends Source { + private final Values type; + + public EmptySource(Values type) { + this.type = type; + } + + @Override + public BytesRef getBytes(int docID, BytesRef ref) { + return this.missingValues.bytesValue; + } @Override public double getFloat(int docID) { - return 0.0d; + return missingValues.doubleValue; } @Override public long getInt(int docID) { - return 0; + return missingValues.longValue; } public long ramBytesUsed() { return 0; } - } - private static class DummyEnum extends ValuesEnum { - private int pos = -1; - private final int maxDoc; - - public DummyEnum(AttributeSource source, int maxDoc, Values type) { - super(source, type); - this.maxDoc = maxDoc; - switch (type) { - case BYTES_VAR_STRAIGHT: - case BYTES_FIXED_STRAIGHT: - case BYTES_FIXED_DEREF: - case BYTES_FIXED_SORTED: - case BYTES_VAR_DEREF: - case BYTES_VAR_SORTED: - // nocommit - this is not correct for Fixed_straight - BytesRef bytes = attr.bytes(); - bytes.length = 0; - bytes.offset = 0; - break; - case PACKED_INTS: - case PACKED_INTS_FIXED: - LongsRef ints = attr.ints(); - ints.set(0); - break; - - case SIMPLE_FLOAT_4BYTE: - case SIMPLE_FLOAT_8BYTE: - FloatsRef floats = attr.floats(); - floats.set(0d); - break; - default: - throw new IllegalArgumentException("unknown Values type: " + type); - } + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + return ValuesEnum.emptyEnum(type); } @Override - public void close() throws IOException { - } - - @Override - public int advance(int target) throws IOException { - return pos = (pos < maxDoc ? target : NO_MORE_DOCS); - } - - @Override - public int docID() { - return pos; - } - - @Override - public int nextDoc() throws IOException { - return advance(pos + 1); + public Values type() { + return type; } } diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index f37f7bbbdff..91f56bdd660 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Collection; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.values.DocValues.MissingValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -27,6 +28,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; @@ -39,7 +41,6 @@ class PackedIntsImpl { static final int VERSION_CURRENT = VERSION_START; static class IntsWriter extends Writer { - // TODO: can we bulkcopy this on a merge? private LongsRef intsRef; @@ -49,8 +50,8 @@ class PackedIntsImpl { private boolean started; private final Directory dir; private final String id; - private int maxDocID; - private int minDocID; + private OpenBitSet defaultValues = new OpenBitSet(1); + private int lastDocId = -1; protected IntsWriter(Directory dir, String id) throws IOException { this.dir = dir; @@ -59,54 +60,58 @@ class PackedIntsImpl { } @Override - synchronized public void add(int docID, long v) throws IOException { - + public synchronized void add(int docID, long v) throws IOException { + assert lastDocId < docID; if (!started) { - minValue = maxValue = v; - minDocID = maxDocID = docID; started = true; - + minValue = maxValue = v; } else { if (v < minValue) { minValue = v; } else if (v > maxValue) { maxValue = v; } - if (docID < minDocID) { - minDocID = docID; - } else if (docID > maxDocID) { - maxDocID = docID; - } } + defaultValues.set(docID); + lastDocId = docID; + if (docID >= docToValue.length) { docToValue = ArrayUtil.grow(docToValue, 1 + docID); + defaultValues.ensureCapacity(docToValue.length); + } docToValue[docID] = v; } @Override - synchronized public void finish(int docCount) throws IOException { - if(!started) + public synchronized void finish(int docCount) throws IOException { + if (!started) return; final IndexOutput datOut = dir.createOutput(IndexFileNames - .segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION)); + .segmentFileName(id, "", DATA_EXTENSION)); CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); - // nocommit -- long can't work right since it's signed + // TODO -- long can't work right since it's signed datOut.writeLong(minValue); // write a default value to recognize docs without a value for that field final long defaultValue = ++maxValue - minValue; datOut.writeLong(defaultValue); - PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts.bitsRequired(maxValue-minValue)); - - final int limit = maxDocID + 1; - for (int i = 0; i < minDocID; i++) { - w.add(defaultValue); + PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts + .bitsRequired(maxValue - minValue)); + final int firstDoc = defaultValues.nextSetBit(0); + assert firstDoc >= 0; // we have at lest one value! + for (int i = 0; i < firstDoc; i++) { + w.add(defaultValue); // fill with defaults until first bit set } - for (int i = minDocID; i < limit; i++) { + lastDocId++; + for (int i = firstDoc; i < lastDocId;) { w.add(docToValue[i] - minValue); + final int nextValue = defaultValues.nextSetBit(i); + for (i++; i < nextValue; i++) { + w.add(defaultValue); // fill all gaps + } } - for (int i = limit; i < docCount; i++) { + for (int i = lastDocId; i < docCount; i++) { w.add(defaultValue); } w.finish(); @@ -128,19 +133,18 @@ class PackedIntsImpl { protected void setNextAttribute(ValuesAttribute attr) { intsRef = attr.ints(); } - + @Override public void add(int docID, ValuesAttribute attr) throws IOException { final LongsRef ref; - if((ref = attr.ints()) != null) { + if ((ref = attr.ints()) != null) { add(docID, ref.get()); } } @Override public void files(Collection files) throws IOException { - files.add(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION)); } } @@ -153,7 +157,7 @@ class PackedIntsImpl { protected IntsReader(Directory dir, String id) throws IOException { datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", - IndexFileNames.CSF_DATA_EXTENSION)); + Writer.DATA_EXTENSION)); CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START); } @@ -176,6 +180,7 @@ class PackedIntsImpl { minValue = dataIn.readLong(); defaultValue = dataIn.readLong(); values = PackedInts.getReader(dataIn); + missingValues.longValue = minValue + defaultValue; } @Override @@ -183,9 +188,7 @@ class PackedIntsImpl { // TODO -- can we somehow avoid 2X method calls // on each get? must push minValue down, and make // PackedInts implement Ints.Source - final long val = values.get(docID); - // docs not having a value for that field must return a default value - return val == defaultValue ? 0 : minValue + val; + return minValue + values.get(docID); } public long ramBytesUsed() { @@ -193,6 +196,31 @@ class PackedIntsImpl { return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.getBitsPerValue() * values.size(); } + + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + final MissingValues missing = getMissing(); + return new SourceEnum(attrSource, type(), this, values.size()) { + private final LongsRef ref = attr.ints(); + @Override + public int advance(int target) throws IOException { + if (target >= numDocs) + return pos = NO_MORE_DOCS; + while (source.getInt(target) == missing.longValue) { + if (++target >= numDocs) { + return pos = NO_MORE_DOCS; + } + } + ref.ints[ref.offset] = source.getInt(target); + return pos = target; + } + }; + } + + @Override + public Values type() { + return Values.PACKED_INTS; + } } @Override @@ -205,7 +233,7 @@ class PackedIntsImpl { public ValuesEnum getEnum(AttributeSource source) throws IOException { return new IntsEnumImpl(source, (IndexInput) datIn.clone()); } - + @Override public Values type() { return Values.PACKED_INTS; @@ -243,10 +271,17 @@ class PackedIntsImpl { @Override public int advance(int target) throws IOException { - if (target >= maxDoc) + if (target >= maxDoc) { return pos = NO_MORE_DOCS; - final long val = ints.advance(target); - ref.ints[0] = val == defaultValue? 0:minValue + val; + } + long val = ints.advance(target); + while (val == defaultValue) { + if (++target >= maxDoc) { + return pos = NO_MORE_DOCS; + } + val = ints.advance(target); + } + ref.ints[0] = minValue + val; ref.offset = 0; // can we skip this? return pos = target; } @@ -258,7 +293,10 @@ class PackedIntsImpl { @Override public int nextDoc() throws IOException { - return advance(pos+1); + if (pos >= maxDoc) { + return pos = NO_MORE_DOCS; + } + return advance(pos + 1); } } } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java index d7d613c0510..e33c0cb9b1b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Values.java +++ b/lucene/src/java/org/apache/lucene/index/values/Values.java @@ -30,7 +30,6 @@ public enum Values { * precision is fixed across the segment, and * determined by the min/max values in the field. */ PACKED_INTS, - PACKED_INTS_FIXED, SIMPLE_FLOAT_4BYTE, SIMPLE_FLOAT_8BYTE, diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java index 6cd1e0294c7..b69217bcbbe 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java @@ -6,7 +6,6 @@ import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; -import org.apache.lucene.util.SetOnce; public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribute { private Values type; @@ -45,7 +44,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut floats = null; break; case PACKED_INTS: - case PACKED_INTS_FIXED: ints = new LongsRef(new long[1], 0, 1); bytes = null; floats = null; @@ -84,7 +82,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut other.bytes.copy(bytes); break; case PACKED_INTS: - case PACKED_INTS_FIXED: other.ints.copy(ints); break; case SIMPLE_FLOAT_4BYTE: diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java index eed33457380..54bc8bf2c7e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java @@ -21,14 +21,16 @@ import java.io.IOException; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.LongsRef; -public abstract class ValuesEnum extends DocIdSetIterator{ +public abstract class ValuesEnum extends DocIdSetIterator { private AttributeSource source; protected final ValuesAttribute attr; - protected ValuesEnum(Values enumType) { - this(null, enumType); + this(null, enumType); } protected ValuesEnum(AttributeSource source, Values enumType) { @@ -39,6 +41,22 @@ public abstract class ValuesEnum extends DocIdSetIterator{ attr.setType(enumType); } + public Values type() { + return attr.type(); + } + + public BytesRef bytes() { + return attr.bytes(); + } + + public FloatsRef getFloat() { + return attr.floats(); + } + + public LongsRef getInt() { + return attr.ints(); + } + public AttributeSource attributes() { if (source == null) source = new AttributeSource(); @@ -59,4 +77,28 @@ public abstract class ValuesEnum extends DocIdSetIterator{ public abstract void close() throws IOException; + public static ValuesEnum emptyEnum(Values type) { + return new ValuesEnum(type) { + @Override + public int nextDoc() throws IOException { + return NO_MORE_DOCS; + } + + @Override + public int docID() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + return NO_MORE_DOCS; + } + + @Override + public void close() throws IOException { + + } + }; + } + } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index beb0c14704c..2dfa5bdc8a1 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -50,16 +50,16 @@ class VarDerefBytesImpl { static final String CODEC_NAME = "VarDerefBytes"; static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; - - - private static class AddressParallelArray extends ParallelArrayBase { + private static class AddressParallelArray extends + ParallelArrayBase { final int[] address; - + AddressParallelArray(int size, AtomicLong bytesUsed) { super(size, bytesUsed); - address = new int[size]; + address = new int[size]; } + @Override protected int bytesPerEntry() { return RamUsageEstimator.NUM_BYTES_INT + super.bytesPerEntry(); @@ -69,46 +69,50 @@ class VarDerefBytesImpl { protected void copyTo(AddressParallelArray toArray, int numToCopy) { super.copyTo(toArray, numToCopy); System.arraycopy(address, 0, toArray.address, 0, size); - + } @Override public AddressParallelArray newInstance(int size) { return new AddressParallelArray(size, bytesUsed); } - - } + } static class Writer extends BytesWriterBase { private int[] docToAddress; private int address = 1; - - private final ParallelBytesStartArray array = new ParallelBytesStartArray(new AddressParallelArray(0, bytesUsed)); - private final BytesRefHash hash = new BytesRefHash(pool, 16, array) ; - public Writer(Directory dir, String id) throws IOException { + private final ParallelBytesStartArray array = new ParallelBytesStartArray( + new AddressParallelArray(0, bytesUsed)); + private final BytesRefHash hash = new BytesRefHash(pool, 16, array); + + public Writer(Directory dir, String id) throws IOException { this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), new AtomicLong()); } - public Writer(Directory dir, String id, Allocator allocator, AtomicLong bytesUsed) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed); + + public Writer(Directory dir, String id, Allocator allocator, + AtomicLong bytesUsed) throws IOException { + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, + new ByteBlockPool(allocator), bytesUsed); docToAddress = new int[1]; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); } @Override synchronized public void add(int docID, BytesRef bytes) throws IOException { - if(bytes.length == 0) + if (bytes.length == 0) return; // default - if(datOut == null) + if (datOut == null) initDataOut(); final int e = hash.add(bytes); if (docID >= docToAddress.length) { final int oldSize = docToAddress.length; - docToAddress = ArrayUtil.grow(docToAddress, 1+docID); - bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * (docToAddress.length - oldSize)); + docToAddress = ArrayUtil.grow(docToAddress, 1 + docID); + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT + * (docToAddress.length - oldSize)); } final int docAddress; if (e >= 0) { @@ -117,12 +121,13 @@ class VarDerefBytesImpl { datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); address += bytes.length; } else { - docAddress = array.array.address[(-e)-1]; + docAddress = array.array.address[(-e) - 1]; } docToAddress[docID] = docAddress; } - - private static int writePrefixLength(DataOutput datOut, BytesRef bytes) throws IOException{ + + private static int writePrefixLength(DataOutput datOut, BytesRef bytes) + throws IOException { if (bytes.length < 128) { datOut.writeByte((byte) bytes.length); return 1; @@ -132,7 +137,7 @@ class VarDerefBytesImpl { return 2; } } - + public long ramBytesUsed() { return bytesUsed.get(); } @@ -141,25 +146,26 @@ class VarDerefBytesImpl { // some last docs that we didn't see @Override synchronized public void finish(int docCount) throws IOException { - if(datOut == null) + if (datOut == null) return; initIndexOut(); - idxOut.writeInt(address-1); + idxOut.writeInt(address - 1); // write index // TODO(simonw): -- allow forcing fixed array (not -1) // TODO(simonw): check the address calculation / make it more intuitive - final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1)); + final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, + PackedInts.bitsRequired(address - 1)); final int limit; if (docCount > docToAddress.length) { limit = docToAddress.length; } else { limit = docCount; } - for(int i=0;i= docCount) + if (target >= docCount) { return pos = NO_MORE_DOCS; - final int ord = (int) docToOrdIndex.get(target) - 1; - if (ord == -1) { - bytesRef.length = 0; - bytesRef.offset = 0; - return pos = target; } - final long offset = ordToOffsetIndex.get(ord); + int ord; + while((ord =(int) docToOrdIndex.get(target)) == 0) { + if(++target >= docCount) { + return pos = NO_MORE_DOCS; + } + } + final long offset = ordToOffsetIndex.get(--ord); final long nextOffset; if (ord == valueCount - 1) { nextOffset = totBytes; @@ -306,6 +311,9 @@ class VarSortedBytesImpl { @Override public int nextDoc() throws IOException { + if (pos >= docCount) { + return pos = NO_MORE_DOCS; + } return advance(pos + 1); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 04fd5939d34..0f3f6dff28f 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -41,15 +41,15 @@ class VarStraightBytesImpl { static final int VERSION_CURRENT = VERSION_START; static class Writer extends BytesWriterBase { - private int address; + private long address; // start at -1 if the first added value is > 0 private int lastDocID = -1; - private int[] docToAddress; + private long[] docToAddress; public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, bytesUsed); - docToAddress = new int[1]; + docToAddress = new long[1]; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); } @@ -89,11 +89,8 @@ class VarStraightBytesImpl { return; } initIndexOut(); - // write all lengths to index - // write index fill(docCount); - idxOut.writeVInt(address); - // TODO(simonw): allow not -1 + idxOut.writeVLong(address); final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address)); for (int i = 0; i < docCount; i++) { @@ -125,20 +122,17 @@ class VarStraightBytesImpl { } private class Source extends BytesBaseSource { - private final BytesRef bytesRef = new BytesRef(); private final PackedInts.Reader addresses; public Source(IndexInput datIn, IndexInput idxIn) throws IOException { - super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVInt()); // TODO - // should - // be - // long + super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong()); addresses = PackedInts.getReader(idxIn); + missingValues.bytesValue = new BytesRef(0); // empty } @Override - public BytesRef getBytes(int docID) { - final int address = (int) addresses.get(docID); + public BytesRef getBytes(int docID, BytesRef bytesRef) { + final long address = addresses.get(docID); final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address) : (int) (addresses.get(1 + docID) - address); return data.fill(bytesRef, address, length); @@ -148,14 +142,24 @@ class VarStraightBytesImpl { public int getValueCount() { throw new UnsupportedOperationException(); } + + @Override + public Values type() { + return Values.BYTES_VAR_STRAIGHT; + } + + @Override + protected int maxDoc() { + return addresses.size(); + } } @Override public ValuesEnum getEnum(AttributeSource source) throws IOException { - return new VarStrainghtBytesEnum(source, cloneData(), cloneIndex()); + return new VarStraightBytesEnum(source, cloneData(), cloneIndex()); } - private class VarStrainghtBytesEnum extends ValuesEnum { + private class VarStraightBytesEnum extends ValuesEnum { private final PackedInts.Reader addresses; private final IndexInput datIn; private final IndexInput idxIn; @@ -164,7 +168,7 @@ class VarStraightBytesImpl { private final BytesRef ref; private int pos = -1; - protected VarStrainghtBytesEnum(AttributeSource source, IndexInput datIn, + protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn) throws IOException { super(source, Values.BYTES_VAR_STRAIGHT); totBytes = idxIn.readVInt(); @@ -185,13 +189,10 @@ class VarStraightBytesImpl { @Override public int advance(final int target) throws IOException { if (target >= maxDoc) { - ref.length = 0; - ref.offset = 0; return pos = NO_MORE_DOCS; } final long addr = addresses.get(target); - if (addr == totBytes) { - // nocommit is that a valid default value + if (addr == totBytes) { // empty values at the end ref.length = 0; ref.offset = 0; return pos = target; diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index b73b8ab4113..04471b25427 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -25,7 +25,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; public abstract class Writer extends DocValuesConsumer { - + public static final String INDEX_EXTENSION = "idx"; public static final String DATA_EXTENSION = "dat"; @@ -63,23 +63,31 @@ public abstract class Writer extends DocValuesConsumer { int docID = state.docBase; final Bits bits = state.bits; final int docCount = state.docCount; - for (int i = 0; i < docCount; i++) { - if (bits == null || !bits.get(i)) { - if (valEnum.advance(i) == ValuesEnum.NO_MORE_DOCS) - break; - add(docID++); + int currentDocId; + if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) { + for (int i = 0; i < docCount; i++) { + if (bits == null || !bits.get(i)) { + if (currentDocId < i) { + if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) { + break; // advance can jump over default values + } + } + if (currentDocId == i) { // we are on the doc to merge + add(docID); + } + ++docID; + } } } } finally { valEnum.close(); } } - - public static Writer create(Values v, String id, - Directory directory, Comparator comp) throws IOException { + + public static Writer create(Values v, String id, Directory directory, + Comparator comp) throws IOException { switch (v) { case PACKED_INTS: - case PACKED_INTS_FIXED: return Ints.getWriter(directory, id, true); case SIMPLE_FLOAT_4BYTE: return Floats.getWriter(directory, id, 4); diff --git a/lucene/src/java/org/apache/lucene/util/PagedBytes.java b/lucene/src/java/org/apache/lucene/util/PagedBytes.java index 9d42cdd94f7..d09ef809ab7 100644 --- a/lucene/src/java/org/apache/lucene/util/PagedBytes.java +++ b/lucene/src/java/org/apache/lucene/util/PagedBytes.java @@ -90,7 +90,7 @@ public final class PagedBytes { } return b; } - + /** Reads length as 1 or 2 byte vInt prefix, starting @ start */ public BytesRef fillUsingLengthPrefix(BytesRef b, long start) { final int index = (int) (start >> blockBits); @@ -145,6 +145,49 @@ public final class PagedBytes { } return start; } + + /** + * Reads length as 1 or 2 byte vInt prefix, starting @ start and fill the + * given {@link BytesRef} with the byte slice starting after the length + * prefix. + * @lucene.internal + **/ + public BytesRef fillUsingLengthPrefix4(BytesRef b, long start) { + final int index = (int) (start >> blockBits); + int offset = (int) (start & blockMask); + final byte[] block = blocks[index]; + final int length; + if ((block[offset] & 128) == 0) { + length = block[offset]; + offset = offset+1; + } else { + length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); + offset = offset+2; + assert length > 0; + } + assert length >= 0: "length=" + length; + b.length = length; + if (blockSize - offset >= length) { + // Within block + b.offset = offset; + b.bytes = blocks[index]; + } else { + // Split + byte[] buffer = threadBuffers.get(); + if (buffer == null) { + buffer = new byte[length]; + threadBuffers.set(buffer); + } else if (buffer.length < length) { + buffer = ArrayUtil.grow(buffer, length); + threadBuffers.set(buffer); + } + b.bytes = buffer; + b.offset = 0; + System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset); + System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset)); + } + return b; + } /** @lucene.internal */ public byte[][] getBlocks() { diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java index 8086871678c..2b2015cf5f1 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java @@ -117,14 +117,15 @@ public class TestDocValues extends LuceneTestCase { s = getSource(r); ss = null; } - for (int i = 0; i < 100; i++) { final int idx = 2 * i; - assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx)); - assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString()); + assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx, + bytesRef)); + assertEquals("doc " + idx, values[idx], s.getBytes(idx, bytesRef) + .utf8ToString()); if (ss != null) { - assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx)) - .utf8ToString()); + assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx), + bytesRef).utf8ToString()); DocValues.SortedSource.LookupResult result = ss .getByValue(new BytesRef(values[idx])); assertTrue(result.found); @@ -141,7 +142,8 @@ public class TestDocValues extends LuceneTestCase { SortedSource.LookupResult result = ss.getByValue(bytesValue); if (result.found) { assert result.ord > 0; - assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord))); + assertTrue(bytesValue + .bytesEquals(ss.getByOrd(result.ord, bytesRef))); int count = 0; for (int k = 0; k < 100; k++) { if (bytesValue.utf8ToString().equals(values[2 * k])) { @@ -153,18 +155,18 @@ public class TestDocValues extends LuceneTestCase { } else { assert result.ord >= 0; if (result.ord == 0) { - final BytesRef firstRef = ss.getByOrd(1); + final BytesRef firstRef = ss.getByOrd(1, bytesRef); // random string was before our first assertTrue(firstRef.compareTo(bytesValue) > 0); } else if (result.ord == numValues) { - final BytesRef lastRef = ss.getByOrd(numValues); + final BytesRef lastRef = ss.getByOrd(numValues, bytesRef); // random string was after our last assertTrue(lastRef.compareTo(bytesValue) < 0); } else { // random string fell between two of our values - final BytesRef before = (BytesRef) ss.getByOrd(result.ord) - .clone(); - final BytesRef after = ss.getByOrd(result.ord + 1); + final BytesRef before = (BytesRef) ss.getByOrd(result.ord, + bytesRef).clone(); + final BytesRef after = ss.getByOrd(result.ord + 1, bytesRef); assertTrue(before.compareTo(bytesValue) < 0); assertTrue(bytesValue.compareTo(after) < 0); @@ -180,64 +182,65 @@ public class TestDocValues extends LuceneTestCase { public void testInts() throws IOException { long maxV = 1; - final int NUM_VALUES = 1000; + final int NUM_VALUES = 777 + random.nextInt(777); final long[] values = new long[NUM_VALUES]; for (int rx = 1; rx < 63; rx++, maxV *= 2) { - for (int b = 0; b < 2; b++) { - Directory dir = newDirectory(); - boolean useFixedArrays = b == 0; - Writer w = Ints.getWriter(dir, "test", useFixedArrays); - for (int i = 0; i < NUM_VALUES; i++) { - final long v = random.nextLong() % (1 + maxV); - values[i] = v; - w.add(i, v); - } - final int additionalDocs = 1 + random.nextInt(9); - w.finish(NUM_VALUES + additionalDocs); - - DocValues r = Ints.getValues(dir, "test", useFixedArrays); - for (int iter = 0; iter < 2; iter++) { - Source s = getSource(r); - for (int i = 0; i < NUM_VALUES; i++) { - final long v = s.getInt(i); - assertEquals("index " + i + " b: " + b, values[i], v); - } - } - - for (int iter = 0; iter < 2; iter++) { - ValuesEnum iEnum = r.getEnum(); - ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); - LongsRef ints = attr.ints(); - for (int i = 0; i < NUM_VALUES; i++) { - assertEquals(i, iEnum.nextDoc()); - assertEquals(values[i], ints.get()); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { - assertEquals(i, iEnum.nextDoc()); - assertEquals("" + i, 0, ints.get()); - } - - iEnum.close(); - } - - for (int iter = 0; iter < 2; iter++) { - ValuesEnum iEnum = r.getEnum(); - ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); - LongsRef ints = attr.ints(); - for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { - assertEquals(i, iEnum.advance(i)); - assertEquals(values[i], ints.get()); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { - assertEquals(i, iEnum.advance(i)); - assertEquals("" + i, 0, ints.get()); - } - - iEnum.close(); - } - r.close(); - dir.close(); + Directory dir = newDirectory(); + Writer w = Ints.getWriter(dir, "test", false); + for (int i = 0; i < NUM_VALUES; i++) { + final long v = random.nextLong() % (1 + maxV); + values[i] = v; + w.add(i, v); } + final int additionalDocs = 1 + random.nextInt(9); + w.finish(NUM_VALUES + additionalDocs); + + DocValues r = Ints.getValues(dir, "test", false); + for (int iter = 0; iter < 2; iter++) { + Source s = getSource(r); + for (int i = 0; i < NUM_VALUES; i++) { + final long v = s.getInt(i); + assertEquals("index " + i, values[i], v); + } + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum iEnum = r.getEnum(); + ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); + LongsRef ints = attr.ints(); + for (int i = 0; i < NUM_VALUES; i++) { + assertEquals(i, iEnum.nextDoc()); + assertEquals(values[i], ints.get()); + } + if (iEnum.docID() < NUM_VALUES - 1) { + assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1)); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { + assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); + } + + iEnum.close(); + } + + for (int iter = 0; iter < 2; iter++) { + ValuesEnum iEnum = r.getEnum(); + ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class); + LongsRef ints = attr.ints(); + for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { + assertEquals(i, iEnum.advance(i)); + assertEquals(values[i], ints.get()); + } + if (iEnum.docID() < NUM_VALUES - 1) { + assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1)); + } + for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { + assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); + } + + iEnum.close(); + } + r.close(); + dir.close(); } } @@ -248,7 +251,7 @@ public class TestDocValues extends LuceneTestCase { private void runTestFloats(int precision, double delta) throws IOException { Directory dir = newDirectory(); Writer w = Floats.getWriter(dir, "test", precision); - final int NUM_VALUES = 1000; + final int NUM_VALUES = 777 + random.nextInt(777);; final double[] values = new double[NUM_VALUES]; for (int i = 0; i < NUM_VALUES; i++) { final double v = precision == 4 ? random.nextFloat() : random @@ -269,29 +272,25 @@ public class TestDocValues extends LuceneTestCase { for (int iter = 0; iter < 2; iter++) { ValuesEnum fEnum = r.getEnum(); - ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); - FloatsRef floats = attr.floats(); + FloatsRef floats = fEnum.getFloat(); for (int i = 0; i < NUM_VALUES; i++) { assertEquals(i, fEnum.nextDoc()); assertEquals(values[i], floats.get(), delta); } for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(i, fEnum.nextDoc()); - assertEquals(0.0, floats.get(), delta); + assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.nextDoc()); } fEnum.close(); } for (int iter = 0; iter < 2; iter++) { ValuesEnum fEnum = r.getEnum(); - ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class); - FloatsRef floats = attr.floats(); + FloatsRef floats = fEnum.getFloat(); for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { assertEquals(i, fEnum.advance(i)); assertEquals(values[i], floats.get(), delta); } for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(i, fEnum.advance(i)); - assertEquals(0.0, floats.get(), delta); + assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.advance(i)); } fEnum.close(); } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index fcac9640ef7..5ab0c649adf 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -43,6 +43,7 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.docvalues.DocValuesCodec; +import org.apache.lucene.index.values.DocValues.MissingValues; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; @@ -86,49 +87,40 @@ public class TestDocValuesIndexing extends LuceneTestCase { * Tests complete indexing of {@link Values} including deletions, merging and * sparse value fields on Compound-File */ - public void testCFSIndex() throws IOException { - // without deletions - IndexWriterConfig cfg = writerConfig(true); - // primitives - no deletes - runTestNumerics(cfg, false); + public void testIndexBytesNoDeletesCFS() throws IOException { + runTestIndexBytes(writerConfig(true), false); + } - cfg = writerConfig(true); - // bytes - no deletes - runTestIndexBytes(cfg, false); + public void testIndexBytesDeletesCFS() throws IOException { + runTestIndexBytes(writerConfig(true), true); + } - // with deletions - cfg = writerConfig(true); - // primitives - runTestNumerics(cfg, true); + public void testIndexNumericsNoDeletesCFS() throws IOException { + runTestNumerics(writerConfig(true), false); + } - cfg = writerConfig(true); - // bytes - runTestIndexBytes(cfg, true); + public void testIndexNumericsDeletesCFS() throws IOException { + runTestNumerics(writerConfig(true), true); } /** * Tests complete indexing of {@link Values} including deletions, merging and * sparse value fields on None-Compound-File */ - public void testIndex() throws IOException { - // - // without deletions - IndexWriterConfig cfg = writerConfig(false); - // primitives - no deletes - runTestNumerics(cfg, false); + public void testIndexBytesNoDeletes() throws IOException { + runTestIndexBytes(writerConfig(false), false); + } - cfg = writerConfig(false); - // bytes - no deletes - runTestIndexBytes(cfg, false); + public void testIndexBytesDeletes() throws IOException { + runTestIndexBytes(writerConfig(false), true); + } - // with deletions - cfg = writerConfig(false); - // primitives - runTestNumerics(cfg, true); + public void testIndexNumericsNoDeletes() throws IOException { + runTestNumerics(writerConfig(false), false); + } - cfg = writerConfig(false); - // bytes - runTestIndexBytes(cfg, true); + public void testIndexNumericsDeletes() throws IOException { + runTestNumerics(writerConfig(false), true); } private IndexWriterConfig writerConfig(boolean useCompoundFile) { @@ -150,7 +142,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { throws IOException { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); - final int numValues = 350; + final int numValues = 179 + random.nextInt(151); final List numVariantList = new ArrayList(NUMERICS); // run in random order to test if fill works correctly during merges @@ -163,22 +155,24 @@ public class TestDocValuesIndexing extends LuceneTestCase { final int numRemainingValues = (int) (numValues - deleted.cardinality()); final int base = r.numDocs() - numRemainingValues; switch (val) { - case PACKED_INTS: - case PACKED_INTS_FIXED: { + case PACKED_INTS: { DocValues intsReader = getDocValues(r, val.name()); assertNotNull(intsReader); Source ints = getSource(intsReader); + MissingValues missing = ints.getMissing(); - ValuesEnum intsEnum = intsReader.getEnum(); - assertNotNull(intsEnum); - LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints(); for (int i = 0; i < base; i++) { - assertEquals("index " + i, 0, ints.getInt(i)); - assertEquals(val.name() + " base: " + base + " index: " + i, i, - random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc()); - assertEquals(0, enumRef.get()); + long value = ints.getInt(i); + assertEquals("index " + i, missing.longValue, value); } + + ValuesEnum intsEnum = getValuesEnum(intsReader); + assertTrue(intsEnum.advance(0) >= base); + + intsEnum = getValuesEnum(intsReader); + LongsRef enumRef = intsEnum.getInt(); + int expected = 0; for (int i = base; i < r.numDocs(); i++, expected++) { while (deleted.get(expected)) { @@ -197,18 +191,18 @@ public class TestDocValuesIndexing extends LuceneTestCase { DocValues floatReader = getDocValues(r, val.name()); assertNotNull(floatReader); Source floats = getSource(floatReader); - ValuesEnum floatEnum = floatReader.getEnum(); - assertNotNull(floatEnum); - FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class) - .floats(); + MissingValues missing = floats.getMissing(); for (int i = 0; i < base; i++) { - assertEquals(" floats failed for doc: " + i + " base: " + base, 0.0d, - floats.getFloat(i), 0.0d); - assertEquals(i, random.nextBoolean() ? floatEnum.advance(i) - : floatEnum.nextDoc()); - assertEquals("index " + i, 0.0, enumRef.get(), 0.0); + double value = floats.getFloat(i); + assertEquals(" floats failed for doc: " + i + " base: " + base, + missing.doubleValue, value, 0.0d); } + ValuesEnum floatEnum = getValuesEnum(floatReader); + assertTrue(floatEnum.advance(0) >= base); + + floatEnum = getValuesEnum(floatReader); + FloatsRef enumRef = floatEnum.getFloat(); int expected = 0; for (int i = base; i < r.numDocs(); i++, expected++) { while (deleted.get(expected)) { @@ -235,92 +229,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { d.close(); } - private static EnumSet BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF, - Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT, - Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED, - Values.BYTES_VAR_STRAIGHT); - - private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, - Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, - Values.SIMPLE_FLOAT_8BYTE); - - private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, - Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS, - Index.NO }; - - private OpenBitSet indexValues(IndexWriter w, int numValues, Values value, - List valueVarList, boolean withDeletions, int multOfSeven) - throws CorruptIndexException, IOException { - final boolean isNumeric = NUMERICS.contains(value); - OpenBitSet deleted = new OpenBitSet(numValues); - Document doc = new Document(); - Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)]; - Fieldable field = random.nextBoolean() ? new ValuesField(value.name()) - : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, - 10), idx == Index.NO ? Store.YES : Store.NO, idx); - doc.add(field); - - ValuesAttribute valuesAttribute = ValuesField.values(field); - valuesAttribute.setType(value); - final LongsRef intsRef = valuesAttribute.ints(); - final FloatsRef floatsRef = valuesAttribute.floats(); - final BytesRef bytesRef = valuesAttribute.bytes(); - - final String idBase = value.name() + "_"; - final byte[] b = new byte[multOfSeven]; - if (bytesRef != null) { - bytesRef.bytes = b; - bytesRef.length = b.length; - bytesRef.offset = 0; - } - byte upto = 0; - for (int i = 0; i < numValues; i++) { - if (isNumeric) { - switch (value) { - case PACKED_INTS: - case PACKED_INTS_FIXED: - intsRef.set(i); - break; - case SIMPLE_FLOAT_4BYTE: - case SIMPLE_FLOAT_8BYTE: - floatsRef.set(2.0f * i); - break; - default: - fail("unexpected value " + value); - } - } else { - for (int j = 0; j < b.length; j++) { - b[j] = upto++; - } - } - doc.removeFields("id"); - doc.add(new Field("id", idBase + i, Store.YES, - Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - if (i % 7 == 0) { - if (withDeletions && random.nextBoolean()) { - Values val = valueVarList.get(random.nextInt(1 + valueVarList - .indexOf(value))); - final int randInt = val == value ? random.nextInt(1 + i) : random - .nextInt(numValues); - w.deleteDocuments(new Term("id", val.name() + "_" + randInt)); - if (val == value) { - deleted.set(randInt); - } - } - w.commit(); - - } - } - w.commit(); - - // TODO test unoptimized with deletions - if (withDeletions || random.nextBoolean()) - w.optimize(); - return deleted; - } - public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions) throws CorruptIndexException, LockObtainFailedException, IOException { final Directory d = newDirectory(); @@ -343,30 +251,32 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertNotNull("field " + byteIndexValue.name() + " returned null reader - maybe merged failed", bytesReader); Source bytes = getSource(bytesReader); - ValuesEnum bytesEnum = bytesReader.getEnum(); - assertNotNull(bytesEnum); - final ValuesAttribute attr = bytesEnum - .addAttribute(ValuesAttribute.class); byte upto = 0; + // test the filled up slots for correctness + MissingValues missing = bytes.getMissing(); for (int i = 0; i < base; i++) { - final BytesRef br = bytes.getBytes(i); + + BytesRef br = bytes.getBytes(i, new BytesRef()); String msg = " field: " + byteIndexValue.name() + " at index: " + i + " base: " + base + " numDocs:" + r.numDocs(); switch (byteIndexValue) { case BYTES_VAR_STRAIGHT: case BYTES_FIXED_STRAIGHT: - assertEquals(i, bytesEnum.advance(i)); // fixed straight returns bytesref with zero bytes all of fixed // length - assertNotNull("expected none null - " + msg, br); - if (br.length != 0) { - assertEquals("expected zero bytes of length " + bytesSize + " - " - + msg, bytesSize, br.length); - for (int j = 0; j < br.length; j++) { - assertEquals("Byte at index " + j + " doesn't match - " + msg, 0, - br.bytes[br.offset + j]); + if (missing.bytesValue != null) { + assertNotNull("expected none null - " + msg, br); + if (br.length != 0) { + assertEquals("expected zero bytes of length " + bytesSize + " - " + + msg, bytesSize, br.length); + for (int j = 0; j < br.length; j++) { + assertEquals("Byte at index " + j + " doesn't match - " + msg, + 0, br.bytes[br.offset + j]); + } } + } else { + assertNull("expected null - " + msg + " " + br, br); } break; case BYTES_VAR_SORTED: @@ -374,16 +284,18 @@ public class TestDocValuesIndexing extends LuceneTestCase { case BYTES_VAR_DEREF: case BYTES_FIXED_DEREF: default: - assertNotNull("expected none null - " + msg, br); - if (br.length != 0) { - bytes.getBytes(i); - } - assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0, - br.length); + assertNull("expected null - " + msg + " " + br, br); + // make sure we advance at least until base + ValuesEnum bytesEnum = getValuesEnum(bytesReader); + final int advancedTo = bytesEnum.advance(0); + assertTrue(byteIndexValue.name() + " advanced failed base:" + base + + " advancedTo: " + advancedTo, base <= advancedTo); + } } - final BytesRef enumRef = attr.bytes(); + ValuesEnum bytesEnum = getValuesEnum(bytesReader); + final BytesRef enumRef = bytesEnum.bytes(); // test the actual doc values added in this iteration assertEquals(base + numRemainingValues, r.numDocs()); int v = 0; @@ -395,14 +307,20 @@ public class TestDocValuesIndexing extends LuceneTestCase { upto += bytesSize; } - BytesRef br = bytes.getBytes(i); - if (bytesEnum.docID() != i) + BytesRef br = bytes.getBytes(i, new BytesRef()); + if (bytesEnum.docID() != i) { assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum .advance(i)); + } for (int j = 0; j < br.length; j++, upto++) { assertEquals( "EnumRef Byte at index " + j + " doesn't match - " + msg, upto, enumRef.bytes[enumRef.offset + j]); + if (!(br.bytes.length > br.offset + j)) + br = bytes.getBytes(i, new BytesRef()); + assertTrue("BytesRef index exceeded [" + msg + "] offset: " + + br.offset + " length: " + br.length + " index: " + + (br.offset + j), br.bytes.length > br.offset + j); assertEquals("SourceRef Byte at index " + j + " doesn't match - " + msg, upto, br.bytes[br.offset + j]); } @@ -442,8 +360,113 @@ public class TestDocValuesIndexing extends LuceneTestCase { } private Source getSource(DocValues values) throws IOException { - // getSource uses cache internally - return random.nextBoolean() ? values.load() : values.getSource(); + Source source; + if (random.nextInt(10) == 0) { + source = values.load(); + } else { + // getSource uses cache internally + source = values.getSource(); + } + assertNotNull(source); + return source; + } + + private ValuesEnum getValuesEnum(DocValues values) throws IOException { + ValuesEnum valuesEnum; + if (!(values instanceof MultiDocValues) && random.nextInt(10) == 0) { + // TODO not supported by MultiDocValues yet! + valuesEnum = getSource(values).getEnum(); + } else { + valuesEnum = values.getEnum(); + + } + assertNotNull(valuesEnum); + return valuesEnum; + } + + private static EnumSet BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF, + Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT, + Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED, + Values.BYTES_VAR_STRAIGHT); + + private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, + Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE); + + private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, + Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS, + Index.NO }; + + private OpenBitSet indexValues(IndexWriter w, int numValues, Values value, + List valueVarList, boolean withDeletions, int multOfSeven) + throws CorruptIndexException, IOException { + final boolean isNumeric = NUMERICS.contains(value); + OpenBitSet deleted = new OpenBitSet(numValues); + Document doc = new Document(); + Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)]; + Fieldable field = random.nextBoolean() ? new ValuesField(value.name()) + : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, + 10), idx == Index.NO ? Store.YES : Store.NO, idx); + doc.add(field); + + ValuesAttribute valuesAttribute = ValuesField.values(field); + valuesAttribute.setType(value); + final LongsRef intsRef = valuesAttribute.ints(); + final FloatsRef floatsRef = valuesAttribute.floats(); + final BytesRef bytesRef = valuesAttribute.bytes(); + + final String idBase = value.name() + "_"; + final byte[] b = new byte[multOfSeven]; + if (bytesRef != null) { + bytesRef.bytes = b; + bytesRef.length = b.length; + bytesRef.offset = 0; + } + byte upto = 0; + for (int i = 0; i < numValues; i++) { + if (isNumeric) { + switch (value) { + case PACKED_INTS: + intsRef.set(i); + break; + case SIMPLE_FLOAT_4BYTE: + case SIMPLE_FLOAT_8BYTE: + floatsRef.set(2.0f * i); + break; + default: + fail("unexpected value " + value); + } + } else { + for (int j = 0; j < b.length; j++) { + b[j] = upto++; + } + } + doc.removeFields("id"); + doc.add(new Field("id", idBase + i, Store.YES, + Index.NOT_ANALYZED_NO_NORMS)); + w.addDocument(doc); + + if (i % 7 == 0) { + if (withDeletions && random.nextBoolean()) { + Values val = valueVarList.get(random.nextInt(1 + valueVarList + .indexOf(value))); + final int randInt = val == value ? random.nextInt(1 + i) : random + .nextInt(numValues); + w.deleteDocuments(new Term("id", val.name() + "_" + randInt)); + if (val == value) { + deleted.set(randInt); + } + } + if (random.nextInt(10) == 0) { + w.commit(); + } + } + } + w.commit(); + + // TODO test unoptimized with deletions + if (withDeletions || random.nextBoolean()) + w.optimize(); + return deleted; } } From d936615b0afc5f6c1a9ac2a8d60f97a08bc117d9 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 23 Nov 2010 22:45:54 +0000 Subject: [PATCH 016/116] added missing ASL headers git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1038368 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/values/Floats.java | 17 +++++++++++++- .../org/apache/lucene/index/values/Ints.java | 23 ++++++++++++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index dcf984b0774..38afe7da467 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -1,5 +1,20 @@ package org.apache.lucene.index.values; - +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.DoubleBuffer; diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java index 9b4e585c64d..d3f0e691c1c 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java @@ -1,24 +1,41 @@ package org.apache.lucene.index.values; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ import java.io.IOException; import org.apache.lucene.index.values.PackedIntsImpl.IntsReader; import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter; import org.apache.lucene.store.Directory; + //TODO - add bulk copy where possible public class Ints { private Ints() { } - public static Writer getWriter(Directory dir, String id, boolean useFixedArray) throws IOException { - //TODO - implement fixed?! + // TODO - implement fixed?! return new IntsWriter(dir, id); } - public static DocValues getValues(Directory dir, String id, boolean useFixedArray) throws IOException { + public static DocValues getValues(Directory dir, String id, + boolean useFixedArray) throws IOException { return new IntsReader(dir, id); } } From 399c93850c5a80cf2f6ddd2491bc9656daa8119a Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 30 Nov 2010 14:45:45 +0000 Subject: [PATCH 017/116] LUCENE-2186: added tracking of used bytes during indexing git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1040544 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/DocFieldProcessor.java | 1 + .../apache/lucene/index/DocumentsWriter.java | 4 +- .../apache/lucene/index/SegmentMerger.java | 3 +- .../lucene/index/SegmentWriteState.java | 6 +- .../lucene/index/codecs/FieldsConsumer.java | 2 +- .../codecs/docvalues/DocValuesCodec.java | 3 +- .../codecs/docvalues/DocValuesConsumer.java | 15 +- .../org/apache/lucene/index/values/Bytes.java | 25 ++-- .../apache/lucene/index/values/DocValues.java | 12 +- .../index/values/FixedDerefBytesImpl.java | 18 ++- .../index/values/FixedSortedBytesImpl.java | 19 +-- .../index/values/FixedStraightBytesImpl.java | 4 +- .../apache/lucene/index/values/Floats.java | 29 ++-- .../org/apache/lucene/index/values/Ints.java | 5 +- .../lucene/index/values/MultiDocValues.java | 12 +- .../lucene/index/values/PackedIntsImpl.java | 24 ++-- .../apache/lucene/index/values/Values.java | 4 +- .../index/values/VarDerefBytesImpl.java | 70 +++++---- .../index/values/VarSortedBytesImpl.java | 22 +-- .../index/values/VarStraightBytesImpl.java | 4 +- .../apache/lucene/index/values/Writer.java | 25 ++-- .../org/apache/lucene/util/BytesRefHash.java | 135 +++++++++--------- .../org/apache/lucene/util/FloatsRef.java | 24 +++- .../java/org/apache/lucene/util/LongsRef.java | 34 +++-- .../org/apache/lucene/util/PagedBytes.java | 14 +- .../org/apache/lucene/index/TestCodecs.java | 3 +- .../lucene/index/values/TestDocValues.java | 14 +- .../index/values/TestDocValuesIndexing.java | 73 +++++++++- 28 files changed, 369 insertions(+), 235 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 1eeeacb1176..4b37b0481c0 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -64,6 +64,7 @@ final class DocFieldProcessor extends DocConsumer { } valuesConsumer = fieldsConsumer.addValuesField(fieldInfo); docValues.put(name, valuesConsumer); + } return valuesConsumer; diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index 175ab09bc68..296c57af867 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -604,14 +604,14 @@ final class DocumentsWriter { initSegmentName(onlyDocStore); final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs); flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos, - docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info); + docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info, bytesUsed); } SegmentWriteState segWriteState() { final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs); return new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos, docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), - info); + info, bytesUsed); } /** Returns the SegmentCodecs used to flush the last segment */ diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 05ded0aa5ba..d66de3d41b1 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Set; import java.util.HashSet; import java.util.List; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader.FieldOption; @@ -366,7 +367,7 @@ final class SegmentMerger { } } - segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, null, docCount, 0, termIndexInterval, codecInfo); + segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, null, docCount, 0, termIndexInterval, codecInfo, new AtomicLong(0)); return docCount; } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 427e6ba70b5..30d8db64c03 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.PrintStream; import java.util.Collection; import java.util.HashSet; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.store.Directory; @@ -35,6 +36,7 @@ public class SegmentWriteState { public final int numDocs; public int numDocsInStore; public final Collection flushedFiles; + public final AtomicLong bytesUsed; final SegmentCodecs segmentCodecs; public final String codecId; @@ -62,7 +64,7 @@ public class SegmentWriteState { public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, String docStoreSegmentName, int numDocs, - int numDocsInStore, int termIndexInterval, SegmentCodecs segmentCodecs) { + int numDocsInStore, int termIndexInterval, SegmentCodecs segmentCodecs, AtomicLong bytesUsed) { this.infoStream = infoStream; this.directory = directory; this.segmentName = segmentName; @@ -74,6 +76,7 @@ public class SegmentWriteState { this.segmentCodecs = segmentCodecs; flushedFiles = new HashSet(); codecId = ""; + this.bytesUsed = bytesUsed; } /** @@ -91,5 +94,6 @@ public class SegmentWriteState { segmentCodecs = state.segmentCodecs; flushedFiles = state.flushedFiles; this.codecId = codecId; + bytesUsed = state.bytesUsed; } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java index e4e29b79bd1..27d1a87d0c1 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java @@ -44,7 +44,7 @@ public abstract class FieldsConsumer implements Closeable { public abstract TermsConsumer addField(FieldInfo field) throws IOException; /** Adds a new DocValuesField */ - public /*abstract*/ DocValuesConsumer addValuesField(FieldInfo field) throws IOException { + public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { throw new UnsupportedOperationException("docvalues are not supported"); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index 2a4a880b790..43264f67b84 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -22,6 +22,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldsEnum; @@ -107,7 +108,7 @@ public class DocValuesCodec extends Codec { + field.number), // TODO can we have a compound file per segment and codec for // docvalues? - state.directory, field, comparator); + state.directory, field, comparator, state.bytesUsed); info.add(field.number); return consumer; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java index 5f9cd9702b9..2a6a7c7a88f 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java @@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.docvalues; import java.io.IOException; import java.util.Collection; import java.util.Comparator; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; @@ -35,6 +36,16 @@ import org.apache.lucene.util.BytesRef; // TODO this might need to go in the codec package since is a direct relative to // TermsConsumer public abstract class DocValuesConsumer { + + protected AtomicLong bytesUsed = new AtomicLong(0); + + protected DocValuesConsumer(AtomicLong bytesUsed) { + this.bytesUsed = bytesUsed; + } + + public final long bytesUsed() { + return this.bytesUsed.get(); + } public abstract void add(int docID, ValuesAttribute attr) throws IOException; @@ -89,8 +100,8 @@ public abstract class DocValuesConsumer { } public static DocValuesConsumer create(String id, - Directory directory, FieldInfo field, Comparator comp) + Directory directory, FieldInfo field, Comparator comp, AtomicLong bytesUsed) throws IOException { - return Writer.create(field.getDocValues(), id, directory, comp); + return Writer.create(field.getDocValues(), id, directory, comp, bytesUsed); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index f9eeff57093..89cd3441c51 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -24,7 +24,7 @@ import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.values.DocValues.MissingValues; +import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.index.values.DocValues.SourceEnum; @@ -64,7 +64,7 @@ public final class Bytes { // TODO -- i shouldn't have to specify fixed? can // track itself & do the write thing at write time? public static Writer getWriter(Directory dir, String id, Mode mode, - Comparator comp, boolean fixedSize) throws IOException { + Comparator comp, boolean fixedSize, AtomicLong bytesUsed) throws IOException { if (comp == null) { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); @@ -74,17 +74,17 @@ public final class Bytes { if (mode == Mode.STRAIGHT) { return new FixedStraightBytesImpl.Writer(dir, id); } else if (mode == Mode.DEREF) { - return new FixedDerefBytesImpl.Writer(dir, id); + return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed); } else if (mode == Mode.SORTED) { - return new FixedSortedBytesImpl.Writer(dir, id, comp); + return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed); } } else { if (mode == Mode.STRAIGHT) { - return new VarStraightBytesImpl.Writer(dir, id); + return new VarStraightBytesImpl.Writer(dir, id, bytesUsed); } else if (mode == Mode.DEREF) { - return new VarDerefBytesImpl.Writer(dir, id); + return new VarDerefBytesImpl.Writer(dir, id, bytesUsed); } else if (mode == Mode.SORTED) { - return new VarSortedBytesImpl.Writer(dir, id, comp); + return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed); } } @@ -162,7 +162,7 @@ public final class Bytes { @Override public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValues missing = getMissing(); + final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc()) { final BytesRef bytesRef = attr.bytes(); @@ -248,7 +248,7 @@ public final class Bytes { @Override public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValues missing = getMissing(); + final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc()) { final BytesRef bytesRef = attr.bytes(); @@ -275,20 +275,19 @@ public final class Bytes { protected IndexOutput idxOut; protected IndexOutput datOut; protected BytesRef bytesRef; - private String codecName; - private int version; + private final String codecName; + private final int version; protected final ByteBlockPool pool; - protected final AtomicLong bytesUsed; protected BytesWriterBase(Directory dir, String id, String codecName, int version, boolean initIndex, boolean initData, ByteBlockPool pool, AtomicLong bytesUsed) throws IOException { + super(bytesUsed); this.dir = dir; this.id = id; this.codecName = codecName; this.version = version; this.pool = pool; - this.bytesUsed = bytesUsed; if (initData) initDataOut(); if (initIndex) diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index a0d84ff1d79..d734d5e7b0b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -70,7 +70,7 @@ public abstract class DocValues implements Closeable { * used since it can handle all precisions. */ public static abstract class Source { - protected final MissingValues missingValues = new MissingValues(); + protected final MissingValue missingValue = new MissingValue(); public long getInt(int docID) { throw new UnsupportedOperationException("ints are not supported"); @@ -96,8 +96,8 @@ public abstract class DocValues implements Closeable { return getEnum(new AttributeSource()); } - public MissingValues getMissing() { - return missingValues; + public MissingValue getMissing() { + return missingValue; } public abstract Values type(); @@ -105,8 +105,6 @@ public abstract class DocValues implements Closeable { public abstract ValuesEnum getEnum(AttributeSource attrSource) throws IOException; - public abstract long ramBytesUsed(); - } abstract static class SourceEnum extends ValuesEnum { @@ -171,12 +169,12 @@ public abstract class DocValues implements Closeable { public abstract LookupResult getByValue(BytesRef value, BytesRef tmpRef); } - public final static class MissingValues { + public final static class MissingValue { public long longValue; public double doubleValue; public BytesRef bytesValue; - public final void copy(MissingValues values) { + public final void copy(MissingValue values) { longValue = values.longValue; doubleValue = values.doubleValue; bytesValue = values.bytesValue; diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 06a322b9972..f5df15dd7d6 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -34,6 +34,7 @@ import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; // Stores fixed-length byte[] by deref, ie when two docs @@ -48,11 +49,12 @@ class FixedDerefBytesImpl { static class Writer extends BytesWriterBase { private int size = -1; private int[] docToID; - private final BytesRefHash hash = new BytesRefHash(pool); + private final BytesRefHash hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY, + new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, bytesUsed)); - public Writer(Directory dir, String id) throws IOException { + public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), - new AtomicLong()); + bytesUsed); } public Writer(Directory dir, String id, Allocator allocator, @@ -60,7 +62,7 @@ class FixedDerefBytesImpl { super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed); docToID = new int[1]; - bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); // TODO BytesRefHash uses bytes too! } @Override @@ -85,7 +87,7 @@ class FixedDerefBytesImpl { } if (docID >= docToID.length) { - int size = docToID.length; + final int size = docToID.length; docToID = ArrayUtil.grow(docToID, 1 + docID); bytesUsed.addAndGet((docToID.length - size) * RamUsageEstimator.NUM_BYTES_INT); @@ -114,9 +116,11 @@ class FixedDerefBytesImpl { w.add(0); } w.finish(); - hash.clear(); - + hash.close(); super.finish(docCount); + bytesUsed.addAndGet((-docToID.length) + * RamUsageEstimator.NUM_BYTES_INT); + docToID = null; } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index e826a709ee9..f19ac893f49 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -37,6 +37,7 @@ import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; // Stores fixed-length byte[] by deref, ie when two docs @@ -53,12 +54,13 @@ class FixedSortedBytesImpl { private int[] docToEntry; private final Comparator comp; - private final BytesRefHash hash = new BytesRefHash(pool); + private final BytesRefHash hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY, + new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, bytesUsed)); - public Writer(Directory dir, String id, Comparator comp) + public Writer(Directory dir, String id, Comparator comp, AtomicLong bytesUsed) throws IOException { this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), - new AtomicLong()); + bytesUsed); } public Writer(Directory dir, String id, Comparator comp, @@ -148,6 +150,7 @@ class FixedSortedBytesImpl { bytesUsed.addAndGet((-docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT); docToEntry = null; + hash.close(); } } @@ -199,16 +202,6 @@ class FixedSortedBytesImpl { return binarySearch(bytes, tmpRef, 0, numValue - 1); } - public long ramBytesUsed() { - // TODO(simonw): move ram calcultation to PackedInts? - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER - + size - * numValue - + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index - .getBitsPerValue() - * index.size()); - } - @Override public int getValueCount() { return numValue; diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 1ee7b6e996a..ef760835b64 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -81,7 +81,7 @@ class FixedStraightBytesImpl { oneRecord = new byte[size]; } fill(state.docBase); - // nocommit should we add a transfer to API to each reader? + // TODO should we add a transfer to API to each reader? datOut.copyBytes(reader.cloneData(), size * maxDocs); lastDocID += maxDocs-1; } else @@ -139,7 +139,7 @@ class FixedStraightBytesImpl { public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException { super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size*maxDoc); this.size = size; - this.missingValues.bytesValue = new BytesRef(size); + this.missingValue.bytesValue = new BytesRef(size); this.maxDoc = maxDoc; } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 38afe7da467..f53345c9c3d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.nio.DoubleBuffer; import java.nio.FloatBuffer; import java.util.Collection; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.store.Directory; @@ -44,16 +45,16 @@ public class Floats { private static final long LONG_DEFAULT = Double .doubleToRawLongBits(Double.NEGATIVE_INFINITY); - public static Writer getWriter(Directory dir, String id, int precisionBytes) + public static Writer getWriter(Directory dir, String id, int precisionBytes, AtomicLong bytesUsed) throws IOException { if (precisionBytes != 4 && precisionBytes != 8) { throw new IllegalArgumentException("precisionBytes must be 4 or 8; got " + precisionBytes); } if (precisionBytes == 4) { - return new Float4Writer(dir, id); + return new Float4Writer(dir, id, bytesUsed); } else { - return new Float8Writer(dir, id); + return new Float8Writer(dir, id, bytesUsed); } } @@ -63,7 +64,6 @@ public class Floats { } abstract static class FloatsWriter extends Writer { - private final Directory dir; private final String id; private FloatsRef floatsRef; @@ -71,8 +71,9 @@ public class Floats { protected IndexOutput datOut; private final byte precision; - protected FloatsWriter(Directory dir, String id, int precision) + protected FloatsWriter(Directory dir, String id, int precision, AtomicLong bytesUsed) throws IOException { + super(bytesUsed); this.dir = dir; this.id = id; this.precision = (byte) precision; @@ -113,7 +114,7 @@ public class Floats { protected void merge(MergeState state) throws IOException { if (state.bits == null && state.reader instanceof FloatsReader) { // no deletes - bulk copy - // nocommit - should be do bulks with deletes too? + // TODO: should be do bulks with deletes too? final FloatsReader reader = (FloatsReader) state.reader; assert reader.precisionBytes == (int) precision; if (reader.maxDoc == 0) @@ -140,8 +141,8 @@ public class Floats { // Writes 4 bytes (float) per value static class Float4Writer extends FloatsWriter { - protected Float4Writer(Directory dir, String id) throws IOException { - super(dir, id, 4); + protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { + super(dir, id, 4, bytesUsed); } @Override @@ -184,8 +185,8 @@ public class Floats { // Writes 8 bytes (double) per value static class Float8Writer extends FloatsWriter { - protected Float8Writer(Directory dir, String id) throws IOException { - super(dir, id, 8); + protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { + super(dir, id, 8, bytesUsed); } @Override @@ -280,7 +281,7 @@ public class Floats { Source4(ByteBuffer buffer) { values = buffer.asFloatBuffer(); - missingValues.doubleValue = Float.NEGATIVE_INFINITY; + missingValue.doubleValue = Float.NEGATIVE_INFINITY; } @Override @@ -295,7 +296,7 @@ public class Floats { @Override public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValues missing = getMissing(); + final MissingValue missing = getMissing(); return new SourceEnum(attrSource, Values.SIMPLE_FLOAT_4BYTE, this, maxDoc) { private final FloatsRef ref = attr.floats(); @Override @@ -324,7 +325,7 @@ public class Floats { Source8(ByteBuffer buffer) { values = buffer.asDoubleBuffer(); - missingValues.doubleValue = Double.NEGATIVE_INFINITY; + missingValue.doubleValue = Double.NEGATIVE_INFINITY; } @@ -340,7 +341,7 @@ public class Floats { @Override public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValues missing = getMissing(); + final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc) { private final FloatsRef ref = attr.floats(); @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java index d3f0e691c1c..7955d7c7753 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java @@ -17,6 +17,7 @@ package org.apache.lucene.index.values; */ import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.values.PackedIntsImpl.IntsReader; import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter; @@ -28,10 +29,10 @@ public class Ints { private Ints() { } - public static Writer getWriter(Directory dir, String id, boolean useFixedArray) + public static Writer getWriter(Directory dir, String id, boolean useFixedArray, AtomicLong bytesUsed) throws IOException { // TODO - implement fixed?! - return new IntsWriter(dir, id); + return new IntsWriter(dir, id, bytesUsed); } public static DocValues getValues(Directory dir, String id, diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index d1780936f5c..0e81c25d725 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -192,7 +192,7 @@ public class MultiDocValues extends DocValues { assert docValuesIdx[idx] != null; try { current = docValuesIdx[idx].docValues.getSource(); - missingValues.copy(current.getMissing()); + missingValue.copy(current.getMissing()); } catch (IOException e) { throw new RuntimeException("load failed", e); // TODO how should we // handle this @@ -215,10 +215,6 @@ public class MultiDocValues extends DocValues { return current.getBytes(doc, bytesRef); } - public long ramBytesUsed() { - return current.ramBytesUsed(); - } - @Override public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { throw new UnsupportedOperationException(); // TODO @@ -240,18 +236,18 @@ public class MultiDocValues extends DocValues { @Override public BytesRef getBytes(int docID, BytesRef ref) { - return this.missingValues.bytesValue; + return this.missingValue.bytesValue; } @Override public double getFloat(int docID) { - return missingValues.doubleValue; + return missingValue.doubleValue; } @Override public long getInt(int docID) { - return missingValues.longValue; + return missingValue.longValue; } public long ramBytesUsed() { diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 91f56bdd660..61f19e7cb5a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -18,9 +18,9 @@ package org.apache.lucene.index.values; */ import java.io.IOException; import java.util.Collection; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.values.DocValues.MissingValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -50,13 +50,15 @@ class PackedIntsImpl { private boolean started; private final Directory dir; private final String id; - private OpenBitSet defaultValues = new OpenBitSet(1); + private final OpenBitSet defaultValues = new OpenBitSet(1); private int lastDocId = -1; - protected IntsWriter(Directory dir, String id) throws IOException { + protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed) throws IOException { + super(bytesUsed); this.dir = dir; this.id = id; docToValue = new long[1]; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the bitset needs memory too } @Override @@ -76,9 +78,10 @@ class PackedIntsImpl { lastDocId = docID; if (docID >= docToValue.length) { + final long len = docToValue.length ; docToValue = ArrayUtil.grow(docToValue, 1 + docID); defaultValues.ensureCapacity(docToValue.length); - + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG * ((docToValue.length) - len)); } docToValue[docID] = v; } @@ -115,13 +118,10 @@ class PackedIntsImpl { w.add(defaultValue); } w.finish(); - datOut.close(); - } - - public long ramBytesUsed() { - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToValue.length - * RamUsageEstimator.NUM_BYTES_LONG; + bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG * docToValue.length )); + docToValue = null; + } @Override @@ -180,7 +180,7 @@ class PackedIntsImpl { minValue = dataIn.readLong(); defaultValue = dataIn.readLong(); values = PackedInts.getReader(dataIn); - missingValues.longValue = minValue + defaultValue; + missingValue.longValue = minValue + defaultValue; } @Override @@ -199,7 +199,7 @@ class PackedIntsImpl { @Override public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValues missing = getMissing(); + final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, values.size()) { private final LongsRef ref = attr.ints(); @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java index e33c0cb9b1b..56921dd36c2 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Values.java +++ b/lucene/src/java/org/apache/lucene/index/values/Values.java @@ -22,8 +22,8 @@ package org.apache.lucene.index.values; * values into RAM, exposing a random access API, when * loaded. * - *

NOTE: This feature is experimental and the - * API is free to change in non-backwards-compatible ways. */ + * @lucene.experimenta + */ public enum Values { /** Integral value is stored as packed ints. The bit diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index 2dfa5bdc8a1..b0e89ce92cd 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -37,8 +37,7 @@ import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; -import org.apache.lucene.util.BytesRefHash.ParallelArrayBase; -import org.apache.lucene.util.BytesRefHash.ParallelBytesStartArray; +import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; // Stores variable-length byte[] by deref, ie when two docs @@ -51,30 +50,47 @@ class VarDerefBytesImpl { static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; - private static class AddressParallelArray extends - ParallelArrayBase { - final int[] address; + private static final class AddressByteStartArray extends + TrackingDirectBytesStartArray { + int[] address; - AddressParallelArray(int size, AtomicLong bytesUsed) { + AddressByteStartArray(int size, AtomicLong bytesUsed) { super(size, bytesUsed); - address = new int[size]; } @Override - protected int bytesPerEntry() { - return RamUsageEstimator.NUM_BYTES_INT + super.bytesPerEntry(); + public AtomicLong bytesUsed() { + return bytesUsed; } @Override - protected void copyTo(AddressParallelArray toArray, int numToCopy) { - super.copyTo(toArray, numToCopy); - System.arraycopy(address, 0, toArray.address, 0, size); - + public int[] clear() { + if (address != null) { + bytesUsed.addAndGet(-address.length * RamUsageEstimator.NUM_BYTES_INT); + address = null; + } + return super.clear(); } @Override - public AddressParallelArray newInstance(int size) { - return new AddressParallelArray(size, bytesUsed); + public int[] grow() { + assert address != null; + final int oldSize = address.length; + final int[] retVal = super.grow(); + address = ArrayUtil.grow(address, retVal.length); + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT + * (address.length - oldSize)); + return retVal; + } + + @Override + public int[] init() { + if (address == null) { + address = new int[ArrayUtil.oversize(initSize, + RamUsageEstimator.NUM_BYTES_INT)]; + bytesUsed.addAndGet((address.length) * RamUsageEstimator.NUM_BYTES_INT); + } + return super.init(); } } @@ -83,13 +99,14 @@ class VarDerefBytesImpl { private int[] docToAddress; private int address = 1; - private final ParallelBytesStartArray array = new ParallelBytesStartArray( - new AddressParallelArray(0, bytesUsed)); + private final AddressByteStartArray array = new AddressByteStartArray(1, + bytesUsed); private final BytesRefHash hash = new BytesRefHash(pool, 16, array); - public Writer(Directory dir, String id) throws IOException { + public Writer(Directory dir, String id, AtomicLong bytesUsed) + throws IOException { this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), - new AtomicLong()); + bytesUsed); } public Writer(Directory dir, String id, Allocator allocator, @@ -116,12 +133,12 @@ class VarDerefBytesImpl { } final int docAddress; if (e >= 0) { - docAddress = array.array.address[e] = address; + docAddress = array.address[e] = address; address += writePrefixLength(datOut, bytes); datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length); address += bytes.length; } else { - docAddress = array.array.address[(-e) - 1]; + docAddress = array.address[(-e) - 1]; } docToAddress[docID] = docAddress; } @@ -138,10 +155,6 @@ class VarDerefBytesImpl { } } - public long ramBytesUsed() { - return bytesUsed.get(); - } - // Important that we get docCount, in case there were // some last docs that we didn't see @Override @@ -169,8 +182,11 @@ class VarDerefBytesImpl { w.add(0); } w.finish(); - hash.clear(true); + hash.close(); super.finish(docCount); + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT + * (-docToAddress.length)); + docToAddress = null; } } @@ -202,7 +218,7 @@ class VarDerefBytesImpl { @Override public BytesRef getBytes(int docID, BytesRef bytesRef) { long address = index.get(docID); - return address == 0 ? null : data.fillUsingLengthPrefix4(bytesRef, + return address == 0 ? null : data.fillSliceWithPrefix(bytesRef, --address); } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 4504ee4f431..0c22fd8ae54 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -36,6 +36,7 @@ import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; // Stores variable-length byte[] by deref, ie when two docs @@ -52,12 +53,12 @@ class VarSortedBytesImpl { private int[] docToEntry; private final Comparator comp; - private final BytesRefHash hash = new BytesRefHash(pool); + private final BytesRefHash hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY, + new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, bytesUsed)); - public Writer(Directory dir, String id, Comparator comp) + public Writer(Directory dir, String id, Comparator comp, AtomicLong bytesUsed) throws IOException { - this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), - new AtomicLong()); + this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), bytesUsed); } public Writer(Directory dir, String id, Comparator comp, @@ -147,6 +148,7 @@ class VarSortedBytesImpl { super.finish(docCount); bytesUsed.addAndGet((-docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT); + hash.close(); } } @@ -195,18 +197,6 @@ class VarSortedBytesImpl { return binarySearch(bytes, tmpRef, 0, valueCount - 1); } - public long ramBytesUsed() { - // TODO(simonw): move ram usage to PackedInts? - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER - + totBytes - + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex - .getBitsPerValue() - * docToOrdIndex.getBitsPerValue()) - + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + ordToOffsetIndex - .getBitsPerValue() - * ordToOffsetIndex.getBitsPerValue()); - } - @Override public int getValueCount() { return valueCount; diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 0f3f6dff28f..ccff45e1c5b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -62,7 +62,7 @@ class VarStraightBytesImpl { if (docID >= docToAddress.length) { int oldSize = docToAddress.length; docToAddress = ArrayUtil.grow(docToAddress, 1 + docID); - bytesUsed.addAndGet(-(docToAddress.length - oldSize) + bytesUsed.addAndGet((docToAddress.length - oldSize) * RamUsageEstimator.NUM_BYTES_INT); } for (int i = lastDocID + 1; i < docID; i++) { @@ -127,7 +127,7 @@ class VarStraightBytesImpl { public Source(IndexInput datIn, IndexInput idxIn) throws IOException { super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong()); addresses = PackedInts.getReader(idxIn); - missingValues.bytesValue = new BytesRef(0); // empty + missingValue.bytesValue = new BytesRef(0); // empty } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index 04471b25427..82dc03b6f2b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -18,6 +18,7 @@ package org.apache.lucene.index.values; */ import java.io.IOException; import java.util.Comparator; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.store.Directory; @@ -26,6 +27,10 @@ import org.apache.lucene.util.BytesRef; public abstract class Writer extends DocValuesConsumer { + protected Writer(AtomicLong bytesUsed) { + super(bytesUsed); + } + public static final String INDEX_EXTENSION = "idx"; public static final String DATA_EXTENSION = "dat"; @@ -85,26 +90,26 @@ public abstract class Writer extends DocValuesConsumer { } public static Writer create(Values v, String id, Directory directory, - Comparator comp) throws IOException { + Comparator comp, AtomicLong bytesUsed) throws IOException { switch (v) { case PACKED_INTS: - return Ints.getWriter(directory, id, true); + return Ints.getWriter(directory, id, true, bytesUsed); case SIMPLE_FLOAT_4BYTE: - return Floats.getWriter(directory, id, 4); + return Floats.getWriter(directory, id, 4, bytesUsed); case SIMPLE_FLOAT_8BYTE: - return Floats.getWriter(directory, id, 8); + return Floats.getWriter(directory, id, 8, bytesUsed); case BYTES_FIXED_STRAIGHT: - return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true); + return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true, bytesUsed); case BYTES_FIXED_DEREF: - return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true); + return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true, bytesUsed); case BYTES_FIXED_SORTED: - return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true); + return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true, bytesUsed); case BYTES_VAR_STRAIGHT: - return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false); + return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false, bytesUsed); case BYTES_VAR_DEREF: - return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false); + return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false, bytesUsed); case BYTES_VAR_SORTED: - return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false); + return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false, bytesUsed); default: throw new IllegalArgumentException("Unknown Values: " + v); } diff --git a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java index 5a4336f2d32..c5b180cfa44 100644 --- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java +++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java @@ -227,8 +227,9 @@ public final class BytesRefHash { public void clear(boolean resetPool) { lastCount = count; count = 0; - if (resetPool) + if (resetPool) { pool.reset(); + } bytesStart = bytesStartArray.clear(); if (lastCount != -1 && shrink(lastCount)) { // shrink clears the hash entries @@ -240,6 +241,16 @@ public final class BytesRefHash { public void clear() { clear(true); } + + /** + * Closes the BytesRefHash and releases all internally used memory + */ + public void close() { + clear(true); + ords = null; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT + * -hashSize); + } /** * Adds a new {@link BytesRef} @@ -453,8 +464,14 @@ public final class BytesRefHash { * effect. */ public void reinit() { - if (bytesStart == null) + if (bytesStart == null) { bytesStart = bytesStartArray.init(); + } + + if (ords == null) { + ords = new int[hashSize]; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * hashSize); + } } /** @@ -515,17 +532,62 @@ public final class BytesRefHash { */ public abstract AtomicLong bytesUsed(); } - - public static class DirectBytesStartArray extends BytesStartArray { - + + /** + * A direct {@link BytesStartArray} that tracks all memory allocation using an {@link AtomicLong} instance. + */ + public static class TrackingDirectBytesStartArray extends BytesStartArray { protected final int initSize; private int[] bytesStart; - private final AtomicLong bytesUsed = new AtomicLong(0); + protected final AtomicLong bytesUsed; + + public TrackingDirectBytesStartArray(int initSize, AtomicLong bytesUsed) { + this.initSize = initSize; + this.bytesUsed = bytesUsed; + } + @Override + public int[] clear() { + if (bytesStart != null) { + bytesUsed.addAndGet(-bytesStart.length * RamUsageEstimator.NUM_BYTES_INT); + } + return bytesStart = null; + } + + @Override + public int[] grow() { + assert bytesStart != null; + final int oldSize = bytesStart.length; + bytesStart = ArrayUtil.grow(bytesStart, bytesStart.length + 1); + bytesUsed.addAndGet((bytesStart.length - oldSize) * RamUsageEstimator.NUM_BYTES_INT); + return bytesStart; + } + + @Override + public int[] init() { + bytesStart = new int[ArrayUtil.oversize(initSize, + RamUsageEstimator.NUM_BYTES_INT)]; + bytesUsed.addAndGet((bytesStart.length) * RamUsageEstimator.NUM_BYTES_INT); + return bytesStart; + } + + @Override + public AtomicLong bytesUsed() { + return bytesUsed; + } + } + + public static class DirectBytesStartArray extends BytesStartArray { + protected final int initSize; + private int[] bytesStart; + private final AtomicLong bytesUsed; + public DirectBytesStartArray(int initSize) { + this.bytesUsed = new AtomicLong(0); this.initSize = initSize; } + @Override public int[] clear() { return bytesStart = null; @@ -547,66 +609,5 @@ public final class BytesRefHash { public AtomicLong bytesUsed() { return bytesUsed; } - - } - - public static class ParallelBytesStartArray> extends BytesStartArray { - private final T prototype; - public T array; - - public ParallelBytesStartArray(T template) { - this.prototype = template; - } - @Override - public int[] init() { - if(array == null) { - array = prototype.newInstance(2); - } - return array.textStart; - } - - @Override - public int[] grow() { - array = array.grow(); - return array.textStart; - } - - @Override - public int[] clear() { - if(array != null) { - array.deref(); - array = null; - } - return null; - } - - @Override - public AtomicLong bytesUsed() { - return array.bytesUsed(); - } - - } - - public abstract static class ParallelArrayBase> extends ParallelArray { - final int[] textStart; - - protected ParallelArrayBase(int size, AtomicLong bytesUsed) { - super(size, bytesUsed); - textStart = new int[size]; - } - - @Override - protected int bytesPerEntry() { - return RamUsageEstimator.NUM_BYTES_INT; - } - - @Override - protected void copyTo(T toArray, int numToCopy) { - System.arraycopy(textStart, 0, toArray.textStart, 0, size); - } - - @Override - public abstract T newInstance(int size); - } } diff --git a/lucene/src/java/org/apache/lucene/util/FloatsRef.java b/lucene/src/java/org/apache/lucene/util/FloatsRef.java index 9dd107e323d..67066748395 100644 --- a/lucene/src/java/org/apache/lucene/util/FloatsRef.java +++ b/lucene/src/java/org/apache/lucene/util/FloatsRef.java @@ -1,9 +1,27 @@ -/** - * - */ package org.apache.lucene.util; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Represents float[], as a slice (offset + length) into an existing float[]. + * + * @lucene.internal + */ public final class FloatsRef implements Cloneable{ public double[] floats; public int offset; diff --git a/lucene/src/java/org/apache/lucene/util/LongsRef.java b/lucene/src/java/org/apache/lucene/util/LongsRef.java index c5dee1a15b1..2a9bb2e385a 100644 --- a/lucene/src/java/org/apache/lucene/util/LongsRef.java +++ b/lucene/src/java/org/apache/lucene/util/LongsRef.java @@ -1,9 +1,27 @@ -/** - * - */ package org.apache.lucene.util; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Represents long[], as a slice (offset + length) into an existing long[]. + * + * @lucene.internal + */ public final class LongsRef implements Cloneable { public long[] ints; public int offset; @@ -30,11 +48,11 @@ public final class LongsRef implements Cloneable { public Object clone() { return new LongsRef(this); } - + public void set(long value) { ints[offset] = value; } - + public long get() { return ints[offset]; } @@ -44,13 +62,13 @@ public final class LongsRef implements Cloneable { final int prime = 31; int result = 0; final int end = offset + length; - for(int i = offset; i < end; i++) { + for (int i = offset; i < end; i++) { long value = ints[i]; result = prime * result + (int) (value ^ (value >>> 32)); } return result; } - + @Override public boolean equals(Object other) { return this.intsEquals((LongsRef) other); @@ -61,7 +79,7 @@ public final class LongsRef implements Cloneable { int otherUpto = other.offset; final long[] otherInts = other.ints; final int end = offset + length; - for(int upto=offset;uptostart, the + * length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a + * block border this method will allocate sufficient resources and copy the + * paged data. + *

+ * Slices spanning more than one block are not supported. + *

+ * * @lucene.internal **/ - public BytesRef fillUsingLengthPrefix4(BytesRef b, long start) { + public BytesRef fillSliceWithPrefix(BytesRef b, long start) { final int index = (int) (start >> blockBits); int offset = (int) (start & blockMask); final byte[] block = blocks[index]; diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java index 769ea0e9bee..38c80c30bec 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -607,7 +608,7 @@ public class TestCodecs extends LuceneTestCase { final int termIndexInterval = this.nextInt(13, 27); final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, CodecProvider.getDefault()); - final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, null, 10000, 10000, termIndexInterval, codecInfo); + final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, null, 10000, 10000, termIndexInterval, codecInfo, new AtomicLong()); final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); Arrays.sort(fields); diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java index 2b2015cf5f1..02a49ccd847 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java @@ -19,6 +19,7 @@ package org.apache.lucene.index.values; import java.io.IOException; import java.util.Comparator; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; @@ -59,7 +60,8 @@ public class TestDocValues extends LuceneTestCase { .getUTF8SortedAsUnicodeComparator() : null; Directory dir = newDirectory(); - Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize); + final AtomicLong trackBytes = new AtomicLong(0); + Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize, trackBytes); int maxDoc = 220; final String[] values = new String[maxDoc]; final int lenMin, lenMax; @@ -83,6 +85,7 @@ public class TestDocValues extends LuceneTestCase { w.add(2 * i, bytesRef); } w.finish(maxDoc); + assertEquals(0, trackBytes.get()); DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); for (int iter = 0; iter < 2; iter++) { @@ -186,7 +189,8 @@ public class TestDocValues extends LuceneTestCase { final long[] values = new long[NUM_VALUES]; for (int rx = 1; rx < 63; rx++, maxV *= 2) { Directory dir = newDirectory(); - Writer w = Ints.getWriter(dir, "test", false); + final AtomicLong trackBytes = new AtomicLong(0); + Writer w = Ints.getWriter(dir, "test", false, trackBytes); for (int i = 0; i < NUM_VALUES; i++) { final long v = random.nextLong() % (1 + maxV); values[i] = v; @@ -194,6 +198,8 @@ public class TestDocValues extends LuceneTestCase { } final int additionalDocs = 1 + random.nextInt(9); w.finish(NUM_VALUES + additionalDocs); + assertEquals(0, trackBytes.get()); + DocValues r = Ints.getValues(dir, "test", false); for (int iter = 0; iter < 2; iter++) { @@ -250,7 +256,8 @@ public class TestDocValues extends LuceneTestCase { private void runTestFloats(int precision, double delta) throws IOException { Directory dir = newDirectory(); - Writer w = Floats.getWriter(dir, "test", precision); + final AtomicLong trackBytes = new AtomicLong(0); + Writer w = Floats.getWriter(dir, "test", precision, trackBytes); final int NUM_VALUES = 777 + random.nextInt(777);; final double[] values = new double[NUM_VALUES]; for (int i = 0; i < NUM_VALUES; i++) { @@ -261,6 +268,7 @@ public class TestDocValues extends LuceneTestCase { } final int additionalValues = 1 + random.nextInt(10); w.finish(NUM_VALUES + additionalValues); + assertEquals(0, trackBytes.get()); DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues); for (int iter = 0; iter < 2; iter++) { diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 5ab0c649adf..9cca81fbec1 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -43,8 +43,13 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.docvalues.DocValuesCodec; -import org.apache.lucene.index.values.DocValues.MissingValues; +import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; @@ -62,8 +67,27 @@ import org.junit.BeforeClass; * */ public class TestDocValuesIndexing extends LuceneTestCase { - // TODO Add a test for addIndexes - // TODO add test for unoptimized case with deletes + /* + * TODO: + * Roadmap to land on trunk + * - Cut over to a direct API on ValuesEnum vs. ValuesAttribute + * - Add documentation for: + * - Source and ValuesEnum + * - DocValues + * - ValuesField + * - ValuesAttribute + * - Values + * - Add @lucene.experimental to all necessary classes + * - Try to make ValuesField more lightweight -> AttributeSource + * - add test for unoptimized case with deletes + * - add a test for addIndexes + * - split up existing testcases and give them meaningfull names + * - use consistent naming throughout DocValues + * - Values -> DocValueType + * - PackedIntsImpl -> Ints + * - run RAT + * - add tests for FieldComparator FloatIndexValuesComparator vs. FloatValuesComparator etc. + */ private static DocValuesCodec docValuesCodec; private static CodecProvider provider; @@ -82,6 +106,43 @@ public class TestDocValuesIndexing extends LuceneTestCase { public static void afterClassLuceneTestCaseJ4() { LuceneTestCase.afterClassLuceneTestCaseJ4(); } + + /* + * Simple test case to show how to use the API + */ + public void testDocValuesSimple() throws CorruptIndexException, IOException, ParseException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, writerConfig(false)); + for (int i = 0; i < 5; i++) { + Document doc = new Document(); + ValuesField valuesField = new ValuesField("docId"); + valuesField.setInt(i); + doc.add(valuesField); + doc.add(new Field("docId", "" + i, Store.NO, Index.ANALYZED)); + writer.addDocument(doc); + } + writer.commit(); + writer.optimize(true); + + writer.close(); + + IndexReader reader = IndexReader.open(dir, null, true, 1, provider); + assertTrue(reader.isOptimized()); + + IndexSearcher searcher = new IndexSearcher(reader); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "docId", new MockAnalyzer()); + TopDocs search = searcher.search(parser.parse("0 OR 1 OR 2 OR 3 OR 4"), 10); + assertEquals(5, search.totalHits); + ScoreDoc[] scoreDocs = search.scoreDocs; + DocValues docValues = MultiFields.getDocValues(reader, "docId"); + Source source = docValues.getSource(); + for (int i = 0; i < scoreDocs.length; i++) { + assertEquals(i, scoreDocs[i].doc); + assertEquals(i, source.getInt(scoreDocs[i].doc)); + } + reader.close(); + dir.close(); + } /** * Tests complete indexing of {@link Values} including deletions, merging and @@ -160,7 +221,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertNotNull(intsReader); Source ints = getSource(intsReader); - MissingValues missing = ints.getMissing(); + MissingValue missing = ints.getMissing(); for (int i = 0; i < base; i++) { long value = ints.getInt(i); @@ -191,7 +252,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { DocValues floatReader = getDocValues(r, val.name()); assertNotNull(floatReader); Source floats = getSource(floatReader); - MissingValues missing = floats.getMissing(); + MissingValue missing = floats.getMissing(); for (int i = 0; i < base; i++) { double value = floats.getFloat(i); @@ -254,7 +315,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { byte upto = 0; // test the filled up slots for correctness - MissingValues missing = bytes.getMissing(); + MissingValue missing = bytes.getMissing(); for (int i = 0; i < base; i++) { BytesRef br = bytes.getBytes(i, new BytesRef()); From 964eccc435084f0c5a8aa6ff516bc12cf9398930 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 6 Dec 2010 00:57:55 +0000 Subject: [PATCH 018/116] fixed naming inconsistency git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1042503 13f79535-47bb-0310-9956-ffa450edef68 --- .../benchmark/byTask/feeds/DocMaker.java | 14 +++--- .../apache/lucene/document/ValuesField.java | 20 ++++----- .../org/apache/lucene/index/FieldInfo.java | 8 ++-- .../org/apache/lucene/index/FieldInfos.java | 20 ++++----- .../org/apache/lucene/index/FieldsEnum.java | 2 +- .../lucene/index/FilterIndexReader.java | 2 +- .../org/apache/lucene/index/MultiFields.java | 4 +- .../apache/lucene/index/MultiFieldsEnum.java | 4 +- .../apache/lucene/index/SegmentMerger.java | 6 +-- .../apache/lucene/index/SegmentReader.java | 2 +- .../docvalues/DocValuesProducerBase.java | 4 +- .../org/apache/lucene/index/values/Bytes.java | 6 +-- .../apache/lucene/index/values/DocValues.java | 16 +++---- .../{ValuesEnum.java => DocValuesEnum.java} | 16 +++---- .../index/values/FixedDerefBytesImpl.java | 18 ++++---- .../index/values/FixedSortedBytesImpl.java | 10 ++--- .../index/values/FixedStraightBytesImpl.java | 16 +++---- .../apache/lucene/index/values/Floats.java | 36 +++++++-------- .../lucene/index/values/MultiDocValues.java | 28 ++++++------ .../lucene/index/values/PackedIntsImpl.java | 18 ++++---- .../index/values/PerDocFieldValues.java | 8 ++-- .../lucene/index/values/SourceCache.java | 1 + .../index/values/{Values.java => Type.java} | 2 +- .../index/values/VarDerefBytesImpl.java | 12 ++--- .../index/values/VarSortedBytesImpl.java | 14 +++--- .../index/values/VarStraightBytesImpl.java | 14 +++--- .../apache/lucene/index/values/Writer.java | 10 ++--- .../lucene/index/values/TestDocValues.java | 22 ++++----- .../index/values/TestDocValuesIndexing.java | 45 +++++++++---------- 29 files changed, 188 insertions(+), 190 deletions(-) rename lucene/src/java/org/apache/lucene/index/values/{ValuesEnum.java => DocValuesEnum.java} (89%) rename lucene/src/java/org/apache/lucene/index/values/{Values.java => Type.java} (98%) diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index b28300713ed..493745b1ac4 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -33,7 +33,7 @@ import org.apache.lucene.document.ValuesField; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; /** * Creates {@link Document} objects. Uses a {@link ContentSource} to generate @@ -160,13 +160,13 @@ public class DocMaker { private long lastPrintedNumUniqueBytes = 0; private int printNum = 0; - private Map fieldVauleMap; + private Map fieldVauleMap; // create a doc // use only part of the body, modify it to keep the rest (or use all if size==0). // reset the docdata properties so they are not added more than once. private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException { - Values valueType; + Type valueType; final DocState ds = getDocState(); final Document doc = reuseFields ? ds.doc : new Document(); doc.getFields().clear(); @@ -252,7 +252,7 @@ public class DocMaker { } private void trySetIndexValues(Field field) { - final Values valueType; + final Type valueType; if((valueType = fieldVauleMap.get(field.name())) != null) ValuesField.set(field, valueType); } @@ -385,18 +385,18 @@ public class DocMaker { resetLeftovers(); } - private static final Map parseValueFields(String fields) { + private static final Map parseValueFields(String fields) { if(fields == null) return Collections.emptyMap(); String[] split = fields.split(";"); - Map result = new HashMap(); + Map result = new HashMap(); for (String tuple : split) { final String[] nameValue = tuple.split("="); if (nameValue.length != 2) { throw new IllegalArgumentException("illegal doc.stored.values format: " + fields + " expected fieldname=ValuesType;...;...;"); } - result.put(nameValue[0].trim(), Values.valueOf(nameValue[1])); + result.put(nameValue[0].trim(), Type.valueOf(nameValue[1])); } return result; } diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java index 5acb6f272c7..105ac86a355 100644 --- a/lucene/src/java/org/apache/lucene/document/ValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/ValuesField.java @@ -24,7 +24,7 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; import org.apache.lucene.util.BytesRef; /** @@ -36,7 +36,7 @@ public class ValuesField extends AbstractField implements PerDocFieldValues { protected BytesRef bytes; protected double doubleValue; protected long longValue; - protected Values type; + protected Type type; protected Comparator bytesComparator; public ValuesField(String name) { @@ -49,26 +49,26 @@ public class ValuesField extends AbstractField implements PerDocFieldValues { } public void setInt(long value) { - type = Values.PACKED_INTS; + type = Type.PACKED_INTS; longValue = value; } public void setFloat(float value) { - type = Values.SIMPLE_FLOAT_4BYTE; + type = Type.SIMPLE_FLOAT_4BYTE; doubleValue = value; } public void setFloat(double value) { - type = Values.SIMPLE_FLOAT_8BYTE; + type = Type.SIMPLE_FLOAT_8BYTE; doubleValue = value; } - public void setBytes(BytesRef value, Values type) { + public void setBytes(BytesRef value, Type type) { setBytes(value, type, null); } - public void setBytes(BytesRef value, Values type, Comparator comp) { + public void setBytes(BytesRef value, Type type, Comparator comp) { this.type = type; if (bytes == null) { this.bytes = new BytesRef(); @@ -97,11 +97,11 @@ public class ValuesField extends AbstractField implements PerDocFieldValues { this.bytesComparator = comp; } - public void setType(Values type) { + public void setType(Type type) { this.type = type; } - public Values type() { + public Type type() { return type; } @@ -122,7 +122,7 @@ public class ValuesField extends AbstractField implements PerDocFieldValues { return field; } - public static T set(T field, Values type) { + public static T set(T field, Type type) { if (field instanceof ValuesField) return field; final ValuesField valField = new ValuesField(); diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index e2fbf14f9d3..96ace5f1f1b 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -1,6 +1,6 @@ package org.apache.lucene.index; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -24,7 +24,7 @@ public final class FieldInfo { public String name; public boolean isIndexed; public int number; - Values docValues; + Type docValues; // true if term vector for this field should be stored @@ -94,7 +94,7 @@ public final class FieldInfo { } } - void setDocValues(Values v) { + void setDocValues(Type v) { if (docValues != null) { if (docValues != v) { throw new IllegalArgumentException("indexValues is already set to " + docValues + "; cannot change to " + v); @@ -108,7 +108,7 @@ public final class FieldInfo { return docValues != null; } - public Values getDocValues() { + public Type getDocValues() { return docValues; } } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index a6baae51324..ea5d528ca78 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -384,31 +384,31 @@ public final class FieldInfos { fi.docValues = null; break; case 1: - fi.docValues = Values.PACKED_INTS; + fi.docValues = Type.PACKED_INTS; break; case 2: - fi.docValues = Values.SIMPLE_FLOAT_4BYTE; + fi.docValues = Type.SIMPLE_FLOAT_4BYTE; break; case 3: - fi.docValues = Values.SIMPLE_FLOAT_8BYTE; + fi.docValues = Type.SIMPLE_FLOAT_8BYTE; break; case 4: - fi.docValues = Values.BYTES_FIXED_STRAIGHT; + fi.docValues = Type.BYTES_FIXED_STRAIGHT; break; case 5: - fi.docValues = Values.BYTES_FIXED_DEREF; + fi.docValues = Type.BYTES_FIXED_DEREF; break; case 6: - fi.docValues = Values.BYTES_FIXED_SORTED; + fi.docValues = Type.BYTES_FIXED_SORTED; break; case 7: - fi.docValues = Values.BYTES_VAR_STRAIGHT; + fi.docValues = Type.BYTES_VAR_STRAIGHT; break; case 8: - fi.docValues = Values.BYTES_VAR_DEREF; + fi.docValues = Type.BYTES_VAR_DEREF; break; case 9: - fi.docValues = Values.BYTES_VAR_SORTED; + fi.docValues = Type.BYTES_VAR_SORTED; break; default: throw new IllegalStateException("unhandled indexValues type " + b); diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java index e3112ca8b5b..290cd107cfb 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java @@ -20,7 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.ValuesEnum; +import org.apache.lucene.index.values.DocValuesEnum; import org.apache.lucene.util.AttributeSource; /** Enumerates indexed fields. You must first call {@link diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index 9cde6140ae1..3393a71fdfb 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -20,7 +20,7 @@ package org.apache.lucene.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.ValuesEnum; +import org.apache.lucene.index.values.DocValuesEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index 9137d6077ff..a871ce87f7a 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -25,7 +25,7 @@ import java.util.ArrayList; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.MultiDocValues; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs @@ -293,7 +293,7 @@ public final class MultiFields extends Fields { // create & add to docValues: final List docValuesIndex = new ArrayList(); int docsUpto = 0; - Values type = null; + Type type = null; // Gather all sub-readers that share this field for(int i=0;i docValuesIndex = new ArrayList(); int docsUpto = 0; - Values type = null; + Type type = null; final int numEnums = enumWithSlices.length; for (int i = 0; i < numEnums; i++) { FieldsEnumWithSlice withSlice = enumWithSlices[i]; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 48f516b903c..5535bbe7725 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -274,8 +274,8 @@ final class SegmentMerger { fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); - final Values fiIndexValues = fi.docValues; - final Values mergedDocValues = merged.docValues; + final Type fiIndexValues = fi.docValues; + final Type mergedDocValues = merged.docValues; if (mergedDocValues == null) { merged.setDocValues(fiIndexValues); } else if (mergedDocValues != fiIndexValues) { diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 47446854af3..10974b45fcf 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -45,7 +45,7 @@ import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.Ints; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Floats; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.BytesRef; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java index 08307d4dd97..4cf1d5a916b 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Ints; -import org.apache.lucene.index.values.Values; +import org.apache.lucene.index.values.Type; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IntsRef; @@ -65,7 +65,7 @@ public abstract class DocValuesProducerBase extends FieldsProducer{ } protected DocValues loadDocValues(int docCount, Directory dir, String id, - Values v) throws IOException { + Type v) throws IOException { switch (v) { case PACKED_INTS: return Ints.getValues(dir, id, false); diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index e54e575f4c0..5dabb6cdd41 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -162,7 +162,7 @@ public final class Bytes { } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc()) { @Override @@ -246,7 +246,7 @@ public final class Bytes { } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc()) { @@ -340,7 +340,7 @@ public final class Bytes { } @Override - protected void setNextEnum(ValuesEnum valuesEnum) { + protected void setNextEnum(DocValuesEnum valuesEnum) { bytesRef = valuesEnum.bytes(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index 39d21ac4398..4de684ce526 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -31,11 +31,11 @@ public abstract class DocValues implements Closeable { public static final DocValues[] EMPTY_ARRAY = new DocValues[0]; private SourceCache cache = new SourceCache.DirectSourceCache(); - public ValuesEnum getEnum() throws IOException { + public DocValuesEnum getEnum() throws IOException { return getEnum(null); } - public abstract ValuesEnum getEnum(AttributeSource attrSource) + public abstract DocValuesEnum getEnum(AttributeSource attrSource) throws IOException; public abstract Source load() throws IOException; @@ -54,7 +54,7 @@ public abstract class DocValues implements Closeable { throw new UnsupportedOperationException(); } - public abstract Values type(); + public abstract Type type(); public void close() throws IOException { this.cache.close(this); @@ -95,7 +95,7 @@ public abstract class DocValues implements Closeable { throw new UnsupportedOperationException(); } - public ValuesEnum getEnum() throws IOException { + public DocValuesEnum getEnum() throws IOException { return getEnum(null); } @@ -103,19 +103,19 @@ public abstract class DocValues implements Closeable { return missingValue; } - public abstract Values type(); + public abstract Type type(); - public abstract ValuesEnum getEnum(AttributeSource attrSource) + public abstract DocValuesEnum getEnum(AttributeSource attrSource) throws IOException; } - abstract static class SourceEnum extends ValuesEnum { + abstract static class SourceEnum extends DocValuesEnum { protected final Source source; protected final int numDocs; protected int pos = -1; - SourceEnum(AttributeSource attrs, Values type, Source source, int numDocs) { + SourceEnum(AttributeSource attrs, Type type, Source source, int numDocs) { super(attrs, type); this.source = source; this.numDocs = numDocs; diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java similarity index 89% rename from lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java rename to lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java index 8008929dabc..ecdeec8cf42 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java @@ -28,18 +28,18 @@ import org.apache.lucene.util.LongsRef; * * @lucene.experimental */ -public abstract class ValuesEnum extends DocIdSetIterator { +public abstract class DocValuesEnum extends DocIdSetIterator { private AttributeSource source; - private Values enumType; + private Type enumType; protected BytesRef bytesRef; protected FloatsRef floatsRef; protected LongsRef intsRef; - protected ValuesEnum(Values enumType) { + protected DocValuesEnum(Type enumType) { this(null, enumType); } - protected ValuesEnum(AttributeSource source, Values enumType) { + protected DocValuesEnum(AttributeSource source, Type enumType) { this.source = source; this.enumType = enumType; switch (enumType) { @@ -61,7 +61,7 @@ public abstract class ValuesEnum extends DocIdSetIterator { } } - public Values type() { + public Type type() { return enumType; } @@ -77,7 +77,7 @@ public abstract class ValuesEnum extends DocIdSetIterator { return intsRef; } - protected void copyReferences(ValuesEnum valuesEnum) { + protected void copyReferences(DocValuesEnum valuesEnum) { intsRef = valuesEnum.intsRef; floatsRef = valuesEnum.floatsRef; bytesRef = valuesEnum.bytesRef; @@ -103,8 +103,8 @@ public abstract class ValuesEnum extends DocIdSetIterator { public abstract void close() throws IOException; - public static ValuesEnum emptyEnum(Values type) { - return new ValuesEnum(type) { + public static DocValuesEnum emptyEnum(Type type) { + return new DocValuesEnum(type) { @Override public int nextDoc() throws IOException { return NO_MORE_DOCS; diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index c987ca37816..e662de58565 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -172,8 +172,8 @@ class FixedDerefBytesImpl { } @Override - public Values type() { - return Values.BYTES_FIXED_DEREF; + public Type type() { + return Type.BYTES_FIXED_DEREF; } @Override @@ -183,12 +183,12 @@ class FixedDerefBytesImpl { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { return new DerefBytesEnum(source, cloneData(), cloneIndex(), size); } - static class DerefBytesEnum extends ValuesEnum { + static class DerefBytesEnum extends DocValuesEnum { protected final IndexInput datIn; private final PackedInts.ReaderIterator idx; protected final long fp; @@ -198,11 +198,11 @@ class FixedDerefBytesImpl { public DerefBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn, int size) throws IOException { - this(source, datIn, idxIn, size, Values.BYTES_FIXED_DEREF); + this(source, datIn, idxIn, size, Type.BYTES_FIXED_DEREF); } protected DerefBytesEnum(AttributeSource source, IndexInput datIn, - IndexInput idxIn, int size, Values enumType) + IndexInput idxIn, int size, Type enumType) throws IOException { super(source, enumType); this.datIn = datIn; @@ -217,7 +217,7 @@ class FixedDerefBytesImpl { } - protected void copyReferences(ValuesEnum valuesEnum) { + protected void copyReferences(DocValuesEnum valuesEnum) { bytesRef = valuesEnum.bytesRef; if(bytesRef.bytes.length < size) { bytesRef.grow(size); @@ -270,8 +270,8 @@ class FixedDerefBytesImpl { } @Override - public Values type() { - return Values.BYTES_FIXED_DEREF; + public Type type() { + return Type.BYTES_FIXED_DEREF; } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index adf1c2167c2..583d93b3d7b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -215,8 +215,8 @@ class FixedSortedBytesImpl { } @Override - public Values type() { - return Values.BYTES_FIXED_SORTED; + public Type type() { + return Type.BYTES_FIXED_SORTED; } @Override @@ -226,15 +226,15 @@ class FixedSortedBytesImpl { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { // do unsorted return new DerefBytesEnum(source, cloneData(), cloneIndex(), size); } @Override - public Values type() { - return Values.BYTES_FIXED_SORTED; + public Type type() { + return Type.BYTES_FIXED_SORTED; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 9f8047304ca..77d51a6f431 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -156,8 +156,8 @@ class FixedStraightBytesImpl { } @Override - public Values type() { - return Values.BYTES_FIXED_STRAIGHT; + public Type type() { + return Type.BYTES_FIXED_STRAIGHT; } @Override @@ -167,11 +167,11 @@ class FixedStraightBytesImpl { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc); } - private static final class FixedStraightBytesEnum extends ValuesEnum { + private static final class FixedStraightBytesEnum extends DocValuesEnum { private final IndexInput datIn; private final int size; private final int maxDoc; @@ -179,7 +179,7 @@ class FixedStraightBytesImpl { private final long fp; public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn, int size, int maxDoc) throws IOException{ - super(source, Values.BYTES_FIXED_STRAIGHT); + super(source, Type.BYTES_FIXED_STRAIGHT); this.datIn = datIn; this.size = size; this.maxDoc = maxDoc; @@ -189,7 +189,7 @@ class FixedStraightBytesImpl { fp = datIn.getFilePointer(); } - protected void copyReferences(ValuesEnum valuesEnum) { + protected void copyReferences(DocValuesEnum valuesEnum) { bytesRef = valuesEnum.bytesRef; if(bytesRef.bytes.length < size) { bytesRef.grow(size); @@ -228,8 +228,8 @@ class FixedStraightBytesImpl { } @Override - public Values type() { - return Values.BYTES_FIXED_STRAIGHT; + public Type type() { + return Type.BYTES_FIXED_STRAIGHT; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index d2d3fe6a500..0bf082ab98b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -102,7 +102,7 @@ public class Floats { } @Override - protected void setNextEnum(ValuesEnum valuesEnum) { + protected void setNextEnum(DocValuesEnum valuesEnum) { floatsRef = valuesEnum.getFloat(); } @@ -288,9 +288,9 @@ public class Floats { } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { final MissingValue missing = getMissing(); - return new SourceEnum(attrSource, Values.SIMPLE_FLOAT_4BYTE, this, maxDoc) { + return new SourceEnum(attrSource, Type.SIMPLE_FLOAT_4BYTE, this, maxDoc) { @Override public int advance(int target) throws IOException { if (target >= numDocs) @@ -307,8 +307,8 @@ public class Floats { } @Override - public Values type() { - return Values.SIMPLE_FLOAT_4BYTE; + public Type type() { + return Type.SIMPLE_FLOAT_4BYTE; } } @@ -327,7 +327,7 @@ public class Floats { } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc) { @Override @@ -346,8 +346,8 @@ public class Floats { } @Override - public Values type() { - return Values.SIMPLE_FLOAT_8BYTE; + public Type type() { + return Type.SIMPLE_FLOAT_8BYTE; } } @@ -358,7 +358,7 @@ public class Floats { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { IndexInput indexInput = (IndexInput) datIn.clone(); indexInput.seek(CodecUtil.headerLength(CODEC_NAME)); // skip precision: @@ -368,9 +368,9 @@ public class Floats { } @Override - public Values type() { - return precisionBytes == 4 ? Values.SIMPLE_FLOAT_4BYTE - : Values.SIMPLE_FLOAT_8BYTE; + public Type type() { + return precisionBytes == 4 ? Type.SIMPLE_FLOAT_4BYTE + : Type.SIMPLE_FLOAT_8BYTE; } } @@ -378,7 +378,7 @@ public class Floats { Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc) throws IOException { - super(source, dataIn, 4, maxDoc, Values.SIMPLE_FLOAT_4BYTE); + super(source, dataIn, 4, maxDoc, Type.SIMPLE_FLOAT_4BYTE); } @Override @@ -414,7 +414,7 @@ public class Floats { Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc) throws IOException { - super(source, dataIn, 8, maxDoc, Values.SIMPLE_FLOAT_8BYTE); + super(source, dataIn, 8, maxDoc, Type.SIMPLE_FLOAT_8BYTE); } @Override @@ -447,7 +447,7 @@ public class Floats { } } - static abstract class FloatsEnumImpl extends ValuesEnum { + static abstract class FloatsEnumImpl extends DocValuesEnum { protected final IndexInput dataIn; protected int pos = -1; protected final int precision; @@ -455,9 +455,9 @@ public class Floats { protected final long fp; FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision, - int maxDoc, Values type) throws IOException { - super(source, precision == 4 ? Values.SIMPLE_FLOAT_4BYTE - : Values.SIMPLE_FLOAT_8BYTE); + int maxDoc, Type type) throws IOException { + super(source, precision == 4 ? Type.SIMPLE_FLOAT_4BYTE + : Type.SIMPLE_FLOAT_8BYTE); this.dataIn = dataIn; this.precision = precision; this.maxDoc = maxDoc; diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index dfd7478a106..f0b6e7dbfe3 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -54,7 +54,7 @@ public class MultiDocValues extends DocValues { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { return new MultiValuesEnum(docValuesIdx, starts); } @@ -81,13 +81,13 @@ public class MultiDocValues extends DocValues { final int maxDoc; final Source emptySoruce; - public DummyDocValues(int maxDoc, Values type) { + public DummyDocValues(int maxDoc, Type type) { this.maxDoc = maxDoc; this.emptySoruce = new EmptySource(type); } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { return emptySoruce.getEnum(attrSource); } @@ -97,7 +97,7 @@ public class MultiDocValues extends DocValues { } @Override - public Values type() { + public Type type() { return emptySoruce.type(); } @@ -107,13 +107,13 @@ public class MultiDocValues extends DocValues { } - private static class MultiValuesEnum extends ValuesEnum { + private static class MultiValuesEnum extends DocValuesEnum { private DocValuesIndex[] docValuesIdx; private final int maxDoc; private int currentStart; private int currentMax; private int currentDoc = -1; - private ValuesEnum currentEnum; + private DocValuesEnum currentEnum; private final int[] starts; public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts) @@ -222,21 +222,21 @@ public class MultiDocValues extends DocValues { } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { throw new UnsupportedOperationException(); // TODO } @Override - public Values type() { + public Type type() { return docValuesIdx[0].docValues.type(); } } private static class EmptySource extends Source { - private final Values type; + private final Type type; - public EmptySource(Values type) { + public EmptySource(Type type) { this.type = type; } @@ -257,18 +257,18 @@ public class MultiDocValues extends DocValues { } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { - return ValuesEnum.emptyEnum(type); + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { + return DocValuesEnum.emptyEnum(type); } @Override - public Values type() { + public Type type() { return type; } } @Override - public Values type() { + public Type type() { return this.docValuesIdx[0].docValues.type(); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 9842352c571..8466bb2db7f 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -133,7 +133,7 @@ class PackedIntsImpl { } @Override - protected void setNextEnum(ValuesEnum valuesEnum) { + protected void setNextEnum(DocValuesEnum valuesEnum) { intsRef = valuesEnum.getInt(); } @@ -192,7 +192,7 @@ class PackedIntsImpl { } @Override - public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, values.size()) { @Override @@ -211,8 +211,8 @@ class PackedIntsImpl { } @Override - public Values type() { - return Values.PACKED_INTS; + public Type type() { + return Type.PACKED_INTS; } } @@ -223,18 +223,18 @@ class PackedIntsImpl { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { return new IntsEnumImpl(source, (IndexInput) datIn.clone()); } @Override - public Values type() { - return Values.PACKED_INTS; + public Type type() { + return Type.PACKED_INTS; } } - private static final class IntsEnumImpl extends ValuesEnum { + private static final class IntsEnumImpl extends DocValuesEnum { private final PackedInts.ReaderIterator ints; private long minValue; private final IndexInput dataIn; @@ -244,7 +244,7 @@ class PackedIntsImpl { private IntsEnumImpl(AttributeSource source, IndexInput dataIn) throws IOException { - super(source, Values.PACKED_INTS); + super(source, Type.PACKED_INTS); intsRef.offset = 0; this.dataIn = dataIn; dataIn.seek(CodecUtil.headerLength(CODEC_NAME)); diff --git a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java index e94b7d5505f..f60fcd4e0e5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java @@ -32,9 +32,9 @@ public interface PerDocFieldValues { public void setFloat(double value); - public void setBytes(BytesRef value, Values type); + public void setBytes(BytesRef value, Type type); - public void setBytes(BytesRef value, Values type, Comparator comp); + public void setBytes(BytesRef value, Type type, Comparator comp); public BytesRef getBytes(); @@ -46,8 +46,8 @@ public interface PerDocFieldValues { public void setBytesComparator(Comparator comp); - public void setType(Values type); + public void setType(Type type); - public Values type(); + public Type type(); } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java index e2f4c63c9aa..07e33633b44 100644 --- a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java +++ b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java @@ -26,6 +26,7 @@ import org.apache.lucene.util.BytesRef; /** * Per {@link DocValues} {@link Source} cache. + * @lucene.experimental */ public abstract class SourceCache { public abstract Source load(DocValues values) throws IOException; diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Type.java similarity index 98% rename from lucene/src/java/org/apache/lucene/index/values/Values.java rename to lucene/src/java/org/apache/lucene/index/values/Type.java index 31adfd2a51a..717741abab8 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Values.java +++ b/lucene/src/java/org/apache/lucene/index/values/Type.java @@ -24,7 +24,7 @@ package org.apache.lucene.index.values; * * @lucene.experimental */ -public enum Values { +public enum Type { /** Integral value is stored as packed ints. The bit * precision is fixed across the segment, and diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index 985b960c5a2..c28e7e57759 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -231,8 +231,8 @@ class VarDerefBytesImpl { } @Override - public Values type() { - return Values.BYTES_VAR_DEREF; + public Type type() { + return Type.BYTES_VAR_DEREF; } @Override @@ -242,7 +242,7 @@ class VarDerefBytesImpl { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { return new VarDerefBytesEnum(source, cloneData(), cloneIndex()); } @@ -250,7 +250,7 @@ class VarDerefBytesImpl { public VarDerefBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn) throws IOException { - super(source, datIn, idxIn, -1, Values.BYTES_VAR_DEREF); + super(source, datIn, idxIn, -1, Type.BYTES_VAR_DEREF); } @Override @@ -273,8 +273,8 @@ class VarDerefBytesImpl { } @Override - public Values type() { - return Values.BYTES_VAR_DEREF; + public Type type() { + return Type.BYTES_VAR_DEREF; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 53dddf31c73..ab023a1f817 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -220,8 +220,8 @@ class VarSortedBytesImpl { } @Override - public Values type() { - return Values.BYTES_VAR_SORTED; + public Type type() { + return Type.BYTES_VAR_SORTED; } @Override @@ -231,11 +231,11 @@ class VarSortedBytesImpl { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { return new VarSortedBytesEnum(source, cloneData(), cloneIndex()); } - private static class VarSortedBytesEnum extends ValuesEnum { + private static class VarSortedBytesEnum extends DocValuesEnum { private PackedInts.Reader docToOrdIndex; private PackedInts.Reader ordToOffsetIndex; private IndexInput idxIn; @@ -248,7 +248,7 @@ class VarSortedBytesImpl { protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn) throws IOException { - super(source, Values.BYTES_VAR_SORTED); + super(source, Type.BYTES_VAR_SORTED); totBytes = idxIn.readLong(); // keep that in memory to prevent lots of disk seeks docToOrdIndex = PackedInts.getReader(idxIn); @@ -309,8 +309,8 @@ class VarSortedBytesImpl { } @Override - public Values type() { - return Values.BYTES_VAR_SORTED; + public Type type() { + return Type.BYTES_VAR_SORTED; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index c259fe2c242..18f0272a268 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -147,8 +147,8 @@ class VarStraightBytesImpl { } @Override - public Values type() { - return Values.BYTES_VAR_STRAIGHT; + public Type type() { + return Type.BYTES_VAR_STRAIGHT; } @Override @@ -158,11 +158,11 @@ class VarStraightBytesImpl { } @Override - public ValuesEnum getEnum(AttributeSource source) throws IOException { + public DocValuesEnum getEnum(AttributeSource source) throws IOException { return new VarStraightBytesEnum(source, cloneData(), cloneIndex()); } - private class VarStraightBytesEnum extends ValuesEnum { + private class VarStraightBytesEnum extends DocValuesEnum { private final PackedInts.Reader addresses; private final IndexInput datIn; private final IndexInput idxIn; @@ -172,7 +172,7 @@ class VarStraightBytesImpl { protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn) throws IOException { - super(source, Values.BYTES_VAR_STRAIGHT); + super(source, Type.BYTES_VAR_STRAIGHT); totBytes = idxIn.readVInt(); fp = datIn.getFilePointer(); addresses = PackedInts.getReader(idxIn); @@ -220,8 +220,8 @@ class VarStraightBytesImpl { } @Override - public Values type() { - return Values.BYTES_VAR_STRAIGHT; + public Type type() { + return Type.BYTES_VAR_STRAIGHT; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index 376c09291ff..c254e0ae3d5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -55,7 +55,7 @@ public abstract class Writer extends DocValuesConsumer { /** Records the specfied value for the docID */ protected abstract void add(int docID) throws IOException; - protected abstract void setNextEnum(ValuesEnum valuesEnum); + protected abstract void setNextEnum(DocValuesEnum valuesEnum); /** Finish writing, close any files */ public abstract void finish(int docCount) throws IOException; @@ -63,7 +63,7 @@ public abstract class Writer extends DocValuesConsumer { // enables bulk copies in subclasses per MergeState @Override protected void merge(MergeState state) throws IOException { - final ValuesEnum valEnum = state.reader.getEnum(); + final DocValuesEnum valEnum = state.reader.getEnum(); assert valEnum != null; try { setNextEnum(valEnum); @@ -71,11 +71,11 @@ public abstract class Writer extends DocValuesConsumer { final Bits bits = state.bits; final int docCount = state.docCount; int currentDocId; - if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) { + if ((currentDocId = valEnum.advance(0)) != DocValuesEnum.NO_MORE_DOCS) { for (int i = 0; i < docCount; i++) { if (bits == null || !bits.get(i)) { if (currentDocId < i) { - if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) { + if ((currentDocId = valEnum.advance(i)) == DocValuesEnum.NO_MORE_DOCS) { break; // advance can jump over default values } } @@ -91,7 +91,7 @@ public abstract class Writer extends DocValuesConsumer { } } - public static Writer create(Values v, String id, Directory directory, + public static Writer create(Type v, String id, Directory directory, Comparator comp, AtomicLong bytesUsed) throws IOException { switch (v) { case PACKED_INTS: diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java index 94b247e3c36..694ed68d1a4 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java @@ -89,7 +89,7 @@ public class TestDocValues extends LuceneTestCase { DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); for (int iter = 0; iter < 2; iter++) { - ValuesEnum bytesEnum = r.getEnum(); + DocValuesEnum bytesEnum = r.getEnum(); assertNotNull("enum is null", bytesEnum); BytesRef ref = bytesEnum.bytes(); @@ -100,8 +100,8 @@ public class TestDocValues extends LuceneTestCase { assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + " lenRight: " + utf8String.length(), values[idx], utf8String); } - assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); - assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1)); + assertEquals(DocValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); + assertEquals(DocValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1)); bytesEnum.close(); } @@ -207,7 +207,7 @@ public class TestDocValues extends LuceneTestCase { } for (int iter = 0; iter < 2; iter++) { - ValuesEnum iEnum = r.getEnum(); + DocValuesEnum iEnum = r.getEnum(); LongsRef ints = iEnum.getInt(); for (int i = 0; i < NUM_VALUES; i++) { assertEquals(i, iEnum.nextDoc()); @@ -217,14 +217,14 @@ public class TestDocValues extends LuceneTestCase { assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1)); } for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { - assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); + assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); } iEnum.close(); } for (int iter = 0; iter < 2; iter++) { - ValuesEnum iEnum = r.getEnum(); + DocValuesEnum iEnum = r.getEnum(); LongsRef ints = iEnum.getInt(); for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { assertEquals(i, iEnum.advance(i)); @@ -234,7 +234,7 @@ public class TestDocValues extends LuceneTestCase { assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1)); } for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { - assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); + assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); } iEnum.close(); @@ -273,26 +273,26 @@ public class TestDocValues extends LuceneTestCase { } for (int iter = 0; iter < 2; iter++) { - ValuesEnum fEnum = r.getEnum(); + DocValuesEnum fEnum = r.getEnum(); FloatsRef floats = fEnum.getFloat(); for (int i = 0; i < NUM_VALUES; i++) { assertEquals(i, fEnum.nextDoc()); assertEquals(values[i], floats.get(), delta); } for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.nextDoc()); + assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.nextDoc()); } fEnum.close(); } for (int iter = 0; iter < 2; iter++) { - ValuesEnum fEnum = r.getEnum(); + DocValuesEnum fEnum = r.getEnum(); FloatsRef floats = fEnum.getFloat(); for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { assertEquals(i, fEnum.advance(i)); assertEquals(values[i], floats.get(), delta); } for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.advance(i)); + assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.advance(i)); } fEnum.close(); } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 5600b005bac..334414b0d8a 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -80,9 +80,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { * - add test for unoptimized case with deletes * - add a test for addIndexes * - split up existing testcases and give them meaningfull names - * - use consistent naming throughout DocValues - * - Values -> DocValueType - * - PackedIntsImpl -> Ints * - run RAT * - add tests for FieldComparator FloatIndexValuesComparator vs. FloatValuesComparator etc. */ @@ -139,7 +136,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } /** - * Tests complete indexing of {@link Values} including deletions, merging and + * Tests complete indexing of {@link Type} including deletions, merging and * sparse value fields on Compound-File */ public void testIndexBytesNoDeletesCFS() throws IOException { @@ -159,7 +156,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } /** - * Tests complete indexing of {@link Values} including deletions, merging and + * Tests complete indexing of {@link Type} including deletions, merging and * sparse value fields on None-Compound-File */ public void testIndexBytesNoDeletes() throws IOException { @@ -198,11 +195,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 179 + random.nextInt(151); - final List numVariantList = new ArrayList(NUMERICS); + final List numVariantList = new ArrayList(NUMERICS); // run in random order to test if fill works correctly during merges Collections.shuffle(numVariantList, random); - for (Values val : numVariantList) { + for (Type val : numVariantList) { OpenBitSet deleted = indexValues(w, numValues, val, numVariantList, withDeletions, 7); List closeables = new ArrayList(); @@ -222,7 +219,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals("index " + i, missing.longValue, value); } - ValuesEnum intsEnum = getValuesEnum(intsReader); + DocValuesEnum intsEnum = getValuesEnum(intsReader); assertTrue(intsEnum.advance(0) >= base); intsEnum = getValuesEnum(intsReader); @@ -253,7 +250,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals(" floats failed for doc: " + i + " base: " + base, missing.doubleValue, value, 0.0d); } - ValuesEnum floatEnum = getValuesEnum(floatReader); + DocValuesEnum floatEnum = getValuesEnum(floatReader); assertTrue(floatEnum.advance(0) >= base); floatEnum = getValuesEnum(floatReader); @@ -288,11 +285,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { throws CorruptIndexException, LockObtainFailedException, IOException { final Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); - final List byteVariantList = new ArrayList(BYTES); + final List byteVariantList = new ArrayList(BYTES); // run in random order to test if fill works correctly during merges Collections.shuffle(byteVariantList, random); final int numValues = 179 + random.nextInt(151); - for (Values byteIndexValue : byteVariantList) { + for (Type byteIndexValue : byteVariantList) { List closeables = new ArrayList(); int bytesSize = 7 + random.nextInt(128); @@ -341,7 +338,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { default: assertNull("expected null - " + msg + " " + br, br); // make sure we advance at least until base - ValuesEnum bytesEnum = getValuesEnum(bytesReader); + DocValuesEnum bytesEnum = getValuesEnum(bytesReader); final int advancedTo = bytesEnum.advance(0); assertTrue(byteIndexValue.name() + " advanced failed base:" + base + " advancedTo: " + advancedTo, base <= advancedTo); @@ -349,7 +346,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } } - ValuesEnum bytesEnum = getValuesEnum(bytesReader); + DocValuesEnum bytesEnum = getValuesEnum(bytesReader); final BytesRef enumRef = bytesEnum.bytes(); // test the actual doc values added in this iteration assertEquals(base + numRemainingValues, r.numDocs()); @@ -427,8 +424,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { return source; } - private ValuesEnum getValuesEnum(DocValues values) throws IOException { - ValuesEnum valuesEnum; + private DocValuesEnum getValuesEnum(DocValues values) throws IOException { + DocValuesEnum valuesEnum; if (!(values instanceof MultiDocValues) && random.nextInt(10) == 0) { // TODO not supported by MultiDocValues yet! valuesEnum = getSource(values).getEnum(); @@ -440,20 +437,20 @@ public class TestDocValuesIndexing extends LuceneTestCase { return valuesEnum; } - private static EnumSet BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF, - Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT, - Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED, - Values.BYTES_VAR_STRAIGHT); + private static EnumSet BYTES = EnumSet.of(Type.BYTES_FIXED_DEREF, + Type.BYTES_FIXED_SORTED, Type.BYTES_FIXED_STRAIGHT, + Type.BYTES_VAR_DEREF, Type.BYTES_VAR_SORTED, + Type.BYTES_VAR_STRAIGHT); - private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, - Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE); + private static EnumSet NUMERICS = EnumSet.of(Type.PACKED_INTS, + Type.SIMPLE_FLOAT_4BYTE, Type.SIMPLE_FLOAT_8BYTE); private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS, Index.NO }; - private OpenBitSet indexValues(IndexWriter w, int numValues, Values value, - List valueVarList, boolean withDeletions, int multOfSeven) + private OpenBitSet indexValues(IndexWriter w, int numValues, Type value, + List valueVarList, boolean withDeletions, int multOfSeven) throws CorruptIndexException, IOException { final boolean isNumeric = NUMERICS.contains(value); OpenBitSet deleted = new OpenBitSet(numValues); @@ -501,7 +498,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { if (i % 7 == 0) { if (withDeletions && random.nextBoolean()) { - Values val = valueVarList.get(random.nextInt(1 + valueVarList + Type val = valueVarList.get(random.nextInt(1 + valueVarList .indexOf(value))); final int randInt = val == value ? random.nextInt(1 + i) : random .nextInt(numValues); From 7da321229ee003f80d3ea71d728560920fb03c2d Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 8 Dec 2010 03:25:25 +0000 Subject: [PATCH 019/116] Added DocValuesIndexing#addIndexes() and merged LUCENE-2802 & LUCENE-2805 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1043289 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 20 +++ .../apache/lucene/index/IndexSplitter.java | 2 + .../org/apache/lucene/index/CheckIndex.java | 1 + .../apache/lucene/index/DirectoryReader.java | 11 +- .../org/apache/lucene/index/IndexWriter.java | 22 ++- .../org/apache/lucene/index/SegmentInfos.java | 21 ++- .../codecs/DefaultSegmentInfosWriter.java | 3 +- .../index/values/FixedDerefBytesImpl.java | 2 +- .../lucene/index/values/MultiDocValues.java | 7 +- .../lucene/index/TestIndexWriterReader.java | 89 +++++++++++- .../index/values/TestDocValuesIndexing.java | 131 ++++++++++++++---- 11 files changed, 249 insertions(+), 60 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c4ca3d6aead..17852b48009 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -127,6 +127,15 @@ Changes in Runtime Behavior changed to enable unmapping by default if supported by the JRE. (Mike McCandless, Uwe Schindler, Robert Muir) +* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio + to determine whether the passed in segment should be compound. + (Shai Erera, Earwin Burrfoot) + +* LUCENE-2805: IndexWriter now increments the index version on every change to + the index instead of for every commit. Committing or closing the IndexWriter + without any changes to the index will not cause any index version increment. + (Simon Willnauer, Mike Mccandless) + API Changes * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer @@ -335,6 +344,17 @@ Bug fixes * LUCENE-2633: PackedInts Packed32 and Packed64 did not support internal structures larger than 256MB (Toke Eskildsen via Mike McCandless) +* LUCENE-2803: The FieldCache can miss values if an entry for a reader + with more document deletions is requested before a reader with fewer + deletions, provided they share some segments. (yonik) + +* LUCENE-2802: NRT DirectoryReader returned incorrect values from + getVersion, isOptimized, getCommitUserData, getIndexCommit and isCurrent due + to a mutable reference to the IndexWriters SegmentInfos. + (Simon Willnauer, Earwin Burrfoot) + + + ======================= Lucene 3.x (not yet released) ======================= Changes in backwards compatibility policy diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java index e2f3ecf52e3..0fe5c771460 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java @@ -134,6 +134,7 @@ public class IndexSplitter { int idx = getIdx(n); infos.remove(idx); } + infos.changed(); infos.commit(fsDir); } @@ -152,6 +153,7 @@ public class IndexSplitter { copyFile(srcFile, destFile); } } + destInfos.changed(); destInfos.commit(destFSDir); // System.out.println("destDir:"+destDir.getAbsolutePath()); } diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index ee7793e5235..e9339e8aa23 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -879,6 +879,7 @@ public class CheckIndex { public void fixIndex(Status result) throws IOException { if (result.partial) throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); + result.newSegments.changed(); result.newSegments.commit(result.dir); } diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index ed2adf6054e..8ed4b8746f2 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -55,8 +55,7 @@ class DirectoryReader extends IndexReader implements Cloneable { private IndexDeletionPolicy deletionPolicy; private Lock writeLock; - private SegmentInfos segmentInfos; - private SegmentInfos segmentInfosStart; + private final SegmentInfos segmentInfos; private boolean stale; private final int termInfosIndexDivisor; @@ -106,7 +105,6 @@ class DirectoryReader extends IndexReader implements Cloneable { this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; - if (codecs == null) { this.codecs = CodecProvider.getDefault(); } else { @@ -145,8 +143,7 @@ class DirectoryReader extends IndexReader implements Cloneable { DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { this.directory = writer.getDirectory(); this.readOnly = true; - segmentInfos = infos; - segmentInfosStart = (SegmentInfos) infos.clone(); + segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW this.termInfosIndexDivisor = termInfosIndexDivisor; if (codecs == null) { this.codecs = CodecProvider.getDefault(); @@ -770,6 +767,7 @@ class DirectoryReader extends IndexReader implements Cloneable { deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, segmentInfos, null, null, codecs); segmentInfos.updateGeneration(deleter.getLastSegmentInfos()); + segmentInfos.changed(); // Checkpoint the state we are about to change, in // case we have to roll back: @@ -782,7 +780,6 @@ class DirectoryReader extends IndexReader implements Cloneable { // Sync all files we just wrote directory.sync(segmentInfos.files(directory, false)); - segmentInfos.commit(directory); success = true; } finally { @@ -860,7 +857,7 @@ class DirectoryReader extends IndexReader implements Cloneable { // we loaded SegmentInfos from the directory return SegmentInfos.readCurrentVersion(directory, codecs) == segmentInfos.getVersion(); } else { - return writer.nrtIsCurrent(segmentInfosStart); + return writer.nrtIsCurrent(segmentInfos); } } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 0d2d543b4ab..0f111be36dd 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -741,6 +741,7 @@ public class IndexWriter implements Closeable { // Record that we have a change (zero out all // segments) pending: changeCount++; + segmentInfos.changed(); } else { segmentInfos.read(directory, codecs); @@ -757,6 +758,7 @@ public class IndexWriter implements Closeable { oldInfos.read(directory, commit.getSegmentsFileName(), codecs); segmentInfos.replace(oldInfos); changeCount++; + segmentInfos.changed(); if (infoStream != null) message("init: loaded commit \"" + commit.getSegmentsFileName() + "\""); } @@ -774,12 +776,14 @@ public class IndexWriter implements Closeable { conf.getIndexDeletionPolicy(), segmentInfos, infoStream, docWriter, codecs); - if (deleter.startingCommitDeleted) + if (deleter.startingCommitDeleted) { // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. changeCount++; + segmentInfos.changed(); + } docWriter.setMaxBufferedDeleteTerms(conf.getMaxBufferedDeleteTerms()); docWriter.setRAMBufferSizeMB(conf.getRAMBufferSizeMB()); @@ -1537,6 +1541,7 @@ public class IndexWriter implements Closeable { // name that was previously returned which can cause // problems at least with ConcurrentMergeScheduler. changeCount++; + segmentInfos.changed(); return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } } @@ -2038,6 +2043,7 @@ public class IndexWriter implements Closeable { // Mark that the index has changed ++changeCount; + segmentInfos.changed(); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteAll"); } finally { @@ -2119,6 +2125,7 @@ public class IndexWriter implements Closeable { */ private synchronized void checkpoint() throws IOException { changeCount++; + segmentInfos.changed(); deleter.checkpoint(segmentInfos, false); } @@ -3697,6 +3704,7 @@ public class IndexWriter implements Closeable { } toSync.remove(toSync.size()-1); changeCount++; + segmentInfos.changed(); } } assert filesExist(toSync); @@ -3829,17 +3837,7 @@ public class IndexWriter implements Closeable { } synchronized boolean nrtIsCurrent(SegmentInfos infos) { - if (!infos.equals(segmentInfos)) { - // if any structural changes (new segments), we are - // stale - return false; - } else if (infos.getGeneration() != segmentInfos.getGeneration()) { - // if any commit took place since we were opened, we - // are stale - return false; - } else { - return !docWriter.anyChanges(); - } + return infos.version == segmentInfos.version && !docWriter.anyChanges(); } synchronized boolean isClosed() { diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java index 2db03c9180d..e1757dda6fc 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java @@ -696,7 +696,6 @@ public final class SegmentInfos extends Vector { void updateGeneration(SegmentInfos other) { lastGeneration = other.lastGeneration; generation = other.generation; - version = other.version; } final void rollbackCommit(Directory dir) throws IOException { @@ -727,7 +726,12 @@ public final class SegmentInfos extends Vector { * segments file, but writes an invalid checksum at the * end, so that it is not visible to readers. Once this * is called you must call {@link #finishCommit} to complete - * the commit or {@link #rollbackCommit} to abort it. */ + * the commit or {@link #rollbackCommit} to abort it. + *

+ * Note: {@link #changed()} should be called prior to this + * method if changes have been made to this {@link SegmentInfos} instance + *

+ **/ final void prepareCommit(Directory dir) throws IOException { if (pendingSegnOutput != null) throw new IllegalStateException("prepareCommit was already called"); @@ -811,7 +815,12 @@ public final class SegmentInfos extends Vector { } /** Writes & syncs to the Directory dir, taking care to - * remove the segments file on exception */ + * remove the segments file on exception + *

+ * Note: {@link #changed()} should be called prior to this + * method if changes have been made to this {@link SegmentInfos} instance + *

+ **/ final void commit(Directory dir) throws IOException { prepareCommit(dir); finishCommit(dir); @@ -862,4 +871,10 @@ public final class SegmentInfos extends Vector { } return count; } + + /** Call this before committing if changes have been made to the + * segments. */ + public void changed() { + version++; + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java index 9f5fb58419f..317644a69b4 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java @@ -51,8 +51,7 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter { throws IOException { IndexOutput out = createOutput(dir, segmentFileName); out.writeInt(FORMAT_CURRENT); // write FORMAT - out.writeLong(++infos.version); // every write changes - // the index + out.writeLong(infos.version); out.writeInt(infos.counter); // write counter out.writeInt(infos.size()); // write infos for (SegmentInfo si : infos) { diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index e662de58565..15962b84548 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -244,7 +244,7 @@ class FixedDerefBytesImpl { @Override public int nextDoc() throws IOException { - if (pos < valueCount) { + if (pos >= valueCount) { return pos = NO_MORE_DOCS; } return advance(pos + 1); diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index f0b6e7dbfe3..f310378cf54 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -142,8 +142,9 @@ public class MultiDocValues extends DocValues { + " must be > than the current doc " + currentDoc; int relativeDoc = target - currentStart; do { - if (target >= maxDoc) // we are beyond max doc + if (target >= maxDoc) {// we are beyond max doc return currentDoc = NO_MORE_DOCS; + } if (target >= currentMax) { final int idx = ReaderUtil.subIndex(target, starts); currentEnum.close(); @@ -152,9 +153,9 @@ public class MultiDocValues extends DocValues { currentStart = docValuesIdx[idx].start; currentMax = currentStart + docValuesIdx[idx].length; relativeDoc = target - currentStart; - } else { - return currentDoc = currentStart + currentEnum.advance(relativeDoc); } + target = currentMax; // make sure that we advance to the next enum if the current is exhausted + } while ((relativeDoc = currentEnum.advance(relativeDoc)) == NO_MORE_DOCS); return currentDoc = currentStart + relativeDoc; } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java index 692dda665de..5c357927e33 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java @@ -61,7 +61,54 @@ public class TestIndexWriterReader extends LuceneTestCase { } return count; } - + + public void testAddCloseOpen() throws IOException { + Directory dir1 = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + + IndexWriter writer = new IndexWriter(dir1, iwc); + for (int i = 0; i < 97 ; i++) { + IndexReader reader = writer.getReader(); + if (i == 0) { + writer.addDocument(createDocument(i, "x", 1 + random.nextInt(5))); + } else { + int previous = random.nextInt(i); + // a check if the reader is current here could fail since there might be + // merges going on. + switch (random.nextInt(5)) { + case 0: + case 1: + case 2: + writer.addDocument(createDocument(i, "x", 1 + random.nextInt(5))); + break; + case 3: + writer.updateDocument(new Term("id", "" + previous), createDocument( + previous, "x", 1 + random.nextInt(5))); + break; + case 4: + writer.deleteDocuments(new Term("id", "" + previous)); + } + } + assertFalse(reader.isCurrent()); + reader.close(); + } + writer.optimize(); // make sure all merging is done etc. + IndexReader reader = writer.getReader(); + writer.commit(); // no changes that are not visible to the reader + assertTrue(reader.isCurrent()); + writer.close(); + assertTrue(reader.isCurrent()); // all changes are visible to the reader + iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + writer = new IndexWriter(dir1, iwc); + assertTrue(reader.isCurrent()); + writer.addDocument(createDocument(1, "x", 1+random.nextInt(5))); + assertTrue(reader.isCurrent()); // segments in ram but IW is different to the readers one + writer.close(); + assertFalse(reader.isCurrent()); // segments written + reader.close(); + dir1.close(); + } + public void testUpdateDocument() throws Exception { boolean optimize = true; @@ -128,6 +175,44 @@ public class TestIndexWriterReader extends LuceneTestCase { dir1.close(); } + public void testIsCurrent() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + + IndexWriter writer = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(newField("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + writer = new IndexWriter(dir, iwc); + doc = new Document(); + doc.add(newField("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + IndexReader nrtReader = writer.getReader(); + assertTrue(nrtReader.isCurrent()); + writer.addDocument(doc); + assertFalse(nrtReader.isCurrent()); // should see the changes + writer.optimize(); // make sure we don't have a merge going on + assertFalse(nrtReader.isCurrent()); + nrtReader.close(); + + IndexReader dirReader = IndexReader.open(dir); + nrtReader = writer.getReader(); + + assertTrue(dirReader.isCurrent()); + assertTrue(nrtReader.isCurrent()); // nothing was committed yet so we are still current + assertEquals(2, nrtReader.maxDoc()); // sees the actual document added + assertEquals(1, dirReader.maxDoc()); + writer.close(); // close is actually a commit both should see the changes + assertTrue(nrtReader.isCurrent()); + assertFalse(dirReader.isCurrent()); // this reader has been opened before the writer was closed / committed + + dirReader.close(); + nrtReader.close(); + dir.close(); + } + /** * Test using IW.addIndexes * @@ -171,7 +256,7 @@ public class TestIndexWriterReader extends LuceneTestCase { assertTrue(r1.isCurrent()); writer.commit(); - assertFalse(r1.isCurrent()); + assertTrue(r1.isCurrent()); // we have seen all changes - no change after opening the NRT reader assertEquals(200, r1.maxDoc()); diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 334414b0d8a..e3961c9418c 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -19,6 +19,7 @@ package org.apache.lucene.index.values; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; import java.util.List; @@ -67,21 +68,14 @@ import org.junit.Before; */ public class TestDocValuesIndexing extends LuceneTestCase { /* - * TODO: - * Roadmap to land on trunk - * - * - Add documentation for: - * - Source and ValuesEnum - * - DocValues - * - ValuesField - * - ValuesAttribute - * - Values - * - Add @lucene.experimental to all necessary classes - * - add test for unoptimized case with deletes - * - add a test for addIndexes - * - split up existing testcases and give them meaningfull names - * - run RAT - * - add tests for FieldComparator FloatIndexValuesComparator vs. FloatValuesComparator etc. + * TODO: Roadmap to land on trunk + * + * - Add documentation for: - Source and ValuesEnum - DocValues - ValuesField + * - ValuesAttribute - Values - Add @lucene.experimental to all necessary + * classes - add test for unoptimized case with deletes - add a test for + * addIndexes - split up existing testcases and give them meaningfull names - + * run RAT - add tests for FieldComparator FloatIndexValuesComparator vs. + * FloatValuesComparator etc. */ private DocValuesCodec docValuesCodec; @@ -90,18 +84,20 @@ public class TestDocValuesIndexing extends LuceneTestCase { @Before public void setUp() throws Exception { super.setUp(); - String defaultFieldCodec = CodecProvider.getDefault().getDefaultFieldCodec(); + String defaultFieldCodec = CodecProvider.getDefault() + .getDefaultFieldCodec(); provider = new CodecProvider(); - docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup(defaultFieldCodec)); + docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup( + defaultFieldCodec)); provider.register(docValuesCodec); provider.setDefaultFieldCodec(docValuesCodec.name); } - - + /* * Simple test case to show how to use the API */ - public void testDocValuesSimple() throws CorruptIndexException, IOException, ParseException { + public void testDocValuesSimple() throws CorruptIndexException, IOException, + ParseException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, writerConfig(false)); for (int i = 0; i < 5; i++) { @@ -114,14 +110,15 @@ public class TestDocValuesIndexing extends LuceneTestCase { } writer.commit(); writer.optimize(true); - + writer.close(); - + IndexReader reader = IndexReader.open(dir, null, true, 1, provider); assertTrue(reader.isOptimized()); - + IndexSearcher searcher = new IndexSearcher(reader); - QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "docId", new MockAnalyzer()); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "docId", + new MockAnalyzer()); TopDocs search = searcher.search(parser.parse("0 OR 1 OR 2 OR 3 OR 4"), 10); assertEquals(5, search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; @@ -175,6 +172,79 @@ public class TestDocValuesIndexing extends LuceneTestCase { runTestNumerics(writerConfig(false), true); } + public void testAddIndexes() throws IOException { + int valuesPerIndex = 10; + List values = Arrays.asList(Type.values()); + Collections.shuffle(values, random); + Type first = values.get(0); + Type second = values.get(1); + String msg = "[first=" + first.name() + ", second=" + second.name() + "]"; + // index first index + Directory d_1 = newDirectory(); + IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random.nextBoolean())); + indexValues(w_1, valuesPerIndex, first, values, false, 7); + w_1.commit(); + assertEquals(valuesPerIndex, w_1.maxDoc()); + _TestUtil.checkIndex(d_1, w_1.getConfig().getCodecProvider()); + + // index second index + Directory d_2 = newDirectory(); + IndexWriter w_2 = new IndexWriter(d_2, writerConfig(random.nextBoolean())); + indexValues(w_2, valuesPerIndex, second, values, false, 7); + w_2.commit(); + assertEquals(valuesPerIndex, w_2.maxDoc()); + _TestUtil.checkIndex(d_2, w_2.getConfig().getCodecProvider()); + + Directory target = newDirectory(); + IndexWriter w = new IndexWriter(target, writerConfig(random.nextBoolean())); + IndexReader r_1 = IndexReader.open(w_1); + IndexReader r_2 = IndexReader.open(w_2); + if (random.nextBoolean()) { + w.addIndexes(d_1, d_2); + } else { + w.addIndexes(r_1, r_2); + } + w.optimize(); + w.commit(); + + _TestUtil.checkIndex(target, w.getConfig().getCodecProvider()); + assertEquals(valuesPerIndex * 2, w.maxDoc()); + + // check values + + IndexReader merged = IndexReader.open(w); + DocValuesEnum vE_1 = getValuesEnum(getDocValues(r_1, first.name())); + DocValuesEnum vE_2 = getValuesEnum(getDocValues(r_2, second.name())); + DocValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name())); + DocValuesEnum vE_2_merged = getValuesEnum(getDocValues(merged, second + .name())); + if (second == Type.BYTES_VAR_STRAIGHT || second == Type.BYTES_FIXED_STRAIGHT) { + assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1)); + } + for (int i = 0; i < valuesPerIndex; i++) { + assertEquals(msg, i, vE_1.nextDoc()); + assertEquals(msg, i, vE_1_merged.nextDoc()); + + assertEquals(msg, i, vE_2.nextDoc()); + assertEquals(msg, i + valuesPerIndex, vE_2_merged.nextDoc()); + } + assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_1.nextDoc()); + assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_2.nextDoc()); + assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_1_merged.advance(valuesPerIndex*2)); + assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_2_merged.nextDoc()); + + // close resources + r_1.close(); + r_2.close(); + merged.close(); + w_1.close(); + w_2.close(); + w.close(); + d_1.close(); + d_2.close(); + target.close(); + } + private IndexWriterConfig writerConfig(boolean useCompoundFile) { final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); @@ -262,7 +332,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { } assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs base:" + base, i, floatEnum.advance(i)); - assertEquals(floatEnum.getClass() + " index " + i, 2.0 * expected, enumRef.get(), 0.00001); + assertEquals(floatEnum.getClass() + " index " + i, 2.0 * expected, + enumRef.get(), 0.00001); assertEquals("index " + i, 2.0 * expected, floats.getFloat(i), 0.00001); } @@ -365,7 +436,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { .advance(i)); } for (int j = 0; j < br.length; j++, upto++) { - assertTrue(bytesEnum.getClass() + " enumRef not initialized " + msg, enumRef.bytes.length > 0); + assertTrue(" enumRef not initialized " + msg, + enumRef.bytes.length > 0); assertEquals( "EnumRef Byte at index " + j + " doesn't match - " + msg, upto, enumRef.bytes[enumRef.offset + j]); @@ -393,6 +465,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { private DocValues getDocValues(IndexReader reader, String field) throws IOException { boolean optimized = reader.isOptimized(); + reader.isCurrent(); Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() : MultiFields.getFields(reader); switch (random.nextInt(optimized ? 3 : 2)) { // case 2 only if optimized @@ -438,9 +511,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { } private static EnumSet BYTES = EnumSet.of(Type.BYTES_FIXED_DEREF, - Type.BYTES_FIXED_SORTED, Type.BYTES_FIXED_STRAIGHT, - Type.BYTES_VAR_DEREF, Type.BYTES_VAR_SORTED, - Type.BYTES_VAR_STRAIGHT); + Type.BYTES_FIXED_SORTED, Type.BYTES_FIXED_STRAIGHT, Type.BYTES_VAR_DEREF, + Type.BYTES_VAR_SORTED, Type.BYTES_VAR_STRAIGHT); private static EnumSet NUMERICS = EnumSet.of(Type.PACKED_INTS, Type.SIMPLE_FLOAT_4BYTE, Type.SIMPLE_FLOAT_8BYTE); @@ -519,5 +591,4 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.optimize(); return deleted; } - } From 452684f1f1a4cfc8f54aa91ace1cb3a1ffeabd1e Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 8 Dec 2010 18:35:41 +0000 Subject: [PATCH 020/116] catch up with trunk git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1043582 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/CHANGES.txt | 10 +- .../index/BalancedSegmentMergePolicy.java | 16 +- .../apache/lucene/store/NativePosixUtil.cpp | 4 +- .../apache/lucene/store/WindowsDirectory.cpp | 175 ++++++++++++++++++ .../apache/lucene/store/WindowsDirectory.java | 124 +++++++++++++ .../lucene/index/TestIndexSplitter.java | 10 +- .../org/apache/lucene/index/IndexWriter.java | 85 ++++----- .../apache/lucene/index/LogMergePolicy.java | 60 +++--- .../org/apache/lucene/index/MergePolicy.java | 9 +- .../search/cache/ByteValuesCreator.java | 5 +- .../search/cache/CachedArrayCreator.java | 22 +-- .../search/cache/DocTermsIndexCreator.java | 3 +- .../search/cache/DoubleValuesCreator.java | 5 +- .../search/cache/FloatValuesCreator.java | 5 +- .../lucene/search/cache/IntValuesCreator.java | 5 +- .../search/cache/LongValuesCreator.java | 5 +- .../search/cache/ShortValuesCreator.java | 5 +- .../apache/lucene/index/TestAddIndexes.java | 29 +++ .../index/TestBackwardsCompatibility.java | 5 +- .../lucene/index/TestIndexFileDeleter.java | 11 +- .../apache/lucene/index/TestIndexWriter.java | 6 +- .../lucene/search/TestSearchWithThreads.java | 109 +++++++++++ .../util/LuceneJUnitResultFormatter.java | 69 ++++--- 23 files changed, 607 insertions(+), 170 deletions(-) create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.cpp create mode 100644 lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java create mode 100644 lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 2a3c9ee7161..494b0ef9162 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -32,6 +32,11 @@ New Features * LUCENE-2507: Added DirectSpellChecker, which retrieves correction candidates directly from the term dictionary using levenshtein automata. (Robert Muir) + * LUCENE-2791: Added WindowsDirectory, a Windows-specific Directory impl + that doesn't synchronize on the file handle. This can be useful to + avoid the performance problems of SimpleFSDirectory and NIOFSDirectory. + (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless) + API Changes * LUCENE-2606: Changed RegexCapabilities interface to fix thread @@ -166,6 +171,9 @@ API Changes new SpanMultiTermQueryWrapper(new RegexQuery()) instead. (Robert Muir, Uwe Schindler) + * LUCENE-2747: Deprecated ArabicLetterTokenizer. StandardTokenizer now tokenizes + most languages correctly including Arabic. (Steven Rowe, Robert Muir) + New features * LUCENE-2306: Add NumericRangeFilter and NumericRangeQuery support to XMLQueryParser. @@ -274,7 +282,7 @@ Build dependency management between contribs by a new ANT macro. (Uwe Schindler, Shai Erera) - * LUCENE-2399, LUCENE-2683: Upgrade contrib/icu's ICU jar file to ICU 4.4.2 + * LUCENE-2797: Upgrade contrib/icu's ICU jar file to ICU 4.6 (Robert Muir) Optimizations diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java index 484d3326fef..9d6186bf9fc 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java @@ -132,11 +132,10 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { // Since we must optimize down to 1 segment, the // choice is simple: - boolean useCompoundFile = getUseCompoundFile(); if (last > 1 || !isOptimized(infos.info(0))) { spec = new MergeSpecification(); - spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); + spec.add(new OneMerge(infos.range(0, last))); } } else if (last > maxNumSegments) { @@ -153,7 +152,6 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { if (infoLen <= maxNumSegments) return null; MergeSpecification spec = new MergeSpecification(); - boolean useCompoundFile = getUseCompoundFile(); // use Viterbi algorithm to find the best segmentation. // we will try to minimize the size variance of resulting segments. @@ -194,7 +192,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { prev = backLink[i][prev]; int mergeStart = i + prev; if((mergeEnd - mergeStart) > 1) { - spec.add(new OneMerge(infos.range(mergeStart, mergeEnd), useCompoundFile)); + spec.add(new OneMerge(infos.range(mergeStart, mergeEnd))); } else { if(partialExpunge) { SegmentInfo info = infos.info(mergeStart); @@ -210,7 +208,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { if(partialExpunge && maxDelCount > 0) { // expunge deletes - spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1), useCompoundFile)); + spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1))); } return spec; @@ -260,7 +258,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { for(int i = 0; i < numLargeSegs; i++) { SegmentInfo info = infos.info(i); if(info.hasDeletions()) { - spec.add(new OneMerge(infos.range(i, i + 1), getUseCompoundFile())); + spec.add(new OneMerge(infos.range(i, i + 1))); } } return spec; @@ -298,7 +296,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { if(totalSmallSegSize < targetSegSize * 2) { MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge); if(spec == null) spec = new MergeSpecification(); // should not happen - spec.add(new OneMerge(infos.range(numLargeSegs, numSegs), getUseCompoundFile())); + spec.add(new OneMerge(infos.range(numLargeSegs, numSegs))); return spec; } else { return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge); @@ -313,7 +311,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { if(size(info) < sizeThreshold) break; startSeg++; } - spec.add(new OneMerge(infos.range(startSeg, numSegs), getUseCompoundFile())); + spec.add(new OneMerge(infos.range(startSeg, numSegs))); return spec; } else { // apply the log merge policy to small segments. @@ -344,7 +342,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy { } } if (maxDelCount > 0) { - return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1), getUseCompoundFile()); + return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1)); } return null; } diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp b/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp index ced785fff6d..7ccf7e7b445 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp +++ b/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp @@ -97,9 +97,9 @@ JNIEXPORT jobject JNICALL Java_org_apache_lucene_store_NativePosixUtil_open_1dir fname = (char *) env->GetStringUTFChars(filename, NULL); if (readOnly) { - fd = open(fname, O_RDONLY | O_DIRECT); + fd = open(fname, O_RDONLY | O_DIRECT | O_NOATIME); } else { - fd = open(fname, O_RDWR | O_CREAT | O_DIRECT, 0666); + fd = open(fname, O_RDWR | O_CREAT | O_DIRECT | O_NOATIME, 0666); } //printf("open %s -> %d; ro %d\n", fname, fd, readOnly); fflush(stdout); diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.cpp b/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.cpp new file mode 100644 index 00000000000..e6063b936f8 --- /dev/null +++ b/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.cpp @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +#include +#include "windows.h" + +/** + * Windows Native IO methods. + */ +extern "C" { + +/** + * Utility to format a Windows system error code into an exception. + */ +void throwIOException(JNIEnv *env, DWORD error) +{ + jclass ioex; + char *msg; + + ioex = env->FindClass("java/io/IOException"); + + if (ioex != NULL) { + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR) &msg, 0, NULL ); + env->ThrowNew(ioex, msg); + LocalFree(msg); + } +} + +/** + * Utility to throw Exceptions on bad input + */ +void throwException(JNIEnv *env, const char *clazz, const char *msg) +{ + jclass exc = env->FindClass(clazz); + + if (exc != NULL) { + env->ThrowNew(exc, msg); + } +} + +/** + * Opens a handle to a file. + * + * Class: org_apache_lucene_store_WindowsDirectory + * Method: open + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_apache_lucene_store_WindowsDirectory_open + (JNIEnv *env, jclass ignored, jstring filename) +{ + char *fname; + HANDLE handle; + + if (filename == NULL) { + throwException(env, "java/lang/NullPointerException", "filename cannot be null"); + return -1; + } + + fname = (char *) env->GetStringUTFChars(filename, NULL); + + if (fname == NULL) { + throwException(env, "java/lang/IllegalArgumentException", "invalid filename"); + return -1; + } + + handle = CreateFile(fname, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, NULL); + + env->ReleaseStringUTFChars(filename, fname); + + if (handle == INVALID_HANDLE_VALUE) { + throwIOException(env, GetLastError()); + return -1; + } + + return (jlong) handle; +} + +/** + * Reads data into the byte array, starting at offset, for length characters. + * The read is positioned at pos. + * + * Class: org_apache_lucene_store_WindowsDirectory + * Method: read + * Signature: (J[BIIJ)I + */ +JNIEXPORT jint JNICALL Java_org_apache_lucene_store_WindowsDirectory_read + (JNIEnv *env, jclass ignored, jlong fd, jbyteArray bytes, jint offset, jint length, jlong pos) +{ + OVERLAPPED io = { 0 }; + DWORD numRead = -1; + + io.Offset = (DWORD) (pos & 0xFFFFFFFF); + io.OffsetHigh = (DWORD) ((pos >> 0x20) & 0x7FFFFFFF); + + if (bytes == NULL) { + throwException(env, "java/lang/NullPointerException", "bytes cannot be null"); + return -1; + } + + if (length <= 4096) { /* For small buffers, avoid GetByteArrayElements' copy */ + char buffer[length]; + + if (ReadFile((HANDLE) fd, &buffer, length, &numRead, &io)) { + env->SetByteArrayRegion(bytes, offset, numRead, (const jbyte *) buffer); + } else { + throwIOException(env, GetLastError()); + numRead = -1; + } + + } else { + jbyte *buffer = env->GetByteArrayElements (bytes, NULL); + + if (!ReadFile((HANDLE) fd, (void *)(buffer+offset), length, &numRead, &io)) { + throwIOException(env, GetLastError()); + numRead = -1; + } + + env->ReleaseByteArrayElements(bytes, buffer, numRead == 0 || numRead == -1 ? JNI_ABORT : 0); + } + + return numRead; +} + +/** + * Closes a handle to a file + * + * Class: org_apache_lucene_store_WindowsDirectory + * Method: close + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_apache_lucene_store_WindowsDirectory_close + (JNIEnv *env, jclass ignored, jlong fd) +{ + if (!CloseHandle((HANDLE) fd)) { + throwIOException(env, GetLastError()); + } +} + +/** + * Returns the length in bytes of a file. + * + * Class: org_apache_lucene_store_WindowsDirectory + * Method: length + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_apache_lucene_store_WindowsDirectory_length + (JNIEnv *env, jclass ignored, jlong fd) +{ + BY_HANDLE_FILE_INFORMATION info; + + if (GetFileInformationByHandle((HANDLE) fd, (LPBY_HANDLE_FILE_INFORMATION) &info)) { + return (jlong) (((DWORDLONG) info.nFileSizeHigh << 0x20) + info.nFileSizeLow); + } else { + throwIOException(env, GetLastError()); + return -1; + } +} + +} /* extern "C" */ diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java b/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java new file mode 100644 index 00000000000..870ebfade1e --- /dev/null +++ b/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java @@ -0,0 +1,124 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.File; +import java.io.IOException; + +/** + * Native {@link Directory} implementation for Microsoft Windows. + *

+ * Steps: + *

    + *
  1. Compile the source code to create WindowsDirectory.dll: + *
    + * c:\mingw\bin\g++ -Wall -D_JNI_IMPLEMENTATION_ -Wl,--kill-at + * -I"%JAVA_HOME%\include" -I"%JAVA_HOME%\include\win32" -static-libgcc + * -static-libstdc++ -shared WindowsDirectory.cpp -o WindowsDirectory.dll + *
    + * For 64-bit JREs, use mingw64, with the -m64 option. + *
  2. Put WindowsDirectory.dll into some directory in your windows PATH + *
  3. Open indexes with WindowsDirectory and use it. + *

    + * @lucene.experimental + */ +public class WindowsDirectory extends FSDirectory { + private static final int DEFAULT_BUFFERSIZE = 4096; /* default pgsize on ia32/amd64 */ + + static { + System.loadLibrary("WindowsDirectory"); + } + + /** Create a new WindowsDirectory for the named location. + * + * @param path the path of the directory + * @param lockFactory the lock factory to use, or null for the default + * ({@link NativeFSLockFactory}); + * @throws IOException + */ + public WindowsDirectory(File path, LockFactory lockFactory) throws IOException { + super(path, lockFactory); + } + + /** Create a new WindowsDirectory for the named location and {@link NativeFSLockFactory}. + * + * @param path the path of the directory + * @throws IOException + */ + public WindowsDirectory(File path) throws IOException { + super(path, null); + } + + public IndexInput openInput(String name, int bufferSize) throws IOException { + ensureOpen(); + return new WindowsIndexInput(new File(getDirectory(), name), Math.max(bufferSize, DEFAULT_BUFFERSIZE)); + } + + protected static class WindowsIndexInput extends BufferedIndexInput { + private final long fd; + private final long length; + boolean isClone; + boolean isOpen; + + public WindowsIndexInput(File file, int bufferSize) throws IOException { + super(bufferSize); + fd = WindowsDirectory.open(file.getPath()); + length = WindowsDirectory.length(fd); + isOpen = true; + } + + protected void readInternal(byte[] b, int offset, int length) throws IOException { + if (WindowsDirectory.read(fd, b, offset, length, getFilePointer()) != length) + throw new IOException("Read past EOF"); + } + + protected void seekInternal(long pos) throws IOException { + } + + public synchronized void close() throws IOException { + // NOTE: we synchronize and track "isOpen" because Lucene sometimes closes IIs twice! + if (!isClone && isOpen) { + WindowsDirectory.close(fd); + isOpen = false; + } + } + + public long length() { + return length; + } + + @Override + public Object clone() { + WindowsIndexInput clone = (WindowsIndexInput)super.clone(); + clone.isClone = true; + return clone; + } + } + + /** Opens a handle to a file. */ + private static native long open(String filename) throws IOException; + + /** Reads data from a file at pos into bytes */ + private static native int read(long fd, byte bytes[], int offset, int length, long pos) throws IOException; + + /** Closes a handle to a file */ + private static native void close(long fd) throws IOException; + + /** Returns the length of a file */ + private static native long length(long fd) throws IOException; +} diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java index 24efd037b3e..9e4d20fb916 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java @@ -34,7 +34,15 @@ public class TestIndexSplitter extends LuceneTestCase { _TestUtil.rmDir(destDir); destDir.mkdirs(); FSDirectory fsDir = FSDirectory.open(dir); - IndexWriter iw = new IndexWriter(fsDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE)); + + LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); + mergePolicy.setNoCFSRatio(1); + IndexWriter iw = new IndexWriter( + fsDir, + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). + setOpenMode(OpenMode.CREATE). + setMergePolicy(mergePolicy) + ); for (int x=0; x < 100; x++) { Document doc = TestIndexWriterReader.createDocument(x, "index", 5); iw.addDocument(doc); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 0f111be36dd..83de1c1a969 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -50,13 +50,13 @@ import java.util.Date; An IndexWriter creates and maintains an index.

    The create argument to the {@link - #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines + #IndexWriter(Directory, IndexWriterConfig) constructor} determines whether a new index is created, or whether an existing index is opened. Note that you can open an index with create=true even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, and won't see the newly created index until they re-open. There are - also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors} + also {@link #IndexWriter(Directory, IndexWriterConfig) constructors} with no create argument which will create a new index if there is not already an index at the provided path and otherwise open the existing index.

    @@ -72,11 +72,11 @@ import java.util.Date;

    These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are enough - buffered deletes (see {@link #setMaxBufferedDeleteTerms}) + buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) or enough added documents since the last flush, whichever is sooner. For the added documents, flushing is triggered either by RAM usage of the documents (see {@link - #setRAMBufferSizeMB}) or the number of added documents. + IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents. The default is to flush when RAM usage hits 16 MB. For best indexing speed you should flush by RAM usage with a large RAM buffer. Note that flushing just moves the @@ -1252,8 +1252,8 @@ public class IndexWriter implements Closeable { /** * Adds a document to this index. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are - * discarded. + * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, + * the remainder are discarded. * *

    Note that if an Exception is hit (for example disk full) * then the index will be consistent, but this document @@ -1301,7 +1301,7 @@ public class IndexWriter implements Closeable { /** * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are + * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * *

    See {@link #addDocument(Document)} for details on @@ -1608,7 +1608,7 @@ public class IndexWriter implements Closeable { * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error - * @see LogMergePolicy#findMergesForOptimize + * @see MergePolicy#findMergesForOptimize */ public void optimize() throws CorruptIndexException, IOException { optimize(true); @@ -2289,8 +2289,7 @@ public class IndexWriter implements Closeable { * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addIndexes(IndexReader... readers) - throws CorruptIndexException, IOException { + public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException { ensureOpen(); try { @@ -2303,47 +2302,33 @@ public class IndexWriter implements Closeable { int docCount = merger.merge(); // merge 'em - SegmentInfo info = null; - synchronized(this) { - info = new SegmentInfo(mergedName, docCount, directory, false, -1, - null, false, merger.hasProx(), merger.getSegmentCodecs()); - setDiagnostics(info, "addIndexes(IndexReader...)"); - segmentInfos.add(info); - checkpoint(); - - // Notify DocumentsWriter that the flushed count just increased - docWriter.updateFlushedDocCount(docCount); + SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, + false, -1, null, false, merger.hasProx(), merger.getSegmentCodecs()); + setDiagnostics(info, "addIndexes(IndexReader...)"); + + boolean useCompoundFile; + synchronized(this) { // Guard segmentInfos + useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info); } // Now create the compound file if needed - if (mergePolicy instanceof LogMergePolicy && ((LogMergePolicy) mergePolicy).getUseCompoundFile()) { + if (useCompoundFile) { + merger.createCompoundFile(mergedName + ".cfs", info); + info.setUseCompoundFile(true); + + // delete new non cfs files directly: they were never + // registered with IFD + deleter.deleteNewFiles(merger.getMergedFiles(info)); + } - List files = null; - - synchronized(this) { - // Must incRef our files so that if another thread - // is running merge/optimize, it doesn't delete our - // segment's files before we have a chance to - // finish making the compound file. - if (segmentInfos.contains(info)) { - files = info.files(); - deleter.incRef(files); - } - } - - if (files != null) { - try { - merger.createCompoundFile(mergedName + ".cfs", info); - synchronized(this) { - info.setUseCompoundFile(true); - checkpoint(); - } - } finally { - synchronized(this) { - deleter.decRef(files); - } - } - } + // Register the new segment + synchronized(this) { + segmentInfos.add(info); + + // Notify DocumentsWriter that the flushed count just increased + docWriter.updateFlushedDocCount(docCount); + + checkpoint(); } } catch (OutOfMemoryError oom) { handleOOM(oom, "addIndexes(IndexReader...)"); @@ -3447,8 +3432,12 @@ public class IndexWriter implements Closeable { //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); merge.info.setHasProx(merger.hasProx()); - if (merge.useCompoundFile) { + boolean useCompoundFile; + synchronized (this) { // Guard segmentInfos + useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info); + } + if (useCompoundFile) { success = false; final String compoundFileName = IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION); diff --git a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java index 876d5f9271d..17e42350901 100644 --- a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java @@ -127,8 +127,21 @@ public abstract class LogMergePolicy extends MergePolicy { // Javadoc inherited @Override - public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) { - return useCompoundFile; + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + final boolean doCFS; + + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + long totalSize = 0; + for (SegmentInfo info : infos) + totalSize += size(info); + + doCFS = size(mergedInfo) <= noCFSRatio * totalSize; + } + return doCFS; } /** Sets whether compound file format should be used for @@ -254,12 +267,12 @@ public abstract class LogMergePolicy extends MergePolicy { // unless there is only 1 which is optimized. if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) { // there is more than 1 segment to the right of this one, or an unoptimized single segment. - spec.add(makeOneMerge(infos, infos.range(start + 1, last))); + spec.add(new OneMerge(infos.range(start + 1, last))); } last = start; } else if (last - start == mergeFactor) { // mergeFactor eligible segments were found, add them as a merge. - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); last = start; } --start; @@ -267,7 +280,7 @@ public abstract class LogMergePolicy extends MergePolicy { // Add any left-over segments, unless there is just 1 already optimized. if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) { - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); } return spec.merges.size() == 0 ? null : spec; @@ -284,7 +297,7 @@ public abstract class LogMergePolicy extends MergePolicy { // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { - spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last))); + spec.add(new OneMerge(infos.range(last - mergeFactor, last))); last -= mergeFactor; } @@ -296,7 +309,7 @@ public abstract class LogMergePolicy extends MergePolicy { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !isOptimized(infos.info(0))) { - spec.add(makeOneMerge(infos, infos.range(0, last))); + spec.add(new OneMerge(infos.range(0, last))); } } else if (last > maxNumSegments) { @@ -325,7 +338,7 @@ public abstract class LogMergePolicy extends MergePolicy { } } - spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize))); + spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize))); } } return spec.merges.size() == 0 ? null : spec; @@ -413,7 +426,7 @@ public abstract class LogMergePolicy extends MergePolicy { // deletions, so force a merge now: if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { @@ -422,7 +435,7 @@ public abstract class LogMergePolicy extends MergePolicy { // mergeFactor segments if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } @@ -430,7 +443,7 @@ public abstract class LogMergePolicy extends MergePolicy { if (firstSegmentWithDeletions != -1) { if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments))); } return spec; @@ -530,7 +543,7 @@ public abstract class LogMergePolicy extends MergePolicy { spec = new MergeSpecification(); if (verbose()) message(" " + start + " to " + end + ": add this merge"); - spec.add(makeOneMerge(infos, infos.range(start, end))); + spec.add(new OneMerge(infos.range(start, end))); } else if (verbose()) message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); @@ -544,29 +557,6 @@ public abstract class LogMergePolicy extends MergePolicy { return spec; } - protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException { - final boolean doCFS; - if (!useCompoundFile) { - doCFS = false; - } else if (noCFSRatio == 1.0) { - doCFS = true; - } else { - - long totSize = 0; - for(SegmentInfo info : infos) { - totSize += size(info); - } - long mergeSize = 0; - for(SegmentInfo info : infosToMerge) { - mergeSize += size(info); - } - - doCFS = mergeSize <= noCFSRatio * totSize; - } - - return new OneMerge(infosToMerge, doCFS); - } - /**

    Determines the largest segment (measured by * document count) that may be merged with other segments. * Small values (e.g., less than 10,000) are best for diff --git a/lucene/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/src/java/org/apache/lucene/index/MergePolicy.java index 205a4060ba5..5be40254489 100644 --- a/lucene/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/MergePolicy.java @@ -76,16 +76,14 @@ public abstract class MergePolicy implements java.io.Closeable { SegmentReader[] readers; // used by IndexWriter SegmentReader[] readersClone; // used by IndexWriter public final SegmentInfos segments; - public final boolean useCompoundFile; boolean aborted; Throwable error; boolean paused; - public OneMerge(SegmentInfos segments, boolean useCompoundFile) { + public OneMerge(SegmentInfos segments) { if (0 == segments.size()) throw new RuntimeException("segments must include at least one segment"); this.segments = segments; - this.useCompoundFile = useCompoundFile; } /** Record that an exception occurred while executing @@ -314,10 +312,9 @@ public abstract class MergePolicy implements java.io.Closeable { public abstract void close(); /** - * Returns true if a newly flushed (not from merge) - * segment should use the compound file format. + * Returns true if a new segment (regardless of its origin) should use the compound file format. */ - public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException; /** * Returns true if the doc store files should use the diff --git a/lucene/src/java/org/apache/lucene/search/cache/ByteValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/ByteValuesCreator.java index 04b092ab7a5..d28494b5db8 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/ByteValuesCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/ByteValuesCreator.java @@ -110,7 +110,6 @@ public class ByteValuesCreator extends CachedArrayCreator vals.values = new byte[maxDoc]; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; DocsEnum docs = null; try { @@ -120,7 +119,7 @@ public class ByteValuesCreator extends CachedArrayCreator break; } final byte termval = parser.parseByte(term); - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { @@ -137,7 +136,7 @@ public class ByteValuesCreator extends CachedArrayCreator } catch (FieldCache.StopFillCacheException stop) {} if( vals.valid == null ) { - vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + vals.valid = checkMatchAllBits( validBits, vals.numDocs, maxDoc ); } } if( vals.valid == null && vals.numDocs < 1 ) { diff --git a/lucene/src/java/org/apache/lucene/search/cache/CachedArrayCreator.java b/lucene/src/java/org/apache/lucene/search/cache/CachedArrayCreator.java index b65b61478ae..3129c755495 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/CachedArrayCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/CachedArrayCreator.java @@ -100,22 +100,13 @@ public abstract class CachedArrayCreator extends EntryCre /** * Utility function to help check what bits are valid */ - protected Bits checkMatchAllBits( Bits deleted, OpenBitSet valid, int numDocs, int maxDocs ) + protected Bits checkMatchAllBits( OpenBitSet valid, int numDocs, int maxDocs ) { if( numDocs != maxDocs ) { if( hasOption( OPTION_CACHE_BITS ) ) { - if( deleted == null ) { - for( int i=0; i extends EntryCre Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); OpenBitSet validBits = new OpenBitSet( reader.maxDoc() ); DocsEnum docs = null; while(true) { @@ -140,7 +130,7 @@ public abstract class CachedArrayCreator extends EntryCre if (term == null) { break; } - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { @@ -152,7 +142,7 @@ public abstract class CachedArrayCreator extends EntryCre vals.numTerms++; } - vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, reader.maxDoc() ); + vals.valid = checkMatchAllBits( validBits, vals.numDocs, reader.maxDoc() ); } if( vals.numDocs < 1 ) { vals.valid = new Bits.MatchNoBits( reader.maxDoc() ); diff --git a/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java b/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java index b204111353e..2f0bb06e79d 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java @@ -130,7 +130,6 @@ public class DocTermsIndexCreator extends EntryCreatorWithOptions if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { @@ -149,7 +148,7 @@ public class DocTermsIndexCreator extends EntryCreatorWithOptions termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1)); } termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term)); - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { diff --git a/lucene/src/java/org/apache/lucene/search/cache/DoubleValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/DoubleValuesCreator.java index de5171a00b4..a72a322e0dc 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/DoubleValuesCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/DoubleValuesCreator.java @@ -120,7 +120,6 @@ public class DoubleValuesCreator extends CachedArrayCreator vals.values = null; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; DocsEnum docs = null; try { @@ -130,7 +129,7 @@ public class DoubleValuesCreator extends CachedArrayCreator break; } final double termval = parser.parseDouble(term); - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { @@ -150,7 +149,7 @@ public class DoubleValuesCreator extends CachedArrayCreator } catch (FieldCache.StopFillCacheException stop) {} if( vals.valid == null ) { - vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + vals.valid = checkMatchAllBits( validBits, vals.numDocs, maxDoc ); } } diff --git a/lucene/src/java/org/apache/lucene/search/cache/FloatValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/FloatValuesCreator.java index b111dfa5492..42191168361 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/FloatValuesCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/FloatValuesCreator.java @@ -121,7 +121,6 @@ public class FloatValuesCreator extends CachedArrayCreator vals.values = null; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; DocsEnum docs = null; try { @@ -131,7 +130,7 @@ public class FloatValuesCreator extends CachedArrayCreator break; } final float termval = parser.parseFloat(term); - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { @@ -151,7 +150,7 @@ public class FloatValuesCreator extends CachedArrayCreator } catch (FieldCache.StopFillCacheException stop) {} if( vals.valid == null ) { - vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + vals.valid = checkMatchAllBits( validBits, vals.numDocs, maxDoc ); } } diff --git a/lucene/src/java/org/apache/lucene/search/cache/IntValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/IntValuesCreator.java index a739dca4048..287fcb43113 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/IntValuesCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/IntValuesCreator.java @@ -121,7 +121,6 @@ public class IntValuesCreator extends CachedArrayCreator vals.values = null; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; DocsEnum docs = null; try { @@ -131,7 +130,7 @@ public class IntValuesCreator extends CachedArrayCreator break; } final int termval = parser.parseInt(term); - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { @@ -151,7 +150,7 @@ public class IntValuesCreator extends CachedArrayCreator } catch (FieldCache.StopFillCacheException stop) {} if( vals.valid == null ) { - vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + vals.valid = checkMatchAllBits( validBits, vals.numDocs, maxDoc ); } } diff --git a/lucene/src/java/org/apache/lucene/search/cache/LongValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/LongValuesCreator.java index 60174dab46c..f28eee7ce0d 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/LongValuesCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/LongValuesCreator.java @@ -121,7 +121,6 @@ public class LongValuesCreator extends CachedArrayCreator vals.values = null; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; DocsEnum docs = null; try { @@ -131,7 +130,7 @@ public class LongValuesCreator extends CachedArrayCreator break; } final long termval = parser.parseLong(term); - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { @@ -151,7 +150,7 @@ public class LongValuesCreator extends CachedArrayCreator } catch (FieldCache.StopFillCacheException stop) {} if( vals.valid == null ) { - vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + vals.valid = checkMatchAllBits( validBits, vals.numDocs, maxDoc ); } } diff --git a/lucene/src/java/org/apache/lucene/search/cache/ShortValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/ShortValuesCreator.java index af723846aae..603fcbbb1f8 100644 --- a/lucene/src/java/org/apache/lucene/search/cache/ShortValuesCreator.java +++ b/lucene/src/java/org/apache/lucene/search/cache/ShortValuesCreator.java @@ -111,7 +111,6 @@ public class ShortValuesCreator extends CachedArrayCreator vals.values = new short[maxDoc]; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; DocsEnum docs = null; try { @@ -121,7 +120,7 @@ public class ShortValuesCreator extends CachedArrayCreator break; } final Short termval = parser.parseShort(term); - docs = termsEnum.docs(delDocs, docs); + docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { @@ -138,7 +137,7 @@ public class ShortValuesCreator extends CachedArrayCreator } catch (FieldCache.StopFillCacheException stop) {} if( vals.valid == null ) { - vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + vals.valid = checkMatchAllBits( validBits, vals.numDocs, maxDoc ); } } if( vals.valid == null && vals.numDocs < 1 ) { diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index 6b2714dfff7..e7b3eed046c 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -24,6 +24,9 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.mocksep.MockSepCodec; @@ -36,6 +39,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; import org.apache.lucene.util._TestUtil; public class TestAddIndexes extends LuceneTestCase { @@ -1014,5 +1018,30 @@ public class TestAddIndexes extends LuceneTestCase { setFieldCodec("content", mockSepCodec.name); } } + + // LUCENE-2790: tests that the non CFS files were deleted by addIndexes + public void testNonCFSLeftovers() throws Exception { + Directory[] dirs = new Directory[2]; + for (int i = 0; i < dirs.length; i++) { + dirs[i] = new RAMDirectory(); + IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + Document d = new Document(); + d.add(new Field("c", "v", Store.YES, Index.ANALYZED, TermVector.YES)); + w.addDocument(d); + w.close(); + } + + IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) }; + + Directory dir = new RAMDirectory(); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); + lmp.setNoCFSRatio(1.0); // Force creation of CFS + IndexWriter w3 = new IndexWriter(dir, conf); + w3.addIndexes(readers); + w3.close(); + + assertEquals("Only one compound segment should exist", 3, dir.listAll().length); + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index cef3c30d69f..f178f058e53 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -527,12 +527,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase { try { Directory dir = FSDirectory.open(new File(fullDir(outputDir))); + LogMergePolicy mergePolicy = newLogMergePolicy(true, 10); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). setMaxBufferedDocs(-1). setRAMBufferSizeMB(16.0). - setMergePolicy(newLogMergePolicy(true, 10)) + setMergePolicy(mergePolicy) ); for(int i=0;i<35;i++) { addDoc(writer, i); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index 0ff5d34d962..025f7c061de 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -40,18 +40,23 @@ public class TestIndexFileDeleter extends LuceneTestCase { public void testDeleteLeftoverFiles() throws IOException { MockDirectoryWrapper dir = newDirectory(); dir.setPreventDoubleWrite(false); + + LogMergePolicy mergePolicy = newLogMergePolicy(true, 10); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). setMaxBufferedDocs(10). - setMergePolicy(newLogMergePolicy(true, 10)) + setMergePolicy(mergePolicy) ); + int i; for(i=0;i<35;i++) { addDoc(writer, i); } - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); + mergePolicy.setUseCompoundFile(false); + mergePolicy.setUseCompoundDocStore(false); for(;i<45;i++) { addDoc(writer, i); } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index c41c765f6aa..ecbf3fa92fa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -2479,10 +2479,14 @@ public class TestIndexWriter extends LuceneTestCase { public void testDeleteUnusedFiles() throws Exception { for(int iter=0;iter<2;iter++) { Directory dir = newDirectory(); + + LogMergePolicy mergePolicy = newLogMergePolicy(true); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). - setMergePolicy(newLogMergePolicy(true)) + setMergePolicy(mergePolicy) ); Document doc = new Document(); doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED)); diff --git a/lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java b/lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java new file mode 100644 index 00000000000..97d6e86aa56 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/search/TestSearchWithThreads.java @@ -0,0 +1,109 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSearchWithThreads extends LuceneTestCase { + + final int NUM_DOCS = 10000; + final int NUM_SEARCH_THREADS = 5; + final int RUN_TIME_MSEC = 1000 * RANDOM_MULTIPLIER; + + public void test() throws Exception { + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random, dir); + + final long startTime = System.currentTimeMillis(); + + // TODO: replace w/ the @nightly test data; make this + // into an optional @nightly stress test + final Document doc = new Document(); + final Field body = newField("body", "", Field.Index.ANALYZED); + doc.add(body); + final StringBuilder sb = new StringBuilder(); + for(int docCount=0;docCount 0); + netSearch.addAndGet(totSearch); + } catch (Exception exc) { + failed.set(true); + throw new RuntimeException(exc); + } + } + }; + threads[threadID].setDaemon(true); + threads[threadID].start(); + } + + for(int threadID=0;threadID 0) { - sb.append("------------- Standard Output ---------------") + append("------------- Standard Output ---------------") .append(StringUtils.LINE_SEP) .append(systemOutput) .append("------------- ---------------- ---------------") @@ -152,7 +152,7 @@ public class LuceneJUnitResultFormatter implements JUnitResultFormatter { } if (systemError != null && systemError.length() > 0) { - sb.append("------------- Standard Error -----------------") + append("------------- Standard Error -----------------") .append(StringUtils.LINE_SEP) .append(systemError) .append("------------- ---------------- ---------------") @@ -163,7 +163,7 @@ public class LuceneJUnitResultFormatter implements JUnitResultFormatter { try { lock.obtain(5000); try { - out.write(sb.toString().getBytes()); + sb.writeTo(out); out.flush(); } finally { try { @@ -252,14 +252,29 @@ public class LuceneJUnitResultFormatter implements JUnitResultFormatter { endTest(test); } - sb.append(formatTest(test) + type); - sb.append(StringUtils.LINE_SEP); - sb.append(error.getMessage()); - sb.append(StringUtils.LINE_SEP); + append(formatTest(test) + type); + append(StringUtils.LINE_SEP); + append(error.getMessage()); + append(StringUtils.LINE_SEP); String strace = JUnitTestRunner.getFilteredTrace(error); - sb.append(strace); - sb.append(StringUtils.LINE_SEP); - sb.append(StringUtils.LINE_SEP); + append(strace); + append(StringUtils.LINE_SEP); + append(StringUtils.LINE_SEP); + } + + public LuceneJUnitResultFormatter append(String s) { + if (s == null) + s = "(null)"; + try { + sb.write(s.getBytes()); // intentionally use default charset, its a console. + } catch (IOException e) { + throw new RuntimeException(e); + } + return this; + } + + public LuceneJUnitResultFormatter append(long l) { + return append(Long.toString(l)); } } From 5c2158a97f83b6bf66a5ada7b03a7eeb90004cc6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 20 Dec 2010 16:41:51 +0000 Subject: [PATCH 021/116] added JavaDoc to DocValues git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1051201 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/values/DocValues.java | 108 ++++++++++++++++-- .../lucene/index/values/SourceCache.java | 4 +- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index 4de684ce526..e9afea38ebb 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -20,47 +20,131 @@ import java.io.Closeable; import java.io.IOException; import java.util.Comparator; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; + + /** * + * @see FieldsEnum#docValues() + * @see Fields#docValues(String) * @lucene.experimental */ public abstract class DocValues implements Closeable { public static final DocValues[] EMPTY_ARRAY = new DocValues[0]; + private SourceCache cache = new SourceCache.DirectSourceCache(); + /** + * Returns an iterator that steps through all documents values for this + * {@link DocValues} field instance. {@link DocValuesEnum} will skip document + * without a value if applicable. + */ public DocValuesEnum getEnum() throws IOException { return getEnum(null); } + /** + * Returns an iterator that steps through all documents values for this + * {@link DocValues} field instance. {@link DocValuesEnum} will skip document + * without a value if applicable. + *

    + * If an {@link AttributeSource} is supplied to this method the + * {@link DocValuesEnum} will use the given source to access implementation + * related attributes. + */ public abstract DocValuesEnum getEnum(AttributeSource attrSource) throws IOException; + /** + * Loads a new {@link Source} instance for this {@link DocValues} field + * instance. Source instances returned from this method are not cached. It is + * the callers responsibility to maintain the instance and release its + * resources once the source is not needed anymore. + *

    + * This method will return null iff this {@link DocValues} represent a + * {@link SortedSource}. + *

    + * For managed {@link Source} instances see {@link #getSource()}. + * + * @see #getSource() + * @see #setCache(SourceCache) + */ public abstract Source load() throws IOException; + /** + * Returns a {@link Source} instance through the current {@link SourceCache}. + * Iff no {@link Source} has been loaded into the cache so far the source will + * be loaded through {@link #load()} and passed to the {@link SourceCache}. + * The caller of this method should not close the obtained {@link Source} + * instance unless it is not needed for the rest of its life time. + *

    + * {@link Source} instances obtained from this method are closed / released + * from the cache once this {@link DocValues} instance is closed by the + * {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the + * {@link DocValues} was created from. + *

    + * This method will return null iff this {@link DocValues} represent a + * {@link SortedSource}. + */ public Source getSource() throws IOException { return cache.load(this); } + /** + * Returns a {@link SortedSource} instance for this {@link DocValues} field + * instance like {@link #getSource()}. + *

    + * This method will return null iff this {@link DocValues} represent a + * {@link Source} instead of a {@link SortedSource}. + */ public SortedSource getSortedSorted(Comparator comparator) throws IOException { - return cache.laodSorted(this, comparator); + return cache.loadSorted(this, comparator); } + /** + * Loads and returns a {@link SortedSource} instance for this + * {@link DocValues} field instance like {@link #load()}. + *

    + * This method will return null iff this {@link DocValues} represent a + * {@link Source} instead of a {@link SortedSource}. + */ public SortedSource loadSorted(Comparator comparator) throws IOException { throw new UnsupportedOperationException(); } + /** + * Returns the {@link Type} of this {@link DocValues} instance + */ public abstract Type type(); + /** + * Closes this {@link DocValues} instance. This method should only be called + * by the creator of this {@link DocValues} instance. API users should not + * close {@link DocValues} instances. + */ public void close() throws IOException { this.cache.close(this); } - + + /** + * Sets the {@link SourceCache} used by this {@link DocValues} instance. This + * method should be called before {@link #load()} or + * {@link #loadSorted(Comparator)} is called. All {@link Source} or + * {@link SortedSource} instances in the currently used cache will be closed + * before the new cache is installed. + *

    + * Note: All instances previously obtained from {@link #load()} or + * {@link #loadSorted(Comparator)} will be closed. + */ public void setCache(SourceCache cache) { + assert cache != null : "cache must not be null"; synchronized (this.cache) { this.cache.close(this); this.cache = cache; @@ -68,9 +152,12 @@ public abstract class DocValues implements Closeable { } /** - * Source of integer (returned as java long), per document. The underlying - * implementation may use different numbers of bits per value; long is only - * used since it can handle all precisions. + * Source of per document values like long, double or {@link BytesRef} + * depending on the {@link DocValues} fields {@link Type}. Source + * implementations provide random access semantics similar to array lookups + * and typically are entirely memory resident. + *

    + * {@link Source} defines 3 {@link Type} //TODO finish this */ public static abstract class Source { protected final MissingValue missingValue = new MissingValue(); @@ -98,11 +185,11 @@ public abstract class DocValues implements Closeable { public DocValuesEnum getEnum() throws IOException { return getEnum(null); } - + public MissingValue getMissing() { return missingValue; } - + public abstract Type type(); public abstract DocValuesEnum getEnum(AttributeSource attrSource) @@ -132,7 +219,7 @@ public abstract class DocValues implements Closeable { @Override public int nextDoc() throws IOException { - if(pos == NO_MORE_DOCS) + if (pos == NO_MORE_DOCS) return NO_MORE_DOCS; return advance(pos + 1); } @@ -168,14 +255,15 @@ public abstract class DocValues implements Closeable { public final LookupResult getByValue(BytesRef value) { return getByValue(value, new BytesRef()); } + public abstract LookupResult getByValue(BytesRef value, BytesRef tmpRef); } - + public final static class MissingValue { public long longValue; public double doubleValue; public BytesRef bytesValue; - + public final void copy(MissingValue values) { longValue = values.longValue; doubleValue = values.doubleValue; diff --git a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java index 07e33633b44..81ef54f4677 100644 --- a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java +++ b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java @@ -31,7 +31,7 @@ import org.apache.lucene.util.BytesRef; public abstract class SourceCache { public abstract Source load(DocValues values) throws IOException; - public abstract SortedSource laodSorted(DocValues values, + public abstract SortedSource loadSorted(DocValues values, Comparator comp) throws IOException; public abstract void invalidate(DocValues values); @@ -50,7 +50,7 @@ public abstract class SourceCache { return ref; } - public synchronized SortedSource laodSorted(DocValues values, + public synchronized SortedSource loadSorted(DocValues values, Comparator comp) throws IOException { if (sortedRef == null) sortedRef = values.loadSorted(comp); From 55612869048d232d70e9d8a51e3738efc61256b9 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 2 Jan 2011 18:49:23 +0000 Subject: [PATCH 022/116] added javadoc & cleaned up some API git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1054457 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/docvalues/DocValuesCodec.java | 4 +- .../codecs/docvalues/DocValuesConsumer.java | 110 ++++++++--- .../docvalues/DocValuesProducerBase.java | 72 ++++++- .../org/apache/lucene/index/values/Bytes.java | 179 ++++++++++++------ .../apache/lucene/index/values/DocValues.java | 119 +++++++++++- .../lucene/index/values/DocValuesEnum.java | 18 +- .../index/values/FixedStraightBytesImpl.java | 6 +- .../apache/lucene/index/values/Floats.java | 11 +- .../index/values/VarSortedBytesImpl.java | 1 - .../apache/lucene/index/values/Writer.java | 143 ++++++++++++-- 10 files changed, 522 insertions(+), 141 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index 935e3247229..abe247d4936 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -104,11 +104,11 @@ public class DocValuesCodec extends Codec { if(info == null) { info = new DocValuesCodecInfo(); } - final DocValuesConsumer consumer = DocValuesConsumer.create(info.docValuesId(state.segmentName, state.codecId, "" + final DocValuesConsumer consumer = Writer.create(field.getDocValues(), info.docValuesId(state.segmentName, state.codecId, "" + field.number), // TODO can we have a compound file per segment and codec for // docvalues? - state.directory, field, comparator, state.bytesUsed); + state.directory, comparator, state.bytesUsed); info.add(field.number); return consumer; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java index b89e7ace246..ac2b5b7d63d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java @@ -18,43 +18,89 @@ package org.apache.lucene.index.codecs.docvalues; */ import java.io.IOException; import java.util.Collection; -import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.Writer; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; /** + * Abstract API that consumes {@link PerDocFieldValues}. + * {@link DocValuesConsumer} are always associated with a specific field and + * segments. Concrete implementations of this API write the given + * {@link PerDocFieldValues} into a implementation specific format depending on + * the fields meta-data. + * * @lucene.experimental */ -// TODO this might need to go in the codec package since is a direct relative to -// TermsConsumer public abstract class DocValuesConsumer { - - protected AtomicLong bytesUsed = new AtomicLong(0); - + // TODO this might need to go in the codec package since is a direct relative + // to TermsConsumer + protected final AtomicLong bytesUsed; + + /** + * Creates a new {@link DocValuesConsumer}. + * + * @param bytesUsed + * bytes-usage tracking reference used by implementation to track + * internally allocated memory. All tracked bytes must be released + * once {@link #finish(int)} has been called. + */ protected DocValuesConsumer(AtomicLong bytesUsed) { - this.bytesUsed = bytesUsed; + this.bytesUsed = bytesUsed == null ? new AtomicLong(0) : bytesUsed; } - public final long bytesUsed() { - return this.bytesUsed.get(); - } - - public abstract void add(int docID, PerDocFieldValues docValues) throws IOException; + /** + * Adds the given {@link PerDocFieldValues} instance to this + * {@link DocValuesConsumer} + * + * @param docID + * the document ID to add the value for. The docID must always + * increase or be 0 if it is the first call to this method. + * @param docValues + * the values to add + * @throws IOException + * if an {@link IOException} occurs + */ + public abstract void add(int docID, PerDocFieldValues docValues) + throws IOException; + /** + * Called when the consumer of this API is doc with adding + * {@link PerDocFieldValues} to this {@link DocValuesConsumer} + * + * @param docCount + * the total number of documents in this {@link DocValuesConsumer}. + * Must be greater than or equal the last given docID to + * {@link #add(int, PerDocFieldValues)}. + * @throws IOException + */ public abstract void finish(int docCount) throws IOException; + /** + * Gathers files associated with this {@link DocValuesConsumer} + * + * @param files + * the of files to add the consumers files to. + */ public abstract void files(Collection files) throws IOException; + /** + * Merges the given {@link org.apache.lucene.index.codecs.MergeState} into + * this {@link DocValuesConsumer}. + * + * @param mergeState + * the state to merge + * @param values + * the docValues to merge in + * @throws IOException + * if an {@link IOException} occurs + */ public void merge(org.apache.lucene.index.codecs.MergeState mergeState, DocValues values) throws IOException { + assert mergeState != null; // TODO we need some kind of compatibility notation for values such // that two slightly different segments can be merged eg. fixed vs. // variable byte len or float32 vs. float64 @@ -74,20 +120,40 @@ public abstract class DocValuesConsumer { } docBase += reader.numDocs(); } - if (merged) + if (merged) { finish(mergeState.mergedDocCount); + } } + /** + * Merges the given {@link MergeState} into this {@link DocValuesConsumer}. + * {@link MergeState#docBase} must always be increasing. Merging segments out + * of order is not supported. + * + * @param mergeState + * the {@link MergeState} to merge + * @throws IOException + * if an {@link IOException} occurs + */ protected abstract void merge(MergeState mergeState) throws IOException; - /* - * specialized auxiliary MergeState is necessary since we don't want to - * exploit internals up to the codec ones + /** + * Specialized auxiliary MergeState is necessary since we don't want to + * exploit internals up to the codecs consumer. An instance of this class is + * created for each merged low level {@link IndexReader} we are merging to + * support low level bulk copies. */ public static class MergeState { + /** + * the source reader for this MergeState - merged values should be read from + * this instance + */ public final DocValues reader; + /** the absolute docBase for this MergeState within the resulting segment */ public final int docBase; + /** the number of documents in this MergeState */ public final int docCount; + /** the deleted bits for this MergeState */ public final Bits bits; public MergeState(DocValues reader, int docBase, int docCount, Bits bits) { @@ -98,10 +164,4 @@ public abstract class DocValuesConsumer { this.bits = bits; } } - - public static DocValuesConsumer create(String id, - Directory directory, FieldInfo field, Comparator comp, AtomicLong bytesUsed) - throws IOException { - return Writer.create(field.getDocValues(), id, directory, comp, bytesUsed); - } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java index 4cf1d5a916b..b771a86ba65 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -1,4 +1,5 @@ package org.apache.lucene.index.codecs.docvalues; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -32,18 +33,41 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.IntsRef; /** + * Abstract base class for FieldsProducer implementations supporting + * {@link DocValues}. + * * @lucene.experimental */ -public abstract class DocValuesProducerBase extends FieldsProducer{ - +public abstract class DocValuesProducerBase extends FieldsProducer { + protected final TreeMap docValues = new TreeMap(); private final DocValuesCodecInfo info = new DocValuesCodecInfo(); - protected DocValuesProducerBase(SegmentInfo si, Directory dir, FieldInfos fieldInfo, String codecId) throws IOException { + /** + * Creates a new {@link DocValuesProducerBase} instance and loads all + * {@link DocValues} instances for this segment and codec. + * + * @param si + * the segment info to load the {@link DocValues} for. + * @param dir + * the directory to load the {@link DocValues} from. + * @param fieldInfo + * the {@link FieldInfos} + * @param codecId + * the codec ID + * @throws IOException + * if an {@link IOException} occurs + */ + protected DocValuesProducerBase(SegmentInfo si, Directory dir, + FieldInfos fieldInfo, String codecId) throws IOException { info.read(dir, si, codecId); load(fieldInfo, si.name, si.docCount, dir, codecId); } + /** + * Returns a {@link DocValues} instance for the given field name or + * null if this field has no {@link DocValues}. + */ @Override public DocValues docValues(String field) throws IOException { return docValues.get(field); @@ -58,15 +82,35 @@ public abstract class DocValuesProducerBase extends FieldsProducer{ final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); assert fieldInfo.hasDocValues(); final String field = fieldInfo.name; - //TODO can we have a compound file per segment and codec for docvalues? - final String id = info.docValuesId( segment, codecId, fieldNumber+""); - docValues.put(field, loadDocValues(docCount, dir, id, fieldInfo.getDocValues())); + // TODO can we have a compound file per segment and codec for docvalues? + final String id = info.docValuesId(segment, codecId, fieldNumber + ""); + docValues.put(field, loadDocValues(docCount, dir, id, fieldInfo + .getDocValues())); } } + /** + * Loads a {@link DocValues} instance depending on the given {@link Type}. + * Codecs that use different implementations for a certain {@link Type} can + * simply override this method and return their custom implementations. + * + * @param docCount + * number of documents in the segment + * @param dir + * the {@link Directory} to load the {@link DocValues} from + * @param id + * the unique file ID within the segment + * @param type + * the type to load + * @return a {@link DocValues} instance for the given type + * @throws IOException + * if an {@link IOException} occurs + * @throws IllegalArgumentException + * if the given {@link Type} is not supported + */ protected DocValues loadDocValues(int docCount, Directory dir, String id, - Type v) throws IOException { - switch (v) { + Type type) throws IOException { + switch (type) { case PACKED_INTS: return Ints.getValues(dir, id, false); case SIMPLE_FLOAT_4BYTE: @@ -86,15 +130,23 @@ public abstract class DocValuesProducerBase extends FieldsProducer{ case BYTES_VAR_SORTED: return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount); default: - throw new IllegalStateException("unrecognized index values mode " + v); + throw new IllegalStateException("unrecognized index values mode " + type); } } @Override public void close() throws IOException { Collection values = docValues.values(); + IOException ex = null; for (DocValues docValues : values) { - docValues.close(); + try { + docValues.close(); + } catch (IOException e) { + ex = e; + } + } + if (ex != null) { + throw ex; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index 65346296f0f..5521e373425 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -38,17 +38,19 @@ import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.PagedBytes; /** - * Provides concrete Writer/Reader impls for byte[] value per document. There - * are 6 package-private impls of this, for all combinations of - * STRAIGHT/DEREF/SORTED X fixed/not fixed. + * Provides concrete Writer/Reader implementations for byte[] value per + * document. There are 6 package-private default implementations of this, for + * all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT}/ + * {@link Mode#SORTED} x fixed-length/variable-length. * *

    - * NOTE: The total amount of byte[] data stored (across a single segment) cannot - * exceed 2GB. + * NOTE: Currently the total amount of byte[] data stored (across a single + * segment) cannot exceed 2GB. *

    *

    * NOTE: Each byte[] must be <= 32768 bytes in length *

    + * * @lucene.experimental */ public final class Bytes { @@ -56,18 +58,56 @@ public final class Bytes { private Bytes() { /* don't instantiate! */ } /** - * - * + * Defines the {@link Writer}s store mode. The writer will either store the + * bytes sequentially ({@link #STRAIGHT}, dereferenced ({@link #DEREF}) or + * sorted ({@link #SORTED}) + * */ public static enum Mode { - STRAIGHT, DEREF, SORTED + /** + * Mode for sequentially stored bytes + */ + STRAIGHT, + /** + * Mode for dereferenced stored bytes + */ + DEREF, + /** + * Mode for sorted stored bytes + */ + SORTED }; - // TODO -- i shouldn't have to specify fixed? can - // track itself & do the write thing at write time? + /** + * Creates a new byte[] {@link Writer} instances for the given + * directory. + * + * @param dir + * the directory to write the values to + * @param id + * the id used to create a unique file name. Usually composed out of + * the segment name and a unique id per segment. + * @param mode + * the writers store mode + * @param comp + * a {@link BytesRef} comparator - only used with {@link Mode#SORTED} + * @param fixedSize + * true if all bytes subsequently passed to the + * {@link Writer} will have the same length + * @param bytesUsed + * an {@link AtomicLong} instance to track the used bytes within the + * {@link Writer}. A call to {@link Writer#finish(int)} will release + * all internally used resources and frees the memeory tracking + * reference. + * @return a new {@link Writer} instance + * @throws IOException + * if the files for the writer can not be created. + */ public static Writer getWriter(Directory dir, String id, Mode mode, - Comparator comp, boolean fixedSize, AtomicLong bytesUsed) throws IOException { - + Comparator comp, boolean fixedSize, AtomicLong bytesUsed) + throws IOException { + // TODO -- i shouldn't have to specify fixed? can + // track itself & do the write thing at write time? if (comp == null) { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); } @@ -93,22 +133,35 @@ public final class Bytes { throw new IllegalArgumentException(""); } - // TODO -- I can peek @ header to determing fixed/mode? + /** + * Creates a new {@link DocValues} instance that provides either memory + * resident or iterative access to a per-document stored byte[] + * value. The returned {@link DocValues} instance will be initialized without + * consuming a significant amount of memory. + * + * @param dir + * the directory to load the {@link DocValues} from. + * @param id + * the file ID in the {@link Directory} to load the values from. + * @param mode + * the mode used to store the values + * @param fixedSize + * true iff the values are stored with fixed-size, + * otherwise false + * @param maxDoc + * the number of document values stored for the given ID + * @return an initialized {@link DocValues} instance. + * @throws IOException + * if an {@link IOException} occurs + */ public static DocValues getValues(Directory dir, String id, Mode mode, boolean fixedSize, int maxDoc) throws IOException { + // TODO -- I can peek @ header to determing fixed/mode? if (fixedSize) { if (mode == Mode.STRAIGHT) { - try { - return new FixedStraightBytesImpl.Reader(dir, id, maxDoc); - } catch (IOException e) { - throw e; - } + return new FixedStraightBytesImpl.Reader(dir, id, maxDoc); } else if (mode == Mode.DEREF) { - try { - return new FixedDerefBytesImpl.Reader(dir, id, maxDoc); - } catch (IOException e) { - throw e; - } + return new FixedDerefBytesImpl.Reader(dir, id, maxDoc); } else if (mode == Mode.SORTED) { return new FixedSortedBytesImpl.Reader(dir, id, maxDoc); } @@ -122,7 +175,7 @@ public final class Bytes { } } - throw new IllegalArgumentException(""); + throw new IllegalArgumentException("Illegal Mode: " + mode); } static abstract class BytesBaseSource extends Source { @@ -146,21 +199,23 @@ public final class Bytes { } public void close() throws IOException { - data.close(); try { - if (datIn != null) - datIn.close(); + data.close(); // close data } finally { - if (idxIn != null) // if straight - no index needed - idxIn.close(); + try { + if (datIn != null) + datIn.close(); + } finally { + if (idxIn != null) // if straight - no index needed + idxIn.close(); + } } } - - protected abstract int maxDoc(); - public long ramBytesUsed() { - return 0; // TODO - } + /** + * Returns one greater than the largest possible document number. + */ + protected abstract int maxDoc(); @Override public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { @@ -214,10 +269,13 @@ public final class Bytes { } public void close() throws IOException { - if (datIn != null) - datIn.close(); - if (idxIn != null) // if straight - idxIn.close(); + try { + if (datIn != null) + datIn.close(); + } finally { + if (idxIn != null) // if straight + idxIn.close(); + } } protected abstract int maxDoc(); @@ -318,12 +376,18 @@ public final class Bytes { @Override public synchronized void finish(int docCount) throws IOException { - if (datOut != null) - datOut.close(); - if (idxOut != null) - idxOut.close(); - if (pool != null) - pool.reset(); + try { + if (datOut != null) + datOut.close(); + } finally { + try { + if (idxOut != null) + idxOut.close(); + } finally { + if (pool != null) + pool.reset(); + } + } } @Override @@ -331,7 +395,6 @@ public final class Bytes { add(docID, bytesRef); } - @Override public void add(int docID, PerDocFieldValues docValues) throws IOException { final BytesRef ref; @@ -339,7 +402,7 @@ public final class Bytes { add(docID, ref); } } - + @Override protected void setNextEnum(DocValuesEnum valuesEnum) { bytesRef = valuesEnum.bytes(); @@ -391,19 +454,25 @@ public final class Bytes { return (IndexInput) datIn.clone(); } - protected final IndexInput cloneIndex() { // TODO assert here for null - // rather than return null - return idxIn == null ? null : (IndexInput) idxIn.clone(); + protected final IndexInput cloneIndex() { + assert idxIn != null; + return (IndexInput) idxIn.clone(); } @Override public void close() throws IOException { - super.close(); - if (datIn != null) { - datIn.close(); - } - if (idxIn != null) { - idxIn.close(); + try { + super.close(); + } finally { + try { + if (datIn != null) { + datIn.close(); + } + } finally { + if (idxIn != null) { + idxIn.close(); + } + } } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index e9afea38ebb..f6d928ca734 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -26,9 +26,8 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; - /** - * + * TODO * @see FieldsEnum#docValues() * @see Fields#docValues(String) * @lucene.experimental @@ -157,52 +156,117 @@ public abstract class DocValues implements Closeable { * implementations provide random access semantics similar to array lookups * and typically are entirely memory resident. *

    - * {@link Source} defines 3 {@link Type} //TODO finish this + * {@link Source} defines 3 {@link Type} //TODO finish this */ public static abstract class Source { protected final MissingValue missingValue = new MissingValue(); + /** + * Returns a long for the given document id or throws an + * {@link UnsupportedOperationException} if this source doesn't support + * long values. + * + * @throws UnsupportedOperationException + * if this source doesn't support long values. + * @see MissingValue + * @see #getMissing() + */ public long getInt(int docID) { throw new UnsupportedOperationException("ints are not supported"); } + /** + * Returns a double for the given document id or throws an + * {@link UnsupportedOperationException} if this source doesn't support + * double values. + * + * @throws UnsupportedOperationException + * if this source doesn't support double values. + * @see MissingValue + * @see #getMissing() + */ public double getFloat(int docID) { throw new UnsupportedOperationException("floats are not supported"); } + /** + * Returns a {@link BytesRef} for the given document id or throws an + * {@link UnsupportedOperationException} if this source doesn't support + * byte[] values. + * + * @throws UnsupportedOperationException + * if this source doesn't support byte[] values. + * @see MissingValue + * @see #getMissing() + */ public BytesRef getBytes(int docID, BytesRef ref) { throw new UnsupportedOperationException("bytes are not supported"); } /** - * Returns number of unique values. Some impls may throw + * Returns number of unique values. Some implementations may throw * UnsupportedOperationException. */ public int getValueCount() { throw new UnsupportedOperationException(); } + /** + * Returns a {@link DocValuesEnum} for this source. + */ public DocValuesEnum getEnum() throws IOException { return getEnum(null); } + /** + * Returns a {@link MissingValue} instance for this {@link Source}. + * Depending on the type of this {@link Source} consumers of the API should + * check if the value returned from on of the getter methods represents a + * value for a missing document or rather a value for a document no value + * was specified during indexing. + */ public MissingValue getMissing() { return missingValue; } + /** + * Returns the {@link Type} of this source. + * + * @return the {@link Type} of this source. + */ public abstract Type type(); + /** + * Returns a {@link DocValuesEnum} for this source which uses the given + * {@link AttributeSource}. + */ public abstract DocValuesEnum getEnum(AttributeSource attrSource) throws IOException; - } - abstract static class SourceEnum extends DocValuesEnum { + /** + * {@link DocValuesEnum} utility for {@link Source} implemenations. + * + */ + public abstract static class SourceEnum extends DocValuesEnum { protected final Source source; protected final int numDocs; protected int pos = -1; - SourceEnum(AttributeSource attrs, Type type, Source source, int numDocs) { + /** + * Creates a new {@link SourceEnum} + * + * @param attrs + * the {@link AttributeSource} for this enum + * @param type + * the enums {@link Type} + * @param source + * the source this enum operates on + * @param numDocs + * the number of documents within the source + */ + protected SourceEnum(AttributeSource attrs, Type type, Source source, + int numDocs) { super(attrs, type); this.source = source; this.numDocs = numDocs; @@ -225,6 +289,12 @@ public abstract class DocValues implements Closeable { } } + /** + * A sorted variant of {@link Source} for byte[] values per document. + *

    + * Note: {@link DocValuesEnum} obtained from a {@link SortedSource} will + * enumerate values in document order and not in sorted order. + */ public static abstract class SortedSource extends Source { @Override @@ -243,27 +313,56 @@ public abstract class DocValues implements Closeable { public abstract BytesRef getByOrd(int ord, BytesRef bytesRef); public static class LookupResult { + /** true iff the values was found */ public boolean found; + /** + * the ordinal of the value if found or the ordinal of the value if it + * would be present in the source + */ public int ord; } /** - * Finds the largest ord whose value is <= the requested value. If - * {@link LookupResult#found} is true, then ord is an exact match. The - * returned {@link LookupResult} may be reused across calls. + * Finds the largest ord whose value is less or equal to the requested + * value. If {@link LookupResult#found} is true, then ord is an exact match. + * The returned {@link LookupResult} may be reused across calls. */ public final LookupResult getByValue(BytesRef value) { return getByValue(value, new BytesRef()); } + /** + * Performs a lookup by value. + * + * @param value + * the value to look up + * @param tmpRef + * a temporary {@link BytesRef} instance used to compare internal + * values to the given value. Must not be null + * @return the {@link LookupResult} + */ public abstract LookupResult getByValue(BytesRef value, BytesRef tmpRef); } + /** + * {@link MissingValue} is used by {@link Source} implementations to define an + * Implementation dependent value for documents that had no value assigned + * during indexing. Its purpose is similar to a default value but since the a + * missing value across {@link Type} and its implementations can be highly + * dynamic the actual values are not constant but defined per {@link Source} + * through the {@link MissingValue} struct. The actual value used to indicate + * a missing value can even changed within the same field from one segment to + * another. Certain {@link Ints} implementations for instance use a value + * outside of value set as the missing value. + */ public final static class MissingValue { public long longValue; public double doubleValue; public BytesRef bytesValue; + /** + * Copies the values from the given {@link MissingValue}. + */ public final void copy(MissingValue values) { longValue = values.longValue; doubleValue = values.doubleValue; diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java index ecdeec8cf42..11914e79213 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java @@ -19,7 +19,6 @@ package org.apache.lucene.index.values; import java.io.IOException; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; @@ -30,7 +29,7 @@ import org.apache.lucene.util.LongsRef; */ public abstract class DocValuesEnum extends DocIdSetIterator { private AttributeSource source; - private Type enumType; + private final Type enumType; protected BytesRef bytesRef; protected FloatsRef floatsRef; protected LongsRef intsRef; @@ -84,23 +83,12 @@ public abstract class DocValuesEnum extends DocIdSetIterator { } public AttributeSource attributes() { - if (source == null) + if (source == null) { source = new AttributeSource(); + } return source; } - public T addAttribute(Class attr) { - return attributes().addAttribute(attr); - } - - public T getAttribute(Class attr) { - return attributes().getAttribute(attr); - } - - public boolean hasAttribute(Class attr) { - return attributes().hasAttribute(attr); - } - public abstract void close() throws IOException; public static DocValuesEnum emptyEnum(Type type) { diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index a907eefea23..993e0898f8a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -133,7 +133,7 @@ class FixedStraightBytesImpl { @Override public Source load() throws IOException { - return new Source(cloneData(), cloneIndex(), size, maxDoc); + return new Source(cloneData(), size, maxDoc); } @Override @@ -145,9 +145,9 @@ class FixedStraightBytesImpl { private final int size; private final int maxDoc; - public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) + public Source(IndexInput datIn, int size, int maxDoc) throws IOException { - super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc); + super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc); this.size = size; this.missingValue.bytesValue = new BytesRef(size); this.maxDoc = maxDoc; diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index c728eacf3a8..994f45726ee 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -32,13 +33,16 @@ import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.FloatsRef; /** - * Exposes writer/reader for floating point values. You can specify 4 (java - * float) or 8 (java double) byte precision. + * Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit + * floating point values. + *

    + * Current implementations store either 4 byte or 8 byte floating points with + * full precision without any compression. * * @lucene.experimental */ -// TODO - add bulk copy where possible public class Floats { + // TODO - add bulk copy where possible private static final String CODEC_NAME = "SimpleFloats"; static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; @@ -47,6 +51,7 @@ public class Floats { private static final long LONG_DEFAULT = Double .doubleToRawLongBits(Double.NEGATIVE_INFINITY); + public static Writer getWriter(Directory dir, String id, int precisionBytes, AtomicLong bytesUsed) throws IOException { if (precisionBytes != 4 && precisionBytes != 8) { diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 2711b435923..9f65b9a86c9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -101,7 +101,6 @@ class VarSortedBytesImpl { final int count = hash.size(); try { final int[] sortedEntries = hash.sort(comp); - // first dump bytes data, recording index & offset as // we go long offset = 0; diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index c254e0ae3d5..aa060cf09fe 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -26,47 +26,126 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** + * Abstract API for per-document stored primitive values of type byte[] + * , long or double. The API accepts a single value for each + * document. The underlying storage mechanism, file formats, data-structures and + * representations depend on the actual implementation. + *

    + * Document IDs passed to this API must always be increasing unless stated + * otherwise. + *

    + * * @lucene.experimental */ public abstract class Writer extends DocValuesConsumer { + /** + * Creates a new {@link Writer}. + * + * @param bytesUsed + * bytes-usage tracking reference used by implementation to track + * internally allocated memory. All tracked bytes must be released + * once {@link #finish(int)} has been called. + */ protected Writer(AtomicLong bytesUsed) { super(bytesUsed); } + /** + * Filename extension for index files + */ public static final String INDEX_EXTENSION = "idx"; + + /** + * Filename extension for data files. + */ public static final String DATA_EXTENSION = "dat"; - /** Records the specfied value for the docID */ + /** + * Records the specified long value for the docID or throws an + * {@link UnsupportedOperationException} if this {@link Writer} doesn't record + * long values. + * + * @throws UnsupportedOperationException + * if this writer doesn't record long values + */ public void add(int docID, long value) throws IOException { throw new UnsupportedOperationException(); } - /** Records the specfied value for the docID */ + /** + * Records the specified double value for the docID or throws an + * {@link UnsupportedOperationException} if this {@link Writer} doesn't record + * double values. + * + * @throws UnsupportedOperationException + * if this writer doesn't record double values + */ public void add(int docID, double value) throws IOException { throw new UnsupportedOperationException(); } - /** Records the specfied value for the docID */ + /** + * Records the specified {@link BytesRef} value for the docID or throws an + * {@link UnsupportedOperationException} if this {@link Writer} doesn't record + * {@link BytesRef} values. + * + * @throws UnsupportedOperationException + * if this writer doesn't record {@link BytesRef} values + */ public void add(int docID, BytesRef value) throws IOException { throw new UnsupportedOperationException(); } - /** Records the specfied value for the docID */ + /** + * Records a value from the given document id. The methods implementation + * obtains the value for the document id from the last {@link DocValuesEnum} + * set to {@link #setNextEnum(DocValuesEnum)}. + *

    + * This method is used during merging to provide implementation agnostic + * default merge implementation. + *

    + *

    + * The given document id must be the same document id returned from + * {@link DocValuesEnum#docID()} when this method is called. All documents IDs + * between the given ID and the previously given ID or 0 if the + * method is call the first time are filled with default values depending on + * the {@link Writer} implementation. The given document ID must always be + * greater than the previous ID or 0 if called the first time. + */ protected abstract void add(int docID) throws IOException; - + + /** + * Sets the next {@link DocValuesEnum} to consume values from on calls to + * {@link #add(int)} + * + * @param valuesEnum + * the next {@link DocValuesEnum}, this must not be null + */ protected abstract void setNextEnum(DocValuesEnum valuesEnum); - /** Finish writing, close any files */ + /** + * Finish writing and close any files and resources used by this Writer. + * + * @param docCount + * the total number of documents for this writer. This must be + * greater that or equal to the largest document id passed to one of + * the add methods after the {@link Writer} was created. + */ public abstract void finish(int docCount) throws IOException; - // enables bulk copies in subclasses per MergeState @Override protected void merge(MergeState state) throws IOException { + // This enables bulk copies in subclasses per MergeState, subclasses can + // simply override this and decide if they want to merge + // segments using this generic implementation or if a bulk merge is possible + // / feasible. final DocValuesEnum valEnum = state.reader.getEnum(); assert valEnum != null; try { - setNextEnum(valEnum); + setNextEnum(valEnum); // set the current enum we are working on - the + // impl. will get the correct reference for the type + // it supports int docID = state.docBase; final Bits bits = state.bits; final int docCount = state.docCount; @@ -91,9 +170,33 @@ public abstract class Writer extends DocValuesConsumer { } } - public static Writer create(Type v, String id, Directory directory, + /** + * Factory method to create a {@link Writer} instance for a given type. This + * method returns default implementations for each of the different types + * defined in the {@link Type} enumeration. + * + * @param type + * the {@link Type} to create the {@link Writer} for + * @param id + * the file name id used to create files within the writer. + * @param directory + * the {@link Directory} to create the files from. + * @param comp + * a {@link BytesRef} comparator used for {@link Bytes} variants. If + * null + * {@link BytesRef#getUTF8SortedAsUnicodeComparator()} is used as the + * default. + * @param bytesUsed + * a byte-usage tracking reference + * @return a new {@link Writer} instance for the given {@link Type} + * @throws IOException + */ + public static Writer create(Type type, String id, Directory directory, Comparator comp, AtomicLong bytesUsed) throws IOException { - switch (v) { + if (comp == null) { + comp = BytesRef.getUTF8SortedAsUnicodeComparator(); + } + switch (type) { case PACKED_INTS: return Ints.getWriter(directory, id, true, bytesUsed); case SIMPLE_FLOAT_4BYTE: @@ -101,19 +204,25 @@ public abstract class Writer extends DocValuesConsumer { case SIMPLE_FLOAT_8BYTE: return Floats.getWriter(directory, id, 8, bytesUsed); case BYTES_FIXED_STRAIGHT: - return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true, bytesUsed); + return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true, + bytesUsed); case BYTES_FIXED_DEREF: - return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true, bytesUsed); + return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true, + bytesUsed); case BYTES_FIXED_SORTED: - return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true, bytesUsed); + return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true, + bytesUsed); case BYTES_VAR_STRAIGHT: - return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false, bytesUsed); + return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false, + bytesUsed); case BYTES_VAR_DEREF: - return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false, bytesUsed); + return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false, + bytesUsed); case BYTES_VAR_SORTED: - return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false, bytesUsed); + return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false, + bytesUsed); default: - throw new IllegalArgumentException("Unknown Values: " + v); + throw new IllegalArgumentException("Unknown Values: " + type); } } } From ace9fcae35db6eb07437077c1287acb868254bb0 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 5 Jan 2011 17:13:30 +0000 Subject: [PATCH 023/116] merged TestIndexSplitter git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1055536 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/benchmark/res-docvalues.txt | 286 ++++++++++++++++++ .../lucene/index/TestIndexSplitter.java | 22 +- 2 files changed, 292 insertions(+), 16 deletions(-) create mode 100644 lucene/contrib/benchmark/res-docvalues.txt diff --git a/lucene/contrib/benchmark/res-docvalues.txt b/lucene/contrib/benchmark/res-docvalues.txt new file mode 100644 index 00000000000..8402fa88672 --- /dev/null +++ b/lucene/contrib/benchmark/res-docvalues.txt @@ -0,0 +1,286 @@ +Using org.apache.lucene.store.NIOFSDirectory +Using TaskType: perf.values.DocValuesSearchTask +open commit=single +found commit=delsingle +found commit=delmulti +found commit=multi +found commit=single +reader=DirectoryReader(_2:C100000->_0 ) + +HITS q=body:state s=null tot=66586 + 0 doc=31683 score=0.6216621 + 1 doc=6465 score=0.52749777 + 2 doc=89702 score=0.5102744 + 3 doc=34617 score=0.49732968 + 4 doc=37939 score=0.49732968 + 5 doc=40168 score=0.49732968 + 6 doc=91695 score=0.49732968 + 7 doc=88878 score=0.4865276 + 8 doc=7515 score=0.45682645 + 9 doc=33356 score=0.44300243 + +HITS q=body:unit* s=null tot=64020 + 0 doc=0 score=1.0 + 1 doc=1 score=1.0 + 2 doc=2 score=1.0 + 3 doc=3 score=1.0 + 4 doc=4 score=1.0 + 5 doc=5 score=1.0 + 6 doc=6 score=1.0 + 7 doc=7 score=1.0 + 8 doc=8 score=1.0 + 9 doc=9 score=1.0 + 166 expanded terms + +HITS q=body:un*d s=null tot=14719 + 0 doc=0 score=1.0 + 1 doc=1 score=1.0 + 2 doc=3 score=1.0 + 3 doc=4 score=1.0 + 4 doc=6 score=1.0 + 5 doc=7 score=1.0 + 6 doc=8 score=1.0 + 7 doc=14 score=1.0 + 8 doc=15 score=1.0 + 9 doc=16 score=1.0 + 283 expanded terms + +HITS q=body:united~0.6 s=null tot=1517 + 0 doc=94559 score=0.21607591 + 1 doc=10514 score=0.2085325 + 2 doc=9180 score=0.15598921 + 3 doc=98900 score=0.15598921 + 4 doc=29180 score=0.13552018 + 5 doc=45805 score=0.13174331 + 6 doc=94568 score=0.122231 + 7 doc=11270 score=0.117062256 + 8 doc=39076 score=0.115145996 + 9 doc=36171 score=0.100617126 + 50 expanded terms + +HITS q=body:united~0.7 s=null tot=15 + 0 doc=10514 score=0.27043164 + 1 doc=9180 score=0.20229185 + 2 doc=98900 score=0.20229185 + 3 doc=57708 score=0.121375114 + 4 doc=75166 score=0.11443355 + 5 doc=18073 score=0.096111685 + 6 doc=31928 score=0.096111685 + 7 doc=36408 score=0.08450989 + 8 doc=11020 score=0.07592164 + 9 doc=21473 score=0.06760791 + 5 expanded terms + +HITS q=body:unit body:state s=null tot=71542 + 0 doc=31683 score=0.89471126 + 1 doc=89702 score=0.71672213 + 2 doc=34617 score=0.71576905 + 3 doc=37939 score=0.71576905 + 4 doc=40168 score=0.71576905 + 5 doc=91695 score=0.71576905 + 6 doc=88878 score=0.7002223 + 7 doc=89820 score=0.6991013 + 8 doc=55403 score=0.6326564 + 9 doc=55404 score=0.6326564 + +HITS q=+body:unit +body:state s=null tot=58425 + 0 doc=31683 score=0.89471126 + 1 doc=89702 score=0.71672213 + 2 doc=34617 score=0.71576905 + 3 doc=37939 score=0.71576905 + 4 doc=40168 score=0.71576905 + 5 doc=91695 score=0.71576905 + 6 doc=88878 score=0.7002223 + 7 doc=89820 score=0.6991013 + 8 doc=55403 score=0.6326564 + 9 doc=55404 score=0.6326564 + +HITS q=body:"unit state" s=null tot=54506 + 0 doc=31683 score=1.2651248 + 1 doc=34617 score=1.0120999 + 2 doc=37939 score=1.0120999 + 3 doc=40168 score=1.0120999 + 4 doc=91695 score=1.0120999 + 5 doc=88878 score=0.99011683 + 6 doc=89702 score=0.99011683 + 7 doc=55403 score=0.8945784 + 8 doc=55404 score=0.8945784 + 9 doc=32555 score=0.8855874 +checksum=68434440 +ns by query/coll: + q=body:state s=null h=66586 + t=0 + 4541285 c=462018 + 2521923 c=462018 + 2915687 c=462018 + 2059937 c=462018 + 2004176 c=462018 + 2727423 c=462018 + 2623427 c=462018 + 1978686 c=462018 + 1968359 c=462018 + 2014628 c=462018 + 1960483 c=462018 + 11575285 c=462018 + 2009970 c=462018 + 1958026 c=462018 + 2020333 c=462018 + 2004770 c=462018 + 2036261 c=462018 + 1988123 c=462018 + 3135525 c=462018 + 1957758 c=462018 ** + q=body:unit* s=null h=64020 + t=0 + 4643808 c=45 + 3172345 c=45 + 7674774 c=45 + 16807995 c=45 + 10921573 c=45 + 3437976 c=45 + 3452401 c=45 + 2387546 c=45 + 2235433 c=45 + 2259292 c=45 + 2284227 c=45 + 2946973 c=45 + 2262370 c=45 + 2202474 c=45 + 2200461 c=45 + 2209527 c=45 + 2251586 c=45 + 2215512 c=45 + 2237371 c=45 + 2165564 c=45 ** + q=body:un*d s=null h=14719 + t=0 + 284963221 c=74 + 2881482 c=74 + 3413104 c=74 + 2619053 c=74 + 8096969 c=74 + 26424168 c=74 + 14795044 c=74 + 2062666 c=74 + 2059954 c=74 + 2806232 c=74 + 1981718 c=74 + 19105887 c=74 + 1927838 c=74 + 1967447 c=74 + 1947438 c=74 + 1948304 c=74 + 1939775 c=74 + 2001071 c=74 + 16598882 c=74 + 1835379 c=74 ** + q=body:united~0.6 s=null h=1517 + t=0 + 103826471 c=469223 + 555717303 c=469223 + 1080376099 c=469223 + 220017651 c=469223 + 61706415 c=469223 + 282379259 c=469223 + 39197097 c=469223 + 36378629 c=469223 + 38290239 c=469223 + 36514786 c=469223 + 135201864 c=469223 + 57833228 c=469223 + 34226735 c=469223 + 34172475 c=469223 ** + 34190557 c=469223 + 34488326 c=469223 + 36086069 c=469223 + 34802333 c=469223 + 36801995 c=469223 + 34283121 c=469223 + q=body:united~0.7 s=null h=15 + t=0 + 32861609 c=370370 + 28054777 c=370370 + 92952691 c=370370 + 16755846 c=370370 + 20157696 c=370370 + 9575311 c=370370 + 10457266 c=370370 + 9222218 c=370370 + 9193130 c=370370 + 9226210 c=370370 + 12061566 c=370370 + 9734193 c=370370 + 8858844 c=370370 + 8915963 c=370370 + 8948563 c=370370 + 8970609 c=370370 + 8944755 c=370370 + 8847053 c=370370 + 8946026 c=370370 + 8815974 c=370370 ** + q=body:unit body:state s=null h=71542 + t=0 + 10110094 c=615309 + 6409071 c=615309 + 10406489 c=615309 + 8814748 c=615309 + 6617722 c=615309 + 7808115 c=615309 + 6153731 c=615309 + 6230989 c=615309 + 6156351 c=615309 + 6165167 c=615309 + 8189550 c=615309 + 6154614 c=615309 + 6142561 c=615309 + 6136166 c=615309 ** + 6204193 c=615309 + 6850011 c=615309 + 6172851 c=615309 + 6154971 c=615309 + 6159624 c=615309 + 6146062 c=615309 + q=+body:unit +body:state s=null h=58425 + t=0 + 61650331 c=615309 + 9063414 c=615309 + 13550026 c=615309 + 9021896 c=615309 + 7951216 c=615309 + 9605724 c=615309 + 7246070 c=615309 + 7297794 c=615309 + 7264085 c=615309 + 7236206 c=615309 + 9566315 c=615309 + 7237453 c=615309 + 7199994 c=615309 + 7162410 c=615309 + 7183931 c=615309 + 7427852 c=615309 + 7149552 c=615309 + 7629251 c=615309 + 7087746 c=615309 ** + 7166387 c=615309 + q=body:"unit state" s=null h=54506 + t=0 + 94524392 c=558044 + 31604183 c=558044 + 26322724 c=558044 + 23596029 c=558044 + 31573744 c=558044 + 31345234 c=558044 + 23844613 c=558044 + 23516075 c=558044 + 23907187 c=558044 + 23513933 c=558044 + 31254583 c=558044 + 23532671 c=558044 + 23500264 c=558044 + 23418273 c=558044 + 23520715 c=558044 + 23457058 c=558044 + 23545091 c=558044 + 23398826 c=558044 ** + 23683312 c=558044 + 23467857 c=558044 diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java index 441f1b5f964..9e4d20fb916 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java @@ -21,7 +21,7 @@ import java.io.File; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -33,7 +33,7 @@ public class TestIndexSplitter extends LuceneTestCase { File destDir = new File(TEMP_DIR, "testfilesplitterdest"); _TestUtil.rmDir(destDir); destDir.mkdirs(); - Directory fsDir = newFSDirectory(dir); + FSDirectory fsDir = FSDirectory.open(dir); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setNoCFSRatio(1); @@ -58,19 +58,14 @@ public class TestIndexSplitter extends LuceneTestCase { iw.addDocument(doc); } iw.commit(); - IndexReader iwReader = iw.getReader(); - assertEquals(3, iwReader.getSequentialSubReaders().length); - iwReader.close(); + assertEquals(3, iw.getReader().getSequentialSubReaders().length); iw.close(); // we should have 2 segments now IndexSplitter is = new IndexSplitter(dir); String splitSegName = is.infos.info(1).name; is.split(destDir, new String[] {splitSegName}); - Directory fsDirDest = newFSDirectory(destDir); - IndexReader r = IndexReader.open(fsDirDest, true); + IndexReader r = IndexReader.open(FSDirectory.open(destDir), true); assertEquals(50, r.maxDoc()); - r.close(); - fsDirDest.close(); // now test cmdline File destDir2 = new File(TEMP_DIR, "testfilesplitterdest2"); @@ -78,17 +73,12 @@ public class TestIndexSplitter extends LuceneTestCase { destDir2.mkdirs(); IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName}); assertEquals(3, destDir2.listFiles().length); - Directory fsDirDest2 = newFSDirectory(destDir2); - r = IndexReader.open(fsDirDest2, true); + r = IndexReader.open(FSDirectory.open(destDir2), true); assertEquals(50, r.maxDoc()); - r.close(); - fsDirDest2.close(); // now remove the copied segment from src IndexSplitter.main(new String[] {dir.getAbsolutePath(), "-d", splitSegName}); - r = IndexReader.open(fsDir, true); + r = IndexReader.open(FSDirectory.open(dir), true); assertEquals(2, r.getSequentialSubReaders().length); - r.close(); - fsDir.close(); } } From a798ef362b9da271b79909c9502f1a11f264af46 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 18 Jan 2011 00:07:04 +0000 Subject: [PATCH 024/116] LUCENE-2186: more javadocs and code cleanups git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1060144 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/document/AbstractField.java | 5 +- .../lucene/document/DocValuesField.java | 287 ++++++++++++++++++ .../org/apache/lucene/document/Fieldable.java | 26 +- .../apache/lucene/document/ValuesField.java | 156 ---------- .../org/apache/lucene/index/values/Bytes.java | 2 + .../apache/lucene/index/values/DocValues.java | 7 +- .../lucene/index/values/DocValuesEnum.java | 69 ++++- .../index/values/FixedDerefBytesImpl.java | 2 +- .../index/values/FixedStraightBytesImpl.java | 2 +- .../org/apache/lucene/index/values/Ints.java | 2 +- .../lucene/index/values/MultiDocValues.java | 6 +- .../index/values/PerDocFieldValues.java | 48 +++ .../lucene/index/values/SourceCache.java | 60 +++- .../org/apache/lucene/index/values/Type.java | 5 +- .../index/values/TestDocValuesIndexing.java | 15 +- 15 files changed, 501 insertions(+), 191 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/document/DocValuesField.java delete mode 100644 lucene/src/java/org/apache/lucene/document/ValuesField.java diff --git a/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene/src/java/org/apache/lucene/document/AbstractField.java index 2fd4397d12f..54ea023ba28 100755 --- a/lucene/src/java/org/apache/lucene/document/AbstractField.java +++ b/lucene/src/java/org/apache/lucene/document/AbstractField.java @@ -49,6 +49,8 @@ public abstract class AbstractField implements Fieldable { // length/offset for all primitive types protected int binaryLength; protected int binaryOffset; + protected PerDocFieldValues docValues; + protected AbstractField() { @@ -294,8 +296,7 @@ public abstract class AbstractField implements Fieldable { result.append('>'); return result.toString(); } - protected PerDocFieldValues docValues; - + public PerDocFieldValues getDocValues() { return docValues; } diff --git a/lucene/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/src/java/org/apache/lucene/document/DocValuesField.java new file mode 100644 index 00000000000..1f9295859d4 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/document/DocValuesField.java @@ -0,0 +1,287 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.Reader; +import java.util.Comparator; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.values.PerDocFieldValues; +import org.apache.lucene.index.values.Type; +import org.apache.lucene.util.BytesRef; + +/** + *

    + * This class provides a {@link AbstractField} that enables storing of typed + * per-document values for scoring, sorting or value retrieval. Here's an + * example usage, adding an int value: + * + *

    + * document.add(new DocValuesField(name).setInt(value));
    + * 
    + * + * For optimal performance, re-use the DocValuesField and + * {@link Document} instance for more than one document: + * + *
    + *  DocValuesField field = new DocValuesField(name);
    + *  Document document = new Document();
    + *  document.add(field);
    + * 
    + *  for(all documents) {
    + *    ...
    + *    field.setIntValue(value)
    + *    writer.addDocument(document);
    + *    ...
    + *  }
    + * 
    + * + *

    + * If doc values are stored in addition to an indexed ({@link Index}) or stored + * ({@link Store}) value it's recommended to use the {@link DocValuesField}'s + * {@link #set(AbstractField)} API: + * + *

    + *  DocValuesField field = new DocValuesField(name);
    + *  Field indexedField = new Field(name, stringValue, Stored.NO, Indexed.ANALYZED);
    + *  Document document = new Document();
    + *  document.add(indexedField);
    + *  field.set(indexedField);
    + *  for(all documents) {
    + *    ...
    + *    field.setIntValue(value)
    + *    writer.addDocument(document);
    + *    ...
    + *  }
    + * 
    + * + * */ +@SuppressWarnings("serial") +public class DocValuesField extends AbstractField implements PerDocFieldValues { + + protected BytesRef bytes; + protected double doubleValue; + protected long longValue; + protected Type type; + protected Comparator bytesComparator; + + /** + * Creates a new {@link DocValuesField} with the given name. + */ + public DocValuesField(String name) { + super(name, Store.NO, Index.NO, TermVector.NO); + setDocValues(this); + } + + /** + * Creates a {@link DocValuesField} prototype + */ + DocValuesField() { + this(""); + } + + /** + * Sets the given long value and sets the field's {@link Type} to + * {@link Type#PACKED_INTS} unless already set. If you want to change the + * default type use {@link #setType(Type)}. + */ + public void setInt(long value) { + if (type == null) { + type = Type.PACKED_INTS; + } + longValue = value; + } + + /** + * Sets the given float value and sets the field's {@link Type} + * to {@link Type#SIMPLE_FLOAT_4BYTE} unless already set. If you want to + * change the type use {@link #setType(Type)}. + */ + public void setFloat(float value) { + if (type == null) { + type = Type.SIMPLE_FLOAT_4BYTE; + } + doubleValue = value; + } + + /** + * Sets the given double value and sets the field's {@link Type} + * to {@link Type#SIMPLE_FLOAT_8BYTE} unless already set. If you want to + * change the default type use {@link #setType(Type)}. + */ + public void setFloat(double value) { + if (type == null) { + type = Type.SIMPLE_FLOAT_8BYTE; + } + doubleValue = value; + } + + /** + * Sets the given {@link BytesRef} value and the field's {@link Type}. The + * comparator for this field is set to null. If a + * null comparator is set the default comparator for the given + * {@link Type} is used. + */ + public void setBytes(BytesRef value, Type type) { + setBytes(value, type, null); + } + + /** + * Sets the given {@link BytesRef} value, the field's {@link Type} and the + * field's comparator. If the {@link Comparator} is set to null + * the default for the given {@link Type} is used instead. + * + * @throws IllegalArgumentException + * if the value or the type are null + */ + public void setBytes(BytesRef value, Type type, Comparator comp) { + if (value == null) { + throw new IllegalArgumentException("value must not be null"); + } + setType(type); + if (bytes == null) { + bytes = new BytesRef(value); + } else { + bytes.copy(value); + } + bytesComparator = comp; + } + + /** + * Returns the set {@link BytesRef} or null if not set. + */ + public BytesRef getBytes() { + return bytes; + } + + /** + * Returns the set {@link BytesRef} comparator or null if not set + */ + public Comparator bytesComparator() { + return bytesComparator; + } + + /** + * Returns the set floating point value or 0.0d if not set. + */ + public double getFloat() { + return doubleValue; + } + + /** + * Returns the set long value of 0 if not set. + */ + public long getInt() { + return longValue; + } + + /** + * Sets the {@link BytesRef} comparator for this field. If the field has a + * numeric {@link Type} the comparator will be ignored. + */ + public void setBytesComparator(Comparator comp) { + this.bytesComparator = comp; + } + + /** + * Sets the {@link Type} for this field. + */ + public void setType(Type type) { + if (type == null) { + throw new IllegalArgumentException("Type must not be null"); + } + this.type = type; + } + + /** + * Returns the field's {@link Type} + */ + public Type type() { + return type; + } + + /** + * Returns always null + */ + public Reader readerValue() { + return null; + } + + /** + * Returns always null + */ + public String stringValue() { + return null; + } + + /** + * Returns always null + */ + public TokenStream tokenStreamValue() { + return null; + } + + /** + * Sets this {@link DocValuesField} to the given {@link AbstractField} and + * returns the given field. Any modifications to this instance will be visible + * to the given field. + */ + public T set(T field) { + field.setDocValues(this); + return field; + } + + /** + * Sets a new {@link PerDocFieldValues} instance on the given field with the + * given type and returns it. + * + */ + public static T set(T field, Type type) { + if (field instanceof DocValuesField) + return field; + final DocValuesField valField = new DocValuesField(); + switch (type) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + BytesRef ref = field.isBinary() ? new BytesRef(field.getBinaryValue(), + field.getBinaryOffset(), field.getBinaryLength()) : new BytesRef( + field.stringValue()); + valField.setBytes(ref, type); + break; + case PACKED_INTS: + valField.setInt(Long.parseLong(field.stringValue())); + break; + case SIMPLE_FLOAT_4BYTE: + valField.setFloat(Float.parseFloat(field.stringValue())); + break; + case SIMPLE_FLOAT_8BYTE: + valField.setFloat(Double.parseDouble(field.stringValue())); + break; + default: + throw new IllegalArgumentException("unknown type: " + type); + } + return valField.set(field); + } + +} diff --git a/lucene/src/java/org/apache/lucene/document/Fieldable.java b/lucene/src/java/org/apache/lucene/document/Fieldable.java index 60a456af33d..35d2d06d611 100755 --- a/lucene/src/java/org/apache/lucene/document/Fieldable.java +++ b/lucene/src/java/org/apache/lucene/document/Fieldable.java @@ -18,6 +18,7 @@ package org.apache.lucene.document; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInvertState; // for javadocs +import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.Type; import org.apache.lucene.search.PhraseQuery; // for javadocs @@ -212,11 +213,28 @@ public interface Fieldable extends Serializable { */ void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions); - public PerDocFieldValues getDocValues() ; - + /** + * Returns the {@link PerDocFieldValues} + */ + public PerDocFieldValues getDocValues(); + + /** + * Sets the {@link PerDocFieldValues} for this field. If + * {@link PerDocFieldValues} is set this field will store per-document values + * + * @see DocValues + */ public void setDocValues(PerDocFieldValues docValues); - + + /** + * Returns true iff {@link PerDocFieldValues} are set on this + * field. + */ public boolean hasDocValues(); - + + /** + * Returns the {@link Type} of the set {@link PerDocFieldValues} or + * null if not set. + */ public Type docValuesType(); } diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java deleted file mode 100644 index 105ac86a355..00000000000 --- a/lucene/src/java/org/apache/lucene/document/ValuesField.java +++ /dev/null @@ -1,156 +0,0 @@ -package org.apache.lucene.document; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import java.io.Reader; -import java.util.Comparator; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; -import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.index.values.Type; -import org.apache.lucene.util.BytesRef; - -/** - * - */ -@SuppressWarnings("serial") -public class ValuesField extends AbstractField implements PerDocFieldValues { - - protected BytesRef bytes; - protected double doubleValue; - protected long longValue; - protected Type type; - protected Comparator bytesComparator; - - public ValuesField(String name) { - super(name, Store.NO, Index.NO, TermVector.NO); - setDocValues(this); - } - - ValuesField() { - this(""); - } - - public void setInt(long value) { - type = Type.PACKED_INTS; - longValue = value; - } - - public void setFloat(float value) { - type = Type.SIMPLE_FLOAT_4BYTE; - doubleValue = value; - } - - public void setFloat(double value) { - type = Type.SIMPLE_FLOAT_8BYTE; - doubleValue = value; - } - - public void setBytes(BytesRef value, Type type) { - setBytes(value, type, null); - - } - - public void setBytes(BytesRef value, Type type, Comparator comp) { - this.type = type; - if (bytes == null) { - this.bytes = new BytesRef(); - } - bytes.copy(value); - bytesComparator = comp; - } - - public BytesRef getBytes() { - return bytes; - } - - public Comparator bytesComparator() { - return bytesComparator; - } - - public double getFloat() { - return doubleValue; - } - - public long getInt() { - return longValue; - } - - public void setBytesComparator(Comparator comp) { - this.bytesComparator = comp; - } - - public void setType(Type type) { - this.type = type; - } - - public Type type() { - return type; - } - - public Reader readerValue() { - return null; - } - - public String stringValue() { - return null; - } - - public TokenStream tokenStreamValue() { - return tokenStream; - } - - public T set(T field) { - field.setDocValues(this); - return field; - } - - public static T set(T field, Type type) { - if (field instanceof ValuesField) - return field; - final ValuesField valField = new ValuesField(); - switch (type) { - case BYTES_FIXED_DEREF: - case BYTES_FIXED_SORTED: - case BYTES_FIXED_STRAIGHT: - case BYTES_VAR_DEREF: - case BYTES_VAR_SORTED: - case BYTES_VAR_STRAIGHT: - BytesRef ref = field.isBinary() ? new BytesRef(field.getBinaryValue(), - field.getBinaryOffset(), field.getBinaryLength()) : new BytesRef( - field.stringValue()); - valField.setBytes(ref, type); - break; - case PACKED_INTS: - valField.setInt(Long.parseLong(field.stringValue())); - break; - case SIMPLE_FLOAT_4BYTE: - valField.setFloat(Float.parseFloat(field.stringValue())); - break; - case SIMPLE_FLOAT_8BYTE: - valField.setFloat(Double.parseDouble(field.stringValue())); - break; - default: - throw new IllegalArgumentException("unknown type: " + type); - } - return valField.set(field); - } - -} diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index 5521e373425..d3dedd406a2 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -178,6 +178,7 @@ public final class Bytes { throw new IllegalArgumentException("Illegal Mode: " + mode); } + // TODO open up this API? static abstract class BytesBaseSource extends Source { protected final IndexInput datIn; protected final IndexInput idxIn; @@ -325,6 +326,7 @@ public final class Bytes { } } + // TODO: open up this API?! static abstract class BytesWriterBase extends Writer { private final Directory dir; diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index f6d928ca734..44ae7873e35 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -129,7 +129,7 @@ public abstract class DocValues implements Closeable { * close {@link DocValues} instances. */ public void close() throws IOException { - this.cache.close(this); + cache.close(this); } /** @@ -141,9 +141,12 @@ public abstract class DocValues implements Closeable { *

    * Note: All instances previously obtained from {@link #load()} or * {@link #loadSorted(Comparator)} will be closed. + * @throws IllegalArgumentException if the given cache is null + * */ public void setCache(SourceCache cache) { - assert cache != null : "cache must not be null"; + if (cache == null) + throw new IllegalArgumentException("cache must not be null"); synchronized (this.cache) { this.cache.close(this); this.cache = cache; diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java index 11914e79213..60f9d655732 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java @@ -23,7 +23,21 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; + /** + * {@link DocValuesEnum} is a {@link DocIdSetIterator} iterating byte[] + * , long and double stored per document. Depending on the + * enum's {@link Type} ({@link #type()}) the enum might skip over documents that + * have no value stored. Types like {@link Type#BYTES_VAR_STRAIGHT} might not + * skip over documents even if there is no value associated with a document. The + * value for document without values again depends on the types implementation + * although a reference for a {@link Type} returned from a accessor method + * {@link #getFloat()}, {@link #getInt()} or {@link #bytes()} will never be + * null even if a document has no value. + *

    + * Note: Only the reference for the enum's type are initialized to non + * null ie. {@link #getInt()} will always return null + * if the enum's Type is {@link Type#SIMPLE_FLOAT_4BYTE}. * * @lucene.experimental */ @@ -34,10 +48,17 @@ public abstract class DocValuesEnum extends DocIdSetIterator { protected FloatsRef floatsRef; protected LongsRef intsRef; + /** + * Creates a new {@link DocValuesEnum} for the given type. The + * {@link AttributeSource} for this enum is set to null + */ protected DocValuesEnum(Type enumType) { this(null, enumType); } + /** + * Creates a new {@link DocValuesEnum} for the given type. + */ protected DocValuesEnum(AttributeSource source, Type enumType) { this.source = source; this.enumType = enumType; @@ -56,32 +77,57 @@ public abstract class DocValuesEnum extends DocIdSetIterator { case SIMPLE_FLOAT_4BYTE: case SIMPLE_FLOAT_8BYTE: floatsRef = new FloatsRef(1); - break; + break; } } + /** + * Returns the type of this enum + */ public Type type() { return enumType; } + /** + * Returns a {@link BytesRef} or null if this enum doesn't + * enumerate byte[] values + */ public BytesRef bytes() { return bytesRef; } + /** + * Returns a {@link FloatsRef} or null if this enum doesn't + * enumerate floating point values + */ public FloatsRef getFloat() { return floatsRef; } + /** + * Returns a {@link LongsRef} or null if this enum doesn't + * enumerate integer values. + */ public LongsRef getInt() { return intsRef; } - - protected void copyReferences(DocValuesEnum valuesEnum) { + + /** + * Copies the internal state from the given enum + */ + protected void copyFrom(DocValuesEnum valuesEnum) { intsRef = valuesEnum.intsRef; floatsRef = valuesEnum.floatsRef; bytesRef = valuesEnum.bytesRef; + source = valuesEnum.source; } + /** + * Returns the {@link AttributeSource} associated with this enum. + *

    + * Note: this method might create a new AttribueSource if no + * {@link AttributeSource} has been provided during enum creation. + */ public AttributeSource attributes() { if (source == null) { source = new AttributeSource(); @@ -89,28 +135,37 @@ public abstract class DocValuesEnum extends DocIdSetIterator { return source; } + /** + * Closes the enum + * + * @throws IOException + * if an {@link IOException} occurs + */ public abstract void close() throws IOException; + /** + * Returns an empty {@link DocValuesEnum} for the given {@link Type}. + */ public static DocValuesEnum emptyEnum(Type type) { return new DocValuesEnum(type) { @Override public int nextDoc() throws IOException { return NO_MORE_DOCS; } - + @Override public int docID() { return NO_MORE_DOCS; } - + @Override public int advance(int target) throws IOException { return NO_MORE_DOCS; } - + @Override public void close() throws IOException { - + } }; } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 04da04ce9fd..4f582c5c7f1 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -215,7 +215,7 @@ class FixedDerefBytesImpl { valueCount = idx.size(); } - protected void copyReferences(DocValuesEnum valuesEnum) { + protected void copyFrom(DocValuesEnum valuesEnum) { bytesRef = valuesEnum.bytesRef; if (bytesRef.bytes.length < size) { bytesRef.grow(size); diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 993e0898f8a..82ae24b7355 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -198,7 +198,7 @@ class FixedStraightBytesImpl { fp = datIn.getFilePointer(); } - protected void copyReferences(DocValuesEnum valuesEnum) { + protected void copyFrom(DocValuesEnum valuesEnum) { bytesRef = valuesEnum.bytesRef; if (bytesRef.bytes.length < size) { bytesRef.grow(size); diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java index b374b039571..52dbcc6f2e5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java @@ -28,7 +28,7 @@ import org.apache.lucene.store.Directory; * @lucene.experimental */ public class Ints { -//TODO - add bulk copy where possible + // TODO - add bulk copy where possible private Ints() { } diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index f310378cf54..5ffa1b1a9cf 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -124,7 +124,7 @@ public class MultiDocValues extends DocValues { maxDoc = last.start + last.length; final DocValuesIndex idx = docValuesIdx[0]; currentEnum = idx.docValues.getEnum(this.attributes()); - currentEnum.copyReferences(this); + currentEnum.copyFrom(this); intsRef = currentEnum.intsRef; currentMax = idx.length; currentStart = 0; @@ -148,8 +148,8 @@ public class MultiDocValues extends DocValues { if (target >= currentMax) { final int idx = ReaderUtil.subIndex(target, starts); currentEnum.close(); - currentEnum = docValuesIdx[idx].docValues.getEnum(this.attributes()); - currentEnum.copyReferences(this); + currentEnum = docValuesIdx[idx].docValues.getEnum(); + currentEnum.copyFrom(this); currentStart = docValuesIdx[idx].start; currentMax = currentStart + docValuesIdx[idx].length; relativeDoc = target - currentStart; diff --git a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java index f60fcd4e0e5..78958c23ca3 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java @@ -18,36 +18,84 @@ package org.apache.lucene.index.values; */ import java.util.Comparator; +import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.util.BytesRef; /** + * Per document and field values consumed by {@link DocValuesConsumer}. + * @see DocValuesField + * @see Fieldable#setDocValues(PerDocFieldValues) * * @lucene.experimental */ public interface PerDocFieldValues { + /** + * Sets the given long value. + */ public void setInt(long value); + /** + * Sets the given float value. + */ public void setFloat(float value); + /** + * Sets the given double value. + */ public void setFloat(double value); + /** + * Sets the given {@link BytesRef} value and the field's {@link Type}. The + * comparator for this field is set to null. If a + * null comparator is set the default comparator for the given + * {@link Type} is used. + */ public void setBytes(BytesRef value, Type type); + /** + * Sets the given {@link BytesRef} value, the field's {@link Type} and the + * field's comparator. If the {@link Comparator} is set to null + * the default for the given {@link Type} is used instead. + */ public void setBytes(BytesRef value, Type type, Comparator comp); + /** + * Returns the set {@link BytesRef} or null if not set. + */ public BytesRef getBytes(); + /** + * Returns the set {@link BytesRef} comparator or null if not set + */ public Comparator bytesComparator(); + /** + * Returns the set floating point value or 0.0d if not set. + */ public double getFloat(); + /** + * Returns the set long value of 0 if not set. + */ public long getInt(); + /** + * Sets the {@link BytesRef} comparator for this field. If the field has a + * numeric {@link Type} the comparator will be ignored. + */ public void setBytesComparator(Comparator comp); + /** + * Sets the {@link Type} + */ public void setType(Type type); + /** + * Returns the {@link Type} + */ public Type type(); } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java index 81ef54f4677..565df06eb02 100644 --- a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java +++ b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java @@ -20,40 +20,94 @@ package org.apache.lucene.index.values; import java.io.IOException; import java.util.Comparator; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.util.BytesRef; /** - * Per {@link DocValues} {@link Source} cache. + * Abstract base class for {@link DocValues} {@link Source} / + * {@link SortedSource} cache. + *

    + * {@link Source} and {@link SortedSource} instances loaded via + * {@link DocValues#load()} and {@link DocValues#loadSorted(Comparator)} are + * entirely memory resident and need to be maintained by the caller. Each call + * to {@link DocValues#load()} or {@link DocValues#loadSorted(Comparator)} will + * cause an entire reload of the underlying data. Source and + * {@link SortedSource} instances obtained from {@link DocValues#getSource()} + * and {@link DocValues#getSource()} respectively are maintained by a + * {@link SourceCache} that is closed ({@link #close(DocValues)}) once the + * {@link IndexReader} that created the {@link DocValues} instance is closed. + *

    + * Unless {@link Source} and {@link SortedSource} instances are managed by + * another entity it is recommended to use the cached variants to obtain a + * source instance. + *

    + * Implementation of this API must be thread-safe. + * + * @see DocValues#setCache(SourceCache) + * @see DocValues#getSource() + * @see DocValues#getSortedSorted(Comparator) + * * @lucene.experimental */ public abstract class SourceCache { + + /** + * Atomically loads a {@link Source} into the cache from the given + * {@link DocValues} and returns it iff no other {@link Source} has already + * been cached. Otherwise the cached source is returned. + *

    + * This method will not return null + */ public abstract Source load(DocValues values) throws IOException; + /** + * Atomically loads a {@link SortedSource} into the cache from the given + * {@link DocValues} and returns it iff no other {@link SortedSource} has + * already been cached. Otherwise the cached source is returned. + *

    + * This method will not return null + */ public abstract SortedSource loadSorted(DocValues values, Comparator comp) throws IOException; + /** + * Atomically invalidates the cached {@link Source} and {@link SortedSource} + * instances if any and empties the cache. + */ public abstract void invalidate(DocValues values); + /** + * Atomically closes the cache and frees all resources. + */ public synchronized void close(DocValues values) { invalidate(values); } + /** + * Simple per {@link DocValues} instance cache implementation that holds a + * {@link Source} and {@link SortedSource} reference as a member variable. + *

    + * If a {@link DirectSourceCache} instance is closed or invalidated the cached + * reference are simply set to null + */ public static final class DirectSourceCache extends SourceCache { private Source ref; private SortedSource sortedRef; public synchronized Source load(DocValues values) throws IOException { - if (ref == null) + if (ref == null) { ref = values.load(); + } return ref; } public synchronized SortedSource loadSorted(DocValues values, Comparator comp) throws IOException { - if (sortedRef == null) + if (sortedRef == null) { sortedRef = values.loadSorted(comp); + } return sortedRef; } diff --git a/lucene/src/java/org/apache/lucene/index/values/Type.java b/lucene/src/java/org/apache/lucene/index/values/Type.java index b02fd0a9194..446f8a7b810 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Type.java +++ b/lucene/src/java/org/apache/lucene/index/values/Type.java @@ -1,7 +1,4 @@ package org.apache.lucene.index.values; - -import org.apache.lucene.index.values.DocValues.SortedSource; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -19,6 +16,8 @@ import org.apache.lucene.index.values.DocValues.SortedSource; * limitations under the License. */ +import org.apache.lucene.index.values.DocValues.SortedSource; + /** * {@link Type} specifies the type of the {@link DocValues} for a certain field. * A {@link Type} can specify the actual data type for a field, used compression diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index b3ad9f5a213..32ad59b7132 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.ValuesField; +import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; @@ -71,10 +71,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { * TODO: Roadmap to land on trunk * * - Add documentation for: - * - Source and ValuesEnum * - DocValues - * - ValuesField - * - Values * - Add @lucene.experimental to all necessary classes * - add test for unoptimized case with deletes * - run RAT @@ -106,7 +103,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, writerConfig(false)); for (int i = 0; i < 5; i++) { Document doc = new Document(); - ValuesField valuesField = new ValuesField("docId"); + DocValuesField valuesField = new DocValuesField("docId"); valuesField.setInt(i); doc.add(valuesField); doc.add(new Field("docId", "" + i, Store.NO, Index.ANALYZED)); @@ -532,11 +529,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { OpenBitSet deleted = new OpenBitSet(numValues); Document doc = new Document(); Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)]; - AbstractField field = random.nextBoolean() ? new ValuesField(value.name()) + AbstractField field = random.nextBoolean() ? new DocValuesField(value.name()) : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, 10), idx == Index.NO ? Store.YES : Store.NO, idx); doc.add(field); - ValuesField valField = new ValuesField("prototype"); + DocValuesField valField = new DocValuesField("prototype"); final BytesRef bytesRef = new BytesRef(); final String idBase = value.name() + "_"; @@ -564,7 +561,9 @@ public class TestDocValuesIndexing extends LuceneTestCase { for (int j = 0; j < b.length; j++) { b[j] = upto++; } - valField.setBytes(bytesRef, value); + if (bytesRef != null) { + valField.setBytes(bytesRef, value); + } } doc.removeFields("id"); doc.add(new Field("id", idBase + i, Store.YES, From cf8ac7866a4110f827c33aaa05c8cd771de33e64 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 9 Feb 2011 08:24:19 +0000 Subject: [PATCH 025/116] LUCENE-2186: made class and var naming consistent, added javadoc and specialized codec provider git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1068786 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/document/DocValuesField.java | 18 +++---- .../lucene/index/DocFieldProcessor.java | 3 +- .../index/DocFieldProcessorPerThread.java | 7 +-- .../org/apache/lucene/index/FieldInfos.java | 12 ++--- .../lucene/index/codecs/CodecProvider.java | 21 ++++++++ .../codecs/docvalues/DocValuesCodec.java | 2 +- .../docvalues/DocValuesCodecProvider.java | 34 +++++++++++++ .../docvalues/DocValuesProducerBase.java | 6 +-- .../org/apache/lucene/index/values/Bytes.java | 48 ++++++++++++------- .../apache/lucene/index/values/DocValues.java | 14 +++++- .../lucene/index/values/DocValuesEnum.java | 8 ++-- .../index/values/FixedSortedBytesImpl.java | 1 + .../apache/lucene/index/values/Floats.java | 18 +++---- .../lucene/index/values/PackedIntsImpl.java | 8 ++-- .../org/apache/lucene/index/values/Type.java | 31 +++++++----- .../index/values/VarDerefBytesImpl.java | 9 ++++ .../index/values/VarSortedBytesImpl.java | 1 + .../apache/lucene/index/values/Writer.java | 6 +-- .../apache/lucene/search/FieldComparator.java | 8 ++-- .../org/apache/lucene/search/SortField.java | 4 +- .../index/values/TestDocValuesIndexing.java | 28 +++++------ 21 files changed, 188 insertions(+), 99 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java diff --git a/lucene/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/src/java/org/apache/lucene/document/DocValuesField.java index 1f9295859d4..c5f13e17159 100644 --- a/lucene/src/java/org/apache/lucene/document/DocValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/DocValuesField.java @@ -99,36 +99,36 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { /** * Sets the given long value and sets the field's {@link Type} to - * {@link Type#PACKED_INTS} unless already set. If you want to change the + * {@link Type#INTS} unless already set. If you want to change the * default type use {@link #setType(Type)}. */ public void setInt(long value) { if (type == null) { - type = Type.PACKED_INTS; + type = Type.INTS; } longValue = value; } /** * Sets the given float value and sets the field's {@link Type} - * to {@link Type#SIMPLE_FLOAT_4BYTE} unless already set. If you want to + * to {@link Type#FLOAT_32} unless already set. If you want to * change the type use {@link #setType(Type)}. */ public void setFloat(float value) { if (type == null) { - type = Type.SIMPLE_FLOAT_4BYTE; + type = Type.FLOAT_32; } doubleValue = value; } /** * Sets the given double value and sets the field's {@link Type} - * to {@link Type#SIMPLE_FLOAT_8BYTE} unless already set. If you want to + * to {@link Type#FLOAT_64} unless already set. If you want to * change the default type use {@link #setType(Type)}. */ public void setFloat(double value) { if (type == null) { - type = Type.SIMPLE_FLOAT_8BYTE; + type = Type.FLOAT_64; } doubleValue = value; } @@ -269,13 +269,13 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { field.stringValue()); valField.setBytes(ref, type); break; - case PACKED_INTS: + case INTS: valField.setInt(Long.parseLong(field.stringValue())); break; - case SIMPLE_FLOAT_4BYTE: + case FLOAT_32: valField.setFloat(Float.parseFloat(field.stringValue())); break; - case SIMPLE_FLOAT_8BYTE: + case FLOAT_64: valField.setFloat(Double.parseDouble(field.stringValue())); break; default: diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index bb7da337247..deba256feda 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -54,7 +54,7 @@ final class DocFieldProcessor extends DocConsumer { fieldInfo.setDocValues(values.type()); if(fieldsConsumer == null) { - /* nocommit -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice. + /* TODO (close to no commit) -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice. * we need to find a way that allows us to obtain a FieldsConsumer per DocumentsWriter. Currently some codecs rely on * the SegmentsWriteState passed in right at the moment when the segment is flushed (doccount etc) but we need the consumer earlier * to support docvalues and later on stored fields too. @@ -64,7 +64,6 @@ final class DocFieldProcessor extends DocConsumer { } valuesConsumer = fieldsConsumer.addValuesField(fieldInfo); docValues.put(name, valuesConsumer); - } return valuesConsumer; diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java index 90b96ca061a..86d8aafbd26 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java @@ -23,7 +23,6 @@ import java.util.HashSet; import java.util.List; import java.io.IOException; -import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; @@ -253,11 +252,7 @@ final class DocFieldProcessorPerThread extends DocConsumerPerThread { final DocFieldProcessorPerField perField = fields[i]; final Fieldable fieldable = perField.fields[0]; perField.consumer.processFields(perField.fields, perField.fieldCount); - - if (!(fieldable instanceof AbstractField)) { - continue; - } - final PerDocFieldValues docValues = ((AbstractField)fieldable).getDocValues(); + final PerDocFieldValues docValues = fieldable.getDocValues(); if (docValues == null) { continue; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 70efdc407ba..47d21177f58 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -326,13 +326,13 @@ public final class FieldInfos { b = 0; } else { switch(fi.docValues) { - case PACKED_INTS: + case INTS: b = 1; break; - case SIMPLE_FLOAT_4BYTE: + case FLOAT_32: b = 2; break; - case SIMPLE_FLOAT_8BYTE: + case FLOAT_64: b = 3; break; case BYTES_FIXED_STRAIGHT: @@ -393,13 +393,13 @@ public final class FieldInfos { docValuesType = null; break; case 1: - docValuesType = Type.PACKED_INTS; + docValuesType = Type.INTS; break; case 2: - docValuesType = Type.SIMPLE_FLOAT_4BYTE; + docValuesType = Type.FLOAT_32; break; case 3: - docValuesType = Type.SIMPLE_FLOAT_8BYTE; + docValuesType = Type.FLOAT_64; break; case 4: docValuesType = Type.BYTES_FIXED_STRAIGHT; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java index c249116eb09..31b0c230bc5 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java @@ -22,6 +22,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.Map.Entry; import org.apache.lucene.index.codecs.preflex.PreFlexCodec; import org.apache.lucene.index.codecs.pulsing.PulsingCodec; @@ -163,6 +164,26 @@ public class CodecProvider { public synchronized void setDefaultFieldCodec(String codec) { defaultFieldCodec = codec; } + + /** + * Registers all codecs from the given provider including the field to codec + * mapping and the default field codec. + *

    + * NOTE: This method will pass any codec from the given codec to + * {@link #register(Codec)} and sets fiels codecs via + * {@link #setFieldCodec(String, String)}. + */ + public void copyFrom(CodecProvider other) { + final Collection values = other.codecs.values(); + for (Codec codec : values) { + register(codec); + } + final Set> entrySet = other.perFieldMap.entrySet(); + for (Entry entry : entrySet) { + setFieldCodec(entry.getKey(), entry.getValue()); + } + setDefaultFieldCodec(other.getDefaultFieldCodec()); + } } class DefaultCodecProvider extends CodecProvider { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index abe247d4936..3fe8bfeda5b 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -49,7 +49,7 @@ public class DocValuesCodec extends Codec { private final Comparator comparator; public DocValuesCodec(Codec other, Comparator comparator) { - this.name = "docvalues_" + other.name; + this.name = other.name; this.other = other; this.comparator = comparator; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java new file mode 100644 index 00000000000..ab54101f0fb --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java @@ -0,0 +1,34 @@ +package org.apache.lucene.index.codecs.docvalues; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.CodecProvider; + +/** + * Simple Codec provider that wrapps all registered codecs into a {@link DocValuesCodec} + */ +public class DocValuesCodecProvider extends CodecProvider { + + @Override + public synchronized void register(Codec codec) { + if (codec instanceof DocValuesCodec) { + super.register(codec); + } else { + super.register(new DocValuesCodec(codec)); + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java index b771a86ba65..914a7e3c738 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -111,11 +111,11 @@ public abstract class DocValuesProducerBase extends FieldsProducer { protected DocValues loadDocValues(int docCount, Directory dir, String id, Type type) throws IOException { switch (type) { - case PACKED_INTS: + case INTS: return Ints.getValues(dir, id, false); - case SIMPLE_FLOAT_4BYTE: + case FLOAT_32: return Floats.getValues(dir, id, docCount); - case SIMPLE_FLOAT_8BYTE: + case FLOAT_64: return Floats.getValues(dir, id, docCount); case BYTES_FIXED_STRAIGHT: return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount); diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index d3dedd406a2..a8a7c2d4d3d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -54,14 +54,16 @@ import org.apache.lucene.util.PagedBytes; * @lucene.experimental */ public final class Bytes { - //TODO - add bulk copy where possible - private Bytes() { /* don't instantiate! */ } + // TODO - add bulk copy where possible + private Bytes() { /* don't instantiate! */ + } /** * Defines the {@link Writer}s store mode. The writer will either store the * bytes sequentially ({@link #STRAIGHT}, dereferenced ({@link #DEREF}) or * sorted ({@link #SORTED}) * + * @lucene.experimental */ public static enum Mode { /** @@ -180,10 +182,10 @@ public final class Bytes { // TODO open up this API? static abstract class BytesBaseSource extends Source { + private final PagedBytes pagedBytes; protected final IndexInput datIn; protected final IndexInput idxIn; protected final static int PAGED_BYTES_BITS = 15; - private final PagedBytes pagedBytes; protected final PagedBytes.Reader data; protected final long totalLengthInBytes; @@ -204,11 +206,13 @@ public final class Bytes { data.close(); // close data } finally { try { - if (datIn != null) + if (datIn != null) { datIn.close(); + } } finally { - if (idxIn != null) // if straight - no index needed + if (idxIn != null) {// if straight - no index needed idxIn.close(); + } } } } @@ -269,18 +273,27 @@ public final class Bytes { return ord == 0 ? null : deref(--ord, bytesRef); } - public void close() throws IOException { + protected void closeIndexInput() throws IOException { try { - if (datIn != null) + if (datIn != null) { datIn.close(); + } } finally { - if (idxIn != null) // if straight + if (idxIn != null) {// if straight idxIn.close(); + } } } + /** + * Returns the largest doc id + 1 in this doc values source + */ protected abstract int maxDoc(); + /** + * Copies the value for the given ord to the given {@link BytesRef} and + * returns it. + */ protected abstract BytesRef deref(int ord, BytesRef bytesRef); protected LookupResult binarySearch(BytesRef b, BytesRef bytesRef, int low, @@ -328,7 +341,6 @@ public final class Bytes { // TODO: open up this API?! static abstract class BytesWriterBase extends Writer { - private final Directory dir; private final String id; protected IndexOutput idxOut; @@ -347,10 +359,13 @@ public final class Bytes { this.codecName = codecName; this.version = version; this.pool = pool; - if (initData) + if (initData) { initDataOut(); - if (initIndex) + } + + if (initIndex) { initIndexOut(); + } } private void initDataOut() throws IOException { @@ -365,10 +380,6 @@ public final class Bytes { CodecUtil.writeHeader(idxOut, codecName, version); } - public long ramBytesUsed() { - return bytesUsed.get(); - } - /** * Must be called only with increasing docIDs. It's OK for some docIDs to be * skipped; they will be filled with 0 bytes. @@ -448,14 +459,19 @@ public final class Bytes { } else { idxIn = null; } - } + /** + * clones and returns the data {@link IndexInput} + */ protected final IndexInput cloneData() { assert datIn != null; return (IndexInput) datIn.clone(); } + /** + * clones and returns the indexing {@link IndexInput} + */ protected final IndexInput cloneIndex() { assert idxIn != null; return (IndexInput) idxIn.clone(); diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index 44ae7873e35..c63c3e8da95 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -28,12 +28,19 @@ import org.apache.lucene.util.BytesRef; /** * TODO + * * @see FieldsEnum#docValues() * @see Fields#docValues(String) * @lucene.experimental */ public abstract class DocValues implements Closeable { - + /* + * TODO: it might be useful to add another Random Access enum for some + * implementations like packed ints and only return such a random access enum + * if the impl supports random access. For super large segments it might be + * useful or even required in certain environements to have disc based random + * access + */ public static final DocValues[] EMPTY_ARRAY = new DocValues[0]; private SourceCache cache = new SourceCache.DirectSourceCache(); @@ -141,7 +148,9 @@ public abstract class DocValues implements Closeable { *

    * Note: All instances previously obtained from {@link #load()} or * {@link #loadSorted(Comparator)} will be closed. - * @throws IllegalArgumentException if the given cache is null + * + * @throws IllegalArgumentException + * if the given cache is null * */ public void setCache(SourceCache cache) { @@ -162,6 +171,7 @@ public abstract class DocValues implements Closeable { * {@link Source} defines 3 {@link Type} //TODO finish this */ public static abstract class Source { + // TODO we might need a close method here to null out the internal used arrays?! protected final MissingValue missingValue = new MissingValue(); /** diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java index 60f9d655732..60dc7d539f9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java @@ -37,7 +37,7 @@ import org.apache.lucene.util.LongsRef; *

    * Note: Only the reference for the enum's type are initialized to non * null ie. {@link #getInt()} will always return null - * if the enum's Type is {@link Type#SIMPLE_FLOAT_4BYTE}. + * if the enum's Type is {@link Type#FLOAT_32}. * * @lucene.experimental */ @@ -71,11 +71,11 @@ public abstract class DocValuesEnum extends DocIdSetIterator { case BYTES_VAR_STRAIGHT: bytesRef = new BytesRef(); break; - case PACKED_INTS: + case INTS: intsRef = new LongsRef(1); break; - case SIMPLE_FLOAT_4BYTE: - case SIMPLE_FLOAT_8BYTE: + case FLOAT_32: + case FLOAT_64: floatsRef = new FloatsRef(1); break; } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index 108f382b480..47f8f2a6c99 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -194,6 +194,7 @@ class FixedSortedBytesImpl { this.size = size; this.numValue = numValues; index = PackedInts.getReader(idxIn); + closeIndexInput(); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 994f45726ee..31272553abf 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -297,7 +297,7 @@ public class Floats { public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { final MissingValue missing = getMissing(); - return new SourceEnum(attrSource, Type.SIMPLE_FLOAT_4BYTE, this, maxDoc) { + return new SourceEnum(attrSource, Type.FLOAT_32, this, maxDoc) { @Override public int advance(int target) throws IOException { if (target >= numDocs) @@ -315,7 +315,7 @@ public class Floats { @Override public Type type() { - return Type.SIMPLE_FLOAT_4BYTE; + return Type.FLOAT_32; } } @@ -355,7 +355,7 @@ public class Floats { @Override public Type type() { - return Type.SIMPLE_FLOAT_8BYTE; + return Type.FLOAT_64; } } @@ -377,8 +377,8 @@ public class Floats { @Override public Type type() { - return precisionBytes == 4 ? Type.SIMPLE_FLOAT_4BYTE - : Type.SIMPLE_FLOAT_8BYTE; + return precisionBytes == 4 ? Type.FLOAT_32 + : Type.FLOAT_64; } } @@ -386,7 +386,7 @@ public class Floats { Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc) throws IOException { - super(source, dataIn, 4, maxDoc, Type.SIMPLE_FLOAT_4BYTE); + super(source, dataIn, 4, maxDoc, Type.FLOAT_32); } @Override @@ -422,7 +422,7 @@ public class Floats { Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc) throws IOException { - super(source, dataIn, 8, maxDoc, Type.SIMPLE_FLOAT_8BYTE); + super(source, dataIn, 8, maxDoc, Type.FLOAT_64); } @Override @@ -464,8 +464,8 @@ public class Floats { FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision, int maxDoc, Type type) throws IOException { - super(source, precision == 4 ? Type.SIMPLE_FLOAT_4BYTE - : Type.SIMPLE_FLOAT_8BYTE); + super(source, precision == 4 ? Type.FLOAT_32 + : Type.FLOAT_64); this.dataIn = dataIn; this.precision = precision; this.maxDoc = maxDoc; diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 74ec4abe6d5..e2f7642b828 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -111,7 +111,7 @@ class PackedIntsImpl { PackedInts.bitsRequired(maxValue - minValue)); final int firstDoc = defaultValues.nextSetBit(0); lastDocId++; - if(firstDoc != -1) { + if (firstDoc != -1) { for (int i = 0; i < firstDoc; i++) { w.add(defaultValue); // fill with defaults until first bit set } @@ -223,7 +223,7 @@ class PackedIntsImpl { @Override public Type type() { - return Type.PACKED_INTS; + return Type.INTS; } } @@ -240,7 +240,7 @@ class PackedIntsImpl { @Override public Type type() { - return Type.PACKED_INTS; + return Type.INTS; } } @@ -255,7 +255,7 @@ class PackedIntsImpl { private IntsEnumImpl(AttributeSource source, IndexInput dataIn) throws IOException { - super(source, Type.PACKED_INTS); + super(source, Type.INTS); intsRef.offset = 0; this.dataIn = dataIn; dataIn.seek(CodecUtil.headerLength(CODEC_NAME)); diff --git a/lucene/src/java/org/apache/lucene/index/values/Type.java b/lucene/src/java/org/apache/lucene/index/values/Type.java index 446f8a7b810..1d5dd0c2701 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Type.java +++ b/lucene/src/java/org/apache/lucene/index/values/Type.java @@ -1,4 +1,5 @@ package org.apache.lucene.index.values; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,30 +17,38 @@ package org.apache.lucene.index.values; * limitations under the License. */ +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.values.DocValues.SortedSource; /** * {@link Type} specifies the type of the {@link DocValues} for a certain field. - * A {@link Type} can specify the actual data type for a field, used compression - * schemes and high-level data-structures. + * A {@link Type} only defines the data type for a field while the actual + * implemenation used to encode and decode the values depends on the field's + * {@link Codec}. It is up to the {@link Codec} implementing + * {@link FieldsConsumer#addValuesField(org.apache.lucene.index.FieldInfo)} and + * using a different low-level implemenations to write the stored values for a + * field. * * @lucene.experimental */ public enum Type { - - /** - * Integral value is stored as packed ints. The bit precision is fixed across - * the segment, and determined by the min/max values in the field. + /* + * TODO: Add INT_32 INT_64 INT_16 & INT_8?! */ - PACKED_INTS, /** - * 32 bit floating point value stored without modification or compression. + * Integer values. */ - SIMPLE_FLOAT_4BYTE, + INTS, + /** - * 64 bit floating point value stored without modification or compression. + * 32 bit floating point values. */ - SIMPLE_FLOAT_8BYTE, + FLOAT_32, + /** + * 64 bit floating point values. + */ + FLOAT_64, // TODO(simonw): -- shouldn't lucene decide/detect straight vs // deref, as well fixed vs var? diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index 132a5e4336f..bb9f9be53b6 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -98,6 +98,15 @@ class VarDerefBytesImpl { } + /* + * TODO: if impls like this are merged we are bound to the amount of memory we + * can store into a BytesRefHash and therefore how much memory a ByteBlockPool + * can address. This is currently limited to 2GB. While we could extend that + * and use 64bit for addressing this still limits us to the existing main + * memory as all distinct bytes will be loaded up into main memory. We could + * move the byte[] writing to #finish(int) and store the bytes in sorted + * order and merge them in a streamed fashion. + */ static class Writer extends BytesWriterBase { private int[] docToAddress; private int address = 1; diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 9f65b9a86c9..c781c4f2a5d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -186,6 +186,7 @@ class VarSortedBytesImpl { docToOrdIndex = PackedInts.getReader(idxIn); ordToOffsetIndex = PackedInts.getReader(idxIn); valueCount = ordToOffsetIndex.size(); + closeIndexInput(); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index aa060cf09fe..15f466f7de4 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -197,11 +197,11 @@ public abstract class Writer extends DocValuesConsumer { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); } switch (type) { - case PACKED_INTS: + case INTS: return Ints.getWriter(directory, id, true, bytesUsed); - case SIMPLE_FLOAT_4BYTE: + case FLOAT_32: return Floats.getWriter(directory, id, 4, bytesUsed); - case SIMPLE_FLOAT_8BYTE: + case FLOAT_64: return Floats.getWriter(directory, id, 8, bytesUsed); case BYTES_FIXED_STRAIGHT: return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true, diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 3f9bade3350..27d10aecd41 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -332,13 +332,13 @@ public abstract class FieldComparator { } /** Uses float index values to sort by ascending value */ - public static final class FloatIndexValuesComparator extends FieldComparator { + public static final class FloatDocValuesComparator extends FieldComparator { private final double[] values; private Source currentReaderValues; private final String field; private double bottom; - FloatIndexValuesComparator(int numHits, String field) { + FloatDocValuesComparator(int numHits, String field) { values = new double[numHits]; this.field = field; } @@ -599,13 +599,13 @@ public abstract class FieldComparator { } /** Loads int index values and sorts by ascending value. */ - public static final class IntIndexValuesComparator extends FieldComparator { + public static final class IntDocValuesComparator extends FieldComparator { private final long[] values; private Source currentReaderValues; private final String field; private long bottom; - IntIndexValuesComparator(int numHits, String field) { + IntDocValuesComparator(int numHits, String field) { values = new long[numHits]; this.field = field; } diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index 316b097165f..a5bfc761cef 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -493,14 +493,14 @@ implements Serializable { case SortField.INT: if (useIndexValues) { - return new FieldComparator.IntIndexValuesComparator(numHits, field); + return new FieldComparator.IntDocValuesComparator(numHits, field); } else { return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer) missingValue); } case SortField.FLOAT: if (useIndexValues) { - return new FieldComparator.FloatIndexValuesComparator(numHits, field); + return new FieldComparator.FloatDocValuesComparator(numHits, field); } else { return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator) creator, (Float) missingValue); } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 32ad59b7132..1883958eb03 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -43,7 +43,7 @@ import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.codecs.docvalues.DocValuesCodec; +import org.apache.lucene.index.codecs.docvalues.DocValuesCodecProvider; import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.queryParser.ParseException; @@ -79,19 +79,13 @@ public class TestDocValuesIndexing extends LuceneTestCase { * */ - private DocValuesCodec docValuesCodec; private CodecProvider provider; @Before public void setUp() throws Exception { super.setUp(); - String defaultFieldCodec = CodecProvider.getDefault() - .getDefaultFieldCodec(); - provider = new CodecProvider(); - docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup( - defaultFieldCodec)); - provider.register(docValuesCodec); - provider.setDefaultFieldCodec(docValuesCodec.name); + provider = new DocValuesCodecProvider(); + provider.copyFrom(CodecProvider.getDefault()); } /* @@ -278,7 +272,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { final int numRemainingValues = (int) (numValues - deleted.cardinality()); final int base = r.numDocs() - numRemainingValues; switch (val) { - case PACKED_INTS: { + case INTS: { DocValues intsReader = getDocValues(r, val.name()); assertNotNull(intsReader); @@ -309,8 +303,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { } } break; - case SIMPLE_FLOAT_4BYTE: - case SIMPLE_FLOAT_8BYTE: { + case FLOAT_32: + case FLOAT_64: { DocValues floatReader = getDocValues(r, val.name()); assertNotNull(floatReader); Source floats = getSource(floatReader); @@ -515,8 +509,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { Type.BYTES_FIXED_SORTED, Type.BYTES_FIXED_STRAIGHT, Type.BYTES_VAR_DEREF, Type.BYTES_VAR_SORTED, Type.BYTES_VAR_STRAIGHT); - private static EnumSet NUMERICS = EnumSet.of(Type.PACKED_INTS, - Type.SIMPLE_FLOAT_4BYTE, Type.SIMPLE_FLOAT_8BYTE); + private static EnumSet NUMERICS = EnumSet.of(Type.INTS, + Type.FLOAT_32, Type.FLOAT_64); private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS, @@ -547,11 +541,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { for (int i = 0; i < numValues; i++) { if (isNumeric) { switch (value) { - case PACKED_INTS: + case INTS: valField.setInt(i); break; - case SIMPLE_FLOAT_4BYTE: - case SIMPLE_FLOAT_8BYTE: + case FLOAT_32: + case FLOAT_64: valField.setFloat(2.0f * i); break; default: From dee30b64c35d68b2daf0957501c490f364ad6166 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 12 Mar 2011 18:24:35 +0000 Subject: [PATCH 026/116] merge r1079712,r1079716 from trunk to fix the docvalues hudson build git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1080963 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/search/SortField.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index 08047106f4c..b83318e5c21 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -101,7 +101,11 @@ public class SortField { private int type; // defaults to determining type dynamically private Locale locale; // defaults to "natural order" (no Locale) boolean reverse = false; // defaults to natural order - private CachedArrayCreator creator; + + // This is not generified because of a javac bug in early JDK 1.5 (e.g. used by FreeBSD). + // With as type param, getComparator() fails to compile because of cast problems. + @SuppressWarnings("unchecked") private CachedArrayCreator creator; + public Object missingValue = null; // used for 'sortMissingFirst/Last' // Used for CUSTOM sort From ef2e92aff4b41ff0acf37abbd07ee6c074a783d6 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 13 Mar 2011 02:46:01 +0000 Subject: [PATCH 027/116] revert hack git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1081034 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/search/SortField.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index b83318e5c21..08047106f4c 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -101,11 +101,7 @@ public class SortField { private int type; // defaults to determining type dynamically private Locale locale; // defaults to "natural order" (no Locale) boolean reverse = false; // defaults to natural order - - // This is not generified because of a javac bug in early JDK 1.5 (e.g. used by FreeBSD). - // With as type param, getComparator() fails to compile because of cast problems. - @SuppressWarnings("unchecked") private CachedArrayCreator creator; - + private CachedArrayCreator creator; public Object missingValue = null; // used for 'sortMissingFirst/Last' // Used for CUSTOM sort From 621b75c56740e6f96243de48a88d94c508ad55c6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 2 May 2011 14:23:13 +0000 Subject: [PATCH 028/116] removed @Override annotations for java5 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1098593 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java | 1 - .../src/java/org/apache/lucene/index/PerFieldCodecWrapper.java | 2 -- .../apache/lucene/index/codecs/docvalues/DocValuesCodec.java | 2 -- .../lucene/index/codecs/docvalues/DocValuesProducerBase.java | 1 - 4 files changed, 6 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java index bf10a43f6a0..fcce3a2a68b 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -133,7 +133,6 @@ public class MultiPerDocValues extends PerDocValues { return result; } - @Override public void close() throws IOException { PerDocValues[] perDocValues = this.subs; for (PerDocValues values : perDocValues) { diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index d1acaf46a5e..7c30702d01c 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -293,7 +293,6 @@ final class PerFieldCodecWrapper extends Codec { return perDocProducer.docValues(field); } - @Override public void close() throws IOException { final Iterator it = codecs.values().iterator(); IOException err = null; @@ -325,7 +324,6 @@ final class PerFieldCodecWrapper extends Codec { } } - @Override public void close() throws IOException { Iterator it = consumers.iterator(); IOException err = null; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index 60ddccaa52d..93832932f42 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -58,8 +58,6 @@ public class DocValuesCodec extends Codec { public PerDocConsumer docsConsumer(final PerDocWriteState state) throws IOException { return new PerDocConsumer() { - - @Override public void close() throws IOException { } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java index bb6c8556a3a..86e214db841 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java @@ -130,7 +130,6 @@ public class DocValuesProducerBase extends PerDocValues { } } - @Override public void close() throws IOException { Collection values = docValues.values(); IOException ex = null; From 471cb20526c55d4c9e294be2edb4bac2c3cf0abd Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 3 May 2011 09:19:32 +0000 Subject: [PATCH 029/116] removed synchronization on value writers, added test for docValues comparators git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1098980 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/PerFieldCodecWrapper.java | 5 +- .../codecs/docvalues/DocValuesCodec.java | 4 - .../org/apache/lucene/index/values/Bytes.java | 2 +- .../index/values/FixedDerefBytesImpl.java | 4 +- .../index/values/FixedSortedBytesImpl.java | 4 +- .../index/values/FixedStraightBytesImpl.java | 4 +- .../apache/lucene/index/values/Floats.java | 8 +- .../lucene/index/values/MultiDocValues.java | 8 +- .../lucene/index/values/PackedIntsImpl.java | 9 +- .../index/values/VarDerefBytesImpl.java | 4 +- .../index/values/VarSortedBytesImpl.java | 4 +- .../index/values/VarStraightBytesImpl.java | 4 +- .../apache/lucene/search/FieldComparator.java | 30 ++++-- .../org/apache/lucene/search/SortField.java | 4 +- .../index/values/TestDocValuesIndexing.java | 33 ++----- .../org/apache/lucene/search/TestSort.java | 91 ++++++++++++++++++- 16 files changed, 151 insertions(+), 67 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 7c30702d01c..174eb5a1683 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -298,7 +298,10 @@ final class PerFieldCodecWrapper extends Codec { IOException err = null; while (it.hasNext()) { try { - it.next().close(); + PerDocValues next = it.next(); + if (next != null) { + next.close(); + } } catch (IOException ioe) { // keep first IOException we hit but keep // closing the rest diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java index 93832932f42..c8ff645ebfd 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java @@ -76,11 +76,7 @@ public class DocValuesCodec extends Codec { @Override public PerDocValues docsProducer(SegmentReadState state) throws IOException { - try { return new DocValuesProducerBase(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); - }catch (IOException e) { - return new DocValuesProducerBase(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); - } } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index a8a7c2d4d3d..b386b932fb9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -388,7 +388,7 @@ public final class Bytes { public abstract void add(int docID, BytesRef bytes) throws IOException; @Override - public synchronized void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (datOut != null) datOut.close(); diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 4f582c5c7f1..01f89742d41 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -71,7 +71,7 @@ class FixedDerefBytesImpl { } @Override - synchronized public void add(int docID, BytesRef bytes) throws IOException { + public void add(int docID, BytesRef bytes) throws IOException { if (bytes.length == 0) // default value - skip it return; if (size == -1) { @@ -102,7 +102,7 @@ class FixedDerefBytesImpl { // Important that we get docCount, in case there were // some last docs that we didn't see @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (size == -1) { datOut.writeInt(size); diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index 47f8f2a6c99..161b2788393 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -78,7 +78,7 @@ class FixedSortedBytesImpl { } @Override - synchronized public void add(int docID, BytesRef bytes) throws IOException { + public void add(int docID, BytesRef bytes) throws IOException { if (bytes.length == 0) return; // default - skip it if (size == -1) { @@ -103,7 +103,7 @@ class FixedSortedBytesImpl { // Important that we get docCount, in case there were // some last docs that we didn't see @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (size == -1) {// no data added datOut.writeInt(size); diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 82ae24b7355..0a89587a037 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -52,7 +52,7 @@ class FixedStraightBytesImpl { // TODO - impl bulk copy here! @Override - synchronized public void add(int docID, BytesRef bytes) throws IOException { + public void add(int docID, BytesRef bytes) throws IOException { if (size == -1) { size = bytes.length; datOut.writeInt(size); @@ -103,7 +103,7 @@ class FixedStraightBytesImpl { } @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (size == -1) {// no data added datOut.writeInt(0); diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 31272553abf..46e8e88a733 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -153,7 +153,7 @@ public class Floats { } @Override - synchronized public void add(final int docID, final double v) + public void add(final int docID, final double v) throws IOException { assert docID > lastDocId : "docID: " + docID + " must be greater than the last added doc id: " + lastDocId; @@ -167,7 +167,7 @@ public class Floats { } @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (docCount > lastDocId + 1) for (int i = lastDocId; i < docCount; i++) { @@ -196,7 +196,7 @@ public class Floats { } @Override - synchronized public void add(int docID, double v) throws IOException { + public void add(int docID, double v) throws IOException { assert docID > lastDocId : "docID: " + docID + " must be greater than the last added doc id: " + lastDocId; if (docID - lastDocId > 1) { @@ -209,7 +209,7 @@ public class Floats { } @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (docCount > lastDocId + 1) for (int i = lastDocId; i < docCount; i++) { diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index 5ffa1b1a9cf..9eb38a2f5a0 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -191,8 +191,9 @@ public class MultiDocValues extends DocValues { } private final int ensureSource(int docID) { - int n = docID - start; - if (n >= numDocs) { + if (docID >= start && docID < start+numDocs) { + return docID - start; + } else { final int idx = ReaderUtil.subIndex(docID, starts); assert idx >= 0 && idx < docValuesIdx.length : "idx was " + idx + " for doc id: " + docID + " slices : " + Arrays.toString(starts); @@ -207,9 +208,8 @@ public class MultiDocValues extends DocValues { start = docValuesIdx[idx].start; numDocs = docValuesIdx[idx].length; - n = docID - start; + return docID - start; } - return n; } public double getFloat(int docID) { diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index e2f7642b828..246aafcb0cc 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -70,7 +70,7 @@ class PackedIntsImpl { } @Override - public synchronized void add(int docID, long v) throws IOException { + public void add(int docID, long v) throws IOException { assert lastDocId < docID; if (!started) { started = true; @@ -96,7 +96,7 @@ class PackedIntsImpl { } @Override - public synchronized void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (!started) { minValue = maxValue = 0; @@ -118,8 +118,8 @@ class PackedIntsImpl { for (int i = firstDoc; i < lastDocId;) { w.add(docToValue[i] - minValue); - final int nextValue = defaultValues.nextSetBit(i); - for (i++; i < nextValue; i++) { + final int nextValue = defaultValues.nextSetBit(++i); + for (; i < nextValue; i++) { w.add(defaultValue); // fill all gaps } } @@ -198,6 +198,7 @@ class PackedIntsImpl { // TODO -- can we somehow avoid 2X method calls // on each get? must push minValue down, and make // PackedInts implement Ints.Source + assert docID >= 0; return minValue + values.get(docID); } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index bb9f9be53b6..a960a44b0d9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -130,7 +130,7 @@ class VarDerefBytesImpl { } @Override - synchronized public void add(int docID, BytesRef bytes) throws IOException { + public void add(int docID, BytesRef bytes) throws IOException { if (bytes.length == 0) return; // default final int e = hash.add(bytes); @@ -168,7 +168,7 @@ class VarDerefBytesImpl { // Important that we get docCount, in case there were // some last docs that we didn't see @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { idxOut.writeInt(address - 1); // write index diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index c781c4f2a5d..3582d282a86 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -78,7 +78,7 @@ class VarSortedBytesImpl { } @Override - synchronized public void add(int docID, BytesRef bytes) throws IOException { + public void add(int docID, BytesRef bytes) throws IOException { if (bytes.length == 0) return;// default if (docID >= docToEntry.length) { @@ -97,7 +97,7 @@ class VarSortedBytesImpl { // Important that we get docCount, in case there were // some last docs that we didn't see @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { final int count = hash.size(); try { final int[] sortedEntries = hash.sort(comp); diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 239b1c141ef..7bee3ccb4df 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -75,7 +75,7 @@ class VarStraightBytesImpl { } @Override - synchronized public void add(int docID, BytesRef bytes) throws IOException { + public void add(int docID, BytesRef bytes) throws IOException { if (bytes.length == 0) return; // default fill(docID); @@ -85,7 +85,7 @@ class VarStraightBytesImpl { } @Override - synchronized public void finish(int docCount) throws IOException { + public void finish(int docCount) throws IOException { try { if (lastDocID == -1) { idxOut.writeVLong(0); diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 7e2a8cc9670..52c5bdb2ea0 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -20,6 +20,8 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.FieldCache.DocTermsIndex; @@ -334,10 +336,13 @@ public abstract class FieldComparator { private Source currentReaderValues; private final String field; private double bottom; + private final float missingValue; + private MissingValue missing; - FloatDocValuesComparator(int numHits, String field) { + FloatDocValuesComparator(int numHits, String field, Float missingValue) { values = new double[numHits]; this.field = field; + this.missingValue = missingValue == null ? 0 : missingValue.floatValue(); } @Override @@ -367,12 +372,17 @@ public abstract class FieldComparator { @Override public void copy(int slot, int doc) { - values[slot] = currentReaderValues.getFloat(doc); + final double value = currentReaderValues.getFloat(doc); + values[slot] = value == missing.doubleValue ? missingValue : value; } @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - currentReaderValues = context.reader.docValues(field).getSource(); + final DocValues docValues = context.reader.docValues(field); + if (docValues != null) { + currentReaderValues = docValues.getSource(); + missing = currentReaderValues.getMissing(); + } return this; } @@ -601,10 +611,13 @@ public abstract class FieldComparator { private Source currentReaderValues; private final String field; private long bottom; + private int missingValue; + private MissingValue missing; - IntDocValuesComparator(int numHits, String field) { + IntDocValuesComparator(int numHits, String field, Integer missingValue) { values = new long[numHits]; this.field = field; + this.missingValue = missingValue == null ? 0 : missingValue.intValue(); } @Override @@ -638,12 +651,17 @@ public abstract class FieldComparator { @Override public void copy(int slot, int doc) { - values[slot] = currentReaderValues.getInt(doc); + final long value = currentReaderValues.getInt(doc); + values[slot] = value == missing.longValue ? missingValue : value; } @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - currentReaderValues = context.reader.docValues(field).getSource(); + DocValues docValues = context.reader.docValues(field); + if (docValues != null) { + currentReaderValues = docValues.getSource(); + missing = currentReaderValues.getMissing(); + } return this; } diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index 8fdc66c36e8..55e8425d600 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -442,14 +442,14 @@ public class SortField { case SortField.INT: if (useIndexValues) { - return new FieldComparator.IntDocValuesComparator(numHits, field); + return new FieldComparator.IntDocValuesComparator(numHits, field, (Integer) missingValue); } else { return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer) missingValue); } case SortField.FLOAT: if (useIndexValues) { - return new FieldComparator.FloatDocValuesComparator(numHits, field); + return new FieldComparator.FloatDocValuesComparator(numHits, field, (Float) missingValue); } else { return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator) creator, (Float) missingValue); } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 9ca0e86d35d..b38c0cc5e57 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -51,7 +51,6 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; -import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; @@ -73,9 +72,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { * - DocValues * - Add @lucene.experimental to all necessary classes * - add test for unoptimized case with deletes + * - add multithreaded tests / integrate into stress indexing? * - run RAT - * - add tests for FieldComparator FloatIndexValuesComparator vs. FloatValuesComparator etc. - * */ private CodecProvider provider; @@ -87,18 +85,12 @@ public class TestDocValuesIndexing extends LuceneTestCase { provider.copyFrom(CodecProvider.getDefault()); } - private Directory newDirectory2() throws IOException { - MockDirectoryWrapper newDirectory = newDirectory(); - newDirectory.setCheckIndexOnClose(false); - return newDirectory; - } - /* * Simple test case to show how to use the API */ public void testDocValuesSimple() throws CorruptIndexException, IOException, ParseException { - Directory dir = newDirectory2(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, writerConfig(false)); for (int i = 0; i < 5; i++) { Document doc = new Document(); @@ -109,15 +101,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { writer.addDocument(doc); } writer.commit(); - for (int i = 0; i < 5; i++) { - Document doc = new Document(); - DocValuesField valuesField = new DocValuesField("docId1"); - valuesField.setFloat(i); - doc.add(valuesField); - doc.add(new Field("docId1", "" + i, Store.NO, Index.ANALYZED)); - writer.addDocument(doc); - } - writer.commit(); writer.optimize(true); writer.close(true); @@ -189,7 +172,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { Type second = values.get(1); String msg = "[first=" + first.name() + ", second=" + second.name() + "]"; // index first index - Directory d_1 = newDirectory2(); + Directory d_1 = newDirectory(); IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random.nextBoolean())); indexValues(w_1, valuesPerIndex, first, values, false, 7); w_1.commit(); @@ -197,14 +180,14 @@ public class TestDocValuesIndexing extends LuceneTestCase { _TestUtil.checkIndex(d_1, w_1.getConfig().getCodecProvider()); // index second index - Directory d_2 = newDirectory2(); + Directory d_2 = newDirectory(); IndexWriter w_2 = new IndexWriter(d_2, writerConfig(random.nextBoolean())); indexValues(w_2, valuesPerIndex, second, values, false, 7); w_2.commit(); assertEquals(valuesPerIndex, w_2.maxDoc()); _TestUtil.checkIndex(d_2, w_2.getConfig().getCodecProvider()); - Directory target = newDirectory2(); + Directory target = newDirectory(); IndexWriter w = new IndexWriter(target, writerConfig(random.nextBoolean())); IndexReader r_1 = IndexReader.open(w_1, true); IndexReader r_2 = IndexReader.open(w_2, true); @@ -267,7 +250,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions) throws IOException { - Directory d = newDirectory2(); + Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 179 + random.nextInt(151); final List numVariantList = new ArrayList(NUMERICS); @@ -359,7 +342,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions) throws CorruptIndexException, LockObtainFailedException, IOException { - final Directory d = newDirectory2(); + final Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final List byteVariantList = new ArrayList(BYTES); // run in random order to test if fill works correctly during merges @@ -430,7 +413,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { for (int i = base; i < r.numDocs(); i++) { String msg = " field: " + byteIndexValue.name() + " at index: " + i + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " - + bytesSize; + + bytesSize + " src: " + bytes; while (withDeletions && deleted.get(v++)) { upto += bytesSize; } diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index 0f5b5b1c9cd..489aa659b7e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -25,6 +25,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; @@ -35,6 +36,9 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.codecs.docvalues.DocValuesCodecProvider; +import org.apache.lucene.index.values.Type; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldValueHitQueue.Entry; @@ -118,13 +122,19 @@ public class TestSort extends LuceneTestCase { Document doc = new Document(); doc.add (new Field ("tracer", data[i][0], Field.Store.YES, Field.Index.NO)); doc.add (new Field ("contents", data[i][1], Field.Store.NO, Field.Index.ANALYZED)); - if (data[i][2] != null) doc.add (new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED)); - if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED)); + if (data[i][2] != null) { + doc.add (DocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.INTS)); + } + if (data[i][3] != null) { + doc.add (DocValuesField.set(new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_32)); + } if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][7] != null) doc.add (new Field ("long", data[i][7], Field.Store.NO, Field.Index.NOT_ANALYZED)); - if (data[i][8] != null) doc.add (new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED)); + if (data[i][8] != null) { + doc.add (DocValuesField.set(new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_64)); + } if (data[i][9] != null) doc.add (new Field ("short", data[i][9], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][10] != null) doc.add (new Field ("byte", data[i][10], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][11] != null) doc.add (new Field ("parser", data[i][11], Field.Store.NO, Field.Index.NOT_ANALYZED)); @@ -217,6 +227,11 @@ public class TestSort extends LuceneTestCase { @Override public void setUp() throws Exception { super.setUp(); + + //nocommit - enable doc values by default for all tests + DocValuesCodecProvider provider = new DocValuesCodecProvider(); + provider.copyFrom(CodecProvider.getDefault()); + CodecProvider.setDefault(provider); full = getFullIndex(); searchX = getXIndex(); searchY = getYIndex(); @@ -228,6 +243,7 @@ public class TestSort extends LuceneTestCase { queryG = new TermQuery (new Term ("contents", "g")); queryM = new TermQuery (new Term ("contents", "m")); sort = new Sort(); + } private ArrayList dirs = new ArrayList(); @@ -256,15 +272,27 @@ public class TestSort extends LuceneTestCase { assertMatches (full, queryY, sort, "BDFHJ"); } + private static SortField useDocValues(SortField field) { + field.setUseIndexValues(true); + return field; + } // test sorts where the type of field is specified public void testTypedSort() throws Exception { sort.setSort (new SortField ("int", SortField.INT), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "IGAEC"); assertMatches (full, queryY, sort, "DHFJB"); + + sort.setSort (useDocValues(new SortField ("int", SortField.INT)), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "IGAEC"); + assertMatches (full, queryY, sort, "DHFJB"); sort.setSort (new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "GCIEA"); assertMatches (full, queryY, sort, "DHJFB"); + + sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT)), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "GCIEA"); + assertMatches (full, queryY, sort, "DHJFB"); sort.setSort (new SortField ("long", SortField.LONG), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "EACGI"); @@ -273,7 +301,11 @@ public class TestSort extends LuceneTestCase { sort.setSort (new SortField ("double", SortField.DOUBLE), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "AGICE"); assertMatches (full, queryY, sort, "DJHBF"); - + + sort.setSort (useDocValues(new SortField ("double", SortField.DOUBLE)), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "AGICE"); + assertMatches (full, queryY, sort, "DJHBF"); + sort.setSort (new SortField ("byte", SortField.BYTE), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "CIGAE"); assertMatches (full, queryY, sort, "DHFBJ"); @@ -458,12 +490,18 @@ public class TestSort extends LuceneTestCase { sort.setSort (new SortField ("int", SortField.INT), SortField.FIELD_DOC ); assertMatches (empty, queryX, sort, ""); + + sort.setSort (useDocValues(new SortField ("int", SortField.INT)), SortField.FIELD_DOC ); + assertMatches (empty, queryX, sort, ""); sort.setSort (new SortField ("string", SortField.STRING, true), SortField.FIELD_DOC ); assertMatches (empty, queryX, sort, ""); sort.setSort (new SortField ("float", SortField.FLOAT), new SortField ("string", SortField.STRING) ); assertMatches (empty, queryX, sort, ""); + + sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT)), new SortField ("string", SortField.STRING) ); + assertMatches (empty, queryX, sort, ""); } static class MyFieldComparator extends FieldComparator { @@ -539,10 +577,18 @@ public class TestSort extends LuceneTestCase { sort.setSort (new SortField ("int", SortField.INT, true) ); assertMatches (full, queryX, sort, "CAEGI"); assertMatches (full, queryY, sort, "BJFHD"); + + sort.setSort (useDocValues(new SortField ("int", SortField.INT, true)) ); + assertMatches (full, queryX, sort, "CAEGI"); + assertMatches (full, queryY, sort, "BJFHD"); sort.setSort (new SortField ("float", SortField.FLOAT, true) ); assertMatches (full, queryX, sort, "AECIG"); assertMatches (full, queryY, sort, "BFJHD"); + + sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT, true)) ); + assertMatches (full, queryX, sort, "AECIG"); + assertMatches (full, queryY, sort, "BFJHD"); sort.setSort (new SortField ("string", SortField.STRING, true) ); assertMatches (full, queryX, sort, "CEGIA"); @@ -560,11 +606,17 @@ public class TestSort extends LuceneTestCase { sort.setSort (new SortField ("int", SortField.INT) ); assertMatches (full, queryF, sort, "IZJ"); + sort.setSort (useDocValues(new SortField ("int", SortField.INT)) ); + assertMatches (full, queryF, sort, "IZJ"); + sort.setSort (new SortField ("int", SortField.INT, true) ); assertMatches (full, queryF, sort, "JZI"); sort.setSort (new SortField ("float", SortField.FLOAT) ); assertMatches (full, queryF, sort, "ZJI"); + + sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT)) ); + assertMatches (full, queryF, sort, "ZJI"); // using a nonexisting field as first sort key shouldn't make a difference: sort.setSort (new SortField ("nosuchfield", SortField.STRING), @@ -887,12 +939,30 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField("int", SortField.INT)); expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField ("int", SortField.INT))); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField ("int", SortField.INT)), SortField.FIELD_DOC); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField("int", SortField.INT))); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(new SortField("float", SortField.FLOAT)); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); + + sort.setSort(useDocValues(new SortField ("float", SortField.FLOAT)), SortField.FIELD_DOC); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); + + sort.setSort(useDocValues(new SortField("float", SortField.FLOAT))); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(new SortField("string", SortField.STRING)); assertMatches(multi, queryA, sort, "DJAIHGFEBC"); @@ -900,6 +970,10 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField("int", SortField.INT, true)); expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField("int", SortField.INT, true))); + expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; + assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField("float", SortField.FLOAT, true)); assertMatches(multi, queryA, sort, "BAFECIJHDG"); @@ -909,6 +983,9 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField("int", SortField.INT),new SortField("float", SortField.FLOAT)); assertMatches(multi, queryA, sort, "IDHFGJEABC"); + + sort.setSort(useDocValues(new SortField("int", SortField.INT)), useDocValues(new SortField("float", SortField.FLOAT))); + assertMatches(multi, queryA, sort, "IDHFGJEABC"); sort.setSort(new SortField("float", SortField.FLOAT),new SortField("string", SortField.STRING)); assertMatches(multi, queryA, sort, "GDHJICEFAB"); @@ -918,6 +995,12 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField ("int", SortField.INT, true)); assertMatches(multi, queryF, sort, "JZI"); + + sort.setSort(useDocValues(new SortField ("int", SortField.INT))); + assertMatches(multi, queryF, sort, "IZJ"); + + sort.setSort(useDocValues(new SortField ("int", SortField.INT, true))); + assertMatches(multi, queryF, sort, "JZI"); sort.setSort(new SortField ("float", SortField.FLOAT)); assertMatches(multi, queryF, sort, "ZJI"); From 9e77d3cdc68bc7b44c3bfaccd59091933086b5ac Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 5 May 2011 08:35:40 +0000 Subject: [PATCH 030/116] LUCENE-3070: enable docvalues by default for every codec git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1099717 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/appending/AppendingCodec.java | 18 ++++ .../lucene/index/DocFieldProcessor.java | 2 +- .../lucene/index/PerFieldCodecWrapper.java | 2 +- .../org/apache/lucene/index/codecs/Codec.java | 8 +- ...dec.java => DefaultDocValuesConsumer.java} | 94 ++++++------------- ...ase.java => DefaultDocValuesProducer.java} | 10 +- .../{docvalues => }/DocValuesConsumer.java | 2 +- .../lucene/index/codecs/PerDocConsumer.java | 1 - .../docvalues/DocValuesCodecProvider.java | 34 ------- .../index/codecs/preflex/PreFlexCodec.java | 13 +++ .../index/codecs/pulsing/PulsingCodec.java | 19 ++++ .../codecs/simpletext/SimpleTextCodec.java | 20 ++++ .../index/codecs/standard/StandardCodec.java | 20 ++++ .../index/values/PerDocFieldValues.java | 2 +- .../apache/lucene/index/values/Writer.java | 2 +- .../mockintblock/MockFixedIntBlockCodec.java | 20 +++- .../MockVariableIntBlockCodec.java | 20 +++- .../codecs/mockrandom/MockRandomCodec.java | 20 +++- .../index/codecs/mocksep/MockSepCodec.java | 20 +++- .../org/apache/lucene/TestExternalCodecs.java | 10 ++ .../apache/lucene/index/TestDocTermOrds.java | 17 ++++ .../index/values/TestDocValuesIndexing.java | 9 +- .../org/apache/lucene/search/TestSort.java | 7 +- 23 files changed, 237 insertions(+), 133 deletions(-) rename lucene/src/java/org/apache/lucene/index/codecs/{docvalues/DocValuesCodec.java => DefaultDocValuesConsumer.java} (57%) rename lucene/src/java/org/apache/lucene/index/codecs/{docvalues/DocValuesProducerBase.java => DefaultDocValuesProducer.java} (93%) rename lucene/src/java/org/apache/lucene/index/codecs/{docvalues => }/DocValuesConsumer.java (99%) delete mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java index 423e66afd5d..8e46e0420ce 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java @@ -20,13 +20,19 @@ package org.apache.lucene.index.codecs.appending; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.standard.StandardPostingsReader; @@ -133,10 +139,22 @@ public class AppendingCodec extends Codec { StandardPostingsReader.files(dir, segmentInfo, codecIdAsString, files); BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } @Override public void getExtensions(Set extensions) { StandardCodec.getStandardExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); + } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); } } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index cb4a0bfa8b4..154b4884b8d 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -30,7 +30,7 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.DocumentsWriterPerThread.DocState; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.PerDocConsumer; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; +import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.util.ArrayUtil; diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 174eb5a1683..f3cb616877c 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; +import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.store.Directory; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/Codec.java b/lucene/src/java/org/apache/lucene/index/codecs/Codec.java index 464f736dc17..736ceed67dd 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/Codec.java @@ -52,13 +52,9 @@ public abstract class Codec { * use; else, those files may be deleted. */ public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException; - public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { - return null; - } + public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException; - public PerDocValues docsProducer(SegmentReadState state) throws IOException { - return null; - } + public abstract PerDocValues docsProducer(SegmentReadState state) throws IOException; /** * Gathers files associated with this segment diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java similarity index 57% rename from lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java rename to lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java index c8ff645ebfd..6ae431cbf39 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.codecs.docvalues; +package org.apache.lucene.index.codecs; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -16,87 +16,50 @@ package org.apache.lucene.index.codecs.docvalues; * See the License for the specific language governing permissions and * limitations under the License. */ + import java.io.IOException; import java.util.Comparator; import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.codecs.FieldsProducer; -import org.apache.lucene.index.codecs.PerDocConsumer; -import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; -/** - * A codec that adds DocValues support to a given codec transparently. - * @lucene.experimental - */ -public class DocValuesCodec extends Codec { - private final Codec other; +public class DefaultDocValuesConsumer extends PerDocConsumer { + private final String segmentName; + private final int codecId; + private final Directory directory; + private final AtomicLong bytesUsed; private final Comparator comparator; - public DocValuesCodec(Codec other, Comparator comparator) { - this.name = other.name; - this.other = other; + public DefaultDocValuesConsumer(PerDocWriteState state, Comparator comparator) { + this.segmentName = state.segmentName; + this.codecId = state.codecId; + this.bytesUsed = state.bytesUsed; + this.directory = state.directory; this.comparator = comparator; } - - public DocValuesCodec(Codec other) { - this(other, null); + + @Override + public void close() throws IOException { } @Override - public PerDocConsumer docsConsumer(final PerDocWriteState state) - throws IOException { - return new PerDocConsumer() { - public void close() throws IOException { - } - - @Override - public DocValuesConsumer addValuesField(FieldInfo field) - throws IOException { - final DocValuesConsumer consumer = Writer.create(field.getDocValues(), - docValuesId(state.segmentName, state.codecId, field.number), - // TODO can we have a compound file per segment and codec for - // docvalues? - state.directory, comparator, state.bytesUsed); - return consumer; - } - }; - } - - @Override - public PerDocValues docsProducer(SegmentReadState state) throws IOException { - return new DocValuesProducerBase(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); - } - - @Override - public FieldsConsumer fieldsConsumer(SegmentWriteState state) - throws IOException { - return other.fieldsConsumer(state); - } - - @Override - public FieldsProducer fieldsProducer(SegmentReadState state) - throws IOException { - return other.fieldsProducer(state); + public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { + return Writer.create(field.getDocValues(), + docValuesId(segmentName, codecId, field.number), + // TODO can we have a compound file per segment and codec for + // docvalues? + directory, comparator, bytesUsed); } - static String docValuesId(String segmentsName, int codecID, int fieldId) { - return segmentsName + "_" + codecID + "-" + fieldId; - } - - @Override - public void files(Directory dir, SegmentInfo segmentInfo, int codecId, + public static void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { FieldInfos fieldInfos = segmentInfo.getFieldInfos(); boolean indexed = false; @@ -131,14 +94,13 @@ public class DocValuesCodec extends Codec { } } - if (indexed) { - other.files(dir, segmentInfo, codecId, files); - } + } + + static String docValuesId(String segmentsName, int codecID, int fieldId) { + return segmentsName + "_" + codecID + "-" + fieldId; } - @Override - public void getExtensions(Set extensions) { - other.getExtensions(extensions); + public static void getDocValuesExtensions(Set extensions) { extensions.add(Writer.DATA_EXTENSION); extensions.add(Writer.INDEX_EXTENSION); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java similarity index 93% rename from lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java rename to lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java index 86e214db841..e3274dd67c1 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.codecs.docvalues; +package org.apache.lucene.index.codecs; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -37,12 +37,12 @@ import org.apache.lucene.store.Directory; * * @lucene.experimental */ -public class DocValuesProducerBase extends PerDocValues { +public class DefaultDocValuesProducer extends PerDocValues { protected final TreeMap docValues = new TreeMap(); /** - * Creates a new {@link DocValuesProducerBase} instance and loads all + * Creates a new {@link DefaultDocValuesProducer} instance and loads all * {@link DocValues} instances for this segment and codec. * * @param si @@ -56,7 +56,7 @@ public class DocValuesProducerBase extends PerDocValues { * @throws IOException * if an {@link IOException} occurs */ - protected DocValuesProducerBase(SegmentInfo si, Directory dir, + public DefaultDocValuesProducer(SegmentInfo si, Directory dir, FieldInfos fieldInfo, int codecId) throws IOException { load(fieldInfo, si.name, si.docCount, dir, codecId); } @@ -77,7 +77,7 @@ public class DocValuesProducerBase extends PerDocValues { if (codecId == fieldInfo.getCodecId() && fieldInfo.hasDocValues()) { final String field = fieldInfo.name; // TODO can we have a compound file per segment and codec for docvalues? - final String id = DocValuesCodec.docValuesId(segment, codecId, fieldInfo.number); + final String id = DefaultDocValuesConsumer.docValuesId(segment, codecId, fieldInfo.number); docValues.put(field, loadDocValues(docCount, dir, id, fieldInfo .getDocValues())); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java similarity index 99% rename from lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java rename to lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java index ac2b5b7d63d..caf98aa8316 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.codecs.docvalues; +package org.apache.lucene.index.codecs; /** * Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java index 81c11af1a35..4a644ee91a9 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java @@ -19,7 +19,6 @@ import java.io.Closeable; import java.io.IOException; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; import org.apache.lucene.index.values.DocValues; /** diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java deleted file mode 100644 index ab54101f0fb..00000000000 --- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecProvider.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.apache.lucene.index.codecs.docvalues; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.CodecProvider; - -/** - * Simple Codec provider that wrapps all registered codecs into a {@link DocValuesCodec} - */ -public class DocValuesCodecProvider extends CodecProvider { - - @Override - public synchronized void register(Codec codec) { - if (codec instanceof DocValuesCodec) { - super.register(codec); - } else { - super.register(new DocValuesCodec(codec)); - } - } -} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java index 3c279d32cb9..c3860d8ca3d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java @@ -22,11 +22,14 @@ import java.io.IOException; import org.apache.lucene.store.Directory; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.PerDocValues; /** Codec that reads the pre-flex-indexing postings * format. It does not provide a writer because newly @@ -78,4 +81,14 @@ public class PreFlexCodec extends Codec { extensions.add(TERMS_EXTENSION); extensions.add(TERMS_INDEX_EXTENSION); } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return null; + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return null; + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java index 3691f803072..2ecae8a3bb2 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java @@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.pulsing; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; @@ -28,8 +29,13 @@ import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.standard.StandardPostingsReader; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.VariableGapTermsIndexReader; import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter; import org.apache.lucene.index.codecs.BlockTermsReader; @@ -38,6 +44,7 @@ import org.apache.lucene.index.codecs.TermsIndexReaderBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; /** This codec "inlines" the postings for terms that have * low docFreq. It wraps another codec, which is used for @@ -155,10 +162,22 @@ public class PulsingCodec extends Codec { StandardPostingsReader.files(dir, segmentInfo, codecId, files); BlockTermsReader.files(dir, segmentInfo, codecId, files); VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); + DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } @Override public void getExtensions(Set extensions) { StandardCodec.getStandardExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); + } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java index a733acf157c..89456109500 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java @@ -20,14 +20,21 @@ package org.apache.lucene.index.codecs.simpletext; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; /** For debugging, curiosity, transparency only!! Do not * use this codec in production. @@ -63,10 +70,23 @@ public class SimpleTextCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int id, Set files) throws IOException { files.add(getPostingsFileName(segmentInfo.name, ""+id)); + DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } @Override public void getExtensions(Set extensions) { extensions.add(POSTINGS_EXTENSION); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); + } + + // TODO: would be great if these used a plain text impl + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java index c30becba5dd..8b61d8fa2cb 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java @@ -20,12 +20,17 @@ package org.apache.lucene.index.codecs.standard; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase; @@ -34,7 +39,10 @@ import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter; import org.apache.lucene.index.codecs.VariableGapTermsIndexReader; import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.BlockTermsReader; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; +import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; /** Default codec. * @lucene.experimental */ @@ -135,11 +143,13 @@ public class StandardCodec extends Codec { StandardPostingsReader.files(dir, segmentInfo, codecId, files); BlockTermsReader.files(dir, segmentInfo, codecId, files); VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); + DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } @Override public void getExtensions(Set extensions) { getStandardExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); } public static void getStandardExtensions(Set extensions) { @@ -148,4 +158,14 @@ public class StandardCodec extends Codec { BlockTermsReader.getExtensions(extensions); VariableGapTermsIndexReader.getIndexExtensions(extensions); } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); + } } diff --git a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java index 78958c23ca3..057537e347a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java @@ -20,7 +20,7 @@ import java.util.Comparator; import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; +import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.util.BytesRef; /** diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index 15f466f7de4..eb46fbebe80 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; +import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java index 222d25aaa35..70d9842a784 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java @@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.mockintblock; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; @@ -33,8 +34,13 @@ import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.BlockTermsReader; @@ -186,11 +192,12 @@ public class MockFixedIntBlockCodec extends Codec { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) { + public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { final String codecIdAsString = "" + codecId; SepPostingsReaderImpl.files(segmentInfo, codecIdAsString, files); BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } @Override @@ -198,5 +205,16 @@ public class MockFixedIntBlockCodec extends Codec { SepPostingsWriterImpl.getExtensions(extensions); BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); + } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java index f796f7b08ac..c98d880aa8b 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java @@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.mockintblock; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; @@ -33,8 +34,13 @@ import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexInput; import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexOutput; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.BlockTermsReader; @@ -211,11 +217,12 @@ public class MockVariableIntBlockCodec extends Codec { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) { + public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { final String codecIdAsString = "" + codecId; SepPostingsReaderImpl.files(segmentInfo, codecIdAsString, files); BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } @Override @@ -223,5 +230,16 @@ public class MockVariableIntBlockCodec extends Codec { SepPostingsWriterImpl.getExtensions(extensions); BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); + } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java index 2f14297b3ab..c09a48a6e3b 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java @@ -26,16 +26,22 @@ import java.util.Set; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.TermStats; @@ -338,7 +344,7 @@ public class MockRandomCodec extends Codec { BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); VariableGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); - + DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); // hackish! Iterator it = files.iterator(); while(it.hasNext()) { @@ -356,7 +362,19 @@ public class MockRandomCodec extends Codec { BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); VariableGapTermsIndexReader.getIndexExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); extensions.add(SEED_EXT); //System.out.println("MockRandom.getExtensions return " + extensions); } + + // can we make this more evil? + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); + } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java index 1bde11d6610..ca08b6e84ba 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java @@ -20,14 +20,20 @@ package org.apache.lucene.index.codecs.mocksep; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.BlockTermsReader; @@ -130,16 +136,18 @@ public class MockSepCodec extends Codec { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) { + public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { final String codecIdAsString = "" + codecId; SepPostingsReaderImpl.files(segmentInfo, codecIdAsString, files); BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } @Override public void getExtensions(Set extensions) { getSepExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); } public static void getSepExtensions(Set extensions) { @@ -147,4 +155,14 @@ public class MockSepCodec extends Codec { BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); + } } diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java index b8ede7f4111..722a437d32c 100644 --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java @@ -489,6 +489,16 @@ public class TestExternalCodecs extends LuceneTestCase { } } + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return null; + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return null; + } + @Override public void getExtensions(Set extensions) { } diff --git a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java index fa6c25f7714..472375454fc 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java @@ -33,10 +33,15 @@ import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CoreCodecProvider; +import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.DefaultDocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.PostingsWriterBase; import org.apache.lucene.index.codecs.TermsIndexReaderBase; @@ -196,11 +201,13 @@ public class TestDocTermOrds extends LuceneTestCase { StandardPostingsReader.files(dir, segmentInfo, ""+id, files); BlockTermsReader.files(dir, segmentInfo, ""+id, files); FixedGapTermsIndexReader.files(dir, segmentInfo, ""+id, files); + DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } @Override public void getExtensions(Set extensions) { getStandardExtensions(extensions); + DefaultDocValuesConsumer.getDocValuesExtensions(extensions); } public static void getStandardExtensions(Set extensions) { @@ -209,6 +216,16 @@ public class TestDocTermOrds extends LuceneTestCase { BlockTermsReader.getExtensions(extensions); FixedGapTermsIndexReader.getIndexExtensions(extensions); } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + @Override + public PerDocValues docsProducer(SegmentReadState state) throws IOException { + return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId); + } } public void testRandom() throws Exception { diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index b38c0cc5e57..f7a0767f9f7 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -41,7 +41,6 @@ import org.apache.lucene.index.MultiPerDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.codecs.docvalues.DocValuesCodecProvider; import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.queryParser.ParseException; @@ -76,13 +75,10 @@ public class TestDocValuesIndexing extends LuceneTestCase { * - run RAT */ - private CodecProvider provider; - @Before public void setUp() throws Exception { super.setUp(); - provider = new DocValuesCodecProvider(); - provider.copyFrom(CodecProvider.getDefault()); + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault().getDefaultFieldCodec().equals("PreFlex")); } /* @@ -105,7 +101,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { writer.close(true); - IndexReader reader = IndexReader.open(dir, null, true, 1, provider); + IndexReader reader = IndexReader.open(dir, null, true, 1); assertTrue(reader.isOptimized()); IndexSearcher searcher = new IndexSearcher(reader); @@ -244,7 +240,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { LogMergePolicy policy = new LogDocMergePolicy(); cfg.setMergePolicy(policy); policy.setUseCompoundFile(useCompoundFile); - cfg.setCodecProvider(provider); return cfg; } diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index 489aa659b7e..2a083f8c0b4 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -37,7 +37,6 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.codecs.docvalues.DocValuesCodecProvider; import org.apache.lucene.index.values.Type; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.BooleanClause.Occur; @@ -228,10 +227,8 @@ public class TestSort extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); - //nocommit - enable doc values by default for all tests - DocValuesCodecProvider provider = new DocValuesCodecProvider(); - provider.copyFrom(CodecProvider.getDefault()); - CodecProvider.setDefault(provider); + //nocommit - we need to be able to run this test with preflex, but without docvalues!!!! + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault().getDefaultFieldCodec().equals("PreFlex")); full = getFullIndex(); searchX = getXIndex(); searchY = getYIndex(); From 4c68e43556c4b4b884d91061b892b35587556ad0 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 5 May 2011 09:09:31 +0000 Subject: [PATCH 031/116] removed @Override annotation inherited from Interface git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1099726 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java index 6ae431cbf39..b95dc748810 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java @@ -46,7 +46,6 @@ public class DefaultDocValuesConsumer extends PerDocConsumer { this.comparator = comparator; } - @Override public void close() throws IOException { } From 998a94fa78bc5cd467b78db08e638e57fb95e6ec Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 5 May 2011 09:22:17 +0000 Subject: [PATCH 032/116] LUCENE-3070: fix TestSort to work with preflex codec git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1099732 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/search/TestSort.java | 142 ++++++++++-------- 1 file changed, 79 insertions(+), 63 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index 2a083f8c0b4..96d909db83d 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -64,7 +64,8 @@ import org.apache.lucene.util._TestUtil; */ public class TestSort extends LuceneTestCase { - + // true if our codec supports docvalues: true unless codec is preflex (3.x) + boolean supportsDocValues = CodecProvider.getDefault().getDefaultFieldCodec().equals("PreFlex") == false; private static final int NUM_STRINGS = 6000 * RANDOM_MULTIPLIER; private IndexSearcher full; private IndexSearcher searchX; @@ -122,17 +123,26 @@ public class TestSort extends LuceneTestCase { doc.add (new Field ("tracer", data[i][0], Field.Store.YES, Field.Index.NO)); doc.add (new Field ("contents", data[i][1], Field.Store.NO, Field.Index.ANALYZED)); if (data[i][2] != null) { - doc.add (DocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.INTS)); + Field f = supportsDocValues ? + DocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.INTS) + : new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED); + doc.add(f); } if (data[i][3] != null) { - doc.add (DocValuesField.set(new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_32)); + Field f = supportsDocValues ? + DocValuesField.set(new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_32) + : new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED); + doc.add(f); } if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][7] != null) doc.add (new Field ("long", data[i][7], Field.Store.NO, Field.Index.NOT_ANALYZED)); - if (data[i][8] != null) { - doc.add (DocValuesField.set(new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_64)); + if (data[i][8] != null) { + Field f = supportsDocValues ? + DocValuesField.set(new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_64) + : new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED); + doc.add(f); } if (data[i][9] != null) doc.add (new Field ("short", data[i][9], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][10] != null) doc.add (new Field ("byte", data[i][10], Field.Store.NO, Field.Index.NOT_ANALYZED)); @@ -227,8 +237,6 @@ public class TestSort extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); - //nocommit - we need to be able to run this test with preflex, but without docvalues!!!! - assumeFalse("cannot work with preflex codec", CodecProvider.getDefault().getDefaultFieldCodec().equals("PreFlex")); full = getFullIndex(); searchX = getXIndex(); searchY = getYIndex(); @@ -279,17 +287,9 @@ public class TestSort extends LuceneTestCase { assertMatches (full, queryX, sort, "IGAEC"); assertMatches (full, queryY, sort, "DHFJB"); - sort.setSort (useDocValues(new SortField ("int", SortField.INT)), SortField.FIELD_DOC ); - assertMatches (full, queryX, sort, "IGAEC"); - assertMatches (full, queryY, sort, "DHFJB"); - sort.setSort (new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "GCIEA"); assertMatches (full, queryY, sort, "DHJFB"); - - sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT)), SortField.FIELD_DOC ); - assertMatches (full, queryX, sort, "GCIEA"); - assertMatches (full, queryY, sort, "DHJFB"); sort.setSort (new SortField ("long", SortField.LONG), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "EACGI"); @@ -299,10 +299,6 @@ public class TestSort extends LuceneTestCase { assertMatches (full, queryX, sort, "AGICE"); assertMatches (full, queryY, sort, "DJHBF"); - sort.setSort (useDocValues(new SortField ("double", SortField.DOUBLE)), SortField.FIELD_DOC ); - assertMatches (full, queryX, sort, "AGICE"); - assertMatches (full, queryY, sort, "DJHBF"); - sort.setSort (new SortField ("byte", SortField.BYTE), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "CIGAE"); assertMatches (full, queryY, sort, "DHFBJ"); @@ -314,6 +310,20 @@ public class TestSort extends LuceneTestCase { sort.setSort (new SortField ("string", SortField.STRING), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "AIGEC"); assertMatches (full, queryY, sort, "DJHFB"); + + if (supportsDocValues) { + sort.setSort (useDocValues(new SortField ("int", SortField.INT)), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "IGAEC"); + assertMatches (full, queryY, sort, "DHFJB"); + + sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT)), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "GCIEA"); + assertMatches (full, queryY, sort, "DHJFB"); + + sort.setSort (useDocValues(new SortField ("double", SortField.DOUBLE)), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "AGICE"); + assertMatches (full, queryY, sort, "DJHBF"); + } } private static class SortMissingLastTestHelper { @@ -574,22 +584,24 @@ public class TestSort extends LuceneTestCase { sort.setSort (new SortField ("int", SortField.INT, true) ); assertMatches (full, queryX, sort, "CAEGI"); assertMatches (full, queryY, sort, "BJFHD"); - - sort.setSort (useDocValues(new SortField ("int", SortField.INT, true)) ); - assertMatches (full, queryX, sort, "CAEGI"); - assertMatches (full, queryY, sort, "BJFHD"); sort.setSort (new SortField ("float", SortField.FLOAT, true) ); assertMatches (full, queryX, sort, "AECIG"); assertMatches (full, queryY, sort, "BFJHD"); - sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT, true)) ); - assertMatches (full, queryX, sort, "AECIG"); - assertMatches (full, queryY, sort, "BFJHD"); - sort.setSort (new SortField ("string", SortField.STRING, true) ); assertMatches (full, queryX, sort, "CEGIA"); assertMatches (full, queryY, sort, "BFHJD"); + + if (supportsDocValues) { + sort.setSort (useDocValues(new SortField ("int", SortField.INT, true)) ); + assertMatches (full, queryX, sort, "CAEGI"); + assertMatches (full, queryY, sort, "BJFHD"); + + sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT, true)) ); + assertMatches (full, queryX, sort, "AECIG"); + assertMatches (full, queryY, sort, "BFJHD"); + } } // test sorting when the sort field is empty (undefined) for some of the documents @@ -603,17 +615,19 @@ public class TestSort extends LuceneTestCase { sort.setSort (new SortField ("int", SortField.INT) ); assertMatches (full, queryF, sort, "IZJ"); - sort.setSort (useDocValues(new SortField ("int", SortField.INT)) ); - assertMatches (full, queryF, sort, "IZJ"); - sort.setSort (new SortField ("int", SortField.INT, true) ); assertMatches (full, queryF, sort, "JZI"); sort.setSort (new SortField ("float", SortField.FLOAT) ); assertMatches (full, queryF, sort, "ZJI"); + + if (supportsDocValues) { + sort.setSort (useDocValues(new SortField ("int", SortField.INT)) ); + assertMatches (full, queryF, sort, "IZJ"); - sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT)) ); - assertMatches (full, queryF, sort, "ZJI"); + sort.setSort (useDocValues(new SortField ("float", SortField.FLOAT)) ); + assertMatches (full, queryF, sort, "ZJI"); + } // using a nonexisting field as first sort key shouldn't make a difference: sort.setSort (new SortField ("nosuchfield", SortField.STRING), @@ -937,29 +951,11 @@ public class TestSort extends LuceneTestCase { expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); - sort.setSort(useDocValues(new SortField ("int", SortField.INT))); - expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; - assertMatches(multi, queryA, sort, expected); - - sort.setSort(useDocValues(new SortField ("int", SortField.INT)), SortField.FIELD_DOC); - expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; - assertMatches(multi, queryA, sort, expected); - - sort.setSort(useDocValues(new SortField("int", SortField.INT))); - expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; - assertMatches(multi, queryA, sort, expected); - sort.setSort(new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(new SortField("float", SortField.FLOAT)); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); - - sort.setSort(useDocValues(new SortField ("float", SortField.FLOAT)), SortField.FIELD_DOC); - assertMatches(multi, queryA, sort, "GDHJCIEFAB"); - - sort.setSort(useDocValues(new SortField("float", SortField.FLOAT))); - assertMatches(multi, queryA, sort, "GDHJCIEFAB"); sort.setSort(new SortField("string", SortField.STRING)); assertMatches(multi, queryA, sort, "DJAIHGFEBC"); @@ -967,10 +963,6 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField("int", SortField.INT, true)); expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; assertMatches(multi, queryA, sort, expected); - - sort.setSort(useDocValues(new SortField("int", SortField.INT, true))); - expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; - assertMatches(multi, queryA, sort, expected); sort.setSort(new SortField("float", SortField.FLOAT, true)); assertMatches(multi, queryA, sort, "BAFECIJHDG"); @@ -980,9 +972,6 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField("int", SortField.INT),new SortField("float", SortField.FLOAT)); assertMatches(multi, queryA, sort, "IDHFGJEABC"); - - sort.setSort(useDocValues(new SortField("int", SortField.INT)), useDocValues(new SortField("float", SortField.FLOAT))); - assertMatches(multi, queryA, sort, "IDHFGJEABC"); sort.setSort(new SortField("float", SortField.FLOAT),new SortField("string", SortField.STRING)); assertMatches(multi, queryA, sort, "GDHJICEFAB"); @@ -992,12 +981,6 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField ("int", SortField.INT, true)); assertMatches(multi, queryF, sort, "JZI"); - - sort.setSort(useDocValues(new SortField ("int", SortField.INT))); - assertMatches(multi, queryF, sort, "IZJ"); - - sort.setSort(useDocValues(new SortField ("int", SortField.INT, true))); - assertMatches(multi, queryF, sort, "JZI"); sort.setSort(new SortField ("float", SortField.FLOAT)); assertMatches(multi, queryF, sort, "ZJI"); @@ -1008,6 +991,39 @@ public class TestSort extends LuceneTestCase { sort.setSort(new SortField ("string", SortField.STRING, true)); assertMatches(multi, queryF, sort, "IJZ"); + if (supportsDocValues) { + sort.setSort(useDocValues(new SortField ("int", SortField.INT))); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField ("int", SortField.INT)), SortField.FIELD_DOC); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField("int", SortField.INT))); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField ("float", SortField.FLOAT)), SortField.FIELD_DOC); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); + + sort.setSort(useDocValues(new SortField("float", SortField.FLOAT))); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); + + sort.setSort(useDocValues(new SortField("int", SortField.INT, true))); + expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(useDocValues(new SortField("int", SortField.INT)), useDocValues(new SortField("float", SortField.FLOAT))); + assertMatches(multi, queryA, sort, "IDHFGJEABC"); + + sort.setSort(useDocValues(new SortField ("int", SortField.INT))); + assertMatches(multi, queryF, sort, "IZJ"); + + sort.setSort(useDocValues(new SortField ("int", SortField.INT, true))); + assertMatches(multi, queryF, sort, "JZI"); + } + // up to this point, all of the searches should have "sane" // FieldCache behavior, and should have reused hte cache in several cases assertSaneFieldCaches(getName() + " various"); From d63f39c17d8257670f7b926e45295bbb63434d9b Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 16 May 2011 11:43:51 +0000 Subject: [PATCH 033/116] LUCENE-3070: Added UOE to PreFlex Codec, Added Random DocValues injection to RandomIndexWriter, Added basic DocValues verification to CheckIndex git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1103699 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/document/DocValuesField.java | 1 - .../org/apache/lucene/index/CheckIndex.java | 73 +++++++++++++++++ .../lucene/index/DocFieldProcessor.java | 6 +- .../org/apache/lucene/index/FieldInfo.java | 10 +++ .../org/apache/lucene/index/FieldInfos.java | 2 +- .../lucene/index/PerFieldCodecWrapper.java | 25 +++--- .../codecs/DefaultDocValuesConsumer.java | 1 + .../index/codecs/preflex/PreFlexCodec.java | 4 +- .../lucene/index/RandomIndexWriter.java | 78 +++++++++++++++++++ 9 files changed, 182 insertions(+), 18 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/src/java/org/apache/lucene/document/DocValuesField.java index c5f13e17159..0b14860dc44 100644 --- a/lucene/src/java/org/apache/lucene/document/DocValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/DocValuesField.java @@ -73,7 +73,6 @@ import org.apache.lucene.util.BytesRef; * * * */ -@SuppressWarnings("serial") public class DocValuesField extends AbstractField implements PerDocFieldValues { protected BytesRef bytes; diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 61b3fc07da0..145a43801ae 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -27,6 +27,9 @@ import org.apache.lucene.document.AbstractField; // for javadocs import org.apache.lucene.document.Document; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; +import org.apache.lucene.index.codecs.PerDocValues; +import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.DocValuesEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -195,6 +198,9 @@ public class CheckIndex { /** Status for testing of term vectors (null if term vectors could not be tested). */ public TermVectorStatus termVectorStatus; + + /** Status for testing of DocValues (null if DocValues could not be tested). */ + public DocValuesStatus docValuesStatus; } /** @@ -254,6 +260,15 @@ public class CheckIndex { /** Exception thrown during term vector test (null on success) */ public Throwable error = null; } + + public static final class DocValuesStatus { + /** Number of documents tested. */ + public int docCount; + /** Total number of docValues tested. */ + public long totalValueFields; + /** Exception thrown during doc values test (null on success) */ + public Throwable error = null; + } } /** Create a new CheckIndex on the directory. */ @@ -499,6 +514,8 @@ public class CheckIndex { // Test Term Vectors segInfoStat.termVectorStatus = testTermVectors(info, reader, nf); + + segInfoStat.docValuesStatus = testDocValues(info, reader); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly @@ -510,6 +527,8 @@ public class CheckIndex { throw new RuntimeException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new RuntimeException("Term Vector test failed"); + } else if (segInfoStat.docValuesStatus.error != null) { + throw new RuntimeException("DocValues test failed"); } msg(""); @@ -920,6 +939,60 @@ public class CheckIndex { return status; } + + private Status.DocValuesStatus testDocValues(SegmentInfo info, + SegmentReader reader) { + final Status.DocValuesStatus status = new Status.DocValuesStatus(); + try { + if (infoStream != null) { + infoStream.print(" test: DocValues........"); + } + final FieldInfos fieldInfos = info.getFieldInfos(); + for (FieldInfo fieldInfo : fieldInfos) { + if (fieldInfo.hasDocValues()) { + status.totalValueFields++; + final PerDocValues perDocValues = reader.perDocValues(); + final DocValues docValues = perDocValues.docValues(fieldInfo.name); + if (docValues == null) { + continue; + } + final DocValuesEnum values = docValues.getEnum(); + while (values.nextDoc() != DocValuesEnum.NO_MORE_DOCS) { + switch (fieldInfo.docValues) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + values.bytes(); + break; + case FLOAT_32: + case FLOAT_64: + values.getFloat(); + break; + case INTS: + values.getInt(); + break; + default: + throw new IllegalArgumentException("Field: " + fieldInfo.name + + " - no such DocValues type: " + fieldInfo.docValues); + } + } + } + } + + msg("OK [" + status.docCount + " total doc Count; Num DocValues Fields " + + status.totalValueFields); + } catch (Throwable e) { + msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + return status; + } /** * Test term vectors for a segment. diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 53765f84f1d..003bcdeafd8 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -251,7 +251,7 @@ final class DocFieldProcessor extends DocConsumer { fieldsWriter.addField(field, fp.fieldInfo); } if (field.hasDocValues()) { - final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo, fieldInfos); + final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo); docValuesConsumer.add(docState.docID, field.getDocValues()); } } @@ -292,7 +292,7 @@ final class DocFieldProcessor extends DocConsumer { final private Map docValues = new HashMap(); final private Map perDocConsumers = new HashMap(); - DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo, FieldInfos infos) + DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo) throws IOException { DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name); if (docValuesConsumer != null) { @@ -303,12 +303,12 @@ final class DocFieldProcessor extends DocConsumer { PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId()); SegmentCodecs codecs = perDocWriteState.segmentCodecs; assert codecs.codecs.length > fieldInfo.getCodecId(); - Codec codec = codecs.codecs[fieldInfo.getCodecId()]; perDocConsumer = codec.docsConsumer(perDocWriteState); perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer); } docValuesConsumer = perDocConsumer.addValuesField(fieldInfo); + fieldInfo.commitDocValues(); docValues.put(fieldInfo.name, docValuesConsumer); return docValuesConsumer; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index 26b8d30a3ea..472c48619d8 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -127,6 +127,7 @@ public final class FieldInfo { } private boolean vectorsCommitted; + private boolean docValuesCommitted; /** * Reverts all uncommitted changes on this {@link FieldInfo} @@ -138,6 +139,10 @@ public final class FieldInfo { storePositionWithTermVector = false; storeTermVector = false; } + + if (docValues != null && !docValuesCommitted) { + docValues = null; + } } /** @@ -150,4 +155,9 @@ public final class FieldInfo { assert storeTermVector; vectorsCommitted = true; } + + void commitDocValues() { + assert hasDocValues(); + docValuesCommitted = true; + } } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 422560ea057..569597963f7 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -750,5 +750,5 @@ public final class FieldInfos implements Iterable { } return roFis; } - + } diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index f3cb616877c..06d8a0339ff 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -317,22 +317,22 @@ final class PerFieldCodecWrapper extends Codec { } private final class PerDocConsumers extends PerDocConsumer { - private final ArrayList consumers = new ArrayList(); + private final PerDocConsumer[] consumers; + private final Codec[] codecs; + private final PerDocWriteState state; public PerDocConsumers(PerDocWriteState state) throws IOException { assert segmentCodecs == state.segmentCodecs; - final Codec[] codecs = segmentCodecs.codecs; - for (int i = 0; i < codecs.length; i++) { - consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i))); - } + this.state = state; + codecs = segmentCodecs.codecs; + consumers = new PerDocConsumer[codecs.length]; } public void close() throws IOException { - Iterator it = consumers.iterator(); IOException err = null; - while (it.hasNext()) { + for (int i = 0; i < consumers.length; i++) { try { - PerDocConsumer next = it.next(); + final PerDocConsumer next = consumers[i]; if (next != null) { next.close(); } @@ -351,10 +351,13 @@ final class PerFieldCodecWrapper extends Codec { @Override public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { - assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID; - final PerDocConsumer perDoc = consumers.get(field.getCodecId()); + final int codecId = field.getCodecId(); + assert codecId != FieldInfo.UNASSIGNED_CODEC_ID; + PerDocConsumer perDoc = consumers[codecId]; if (perDoc == null) { - return null; + perDoc = codecs[codecId].docsConsumer(new PerDocWriteState(state, codecId)); + assert perDoc != null; + consumers[codecId] = perDoc; } return perDoc.addValuesField(field); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java index b95dc748810..0c1d1a1da1a 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java @@ -77,6 +77,7 @@ public class DefaultDocValuesConsumer extends PerDocConsumer { Writer.INDEX_EXTENSION)); assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", Writer.INDEX_EXTENSION)); + // until here all types use an index case BYTES_FIXED_STRAIGHT: case FLOAT_32: case FLOAT_64: diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java index c3860d8ca3d..763457fa71f 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java @@ -84,11 +84,11 @@ public class PreFlexCodec extends Codec { @Override public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { - return null; + throw new UnsupportedOperationException("PerDocConsumer is not supported by Preflex codec"); } @Override public PerDocValues docsProducer(SegmentReadState state) throws IOException { - return null; + throw new UnsupportedOperationException("PerDocValues is not supported by Preflex codec"); } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 0712e4104c4..4aa5f7c339a 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -23,9 +23,13 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; // javadoc +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.values.Type; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Version; import org.apache.lucene.util._TestUtil; @@ -44,6 +48,10 @@ public class RandomIndexWriter implements Closeable { int flushAt; private double flushAtFactor = 1.0; private boolean getReaderCalled; + private final int fixedBytesLength; + private final long docValuesFieldPrefix; + private volatile boolean doDocValues; + private CodecProvider codecProvider; // Randomly calls Thread.yield so we mixup thread scheduling private static final class MockIndexWriter extends IndexWriter { @@ -91,16 +99,79 @@ public class RandomIndexWriter implements Closeable { System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec()); w.setInfoStream(System.out); } + /* TODO: find some what to make that random... + * This must be fixed across all fixed bytes + * fields in one index. so if you open another writer + * this might change if I use r.nextInt(x) + * maybe we can peek at the existing files here? + */ + fixedBytesLength = 37; + docValuesFieldPrefix = r.nextLong(); + codecProvider = w.getConfig().getCodecProvider(); + switchDoDocValues(); } + private void switchDoDocValues() { + // randomly enable / disable docValues + doDocValues = r.nextInt(10) != 0; + } + /** * Adds a Document. * @see IndexWriter#addDocument(Document) */ public void addDocument(Document doc) throws IOException { + if (doDocValues) { + randomPerDocFieldValues(r, doc); + } w.addDocument(doc); + maybeCommit(); } + + private void randomPerDocFieldValues(Random random, Document doc) { + + Type[] values = Type.values(); + Type type = values[random.nextInt(values.length)]; + String name = "random_" + type.name() + "" + docValuesFieldPrefix; + if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null) + return; + DocValuesField docValuesField = new DocValuesField(name); + switch (type) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength); + BytesRef fixedRef = new BytesRef(randomUnicodeString); + if (fixedRef.length > fixedBytesLength) { + fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength); + } else { + fixedRef.grow(fixedBytesLength); + fixedRef.length = fixedBytesLength; + } + docValuesField.setBytes(fixedRef, type); + break; + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200)); + docValuesField.setBytes(ref, type); + break; + case FLOAT_32: + docValuesField.setFloat(random.nextFloat()); + break; + case FLOAT_64: + docValuesField.setFloat(random.nextDouble()); + break; + case INTS: + docValuesField.setInt(random.nextInt()); + break; + default: + throw new IllegalArgumentException("no such type: " + type); + } + + doc.add(docValuesField); + } private void maybeCommit() throws IOException { if (docCount++ == flushAt) { @@ -113,6 +184,7 @@ public class RandomIndexWriter implements Closeable { // gradually but exponentially increase time b/w flushes flushAtFactor *= 1.05; } + switchDoDocValues(); } } @@ -121,6 +193,9 @@ public class RandomIndexWriter implements Closeable { * @see IndexWriter#updateDocument(Term, Document) */ public void updateDocument(Term t, Document doc) throws IOException { + if (doDocValues) { + randomPerDocFieldValues(r, doc); + } w.updateDocument(t, doc); maybeCommit(); } @@ -135,6 +210,7 @@ public class RandomIndexWriter implements Closeable { public void commit() throws CorruptIndexException, IOException { w.commit(); + switchDoDocValues(); } public int numDocs() throws IOException { @@ -164,6 +240,7 @@ public class RandomIndexWriter implements Closeable { w.optimize(limit); assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount(); } + switchDoDocValues(); } public IndexReader getReader(boolean applyDeletions) throws IOException { @@ -184,6 +261,7 @@ public class RandomIndexWriter implements Closeable { System.out.println("RIW.getReader: open new reader"); } w.commit(); + switchDoDocValues(); return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider()); } } From b1af75007bc11196ce9a25453a54297fcf9a60cc Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 16 May 2011 11:44:57 +0000 Subject: [PATCH 034/116] added db jars to svn:ignore git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1103700 13f79535-47bb-0310-9956-ffa450edef68 From f50134fe1b3275ab929f190863e22a43bbf90834 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 16 May 2011 12:00:32 +0000 Subject: [PATCH 035/116] removed invalid nocommit git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1103705 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 003bcdeafd8..4d0d8ca22ba 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -104,7 +104,7 @@ final class DocFieldProcessor extends DocConsumer { try { consumer.close(); // TODO add abort to PerDocConsumer! } catch (IOException e) { - // nocommit handle exce + // ignore on abort! } } From 8603fac79a1ec7a06489aee064e4b38ef44832ca Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 17 May 2011 08:09:17 +0000 Subject: [PATCH 036/116] added some javadoc git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1104045 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/DocFieldProcessor.java | 2 +- .../org/apache/lucene/index/IndexReader.java | 14 +++++++++- .../lucene/index/MultiPerDocValues.java | 21 +++++++++++++-- .../apache/lucene/index/PerDocWriteState.java | 27 +++++++++---------- .../lucene/index/SegmentWriteState.java | 1 - .../lucene/index/codecs/FieldsProducer.java | 1 - .../lucene/index/codecs/PerDocConsumer.java | 15 ++++++++--- .../lucene/index/codecs/PerDocValues.java | 15 +++++++++-- .../apache/lucene/index/values/DocValues.java | 2 +- .../index/values/TestDocValuesIndexing.java | 2 -- 10 files changed, 71 insertions(+), 29 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 4d0d8ca22ba..1a3b9c5dae7 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -144,7 +144,7 @@ final class DocFieldProcessor extends DocConsumer { try { consumer.close(); } catch (IOException e) { - // nocommit handle exce + // ignore and continue closing remaining consumers } } perDocConsumers.clear(); diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index ed7d472c33f..2fc8173c06e 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -1069,7 +1069,19 @@ public abstract class IndexReader implements Cloneable,Closeable { * through them yourself. */ public abstract Fields fields() throws IOException; - // nocommit javadoc + /** + * Flex API: returns {@link PerDocValues} for this reader. + * This method may return null if the reader has no per-document + * values stored. + * + *

    NOTE: if this is a multi reader ({@link + * #getSequentialSubReaders} is not null) then this + * method will throw UnsupportedOperationException. If + * you really need {@link PerDocValues} for such a reader, + * use {@link MultiPerDocValues#getPerDocs(IndexReader)}. However, for + * performance reasons, it's best to get all sub-readers + * using {@link ReaderUtil#gatherSubReaders} and iterate + * through them yourself. */ public abstract PerDocValues perDocValues() throws IOException; public int docFreq(Term term) throws IOException { diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java index fcce3a2a68b..849eb584c91 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -29,11 +29,20 @@ import org.apache.lucene.index.values.MultiDocValues; import org.apache.lucene.index.values.Type; import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.ReaderUtil.Gather; /** + * Exposes per-document flex API, merged from per-document flex API of + * sub-segments. This is useful when you're interacting with an + * {@link IndexReader} implementation that consists of sequential sub-readers + * (eg DirectoryReader or {@link MultiReader}). * - * nocommit - javadoc - * @experimental + *

    + * NOTE: for multi readers, you'll get better performance by gathering + * the sub readers using {@link ReaderUtil#gatherSubReaders} and then operate + * per-reader, instead of using this class. + * + * @lucene.experimental */ public class MultiPerDocValues extends PerDocValues { private final PerDocValues[] subs; @@ -50,6 +59,14 @@ public class MultiPerDocValues extends PerDocValues { } } + /** + * Returns a single {@link PerDocValues} instance for this reader, merging + * their values on the fly. This method will not return null. + * + *

    + * NOTE: this is a slow way to access postings. It's better to get the + * sub-readers (using {@link Gather}) and iterate through them yourself. + */ public static PerDocValues getPerDocs(IndexReader r) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); if (subs == null) { diff --git a/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java b/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java index 652f1b6d5a5..9ee8bbcf163 100644 --- a/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java @@ -18,10 +18,13 @@ package org.apache.lucene.index; import java.io.PrintStream; import java.util.concurrent.atomic.AtomicLong; +import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.store.Directory; /** - * nocommit - javadoc + * Encapsulates all necessary state to initiate a {@link PerDocConsumer} and + * create all necessary files in order to consume and merge per-document values. + * * @lucene.experimental */ public class PerDocWriteState { @@ -33,14 +36,9 @@ public class PerDocWriteState { public final SegmentCodecs segmentCodecs; public final int codecId; - /** Expert: The fraction of terms in the "dictionary" which should be stored - * in RAM. Smaller values use more memory, but make searching slightly - * faster, while larger values use less memory and make searching slightly - * slower. Searching is typically not dominated by dictionary lookup, so - * tweaking this is rarely useful.*/ - public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC - - public PerDocWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, AtomicLong bytesUsed, int codecId) { + PerDocWriteState(PrintStream infoStream, Directory directory, + String segmentName, FieldInfos fieldInfos, AtomicLong bytesUsed, + int codecId) { this.infoStream = infoStream; this.directory = directory; this.segmentName = segmentName; @@ -49,8 +47,8 @@ public class PerDocWriteState { this.codecId = codecId; this.bytesUsed = bytesUsed; } - - public PerDocWriteState(SegmentWriteState state) { + + PerDocWriteState(SegmentWriteState state) { infoStream = state.infoStream; directory = state.directory; segmentCodecs = state.segmentCodecs; @@ -59,8 +57,8 @@ public class PerDocWriteState { codecId = state.codecId; bytesUsed = new AtomicLong(0); } - - public PerDocWriteState(PerDocWriteState state, int codecId) { + + PerDocWriteState(PerDocWriteState state, int codecId) { this.infoStream = state.infoStream; this.directory = state.directory; this.segmentName = state.segmentName; @@ -69,8 +67,7 @@ public class PerDocWriteState { this.codecId = codecId; this.bytesUsed = state.bytesUsed; } - - + public String codecIdAsString() { return "" + codecId; } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 79c2638add4..7e910286bbc 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -18,7 +18,6 @@ package org.apache.lucene.index; */ import java.io.PrintStream; -import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java index d170699d72b..8a8e3f5b0d1 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java @@ -23,7 +23,6 @@ import java.io.IOException; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.Terms; -import org.apache.lucene.index.values.DocValues; /** Abstract API that consumes terms, doc, freq, prox and * payloads postings. Concrete implementations of this diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java index 4a644ee91a9..1749414a747 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java @@ -22,15 +22,24 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.values.DocValues; /** - * nocommit - javadoc - * @experimental - * + * Abstract API that consumes per document values. Concrete implementations of + * this convert field values into a Codec specific format during indexing. + *

    + * The {@link PerDocConsumer} API is accessible through flexible indexing / the + * {@link Codec} - API providing per field consumers and producers for inverted + * data (terms, postings) as well as per-document data. + * + * @lucene.experimental */ public abstract class PerDocConsumer implements Closeable{ /** Adds a new DocValuesField */ public abstract DocValuesConsumer addValuesField(FieldInfo field) throws IOException; + /** + * Consumes and merges the given {@link PerDocValues} producer + * into this consumers format. + */ public void merge(MergeState mergeState, PerDocValues producer) throws IOException { Iterable fields = producer.fields(); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java index cfeda8f40c9..19b0c4e0728 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java @@ -22,9 +22,16 @@ import java.util.Collection; import org.apache.lucene.index.values.DocValues; /** + * Abstract API that provides access to one or more per-document storage + * features. The concrete implementations provide access to the underlying + * storage on a per-document basis corresponding to their actual + * {@link PerDocConsumer} counterpart. + *

    + * The {@link PerDocValues} API is accessible through flexible indexing / the + * {@link Codec} - API providing per field consumers and producers for inverted + * data (terms, postings) as well as per-document data. * - * nocommit javadoc - * @experimental + * @lucene.experimental */ public abstract class PerDocValues implements Closeable { /** @@ -40,5 +47,9 @@ public abstract class PerDocValues implements Closeable { public static final PerDocValues[] EMPTY_ARRAY = new PerDocValues[0]; + /** + * Returns all fields this {@link PerDocValues} contains values for. + */ public abstract Collection fields(); + } diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index c63c3e8da95..e8569d9a11f 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -27,7 +27,7 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; /** - * TODO + * nocommit - javadoc * * @see FieldsEnum#docValues() * @see Fields#docValues(String) diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index f7a0767f9f7..7e5f7079630 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -69,10 +69,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { * * - Add documentation for: * - DocValues - * - Add @lucene.experimental to all necessary classes * - add test for unoptimized case with deletes * - add multithreaded tests / integrate into stress indexing? - * - run RAT */ @Before From 153475ad31e803f40dc02201908f475345490f1c Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 18 May 2011 09:38:17 +0000 Subject: [PATCH 037/116] LUCENE-3108: removed legacy code, fixed minor javadoc issues and renamed Type to ValueType git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1124144 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/document/AbstractField.java | 4 +- .../lucene/document/DocValuesField.java | 56 +++++++++--------- .../org/apache/lucene/document/Fieldable.java | 6 +- .../org/apache/lucene/index/FieldInfo.java | 10 ++-- .../org/apache/lucene/index/FieldInfos.java | 28 ++++----- .../org/apache/lucene/index/MultiFields.java | 2 +- .../lucene/index/MultiPerDocValues.java | 4 +- .../codecs/DefaultDocValuesProducer.java | 10 ++-- .../apache/lucene/index/values/DocValues.java | 20 +++---- .../lucene/index/values/DocValuesEnum.java | 20 +++---- .../index/values/FixedDerefBytesImpl.java | 12 ++-- .../index/values/FixedSortedBytesImpl.java | 8 +-- .../index/values/FixedStraightBytesImpl.java | 10 ++-- .../apache/lucene/index/values/Floats.java | 33 ++++++----- .../lucene/index/values/MultiDocValues.java | 14 ++--- .../lucene/index/values/PackedIntsImpl.java | 10 ++-- .../index/values/PerDocFieldValues.java | 22 +++---- .../values/{Type.java => ValueType.java} | 6 +- .../index/values/VarDerefBytesImpl.java | 10 ++-- .../index/values/VarSortedBytesImpl.java | 10 ++-- .../index/values/VarStraightBytesImpl.java | 10 ++-- .../apache/lucene/index/values/Writer.java | 8 +-- .../org/apache/lucene/util/FloatsRef.java | 2 +- .../src/java/org/apache/lucene/util/Pair.java | 36 ------------ .../org/apache/lucene/util/ParallelArray.java | 57 ------------------- .../lucene/index/RandomIndexWriter.java | 6 +- .../index/values/TestDocValuesIndexing.java | 36 ++++++------ .../org/apache/lucene/search/TestSort.java | 8 +-- 28 files changed, 185 insertions(+), 273 deletions(-) rename lucene/src/java/org/apache/lucene/index/values/{Type.java => ValueType.java} (92%) delete mode 100644 lucene/src/java/org/apache/lucene/util/Pair.java delete mode 100644 lucene/src/java/org/apache/lucene/util/ParallelArray.java diff --git a/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene/src/java/org/apache/lucene/document/AbstractField.java index 6fb4d1c1089..5eac62ab623 100755 --- a/lucene/src/java/org/apache/lucene/document/AbstractField.java +++ b/lucene/src/java/org/apache/lucene/document/AbstractField.java @@ -20,7 +20,7 @@ import org.apache.lucene.search.spans.SpanQuery; // for javadocs import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.util.StringHelper; // for javadocs @@ -306,7 +306,7 @@ public abstract class AbstractField implements Fieldable { return docValues != null; } - public Type docValuesType() { + public ValueType docValuesType() { return docValues == null? null : docValues.type(); } } diff --git a/lucene/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/src/java/org/apache/lucene/document/DocValuesField.java index 0b14860dc44..38d6c3df3a2 100644 --- a/lucene/src/java/org/apache/lucene/document/DocValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/DocValuesField.java @@ -24,7 +24,7 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.util.BytesRef; /** @@ -47,7 +47,7 @@ import org.apache.lucene.util.BytesRef; * * for(all documents) { * ... - * field.setIntValue(value) + * field.setInt(value) * writer.addDocument(document); * ... * } @@ -66,7 +66,7 @@ import org.apache.lucene.util.BytesRef; * field.set(indexedField); * for(all documents) { * ... - * field.setIntValue(value) + * field.setInt(value) * writer.addDocument(document); * ... * } @@ -78,7 +78,7 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { protected BytesRef bytes; protected double doubleValue; protected long longValue; - protected Type type; + protected ValueType type; protected Comparator bytesComparator; /** @@ -97,60 +97,60 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { } /** - * Sets the given long value and sets the field's {@link Type} to - * {@link Type#INTS} unless already set. If you want to change the - * default type use {@link #setType(Type)}. + * Sets the given long value and sets the field's {@link ValueType} to + * {@link ValueType#INTS} unless already set. If you want to change the + * default type use {@link #setType(ValueType)}. */ public void setInt(long value) { if (type == null) { - type = Type.INTS; + type = ValueType.INTS; } longValue = value; } /** - * Sets the given float value and sets the field's {@link Type} - * to {@link Type#FLOAT_32} unless already set. If you want to - * change the type use {@link #setType(Type)}. + * Sets the given float value and sets the field's {@link ValueType} + * to {@link ValueType#FLOAT_32} unless already set. If you want to + * change the type use {@link #setType(ValueType)}. */ public void setFloat(float value) { if (type == null) { - type = Type.FLOAT_32; + type = ValueType.FLOAT_32; } doubleValue = value; } /** - * Sets the given double value and sets the field's {@link Type} - * to {@link Type#FLOAT_64} unless already set. If you want to - * change the default type use {@link #setType(Type)}. + * Sets the given double value and sets the field's {@link ValueType} + * to {@link ValueType#FLOAT_64} unless already set. If you want to + * change the default type use {@link #setType(ValueType)}. */ public void setFloat(double value) { if (type == null) { - type = Type.FLOAT_64; + type = ValueType.FLOAT_64; } doubleValue = value; } /** - * Sets the given {@link BytesRef} value and the field's {@link Type}. The + * Sets the given {@link BytesRef} value and the field's {@link ValueType}. The * comparator for this field is set to null. If a * null comparator is set the default comparator for the given - * {@link Type} is used. + * {@link ValueType} is used. */ - public void setBytes(BytesRef value, Type type) { + public void setBytes(BytesRef value, ValueType type) { setBytes(value, type, null); } /** - * Sets the given {@link BytesRef} value, the field's {@link Type} and the + * Sets the given {@link BytesRef} value, the field's {@link ValueType} and the * field's comparator. If the {@link Comparator} is set to null - * the default for the given {@link Type} is used instead. + * the default for the given {@link ValueType} is used instead. * * @throws IllegalArgumentException * if the value or the type are null */ - public void setBytes(BytesRef value, Type type, Comparator comp) { + public void setBytes(BytesRef value, ValueType type, Comparator comp) { if (value == null) { throw new IllegalArgumentException("value must not be null"); } @@ -193,16 +193,16 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { /** * Sets the {@link BytesRef} comparator for this field. If the field has a - * numeric {@link Type} the comparator will be ignored. + * numeric {@link ValueType} the comparator will be ignored. */ public void setBytesComparator(Comparator comp) { this.bytesComparator = comp; } /** - * Sets the {@link Type} for this field. + * Sets the {@link ValueType} for this field. */ - public void setType(Type type) { + public void setType(ValueType type) { if (type == null) { throw new IllegalArgumentException("Type must not be null"); } @@ -210,9 +210,9 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { } /** - * Returns the field's {@link Type} + * Returns the field's {@link ValueType} */ - public Type type() { + public ValueType type() { return type; } @@ -252,7 +252,7 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { * given type and returns it. * */ - public static T set(T field, Type type) { + public static T set(T field, ValueType type) { if (field instanceof DocValuesField) return field; final DocValuesField valField = new DocValuesField(); diff --git a/lucene/src/java/org/apache/lucene/document/Fieldable.java b/lucene/src/java/org/apache/lucene/document/Fieldable.java index 8d168c45b6f..8b9c325d3f8 100755 --- a/lucene/src/java/org/apache/lucene/document/Fieldable.java +++ b/lucene/src/java/org/apache/lucene/document/Fieldable.java @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInvertState; // for javadocs import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.spans.SpanQuery; // for javadocs @@ -230,8 +230,8 @@ public interface Fieldable { public boolean hasDocValues(); /** - * Returns the {@link Type} of the set {@link PerDocFieldValues} or + * Returns the {@link ValueType} of the set {@link PerDocFieldValues} or * null if not set. */ - public Type docValuesType(); + public ValueType docValuesType(); } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index 472c48619d8..af76b2bb322 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -1,6 +1,6 @@ package org.apache.lucene.index; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -26,7 +26,7 @@ public final class FieldInfo { public final int number; public boolean isIndexed; - Type docValues; + ValueType docValues; // true if term vector for this field should be stored @@ -42,7 +42,7 @@ public final class FieldInfo { FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, Type docValues) { + boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) { name = na; isIndexed = tk; number = nu; @@ -112,7 +112,7 @@ public final class FieldInfo { } assert !this.omitTermFreqAndPositions || !this.storePayloads; } - void setDocValues(Type v) { + void setDocValues(ValueType v) { if (docValues == null) { docValues = v; } @@ -122,7 +122,7 @@ public final class FieldInfo { return docValues != null; } - public Type getDocValues() { + public ValueType getDocValues() { return docValues; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 569597963f7..594301326f4 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -31,7 +31,7 @@ import java.util.Map.Entry; import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -433,14 +433,14 @@ public final class FieldInfos implements Iterable { */ synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, Type docValues) { + boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) { return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues); } synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, Type docValues) { + boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) { if (globalFieldNumbers == null) { throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos"); } @@ -473,7 +473,7 @@ public final class FieldInfos implements Iterable { */ private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, Type docValuesType) { + boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) { // don't check modifiable here since we use that to initially build up FIs name = StringHelper.intern(name); if (globalFieldNumbers != null) { @@ -680,7 +680,7 @@ public final class FieldInfos implements Iterable { } hasVectors |= storeTermVector; hasProx |= isIndexed && !omitTermFreqAndPositions; - Type docValuesType = null; + ValueType docValuesType = null; if (format <= FORMAT_INDEX_VALUES) { final byte b = input.readByte(); switch(b) { @@ -688,31 +688,31 @@ public final class FieldInfos implements Iterable { docValuesType = null; break; case 1: - docValuesType = Type.INTS; + docValuesType = ValueType.INTS; break; case 2: - docValuesType = Type.FLOAT_32; + docValuesType = ValueType.FLOAT_32; break; case 3: - docValuesType = Type.FLOAT_64; + docValuesType = ValueType.FLOAT_64; break; case 4: - docValuesType = Type.BYTES_FIXED_STRAIGHT; + docValuesType = ValueType.BYTES_FIXED_STRAIGHT; break; case 5: - docValuesType = Type.BYTES_FIXED_DEREF; + docValuesType = ValueType.BYTES_FIXED_DEREF; break; case 6: - docValuesType = Type.BYTES_FIXED_SORTED; + docValuesType = ValueType.BYTES_FIXED_SORTED; break; case 7: - docValuesType = Type.BYTES_VAR_STRAIGHT; + docValuesType = ValueType.BYTES_VAR_STRAIGHT; break; case 8: - docValuesType = Type.BYTES_VAR_DEREF; + docValuesType = ValueType.BYTES_VAR_DEREF; break; case 9: - docValuesType = Type.BYTES_VAR_SORTED; + docValuesType = ValueType.BYTES_VAR_SORTED; break; default: throw new IllegalStateException("unhandled indexValues type " + b); diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index 841349a4a33..453bce7cbc0 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -24,7 +24,7 @@ import java.util.ArrayList; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.MultiDocValues; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.util.ReaderUtil; diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java index 849eb584c91..181e058e528 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -26,7 +26,7 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.MultiDocValues; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; @@ -117,7 +117,7 @@ public class MultiPerDocValues extends PerDocValues { // create & add to docValues: final List docValuesIndex = new ArrayList(); int docsUpto = 0; - Type type = null; + ValueType type = null; // Gather all sub-readers that share this field for (int i = 0; i < subs.length; i++) { DocValues values = subs[i].docValues(field); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java index e3274dd67c1..de1dcfd8f8f 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java @@ -28,7 +28,7 @@ import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.DocValues; import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Ints; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.store.Directory; /** @@ -86,8 +86,8 @@ public class DefaultDocValuesProducer extends PerDocValues { /** - * Loads a {@link DocValues} instance depending on the given {@link Type}. - * Codecs that use different implementations for a certain {@link Type} can + * Loads a {@link DocValues} instance depending on the given {@link ValueType}. + * Codecs that use different implementations for a certain {@link ValueType} can * simply override this method and return their custom implementations. * * @param docCount @@ -102,10 +102,10 @@ public class DefaultDocValuesProducer extends PerDocValues { * @throws IOException * if an {@link IOException} occurs * @throws IllegalArgumentException - * if the given {@link Type} is not supported + * if the given {@link ValueType} is not supported */ protected DocValues loadDocValues(int docCount, Directory dir, String id, - Type type) throws IOException { + ValueType type) throws IOException { switch (type) { case INTS: return Ints.getValues(dir, id, false); diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index e8569d9a11f..ef25436cc73 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -126,9 +126,9 @@ public abstract class DocValues implements Closeable { } /** - * Returns the {@link Type} of this {@link DocValues} instance + * Returns the {@link ValueType} of this {@link DocValues} instance */ - public abstract Type type(); + public abstract ValueType type(); /** * Closes this {@link DocValues} instance. This method should only be called @@ -164,11 +164,11 @@ public abstract class DocValues implements Closeable { /** * Source of per document values like long, double or {@link BytesRef} - * depending on the {@link DocValues} fields {@link Type}. Source + * depending on the {@link DocValues} fields {@link ValueType}. Source * implementations provide random access semantics similar to array lookups * and typically are entirely memory resident. *

    - * {@link Source} defines 3 {@link Type} //TODO finish this + * {@link Source} defines 3 {@link ValueType} //TODO finish this */ public static abstract class Source { // TODO we might need a close method here to null out the internal used arrays?! @@ -243,11 +243,11 @@ public abstract class DocValues implements Closeable { } /** - * Returns the {@link Type} of this source. + * Returns the {@link ValueType} of this source. * - * @return the {@link Type} of this source. + * @return the {@link ValueType} of this source. */ - public abstract Type type(); + public abstract ValueType type(); /** * Returns a {@link DocValuesEnum} for this source which uses the given @@ -272,13 +272,13 @@ public abstract class DocValues implements Closeable { * @param attrs * the {@link AttributeSource} for this enum * @param type - * the enums {@link Type} + * the enums {@link ValueType} * @param source * the source this enum operates on * @param numDocs * the number of documents within the source */ - protected SourceEnum(AttributeSource attrs, Type type, Source source, + protected SourceEnum(AttributeSource attrs, ValueType type, Source source, int numDocs) { super(attrs, type); this.source = source; @@ -361,7 +361,7 @@ public abstract class DocValues implements Closeable { * {@link MissingValue} is used by {@link Source} implementations to define an * Implementation dependent value for documents that had no value assigned * during indexing. Its purpose is similar to a default value but since the a - * missing value across {@link Type} and its implementations can be highly + * missing value across {@link ValueType} and its implementations can be highly * dynamic the actual values are not constant but defined per {@link Source} * through the {@link MissingValue} struct. The actual value used to indicate * a missing value can even changed within the same field from one segment to diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java index 60dc7d539f9..2c9ef2b8fb9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java @@ -27,23 +27,23 @@ import org.apache.lucene.util.LongsRef; /** * {@link DocValuesEnum} is a {@link DocIdSetIterator} iterating byte[] * , long and double stored per document. Depending on the - * enum's {@link Type} ({@link #type()}) the enum might skip over documents that - * have no value stored. Types like {@link Type#BYTES_VAR_STRAIGHT} might not + * enum's {@link ValueType} ({@link #type()}) the enum might skip over documents that + * have no value stored. Types like {@link ValueType#BYTES_VAR_STRAIGHT} might not * skip over documents even if there is no value associated with a document. The * value for document without values again depends on the types implementation - * although a reference for a {@link Type} returned from a accessor method + * although a reference for a {@link ValueType} returned from a accessor method * {@link #getFloat()}, {@link #getInt()} or {@link #bytes()} will never be * null even if a document has no value. *

    * Note: Only the reference for the enum's type are initialized to non * null ie. {@link #getInt()} will always return null - * if the enum's Type is {@link Type#FLOAT_32}. + * if the enum's Type is {@link ValueType#FLOAT_32}. * * @lucene.experimental */ public abstract class DocValuesEnum extends DocIdSetIterator { private AttributeSource source; - private final Type enumType; + private final ValueType enumType; protected BytesRef bytesRef; protected FloatsRef floatsRef; protected LongsRef intsRef; @@ -52,14 +52,14 @@ public abstract class DocValuesEnum extends DocIdSetIterator { * Creates a new {@link DocValuesEnum} for the given type. The * {@link AttributeSource} for this enum is set to null */ - protected DocValuesEnum(Type enumType) { + protected DocValuesEnum(ValueType enumType) { this(null, enumType); } /** * Creates a new {@link DocValuesEnum} for the given type. */ - protected DocValuesEnum(AttributeSource source, Type enumType) { + protected DocValuesEnum(AttributeSource source, ValueType enumType) { this.source = source; this.enumType = enumType; switch (enumType) { @@ -84,7 +84,7 @@ public abstract class DocValuesEnum extends DocIdSetIterator { /** * Returns the type of this enum */ - public Type type() { + public ValueType type() { return enumType; } @@ -144,9 +144,9 @@ public abstract class DocValuesEnum extends DocIdSetIterator { public abstract void close() throws IOException; /** - * Returns an empty {@link DocValuesEnum} for the given {@link Type}. + * Returns an empty {@link DocValuesEnum} for the given {@link ValueType}. */ - public static DocValuesEnum emptyEnum(Type type) { + public static DocValuesEnum emptyEnum(ValueType type) { return new DocValuesEnum(type) { @Override public int nextDoc() throws IOException { diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 01f89742d41..51afb512e83 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -173,8 +173,8 @@ class FixedDerefBytesImpl { } @Override - public Type type() { - return Type.BYTES_FIXED_DEREF; + public ValueType type() { + return ValueType.BYTES_FIXED_DEREF; } @Override @@ -198,11 +198,11 @@ class FixedDerefBytesImpl { public DerefBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn, int size) throws IOException { - this(source, datIn, idxIn, size, Type.BYTES_FIXED_DEREF); + this(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF); } protected DerefBytesEnum(AttributeSource source, IndexInput datIn, - IndexInput idxIn, int size, Type enumType) throws IOException { + IndexInput idxIn, int size, ValueType enumType) throws IOException { super(source, enumType); this.datIn = datIn; this.size = size; @@ -268,8 +268,8 @@ class FixedDerefBytesImpl { } @Override - public Type type() { - return Type.BYTES_FIXED_DEREF; + public ValueType type() { + return ValueType.BYTES_FIXED_DEREF; } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index 161b2788393..beedfe2addb 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -218,8 +218,8 @@ class FixedSortedBytesImpl { } @Override - public Type type() { - return Type.BYTES_FIXED_SORTED; + public ValueType type() { + return ValueType.BYTES_FIXED_SORTED; } @Override @@ -235,8 +235,8 @@ class FixedSortedBytesImpl { } @Override - public Type type() { - return Type.BYTES_FIXED_SORTED; + public ValueType type() { + return ValueType.BYTES_FIXED_SORTED; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 0a89587a037..f718853d2f9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -164,8 +164,8 @@ class FixedStraightBytesImpl { } @Override - public Type type() { - return Type.BYTES_FIXED_STRAIGHT; + public ValueType type() { + return ValueType.BYTES_FIXED_STRAIGHT; } @Override @@ -188,7 +188,7 @@ class FixedStraightBytesImpl { public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn, int size, int maxDoc) throws IOException { - super(source, Type.BYTES_FIXED_STRAIGHT); + super(source, ValueType.BYTES_FIXED_STRAIGHT); this.datIn = datIn; this.size = size; this.maxDoc = maxDoc; @@ -237,8 +237,8 @@ class FixedStraightBytesImpl { } @Override - public Type type() { - return Type.BYTES_FIXED_STRAIGHT; + public ValueType type() { + return ValueType.BYTES_FIXED_STRAIGHT; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 46e8e88a733..e09beac898b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -269,7 +269,12 @@ public class Floats { */ @Override public Source load() throws IOException { - ByteBuffer buffer = ByteBuffer.allocate(precisionBytes * maxDoc); + /* + * the allocated byteBuffer always uses BIG_ENDIAN here + * and since the writer uses DataOutput#writeInt() / writeLong() + * we can allways assume BIGE_ENDIAN + */ + final ByteBuffer buffer = ByteBuffer.allocate(precisionBytes * maxDoc); IndexInput indexInput = (IndexInput) datIn.clone(); indexInput.seek(CodecUtil.headerLength(CODEC_NAME)); // skip precision: @@ -297,7 +302,7 @@ public class Floats { public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { final MissingValue missing = getMissing(); - return new SourceEnum(attrSource, Type.FLOAT_32, this, maxDoc) { + return new SourceEnum(attrSource, ValueType.FLOAT_32, this, maxDoc) { @Override public int advance(int target) throws IOException { if (target >= numDocs) @@ -314,8 +319,8 @@ public class Floats { } @Override - public Type type() { - return Type.FLOAT_32; + public ValueType type() { + return ValueType.FLOAT_32; } } @@ -354,8 +359,8 @@ public class Floats { } @Override - public Type type() { - return Type.FLOAT_64; + public ValueType type() { + return ValueType.FLOAT_64; } } @@ -376,9 +381,9 @@ public class Floats { } @Override - public Type type() { - return precisionBytes == 4 ? Type.FLOAT_32 - : Type.FLOAT_64; + public ValueType type() { + return precisionBytes == 4 ? ValueType.FLOAT_32 + : ValueType.FLOAT_64; } } @@ -386,7 +391,7 @@ public class Floats { Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc) throws IOException { - super(source, dataIn, 4, maxDoc, Type.FLOAT_32); + super(source, dataIn, 4, maxDoc, ValueType.FLOAT_32); } @Override @@ -422,7 +427,7 @@ public class Floats { Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc) throws IOException { - super(source, dataIn, 8, maxDoc, Type.FLOAT_64); + super(source, dataIn, 8, maxDoc, ValueType.FLOAT_64); } @Override @@ -463,9 +468,9 @@ public class Floats { protected final long fp; FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision, - int maxDoc, Type type) throws IOException { - super(source, precision == 4 ? Type.FLOAT_32 - : Type.FLOAT_64); + int maxDoc, ValueType type) throws IOException { + super(source, precision == 4 ? ValueType.FLOAT_32 + : ValueType.FLOAT_64); this.dataIn = dataIn; this.precision = precision; this.maxDoc = maxDoc; diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index 9eb38a2f5a0..cd27d4563af 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -81,7 +81,7 @@ public class MultiDocValues extends DocValues { final int maxDoc; final Source emptySoruce; - public DummyDocValues(int maxDoc, Type type) { + public DummyDocValues(int maxDoc, ValueType type) { this.maxDoc = maxDoc; this.emptySoruce = new EmptySource(type); } @@ -97,7 +97,7 @@ public class MultiDocValues extends DocValues { } @Override - public Type type() { + public ValueType type() { return emptySoruce.type(); } @@ -228,16 +228,16 @@ public class MultiDocValues extends DocValues { } @Override - public Type type() { + public ValueType type() { return docValuesIdx[0].docValues.type(); } } private static class EmptySource extends Source { - private final Type type; + private final ValueType type; - public EmptySource(Type type) { + public EmptySource(ValueType type) { this.type = type; } @@ -263,13 +263,13 @@ public class MultiDocValues extends DocValues { } @Override - public Type type() { + public ValueType type() { return type; } } @Override - public Type type() { + public ValueType type() { return this.docValuesIdx[0].docValues.type(); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 246aafcb0cc..987f7486ff0 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -223,8 +223,8 @@ class PackedIntsImpl { } @Override - public Type type() { - return Type.INTS; + public ValueType type() { + return ValueType.INTS; } } @@ -240,8 +240,8 @@ class PackedIntsImpl { } @Override - public Type type() { - return Type.INTS; + public ValueType type() { + return ValueType.INTS; } } @@ -256,7 +256,7 @@ class PackedIntsImpl { private IntsEnumImpl(AttributeSource source, IndexInput dataIn) throws IOException { - super(source, Type.INTS); + super(source, ValueType.INTS); intsRef.offset = 0; this.dataIn = dataIn; dataIn.seek(CodecUtil.headerLength(CODEC_NAME)); diff --git a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java index 057537e347a..cbd0bb79fe2 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java @@ -48,19 +48,19 @@ public interface PerDocFieldValues { public void setFloat(double value); /** - * Sets the given {@link BytesRef} value and the field's {@link Type}. The + * Sets the given {@link BytesRef} value and the field's {@link ValueType}. The * comparator for this field is set to null. If a * null comparator is set the default comparator for the given - * {@link Type} is used. + * {@link ValueType} is used. */ - public void setBytes(BytesRef value, Type type); + public void setBytes(BytesRef value, ValueType type); /** - * Sets the given {@link BytesRef} value, the field's {@link Type} and the + * Sets the given {@link BytesRef} value, the field's {@link ValueType} and the * field's comparator. If the {@link Comparator} is set to null - * the default for the given {@link Type} is used instead. + * the default for the given {@link ValueType} is used instead. */ - public void setBytes(BytesRef value, Type type, Comparator comp); + public void setBytes(BytesRef value, ValueType type, Comparator comp); /** * Returns the set {@link BytesRef} or null if not set. @@ -84,18 +84,18 @@ public interface PerDocFieldValues { /** * Sets the {@link BytesRef} comparator for this field. If the field has a - * numeric {@link Type} the comparator will be ignored. + * numeric {@link ValueType} the comparator will be ignored. */ public void setBytesComparator(Comparator comp); /** - * Sets the {@link Type} + * Sets the {@link ValueType} */ - public void setType(Type type); + public void setType(ValueType type); /** - * Returns the {@link Type} + * Returns the {@link ValueType} */ - public Type type(); + public ValueType type(); } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/Type.java b/lucene/src/java/org/apache/lucene/index/values/ValueType.java similarity index 92% rename from lucene/src/java/org/apache/lucene/index/values/Type.java rename to lucene/src/java/org/apache/lucene/index/values/ValueType.java index 1d5dd0c2701..aebbd614788 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Type.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValueType.java @@ -22,8 +22,8 @@ import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.values.DocValues.SortedSource; /** - * {@link Type} specifies the type of the {@link DocValues} for a certain field. - * A {@link Type} only defines the data type for a field while the actual + * {@link ValueType} specifies the type of the {@link DocValues} for a certain field. + * A {@link ValueType} only defines the data type for a field while the actual * implemenation used to encode and decode the values depends on the field's * {@link Codec}. It is up to the {@link Codec} implementing * {@link FieldsConsumer#addValuesField(org.apache.lucene.index.FieldInfo)} and @@ -32,7 +32,7 @@ import org.apache.lucene.index.values.DocValues.SortedSource; * * @lucene.experimental */ -public enum Type { +public enum ValueType { /* * TODO: Add INT_32 INT_64 INT_16 & INT_8?! */ diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index a960a44b0d9..8cb5a7374a6 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -237,8 +237,8 @@ class VarDerefBytesImpl { } @Override - public Type type() { - return Type.BYTES_VAR_DEREF; + public ValueType type() { + return ValueType.BYTES_VAR_DEREF; } @Override @@ -256,7 +256,7 @@ class VarDerefBytesImpl { public VarDerefBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn) throws IOException { - super(source, datIn, idxIn, -1, Type.BYTES_VAR_DEREF); + super(source, datIn, idxIn, -1, ValueType.BYTES_VAR_DEREF); } @Override @@ -279,8 +279,8 @@ class VarDerefBytesImpl { } @Override - public Type type() { - return Type.BYTES_VAR_DEREF; + public ValueType type() { + return ValueType.BYTES_VAR_DEREF; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 3582d282a86..3764387f8f7 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -219,8 +219,8 @@ class VarSortedBytesImpl { } @Override - public Type type() { - return Type.BYTES_VAR_SORTED; + public ValueType type() { + return ValueType.BYTES_VAR_SORTED; } @Override @@ -247,7 +247,7 @@ class VarSortedBytesImpl { protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn) throws IOException { - super(source, Type.BYTES_VAR_SORTED); + super(source, ValueType.BYTES_VAR_SORTED); totBytes = idxIn.readLong(); // keep that in memory to prevent lots of disk seeks docToOrdIndex = PackedInts.getReader(idxIn); @@ -308,8 +308,8 @@ class VarSortedBytesImpl { } @Override - public Type type() { - return Type.BYTES_VAR_SORTED; + public ValueType type() { + return ValueType.BYTES_VAR_SORTED; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 7bee3ccb4df..1012019cb04 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -154,8 +154,8 @@ class VarStraightBytesImpl { } @Override - public Type type() { - return Type.BYTES_VAR_STRAIGHT; + public ValueType type() { + return ValueType.BYTES_VAR_STRAIGHT; } @Override @@ -179,7 +179,7 @@ class VarStraightBytesImpl { protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn) throws IOException { - super(source, Type.BYTES_VAR_STRAIGHT); + super(source, ValueType.BYTES_VAR_STRAIGHT); totBytes = idxIn.readVLong(); fp = datIn.getFilePointer(); addresses = PackedInts.getReader(idxIn); @@ -227,8 +227,8 @@ class VarStraightBytesImpl { } @Override - public Type type() { - return Type.BYTES_VAR_STRAIGHT; + public ValueType type() { + return ValueType.BYTES_VAR_STRAIGHT; } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index eb46fbebe80..4c6132424e5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -173,10 +173,10 @@ public abstract class Writer extends DocValuesConsumer { /** * Factory method to create a {@link Writer} instance for a given type. This * method returns default implementations for each of the different types - * defined in the {@link Type} enumeration. + * defined in the {@link ValueType} enumeration. * * @param type - * the {@link Type} to create the {@link Writer} for + * the {@link ValueType} to create the {@link Writer} for * @param id * the file name id used to create files within the writer. * @param directory @@ -188,10 +188,10 @@ public abstract class Writer extends DocValuesConsumer { * default. * @param bytesUsed * a byte-usage tracking reference - * @return a new {@link Writer} instance for the given {@link Type} + * @return a new {@link Writer} instance for the given {@link ValueType} * @throws IOException */ - public static Writer create(Type type, String id, Directory directory, + public static Writer create(ValueType type, String id, Directory directory, Comparator comp, AtomicLong bytesUsed) throws IOException { if (comp == null) { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); diff --git a/lucene/src/java/org/apache/lucene/util/FloatsRef.java b/lucene/src/java/org/apache/lucene/util/FloatsRef.java index 67066748395..e6f8bdd1573 100644 --- a/lucene/src/java/org/apache/lucene/util/FloatsRef.java +++ b/lucene/src/java/org/apache/lucene/util/FloatsRef.java @@ -18,7 +18,7 @@ package org.apache.lucene.util; */ /** - * Represents float[], as a slice (offset + length) into an existing float[]. + * Represents double[], as a slice (offset + length) into an existing float[]. * * @lucene.internal */ diff --git a/lucene/src/java/org/apache/lucene/util/Pair.java b/lucene/src/java/org/apache/lucene/util/Pair.java deleted file mode 100644 index 9459a7548db..00000000000 --- a/lucene/src/java/org/apache/lucene/util/Pair.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.apache.lucene.util; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Simple Pair - * @lucene.internal - */ -public class Pair { - public final Cur cur; - public final Cud cud; - - /** - * Create a simple pair - * @param cur the first element - * @param cud the second element - */ - public Pair(Cur cur, Cud cud) { - this.cur = cur; - this.cud = cud; - } -} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/util/ParallelArray.java b/lucene/src/java/org/apache/lucene/util/ParallelArray.java deleted file mode 100644 index e2012754552..00000000000 --- a/lucene/src/java/org/apache/lucene/util/ParallelArray.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.apache.lucene.util; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import java.util.concurrent.atomic.AtomicLong; - -/** - * - * @lucene.internal - */ -public abstract class ParallelArray> { - - public final int size; - protected final AtomicLong bytesUsed; - - protected ParallelArray(final int size, AtomicLong bytesUsed) { - this.size = size; - this.bytesUsed = bytesUsed; - bytesUsed.addAndGet((size) * bytesPerEntry()); - - } - - protected abstract int bytesPerEntry(); - - public AtomicLong bytesUsed() { - return bytesUsed; - } - - public void deref() { - bytesUsed.addAndGet((-size) * bytesPerEntry()); - } - - public abstract T newInstance(int size); - - public final T grow() { - int newSize = ArrayUtil.oversize(size + 1, bytesPerEntry()); - T newArray = newInstance(newSize); - copyTo(newArray, size); - bytesUsed.addAndGet((newSize - size) * bytesPerEntry()); - return newArray; - } - - protected abstract void copyTo(T toArray, int numToCopy); -} diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 4aa5f7c339a..d2a2d9fd355 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -27,7 +27,7 @@ import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; // javadoc import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -131,8 +131,8 @@ public class RandomIndexWriter implements Closeable { private void randomPerDocFieldValues(Random random, Document doc) { - Type[] values = Type.values(); - Type type = values[random.nextInt(values.length)]; + ValueType[] values = ValueType.values(); + ValueType type = values[random.nextInt(values.length)]; String name = "random_" + type.name() + "" + docValuesFieldPrefix; if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null) return; diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 7e5f7079630..514af9fb4ea 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -119,7 +119,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } /** - * Tests complete indexing of {@link Type} including deletions, merging and + * Tests complete indexing of {@link ValueType} including deletions, merging and * sparse value fields on Compound-File */ public void testIndexBytesNoDeletesCFS() throws IOException { @@ -139,7 +139,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } /** - * Tests complete indexing of {@link Type} including deletions, merging and + * Tests complete indexing of {@link ValueType} including deletions, merging and * sparse value fields on None-Compound-File */ public void testIndexBytesNoDeletes() throws IOException { @@ -160,10 +160,10 @@ public class TestDocValuesIndexing extends LuceneTestCase { public void testAddIndexes() throws IOException { int valuesPerIndex = 10; - List values = Arrays.asList(Type.values()); + List values = Arrays.asList(ValueType.values()); Collections.shuffle(values, random); - Type first = values.get(0); - Type second = values.get(1); + ValueType first = values.get(0); + ValueType second = values.get(1); String msg = "[first=" + first.name() + ", second=" + second.name() + "]"; // index first index Directory d_1 = newDirectory(); @@ -204,7 +204,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { DocValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name())); DocValuesEnum vE_2_merged = getValuesEnum(getDocValues(merged, second .name())); - if (second == Type.BYTES_VAR_STRAIGHT || second == Type.BYTES_FIXED_STRAIGHT) { + if (second == ValueType.BYTES_VAR_STRAIGHT || second == ValueType.BYTES_FIXED_STRAIGHT) { assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1)); } for (int i = 0; i < valuesPerIndex; i++) { @@ -246,11 +246,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 179 + random.nextInt(151); - final List numVariantList = new ArrayList(NUMERICS); + final List numVariantList = new ArrayList(NUMERICS); // run in random order to test if fill works correctly during merges Collections.shuffle(numVariantList, random); - for (Type val : numVariantList) { + for (ValueType val : numVariantList) { OpenBitSet deleted = indexValues(w, numValues, val, numVariantList, withDeletions, 7); List closeables = new ArrayList(); @@ -337,11 +337,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { throws CorruptIndexException, LockObtainFailedException, IOException { final Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); - final List byteVariantList = new ArrayList(BYTES); + final List byteVariantList = new ArrayList(BYTES); // run in random order to test if fill works correctly during merges Collections.shuffle(byteVariantList, random); final int numValues = 179 + random.nextInt(151); - for (Type byteIndexValue : byteVariantList) { + for (ValueType byteIndexValue : byteVariantList) { List closeables = new ArrayList(); int bytesSize = 7 + random.nextInt(128); @@ -488,19 +488,19 @@ public class TestDocValuesIndexing extends LuceneTestCase { return valuesEnum; } - private static EnumSet BYTES = EnumSet.of(Type.BYTES_FIXED_DEREF, - Type.BYTES_FIXED_SORTED, Type.BYTES_FIXED_STRAIGHT, Type.BYTES_VAR_DEREF, - Type.BYTES_VAR_SORTED, Type.BYTES_VAR_STRAIGHT); + private static EnumSet BYTES = EnumSet.of(ValueType.BYTES_FIXED_DEREF, + ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_FIXED_STRAIGHT, ValueType.BYTES_VAR_DEREF, + ValueType.BYTES_VAR_SORTED, ValueType.BYTES_VAR_STRAIGHT); - private static EnumSet NUMERICS = EnumSet.of(Type.INTS, - Type.FLOAT_32, Type.FLOAT_64); + private static EnumSet NUMERICS = EnumSet.of(ValueType.INTS, + ValueType.FLOAT_32, ValueType.FLOAT_64); private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS, Index.NO }; - private OpenBitSet indexValues(IndexWriter w, int numValues, Type value, - List valueVarList, boolean withDeletions, int multOfSeven) + private OpenBitSet indexValues(IndexWriter w, int numValues, ValueType value, + List valueVarList, boolean withDeletions, int multOfSeven) throws CorruptIndexException, IOException { final boolean isNumeric = NUMERICS.contains(value); OpenBitSet deleted = new OpenBitSet(numValues); @@ -550,7 +550,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { if (i % 7 == 0) { if (withDeletions && random.nextBoolean()) { - Type val = valueVarList.get(random.nextInt(1 + valueVarList + ValueType val = valueVarList.get(random.nextInt(1 + valueVarList .indexOf(value))); final int randInt = val == value ? random.nextInt(1 + i) : random .nextInt(numValues); diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index 96d909db83d..4c81a18455a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -37,7 +37,7 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.values.Type; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldValueHitQueue.Entry; @@ -124,13 +124,13 @@ public class TestSort extends LuceneTestCase { doc.add (new Field ("contents", data[i][1], Field.Store.NO, Field.Index.ANALYZED)); if (data[i][2] != null) { Field f = supportsDocValues ? - DocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.INTS) + DocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.INTS) : new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(f); } if (data[i][3] != null) { Field f = supportsDocValues ? - DocValuesField.set(new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_32) + DocValuesField.set(new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.FLOAT_32) : new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(f); } @@ -140,7 +140,7 @@ public class TestSort extends LuceneTestCase { if (data[i][7] != null) doc.add (new Field ("long", data[i][7], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][8] != null) { Field f = supportsDocValues ? - DocValuesField.set(new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED), Type.FLOAT_64) + DocValuesField.set(new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.FLOAT_64) : new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(f); } From 6dc4879fec0e664639279cd3c74e8c78a4725ec7 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2011 14:22:54 +0000 Subject: [PATCH 038/116] LUCENE-3108: Enable memory tracking for ByteBlockPool allocations in DocValues writer impls. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1124825 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/document/AbstractField.java | 2 +- .../index/DocumentsWriterPerThread.java | 32 ++----------- .../lucene/index/MultiPerDocValues.java | 14 +++++- .../lucene/index/PerFieldCodecWrapper.java | 12 ++--- .../apache/lucene/index/SegmentMerger.java | 12 ++--- .../codecs/DefaultDocValuesProducer.java | 2 +- .../org/apache/lucene/index/values/Bytes.java | 35 ++++----------- .../index/values/FixedDerefBytesImpl.java | 14 +++--- .../index/values/FixedSortedBytesImpl.java | 6 +-- .../index/values/FixedStraightBytesImpl.java | 12 +++-- .../index/values/VarDerefBytesImpl.java | 6 +-- .../index/values/VarSortedBytesImpl.java | 6 +-- .../index/values/VarStraightBytesImpl.java | 2 +- .../org/apache/lucene/util/ByteBlockPool.java | 45 ++++++++++++++++++- .../org/apache/lucene/util/BytesRefHash.java | 2 +- 15 files changed, 112 insertions(+), 90 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene/src/java/org/apache/lucene/document/AbstractField.java index 5eac62ab623..344aa9f7481 100755 --- a/lucene/src/java/org/apache/lucene/document/AbstractField.java +++ b/lucene/src/java/org/apache/lucene/document/AbstractField.java @@ -303,7 +303,7 @@ public abstract class AbstractField implements Fieldable { } public boolean hasDocValues() { - return docValues != null; + return docValues != null && docValues.type() != null; } public ValueType docValuesType() { diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 4f14fd8f341..7b71ada2357 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -32,6 +32,7 @@ import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.RamUsageEstimator; public class DocumentsWriterPerThread { @@ -169,6 +170,7 @@ public class DocumentsWriterPerThread { DocumentsWriterDeleteQueue deleteQueue; DeleteSlice deleteSlice; private final NumberFormat nf = NumberFormat.getInstance(); + final Allocator byteBlockAllocator; public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent, @@ -181,9 +183,9 @@ public class DocumentsWriterPerThread { this.docState = new DocState(this); this.docState.similarityProvider = parent.indexWriter.getConfig() .getSimilarityProvider(); - - consumer = indexingChain.getChain(this); bytesUsed = new AtomicLong(0); + byteBlockAllocator = new DirectTrackingAllocator(bytesUsed); + consumer = indexingChain.getChain(this); pendingDeletes = new BufferedDeletes(false); initialize(); } @@ -462,32 +464,6 @@ public class DocumentsWriterPerThread { bytesUsed.addAndGet(-(length *(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT))); } - final Allocator byteBlockAllocator = new DirectTrackingAllocator(); - - - private class DirectTrackingAllocator extends Allocator { - public DirectTrackingAllocator() { - this(BYTE_BLOCK_SIZE); - } - - public DirectTrackingAllocator(int blockSize) { - super(blockSize); - } - - public byte[] getByteBlock() { - bytesUsed.addAndGet(blockSize); - return new byte[blockSize]; - } - @Override - public void recycleByteBlocks(byte[][] blocks, int start, int end) { - bytesUsed.addAndGet(-((end-start)* blockSize)); - for (int i = start; i < end; i++) { - blocks[i] = null; - } - } - - }; - PerDocWriteState newPerDocWriteState(int codecId) { assert segment != null; return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId); diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java index 181e058e528..60848bb5aa9 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -151,9 +151,19 @@ public class MultiPerDocValues extends PerDocValues { } public void close() throws IOException { - PerDocValues[] perDocValues = this.subs; + final PerDocValues[] perDocValues = this.subs; + IOException ex = null; for (PerDocValues values : perDocValues) { - values.close(); + try { + values.close(); + } catch (IOException e) { + if (ex == null) { + ex = e; + } + } + } + if (ex != null) { + throw ex; } } diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 06d8a0339ff..fd033febce1 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; import org.apache.lucene.index.codecs.Codec; @@ -243,8 +244,7 @@ final class PerFieldCodecWrapper extends Codec { } private final class PerDocProducers extends PerDocValues { - private final Set fields = new TreeSet(); - private final Map codecs = new HashMap(); + private final TreeMap codecs = new TreeMap(); public PerDocProducers(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException { @@ -253,7 +253,6 @@ final class PerFieldCodecWrapper extends Codec { try { for (FieldInfo fi : fieldInfos) { if (fi.hasDocValues()) { - fields.add(fi.name); assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID; Codec codec = segmentCodecs.codecs[fi.getCodecId()]; if (!producers.containsKey(codec)) { @@ -280,9 +279,10 @@ final class PerFieldCodecWrapper extends Codec { } } } + @Override public Collection fields() { - return fields; + return codecs.keySet(); } @Override public DocValues docValues(String field) throws IOException { @@ -302,11 +302,11 @@ final class PerFieldCodecWrapper extends Codec { if (next != null) { next.close(); } - } catch (IOException ioe) { + } catch (Exception ioe) { // keep first IOException we hit but keep // closing the rest if (err == null) { - err = ioe; + err = new IOException(ioe); } } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 4523d821286..cba9bd4329c 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -575,13 +575,15 @@ final class SegmentMerger { mergeState.multiDeletedDocs = new MultiBits(perDocBits, perDocBitsStarts); final PerDocConsumer docsConsumer = codec .docsConsumer(new PerDocWriteState(segmentWriteState)); + MultiPerDocValues multiPerDocValues = null; try { - docsConsumer.merge( - mergeState, - new MultiPerDocValues(perDocProducers - .toArray(PerDocValues.EMPTY_ARRAY), perDocSlices - .toArray(ReaderUtil.Slice.EMPTY_ARRAY))); + multiPerDocValues = new MultiPerDocValues(perDocProducers + .toArray(PerDocValues.EMPTY_ARRAY), perDocSlices + .toArray(ReaderUtil.Slice.EMPTY_ARRAY)); + docsConsumer.merge(mergeState, multiPerDocValues); } finally { + if (multiPerDocValues != null) + multiPerDocValues.close(); docsConsumer.close(); } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java index de1dcfd8f8f..70d352f3d07 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java @@ -131,7 +131,7 @@ public class DefaultDocValuesProducer extends PerDocValues { } public void close() throws IOException { - Collection values = docValues.values(); + final Collection values = docValues.values(); IOException ex = null; for (DocValues docValues : values) { try { diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index b386b932fb9..501e7458d40 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -341,45 +341,31 @@ public final class Bytes { // TODO: open up this API?! static abstract class BytesWriterBase extends Writer { - private final Directory dir; private final String id; protected IndexOutput idxOut; protected IndexOutput datOut; protected BytesRef bytesRef; - private final String codecName; - private final int version; protected final ByteBlockPool pool; protected BytesWriterBase(Directory dir, String id, String codecName, - int version, boolean initIndex, boolean initData, ByteBlockPool pool, + int version, boolean initIndex, ByteBlockPool pool, AtomicLong bytesUsed) throws IOException { super(bytesUsed); - this.dir = dir; this.id = id; - this.codecName = codecName; - this.version = version; this.pool = pool; - if (initData) { - initDataOut(); - } + datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + DATA_EXTENSION)); + CodecUtil.writeHeader(datOut, codecName, version); if (initIndex) { - initIndexOut(); + idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + INDEX_EXTENSION)); + CodecUtil.writeHeader(idxOut, codecName, version); + } else { + idxOut = null; } } - private void initDataOut() throws IOException { - datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", - DATA_EXTENSION)); - CodecUtil.writeHeader(datOut, codecName, version); - } - - private void initIndexOut() throws IOException { - idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", - INDEX_EXTENSION)); - CodecUtil.writeHeader(idxOut, codecName, version); - } - /** * Must be called only with increasing docIDs. It's OK for some docIDs to be * skipped; they will be filled with 0 bytes. @@ -390,7 +376,6 @@ public final class Bytes { @Override public void finish(int docCount) throws IOException { try { - if (datOut != null) datOut.close(); } finally { try { @@ -483,9 +468,7 @@ public final class Bytes { super.close(); } finally { try { - if (datIn != null) { datIn.close(); - } } finally { if (idxIn != null) { idxIn.close(); diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 51afb512e83..6a5fc0b597b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -33,7 +33,7 @@ import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; -import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; @@ -54,16 +54,15 @@ class FixedDerefBytesImpl { private final BytesRefHash hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray( BytesRefHash.DEFAULT_CAPACITY, bytesUsed)); - public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { - this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed); } public Writer(Directory dir, String id, Allocator allocator, AtomicLong bytesUsed) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, true, true, + super(dir, id, CODEC_NAME, VERSION_CURRENT, true, new ByteBlockPool(allocator), bytesUsed); docToID = new int[1]; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); // TODO BytesRefHash @@ -249,8 +248,11 @@ class FixedDerefBytesImpl { } public void close() throws IOException { - datIn.close(); - idx.close(); + try { + datIn.close(); + } finally { + idx.close(); + } } protected void fill(long address, BytesRef ref) throws IOException { diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index beedfe2addb..9d9dcffc836 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -36,7 +36,7 @@ import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; -import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; @@ -63,13 +63,13 @@ class FixedSortedBytesImpl { public Writer(Directory dir, String id, Comparator comp, AtomicLong bytesUsed) throws IOException { - this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed); } public Writer(Directory dir, String id, Comparator comp, Allocator allocator, AtomicLong bytesUsed) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, true, true, + super(dir, id, CODEC_NAME, VERSION_CURRENT, true, new ByteBlockPool(allocator), bytesUsed); docToEntry = new int[1]; // docToEntry[0] = -1; diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index f718853d2f9..fceafc76d34 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -46,7 +46,7 @@ class FixedStraightBytesImpl { private byte[] oneRecord; protected Writer(Directory dir, String id) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, false, true, null, null); + super(dir, id, CODEC_NAME, VERSION_CURRENT, false, null, null); } // TODO - impl bulk copy here! @@ -87,7 +87,13 @@ class FixedStraightBytesImpl { } fill(state.docBase); // TODO should we add a transfer to API to each reader? - datOut.copyBytes(reader.cloneData(), size * maxDocs); + final IndexInput cloneData = reader.cloneData(); + try { + datOut.copyBytes(cloneData, size * maxDocs); + } finally { + cloneData.close(); + } + lastDocID += maxDocs - 1; } else super.merge(state); @@ -116,7 +122,7 @@ class FixedStraightBytesImpl { } public long ramBytesUsed() { - return 0; + return oneRecord == null ? 0 : oneRecord.length; } } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index 8cb5a7374a6..8f29dcaa0ec 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -36,7 +36,7 @@ import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; -import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; @@ -117,13 +117,13 @@ class VarDerefBytesImpl { public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { - this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed); } public Writer(Directory dir, String id, Allocator allocator, AtomicLong bytesUsed) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, true, true, + super(dir, id, CODEC_NAME, VERSION_CURRENT, true, new ByteBlockPool(allocator), bytesUsed); docToAddress = new int[1]; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 3764387f8f7..9bfaa809a04 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -35,7 +35,7 @@ import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.ByteBlockPool.Allocator; -import org.apache.lucene.util.ByteBlockPool.DirectAllocator; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; @@ -62,13 +62,13 @@ class VarSortedBytesImpl { public Writer(Directory dir, String id, Comparator comp, AtomicLong bytesUsed) throws IOException { - this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), + this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed); } public Writer(Directory dir, String id, Comparator comp, Allocator allocator, AtomicLong bytesUsed) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, true, true, + super(dir, id, CODEC_NAME, VERSION_CURRENT, true, new ByteBlockPool(allocator), bytesUsed); this.comp = comp; docToEntry = new int[1]; diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 1012019cb04..3ac37097338 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -51,7 +51,7 @@ class VarStraightBytesImpl { public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException { - super(dir, id, CODEC_NAME, VERSION_CURRENT, true, true, null, bytesUsed); + super(dir, id, CODEC_NAME, VERSION_CURRENT, true, null, bytesUsed); docToAddress = new long[1]; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); } diff --git a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java index 552340eca6f..58e3b93824f 100644 --- a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -18,6 +18,8 @@ package org.apache.lucene.util; */ import java.util.Arrays; import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF; /** @@ -78,6 +80,33 @@ public final class ByteBlockPool { } } + + public static class DirectTrackingAllocator extends Allocator { + private final AtomicLong bytesUsed; + + public DirectTrackingAllocator(AtomicLong bytesUsed) { + this(BYTE_BLOCK_SIZE, bytesUsed); + } + + public DirectTrackingAllocator(int blockSize, AtomicLong bytesUsed) { + super(blockSize); + this.bytesUsed = bytesUsed; + } + + public byte[] getByteBlock() { + bytesUsed.addAndGet(blockSize); + return new byte[blockSize]; + } + @Override + public void recycleByteBlocks(byte[][] blocks, int start, int end) { + bytesUsed.addAndGet(-((end-start)* blockSize)); + for (int i = start; i < end; i++) { + blocks[i] = null; + } + } + + }; + public byte[][] buffers = new byte[10][]; @@ -92,6 +121,20 @@ public final class ByteBlockPool { public ByteBlockPool(Allocator allocator) { this.allocator = allocator; } + + public void dropBuffersAndReset() { + if (bufferUpto != -1) { + // Recycle all but the first buffer + allocator.recycleByteBlocks(buffers, 0, 1+bufferUpto); + + // Re-use the first buffer + bufferUpto = -1; + byteUpto = BYTE_BLOCK_SIZE; + byteOffset = -BYTE_BLOCK_SIZE; + buffers = new byte[10][]; + buffer = null; + } + } public void reset() { if (bufferUpto != -1) { @@ -115,7 +158,7 @@ public final class ByteBlockPool { buffer = buffers[0]; } } - + public void nextBuffer() { if (1+bufferUpto == buffers.length) { byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1, diff --git a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java index 038942632b9..2fdb32eaf9f 100644 --- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java +++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java @@ -228,7 +228,7 @@ public final class BytesRefHash { lastCount = count; count = 0; if (resetPool) { - pool.reset(); + pool.dropBuffersAndReset(); } bytesStart = bytesStartArray.clear(); if (lastCount != -1 && shrink(lastCount)) { From a19d849e2f2c53f1f0b77c3e872575edb70044e7 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2011 20:56:25 +0000 Subject: [PATCH 039/116] fix java 5 compile error git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1125097 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/index/PerFieldCodecWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index fd033febce1..69043e7004a 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -302,7 +302,7 @@ final class PerFieldCodecWrapper extends Codec { if (next != null) { next.close(); } - } catch (Exception ioe) { + } catch (IOException ioe) { // keep first IOException we hit but keep // closing the rest if (err == null) { From 5b7793da519e77f4f5e404ec3edc3dfba7ab4d46 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2011 21:12:54 +0000 Subject: [PATCH 040/116] LUCENE-3125: SegmentMerger was closing in-use DocValues after merge git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1125108 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index cba9bd4329c..c2f25fb006e 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -575,15 +575,12 @@ final class SegmentMerger { mergeState.multiDeletedDocs = new MultiBits(perDocBits, perDocBitsStarts); final PerDocConsumer docsConsumer = codec .docsConsumer(new PerDocWriteState(segmentWriteState)); - MultiPerDocValues multiPerDocValues = null; try { - multiPerDocValues = new MultiPerDocValues(perDocProducers + final MultiPerDocValues multiPerDocValues = new MultiPerDocValues(perDocProducers .toArray(PerDocValues.EMPTY_ARRAY), perDocSlices .toArray(ReaderUtil.Slice.EMPTY_ARRAY)); docsConsumer.merge(mergeState, multiPerDocValues); } finally { - if (multiPerDocValues != null) - multiPerDocValues.close(); docsConsumer.close(); } } From c564b77504076687d5af011b413ca5c93c5a0065 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2011 06:03:00 +0000 Subject: [PATCH 041/116] remove unnecessary ctro git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1125248 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/index/PerFieldCodecWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 69043e7004a..4acb6026ffb 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -306,7 +306,7 @@ final class PerFieldCodecWrapper extends Codec { // keep first IOException we hit but keep // closing the rest if (err == null) { - err = new IOException(ioe); + err = ioe; } } } From 441588a04f3e473cb18baa3eaa5e3021c05d385a Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2011 12:25:42 +0000 Subject: [PATCH 042/116] LUCENE-3108: Closing previously opened files if not all DocValues can be loaded git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1125348 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/DocFieldProcessor.java | 12 ++++- .../lucene/index/PerFieldCodecWrapper.java | 9 ++-- .../codecs/DefaultDocValuesConsumer.java | 54 +++++++++---------- .../codecs/DefaultDocValuesProducer.java | 47 +++++++++++----- .../org/apache/lucene/index/values/Bytes.java | 24 ++++++--- .../apache/lucene/index/values/Floats.java | 11 +--- .../lucene/index/values/PackedIntsImpl.java | 6 +-- 7 files changed, 95 insertions(+), 68 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 1a3b9c5dae7..ac4d8a210d7 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -307,8 +307,16 @@ final class DocFieldProcessor extends DocConsumer { perDocConsumer = codec.docsConsumer(perDocWriteState); perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer); } - docValuesConsumer = perDocConsumer.addValuesField(fieldInfo); - fieldInfo.commitDocValues(); + boolean success = false; + try { + docValuesConsumer = perDocConsumer.addValuesField(fieldInfo); + fieldInfo.commitDocValues(); + success = true; + } finally { + if (!success) { + fieldInfo.revertUncommitted(); + } + } docValues.put(fieldInfo.name, docValuesConsumer); return docValuesConsumer; } diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 4acb6026ffb..fd4e6f3e795 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -294,13 +294,12 @@ final class PerFieldCodecWrapper extends Codec { } public void close() throws IOException { - final Iterator it = codecs.values().iterator(); + final Collection values = codecs.values(); IOException err = null; - while (it.hasNext()) { + for (PerDocValues perDocValues : values) { try { - PerDocValues next = it.next(); - if (next != null) { - next.close(); + if (perDocValues != null) { + perDocValues.close(); } } catch (IOException ioe) { // keep first IOException we hit but keep diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java index 0c1d1a1da1a..b3c4d8422dd 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java @@ -61,37 +61,33 @@ public class DefaultDocValuesConsumer extends PerDocConsumer { public static void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { FieldInfos fieldInfos = segmentInfo.getFieldInfos(); - boolean indexed = false; for (FieldInfo fieldInfo : fieldInfos) { - if (fieldInfo.getCodecId() == codecId) { - indexed |= fieldInfo.isIndexed; - if (fieldInfo.hasDocValues()) { - String filename = docValuesId(segmentInfo.name, codecId, fieldInfo.number); - switch (fieldInfo.getDocValues()) { - case BYTES_FIXED_DEREF: - case BYTES_VAR_DEREF: - case BYTES_VAR_SORTED: - case BYTES_FIXED_SORTED: - case BYTES_VAR_STRAIGHT: - files.add(IndexFileNames.segmentFileName(filename, "", - Writer.INDEX_EXTENSION)); - assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", - Writer.INDEX_EXTENSION)); - // until here all types use an index - case BYTES_FIXED_STRAIGHT: - case FLOAT_32: - case FLOAT_64: - case INTS: - files.add(IndexFileNames.segmentFileName(filename, "", - Writer.DATA_EXTENSION)); - assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", - Writer.DATA_EXTENSION)); - break; - default: - assert false; - } + if (fieldInfo.getCodecId() == codecId && fieldInfo.hasDocValues()) { + String filename = docValuesId(segmentInfo.name, codecId, + fieldInfo.number); + switch (fieldInfo.getDocValues()) { + case BYTES_FIXED_DEREF: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + case BYTES_VAR_STRAIGHT: + files.add(IndexFileNames.segmentFileName(filename, "", + Writer.INDEX_EXTENSION)); + assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", + Writer.INDEX_EXTENSION)); + // until here all types use an index + case BYTES_FIXED_STRAIGHT: + case FLOAT_32: + case FLOAT_64: + case INTS: + files.add(IndexFileNames.segmentFileName(filename, "", + Writer.DATA_EXTENSION)); + assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", + Writer.DATA_EXTENSION)); + break; + default: + assert false; } - } } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java index 70d352f3d07..497a0f815fa 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java @@ -39,7 +39,7 @@ import org.apache.lucene.store.Directory; */ public class DefaultDocValuesProducer extends PerDocValues { - protected final TreeMap docValues = new TreeMap(); + protected final TreeMap docValues; /** * Creates a new {@link DefaultDocValuesProducer} instance and loads all @@ -58,7 +58,7 @@ public class DefaultDocValuesProducer extends PerDocValues { */ public DefaultDocValuesProducer(SegmentInfo si, Directory dir, FieldInfos fieldInfo, int codecId) throws IOException { - load(fieldInfo, si.name, si.docCount, dir, codecId); + docValues = load(fieldInfo, si.name, si.docCount, dir, codecId); } /** @@ -66,22 +66,37 @@ public class DefaultDocValuesProducer extends PerDocValues { * null if this field has no {@link DocValues}. */ @Override - public DocValues docValues(String field) throws IOException { + public synchronized DocValues docValues(String field) throws IOException { return docValues.get(field); } // Only opens files... doesn't actually load any values - protected void load(FieldInfos fieldInfos, String segment, int docCount, - Directory dir, int codecId) throws IOException { - for (FieldInfo fieldInfo : fieldInfos) { - if (codecId == fieldInfo.getCodecId() && fieldInfo.hasDocValues()) { - final String field = fieldInfo.name; - // TODO can we have a compound file per segment and codec for docvalues? - final String id = DefaultDocValuesConsumer.docValuesId(segment, codecId, fieldInfo.number); - docValues.put(field, loadDocValues(docCount, dir, id, fieldInfo - .getDocValues())); + protected TreeMap load(FieldInfos fieldInfos, + String segment, int docCount, Directory dir, int codecId) + throws IOException { + TreeMap values = new TreeMap(); + boolean success = false; + try { + + for (FieldInfo fieldInfo : fieldInfos) { + if (codecId == fieldInfo.getCodecId() && fieldInfo.hasDocValues()) { + final String field = fieldInfo.name; + // TODO can we have a compound file per segment and codec for + // docvalues? + final String id = DefaultDocValuesConsumer.docValuesId(segment, + codecId, fieldInfo.number); + values.put(field, + loadDocValues(docCount, dir, id, fieldInfo.getDocValues())); + } + } + success = true; + } finally { + if (!success) { + // if we fail we must close all opened resources if there are any + closeDocValues(values.values()); } } + return values; } @@ -130,8 +145,12 @@ public class DefaultDocValuesProducer extends PerDocValues { } } - public void close() throws IOException { - final Collection values = docValues.values(); + public synchronized void close() throws IOException { + closeDocValues(docValues.values()); + } + + private void closeDocValues(final Collection values) + throws IOException { IOException ex = null; for (DocValues docValues : values) { try { diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index 501e7458d40..b254bc887c9 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -434,6 +434,8 @@ public final class Bytes { this.id = id; datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION)); + boolean success = false; + try { version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion); if (doIndex) { idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "", @@ -444,6 +446,12 @@ public final class Bytes { } else { idxIn = null; } + success = true; + } finally { + if (!success) { + closeInternal(); + } + } } /** @@ -467,12 +475,16 @@ public final class Bytes { try { super.close(); } finally { - try { - datIn.close(); - } finally { - if (idxIn != null) { - idxIn.close(); - } + closeInternal(); + } + } + + private void closeInternal() throws IOException { + try { + datIn.close(); + } finally { + if (idxIn != null) { + idxIn.close(); } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index e09beac898b..c47789e9f3b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -71,7 +71,6 @@ public class Floats { } abstract static class FloatsWriter extends Writer { - private final Directory dir; private final String id; private FloatsRef floatsRef; protected int lastDocId = -1; @@ -81,20 +80,16 @@ public class Floats { protected FloatsWriter(Directory dir, String id, int precision, AtomicLong bytesUsed) throws IOException { super(bytesUsed); - this.dir = dir; this.id = id; this.precision = (byte) precision; - initDatOut(); - } - - private void initDatOut() throws IOException { datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION)); CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME); - datOut.writeByte(precision); + datOut.writeByte(this.precision); } + public long ramBytesUsed() { return 0; } @@ -125,8 +120,6 @@ public class Floats { assert reader.precisionBytes == (int) precision; if (reader.maxDoc == 0) return; - if (datOut == null) - initDatOut(); final int docBase = state.docBase; if (docBase - lastDocId > 1) { // fill with default values diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 987f7486ff0..4c8bd632d4f 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -60,13 +60,13 @@ class PackedIntsImpl { protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed) throws IOException { super(bytesUsed); + datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + DATA_EXTENSION)); + CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); this.id = id; docToValue = new long[1]; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the bitset // needs memory too - datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", - DATA_EXTENSION)); - CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); } @Override From 8f13a775b5f237e45ab31721fedc62602700b615 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2011 12:35:46 +0000 Subject: [PATCH 043/116] removed unnecessary synchronization git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1125350 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/codecs/DefaultDocValuesProducer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java index 497a0f815fa..d3d6980d63d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java @@ -66,7 +66,7 @@ public class DefaultDocValuesProducer extends PerDocValues { * null if this field has no {@link DocValues}. */ @Override - public synchronized DocValues docValues(String field) throws IOException { + public DocValues docValues(String field) throws IOException { return docValues.get(field); } @@ -145,7 +145,7 @@ public class DefaultDocValuesProducer extends PerDocValues { } } - public synchronized void close() throws IOException { + public void close() throws IOException { closeDocValues(docValues.values()); } From cb7583bcb9066f9ef78eee701c92175a57260252 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 23 May 2011 10:58:45 +0000 Subject: [PATCH 044/116] LUCENE-3108: convert Float on load instead of converting for every lookup through FloatsBuffer git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1126430 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/values/Floats.java | 49 ++++++++++--------- .../index/values/TestDocValuesIndexing.java | 6 ++- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index c47789e9f3b..588d1507fa3 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -18,8 +18,6 @@ package org.apache.lucene.index.values; */ import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.DoubleBuffer; -import java.nio.FloatBuffer; import java.util.Collection; import java.util.concurrent.atomic.AtomicLong; @@ -262,33 +260,41 @@ public class Floats { */ @Override public Source load() throws IOException { - /* - * the allocated byteBuffer always uses BIG_ENDIAN here - * and since the writer uses DataOutput#writeInt() / writeLong() - * we can allways assume BIGE_ENDIAN - */ - final ByteBuffer buffer = ByteBuffer.allocate(precisionBytes * maxDoc); - IndexInput indexInput = (IndexInput) datIn.clone(); + /* we always read BIG_ENDIAN here since the writer uses + * DataOutput#writeInt() / writeLong() we can simply read the ints / longs + * back in using readInt / readLong */ + final IndexInput indexInput = (IndexInput) datIn.clone(); indexInput.seek(CodecUtil.headerLength(CODEC_NAME)); // skip precision: indexInput.readByte(); - assert buffer.hasArray() : "Buffer must support Array"; - final byte[] arr = buffer.array(); - indexInput.readBytes(arr, 0, arr.length); - return precisionBytes == 4 ? new Source4(buffer) : new Source8(buffer); + if (precisionBytes == 4) { + final float[] values = new float[(4 * maxDoc) >> 2]; + assert values.length == maxDoc; + for (int i = 0; i < values.length; i++) { + values[i] = Float.intBitsToFloat(indexInput.readInt()); + } + return new Source4(values); + } else { + final double[] values = new double[(8 * maxDoc) >> 3]; + assert values.length == maxDoc; + for (int i = 0; i < values.length; i++) { + values[i] = Double.longBitsToDouble(indexInput.readLong()); + } + return new Source8(values); + } } private class Source4 extends Source { - private final FloatBuffer values; + private final float[] values; - Source4(ByteBuffer buffer) { - values = buffer.asFloatBuffer(); + Source4(final float[] values ) throws IOException { + this.values = values; missingValue.doubleValue = Float.NEGATIVE_INFINITY; } @Override public double getFloat(int docID) { - return values.get(docID); + return values[docID]; } @Override @@ -318,17 +324,16 @@ public class Floats { } private class Source8 extends Source { - private final DoubleBuffer values; + private final double[] values; - Source8(ByteBuffer buffer) { - values = buffer.asDoubleBuffer(); + Source8(final double[] values) throws IOException { + this.values = values; missingValue.doubleValue = Double.NEGATIVE_INFINITY; - } @Override public double getFloat(int docID) { - return values.get(docID); + return values[docID]; } @Override diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 514af9fb4ea..644d81b913a 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -298,7 +298,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { for (int i = 0; i < base; i++) { double value = floats.getFloat(i); - assertEquals(" floats failed for doc: " + i + " base: " + base, + assertEquals(val + " failed for doc: " + i + " base: " + base, missing.doubleValue, value, 0.0d); } DocValuesEnum floatEnum = getValuesEnum(floatReader); @@ -528,9 +528,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { valField.setInt(i); break; case FLOAT_32: - case FLOAT_64: valField.setFloat(2.0f * i); break; + case FLOAT_64: + valField.setFloat(2.0d * i); + break; default: fail("unexpected value " + value); } From 6aaea0ce54b8004813ea583969f1fe9c3888625c Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 30 May 2011 02:11:13 +0000 Subject: [PATCH 045/116] fixed dead javadoc links git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1128981 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/index/values/DocValues.java | 2 -- .../java/org/apache/lucene/index/values/ValueType.java | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index ef25436cc73..71c08f0464e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -29,8 +29,6 @@ import org.apache.lucene.util.BytesRef; /** * nocommit - javadoc * - * @see FieldsEnum#docValues() - * @see Fields#docValues(String) * @lucene.experimental */ public abstract class DocValues implements Closeable { diff --git a/lucene/src/java/org/apache/lucene/index/values/ValueType.java b/lucene/src/java/org/apache/lucene/index/values/ValueType.java index aebbd614788..d1d4cedb75a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValueType.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValueType.java @@ -18,16 +18,16 @@ package org.apache.lucene.index.values; */ import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.values.DocValues.SortedSource; /** * {@link ValueType} specifies the type of the {@link DocValues} for a certain field. * A {@link ValueType} only defines the data type for a field while the actual - * implemenation used to encode and decode the values depends on the field's + * Implementation used to encode and decode the values depends on the field's * {@link Codec}. It is up to the {@link Codec} implementing - * {@link FieldsConsumer#addValuesField(org.apache.lucene.index.FieldInfo)} and - * using a different low-level implemenations to write the stored values for a + * {@link PerDocConsumer#addValuesField(org.apache.lucene.index.FieldInfo)} and + * using a different low-level implementations to write the stored values for a * field. * * @lucene.experimental From 4c4a0297a8fcdd873b53e9fcb1c0d0aab0585f88 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 30 May 2011 17:38:40 +0000 Subject: [PATCH 046/116] fix javadoc warning git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1129267 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/solr/highlight/DefaultSolrHighlighter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index b0be39fd0b1..f95a940ae37 100644 --- a/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -284,7 +284,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** * Return a {@link org.apache.lucene.search.highlight.Fragmenter} appropriate for this field. If a fragmenter * has not been configured for this field, fall back to the configured - * default or the solr default ({@link org.apache.lucene.search.highlight.GapFragmenter}). + * default or the solr default ({@link GapFragmenter}). * * @param fieldName The name of the field * @param params The params controlling Highlighting From 7190c9ed301b38f3e10955f9484185bf41dab09e Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 31 May 2011 12:22:18 +0000 Subject: [PATCH 047/116] make sure we close all in/output in the case of an error git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1129640 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/values/Bytes.java | 34 +++++++++++-------- .../apache/lucene/index/values/Floats.java | 15 ++++++-- .../lucene/index/values/PackedIntsImpl.java | 31 +++++++++++++---- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index b254bc887c9..c7bdf3037a6 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PagedBytes; /** @@ -353,16 +354,23 @@ public final class Bytes { super(bytesUsed); this.id = id; this.pool = pool; - datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION)); + boolean success = false; + try { CodecUtil.writeHeader(datOut, codecName, version); - - if (initIndex) { - idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", - INDEX_EXTENSION)); - CodecUtil.writeHeader(idxOut, codecName, version); - } else { - idxOut = null; + if (initIndex) { + idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", + INDEX_EXTENSION)); + CodecUtil.writeHeader(idxOut, codecName, version); + } else { + idxOut = null; + } + success = true; + } finally { + if (!success) { + IOUtils.closeSafely(true, datOut, idxOut); + } } } @@ -376,14 +384,10 @@ public final class Bytes { @Override public void finish(int docCount) throws IOException { try { - datOut.close(); + IOUtils.closeSafely(false, datOut, idxOut); } finally { - try { - if (idxOut != null) - idxOut.close(); - } finally { - if (pool != null) - pool.reset(); + if (pool != null) { + pool.reset(); } } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 588d1507fa3..468c08c4510 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -29,6 +29,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.FloatsRef; +import org.apache.lucene.util.IOUtils; /** * Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit @@ -82,9 +83,17 @@ public class Floats { this.precision = (byte) precision; datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION)); - CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); - assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME); - datOut.writeByte(this.precision); + boolean success = false; + try { + CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); + assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME); + datOut.writeByte(this.precision); + success = true; + } finally { + if (!success) { + IOUtils.closeSafely(true, datOut); + } + } } diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 4c8bd632d4f..5c61a93c5df 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -27,6 +27,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.RamUsageEstimator; @@ -62,11 +63,21 @@ class PackedIntsImpl { super(bytesUsed); datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION)); - CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); - this.id = id; - docToValue = new long[1]; - bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the bitset - // needs memory too + boolean success = false; + try { + CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT); + this.id = id; + docToValue = new long[1]; + bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the + // bitset + // needs memory + // too + success = true; + } finally { + if (!success) { + datOut.close(); + } + } } @Override @@ -168,7 +179,15 @@ class PackedIntsImpl { protected IntsReader(Directory dir, String id) throws IOException { datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION)); - CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START); + boolean success = false; + try { + CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START); + success = true; + } finally { + if (!success) { + IOUtils.closeSafely(true, datIn); + } + } } /** From 10ead35ab06709879be3cacb2d38a05860baa2a5 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 3 Jun 2011 14:33:19 +0000 Subject: [PATCH 048/116] LUCENE-3108: Cut over to consistent default values and added support for entire long range git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131057 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/values/Bytes.java | 24 +- .../apache/lucene/index/values/DocValues.java | 84 ++----- .../index/values/FixedDerefBytesImpl.java | 3 +- .../index/values/FixedSortedBytesImpl.java | 4 +- .../index/values/FixedStraightBytesImpl.java | 1 - .../apache/lucene/index/values/Floats.java | 34 +-- .../lucene/index/values/MultiDocValues.java | 8 +- .../lucene/index/values/PackedIntsImpl.java | 218 +++++++++++++----- .../index/values/VarDerefBytesImpl.java | 3 +- .../index/values/VarSortedBytesImpl.java | 4 +- .../index/values/VarStraightBytesImpl.java | 1 - .../apache/lucene/search/FieldComparator.java | 11 +- .../org/apache/lucene/util/_TestUtil.java | 53 +++-- .../lucene/index/values/TestDocValues.java | 201 ++++++++-------- .../index/values/TestDocValuesIndexing.java | 53 +++-- 15 files changed, 377 insertions(+), 325 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index c7bdf3037a6..d94fa17b220 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -24,7 +24,6 @@ import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.SortedSource; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.index.values.DocValues.SourceEnum; @@ -225,14 +224,13 @@ public final class Bytes { @Override public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc()) { @Override public int advance(int target) throws IOException { if (target >= numDocs) { return pos = NO_MORE_DOCS; } - while (source.getBytes(target, bytesRef) == missing.bytesValue) { + while (source.getBytes(target, bytesRef).length == 0) { if (++target >= numDocs) { return pos = NO_MORE_DOCS; } @@ -251,7 +249,6 @@ public final class Bytes { protected final static int PAGED_BYTES_BITS = 15; private final PagedBytes pagedBytes; protected final PagedBytes.Reader data; - protected final LookupResult lookupResult = new LookupResult(); private final Comparator comp; protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn, @@ -271,7 +268,8 @@ public final class Bytes { @Override public BytesRef getByOrd(int ord, BytesRef bytesRef) { - return ord == 0 ? null : deref(--ord, bytesRef); + assert ord >= 0; + return deref(ord, bytesRef); } protected void closeIndexInput() throws IOException { @@ -297,10 +295,11 @@ public final class Bytes { */ protected abstract BytesRef deref(int ord, BytesRef bytesRef); - protected LookupResult binarySearch(BytesRef b, BytesRef bytesRef, int low, + protected int binarySearch(BytesRef b, BytesRef bytesRef, int low, int high) { + int mid = 0; while (low <= high) { - int mid = (low + high) >>> 1; + mid = (low + high) >>> 1; deref(mid, bytesRef); final int cmp = comp.compare(bytesRef, b); if (cmp < 0) { @@ -308,20 +307,15 @@ public final class Bytes { } else if (cmp > 0) { high = mid - 1; } else { - lookupResult.ord = mid + 1; - lookupResult.found = true; - return lookupResult; + return mid; } } assert comp.compare(bytesRef, b) != 0; - lookupResult.ord = low; - lookupResult.found = false; - return lookupResult; + return -(low + 1); } @Override public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc()) { @Override @@ -329,7 +323,7 @@ public final class Bytes { if (target >= numDocs) { return pos = NO_MORE_DOCS; } - while (source.getBytes(target, bytesRef) == missing.bytesValue) { + while (source.getBytes(target, bytesRef).length == 0) { if (++target >= numDocs) { return pos = NO_MORE_DOCS; } diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java index 71c08f0464e..4ad9a07dbc8 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java @@ -169,8 +169,6 @@ public abstract class DocValues implements Closeable { * {@link Source} defines 3 {@link ValueType} //TODO finish this */ public static abstract class Source { - // TODO we might need a close method here to null out the internal used arrays?! - protected final MissingValue missingValue = new MissingValue(); /** * Returns a long for the given document id or throws an @@ -179,8 +177,6 @@ public abstract class DocValues implements Closeable { * * @throws UnsupportedOperationException * if this source doesn't support long values. - * @see MissingValue - * @see #getMissing() */ public long getInt(int docID) { throw new UnsupportedOperationException("ints are not supported"); @@ -193,8 +189,6 @@ public abstract class DocValues implements Closeable { * * @throws UnsupportedOperationException * if this source doesn't support double values. - * @see MissingValue - * @see #getMissing() */ public double getFloat(int docID) { throw new UnsupportedOperationException("floats are not supported"); @@ -207,8 +201,6 @@ public abstract class DocValues implements Closeable { * * @throws UnsupportedOperationException * if this source doesn't support byte[] values. - * @see MissingValue - * @see #getMissing() */ public BytesRef getBytes(int docID, BytesRef ref) { throw new UnsupportedOperationException("bytes are not supported"); @@ -229,17 +221,6 @@ public abstract class DocValues implements Closeable { return getEnum(null); } - /** - * Returns a {@link MissingValue} instance for this {@link Source}. - * Depending on the type of this {@link Source} consumers of the API should - * check if the value returned from on of the getter methods represents a - * value for a missing document or rather a value for a document no value - * was specified during indexing. - */ - public MissingValue getMissing() { - return missingValue; - } - /** * Returns the {@link ValueType} of this source. * @@ -310,7 +291,13 @@ public abstract class DocValues implements Closeable { @Override public BytesRef getBytes(int docID, BytesRef bytesRef) { - return getByOrd(ord(docID), bytesRef); + final int ord = ord(docID); + if (ord < 0) { + bytesRef.length = 0; + } else { + getByOrd(ord , bytesRef); + } + return bytesRef; } /** @@ -323,22 +310,18 @@ public abstract class DocValues implements Closeable { /** Returns value for specified ord. */ public abstract BytesRef getByOrd(int ord, BytesRef bytesRef); - public static class LookupResult { - /** true iff the values was found */ - public boolean found; - /** - * the ordinal of the value if found or the ordinal of the value if it - * would be present in the source - */ - public int ord; - } /** - * Finds the largest ord whose value is less or equal to the requested - * value. If {@link LookupResult#found} is true, then ord is an exact match. - * The returned {@link LookupResult} may be reused across calls. + * Finds the ordinal whose value is greater or equal to the given value. + * + * @return the given values ordinal if found or otherwise + * (-(ord)-1), defined as the ordinal of the first + * element that is greater than the given value. This guarantees + * that the return value will always be >= 0 if the given value + * is found. + * */ - public final LookupResult getByValue(BytesRef value) { + public final int getByValue(BytesRef value) { return getByValue(value, new BytesRef()); } @@ -350,35 +333,12 @@ public abstract class DocValues implements Closeable { * @param tmpRef * a temporary {@link BytesRef} instance used to compare internal * values to the given value. Must not be null - * @return the {@link LookupResult} + * @return the given values ordinal if found or otherwise + * (-(ord)-1), defined as the ordinal of the first + * element that is greater than the given value. This guarantees + * that the return value will always be >= 0 if the given value + * is found. */ - public abstract LookupResult getByValue(BytesRef value, BytesRef tmpRef); + public abstract int getByValue(BytesRef value, BytesRef tmpRef); } - - /** - * {@link MissingValue} is used by {@link Source} implementations to define an - * Implementation dependent value for documents that had no value assigned - * during indexing. Its purpose is similar to a default value but since the a - * missing value across {@link ValueType} and its implementations can be highly - * dynamic the actual values are not constant but defined per {@link Source} - * through the {@link MissingValue} struct. The actual value used to indicate - * a missing value can even changed within the same field from one segment to - * another. Certain {@link Ints} implementations for instance use a value - * outside of value set as the missing value. - */ - public final static class MissingValue { - public long longValue; - public double doubleValue; - public BytesRef bytesValue; - - /** - * Copies the values from the given {@link MissingValue}. - */ - public final void copy(MissingValue values) { - longValue = values.longValue; - doubleValue = values.doubleValue; - bytesValue = values.bytesValue; - } - } - } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 6a5fc0b597b..5513e4e06da 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -161,7 +161,8 @@ class FixedDerefBytesImpl { public BytesRef getBytes(int docID, BytesRef bytesRef) { final int id = (int) index.get(docID); if (id == 0) { - return null; + bytesRef.length = 0; + return bytesRef; } return data.fillSlice(bytesRef, ((id - 1) * size), size); } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index 9d9dcffc836..e1385a41838 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -199,11 +199,11 @@ class FixedSortedBytesImpl { @Override public int ord(int docID) { - return (int) index.get(docID); + return (int) index.get(docID) -1; } @Override - public LookupResult getByValue(BytesRef bytes, BytesRef tmpRef) { + public int getByValue(BytesRef bytes, BytesRef tmpRef) { return binarySearch(bytes, tmpRef, 0, numValue - 1); } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index fceafc76d34..a28e7d2793d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -155,7 +155,6 @@ class FixedStraightBytesImpl { throws IOException { super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc); this.size = size; - this.missingValue.bytesValue = new BytesRef(size); this.maxDoc = maxDoc; } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 468c08c4510..72d1ec83eea 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -17,7 +17,6 @@ package org.apache.lucene.index.values; * limitations under the License. */ import java.io.IOException; -import java.nio.ByteBuffer; import java.util.Collection; import java.util.concurrent.atomic.AtomicLong; @@ -46,9 +45,9 @@ public class Floats { static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; private static final int INT_DEFAULT = Float - .floatToRawIntBits(Float.NEGATIVE_INFINITY); + .floatToRawIntBits(0.0f); private static final long LONG_DEFAULT = Double - .doubleToRawLongBits(Double.NEGATIVE_INFINITY); + .doubleToRawLongBits(0.0d); public static Writer getWriter(Directory dir, String id, int precisionBytes, @@ -298,7 +297,6 @@ public class Floats { Source4(final float[] values ) throws IOException { this.values = values; - missingValue.doubleValue = Float.NEGATIVE_INFINITY; } @Override @@ -309,17 +307,11 @@ public class Floats { @Override public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValue missing = getMissing(); return new SourceEnum(attrSource, ValueType.FLOAT_32, this, maxDoc) { @Override public int advance(int target) throws IOException { if (target >= numDocs) return pos = NO_MORE_DOCS; - while (missing.doubleValue == source.getFloat(target)) { - if (++target >= numDocs) { - return pos = NO_MORE_DOCS; - } - } floatsRef.floats[floatsRef.offset] = source.getFloat(target); return pos = target; } @@ -337,7 +329,6 @@ public class Floats { Source8(final double[] values) throws IOException { this.values = values; - missingValue.doubleValue = Double.NEGATIVE_INFINITY; } @Override @@ -348,17 +339,11 @@ public class Floats { @Override public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, maxDoc) { @Override public int advance(int target) throws IOException { if (target >= numDocs) return pos = NO_MORE_DOCS; - while (missing.doubleValue == source.getFloat(target)) { - if (++target >= numDocs) { - return pos = NO_MORE_DOCS; - } - } floatsRef.floats[floatsRef.offset] = source.getFloat(target); return pos = target; } @@ -406,11 +391,7 @@ public class Floats { if (target >= maxDoc) return pos = NO_MORE_DOCS; dataIn.seek(fp + (target * precision)); - int intBits; - while ((intBits = dataIn.readInt()) == INT_DEFAULT) { - if (++target >= maxDoc) - return pos = NO_MORE_DOCS; - } + final int intBits = dataIn.readInt(); floatsRef.floats[0] = Float.intBitsToFloat(intBits); floatsRef.offset = 0; return pos = target; @@ -443,13 +424,8 @@ public class Floats { return pos = NO_MORE_DOCS; } dataIn.seek(fp + (target * precision)); - long value; - while ((value = dataIn.readLong()) == LONG_DEFAULT) { - if (++target >= maxDoc) - return pos = NO_MORE_DOCS; - } - floatsRef.floats[0] = Double.longBitsToDouble(value); - floatsRef.offset = 0; + final long value = dataIn.readLong(); + floatsRef.floats[floatsRef.offset] = Double.longBitsToDouble(value); return pos = target; } diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index cd27d4563af..6456f9d178b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -200,7 +200,6 @@ public class MultiDocValues extends DocValues { assert docValuesIdx[idx] != null; try { current = docValuesIdx[idx].docValues.getSource(); - missingValue.copy(current.getMissing()); } catch (IOException e) { throw new RuntimeException("load failed", e); // TODO how should we // handle this @@ -243,18 +242,19 @@ public class MultiDocValues extends DocValues { @Override public BytesRef getBytes(int docID, BytesRef ref) { - return this.missingValue.bytesValue; + ref.length = 0; + return ref; } @Override public double getFloat(int docID) { - return missingValue.doubleValue; + return 0d; } @Override public long getInt(int docID) { - return missingValue.longValue; + return 0; } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index 5c61a93c5df..ca5831742e6 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -29,7 +29,6 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LongsRef; -import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; @@ -40,7 +39,9 @@ import org.apache.lucene.util.packed.PackedInts; * */ class PackedIntsImpl { - private static final String CODEC_NAME = "PackedInts"; + private static final String CODEC_NAME = "Ints"; + private static final byte PACKED = 0x00; + private static final byte FIXED = 0x01; static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; @@ -54,7 +55,6 @@ class PackedIntsImpl { private long maxValue; private boolean started; private final String id; - private final OpenBitSet defaultValues = new OpenBitSet(1); private int lastDocId = -1; private IndexOutput datOut; @@ -93,13 +93,11 @@ class PackedIntsImpl { maxValue = v; } } - defaultValues.set(docID); lastDocId = docID; if (docID >= docToValue.length) { final long len = docToValue.length; docToValue = ArrayUtil.grow(docToValue, 1 + docID); - defaultValues.ensureCapacity(docToValue.length); bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG * ((docToValue.length) - len)); } @@ -112,40 +110,51 @@ class PackedIntsImpl { if (!started) { minValue = maxValue = 0; } - // TODO -- long can't work right since it's signed - datOut.writeLong(minValue); - // write a default value to recognize docs without a value for that - // field - final long defaultValue = ++maxValue - minValue; - datOut.writeLong(defaultValue); - PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, - PackedInts.bitsRequired(maxValue - minValue)); - final int firstDoc = defaultValues.nextSetBit(0); - lastDocId++; - if (firstDoc != -1) { - for (int i = 0; i < firstDoc; i++) { - w.add(defaultValue); // fill with defaults until first bit set - } + // if we exceed the range of positive longs we must switch to fixed ints + if ((maxValue - minValue) < (((long)1) << 63) && (maxValue - minValue) > 0) { + writePackedInts(docCount); + } else { + writeFixedInts(docCount); + } - for (int i = firstDoc; i < lastDocId;) { - w.add(docToValue[i] - minValue); - final int nextValue = defaultValues.nextSetBit(++i); - for (; i < nextValue; i++) { - w.add(defaultValue); // fill all gaps - } - } - } - for (int i = lastDocId; i < docCount; i++) { - w.add(defaultValue); - } - w.finish(); } finally { datOut.close(); bytesUsed .addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG * docToValue.length)); docToValue = null; } + } + private void writeFixedInts(int docCount) throws IOException { + datOut.writeByte(FIXED); + datOut.writeInt(docCount); + for (int i = 0; i < docToValue.length; i++) { + datOut.writeLong(docToValue[i]); // write full array - we use 0 as default + } + for (int i = docToValue.length; i < docCount; i++) { + datOut.writeLong(0); // fill with defaults values + } + } + + private void writePackedInts(int docCount) throws IOException { + datOut.writeByte(PACKED); + // TODO -- long can't work right since it's signed + datOut.writeLong(minValue); + // write a default value to recognize docs without a value for that + // field + final long defaultValue = maxValue>= 0 && minValue <=0 ? 0-minValue : ++maxValue-minValue; + datOut.writeLong(defaultValue); + PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, + PackedInts.bitsRequired(maxValue-minValue)); + final int limit = docToValue.length > docCount ? docCount : docToValue.length; + for (int i = 0; i < limit; i++) { + w.add(docToValue[i] == 0 ? defaultValue : docToValue[i] - minValue); + } + for (int i = limit; i < docCount; i++) { + w.add(defaultValue); + } + + w.finish(); } @Override @@ -175,6 +184,7 @@ class PackedIntsImpl { */ static class IntsReader extends DocValues { private final IndexInput datIn; + private final boolean packed; protected IntsReader(Directory dir, String id) throws IOException { datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", @@ -182,6 +192,7 @@ class PackedIntsImpl { boolean success = false; try { CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START); + packed = PACKED == datIn.readByte(); success = true; } finally { if (!success) { @@ -196,20 +207,69 @@ class PackedIntsImpl { */ @Override public Source load() throws IOException { - return new IntsSource((IndexInput) datIn.clone()); + final IndexInput input = (IndexInput) datIn.clone(); + boolean success = false; + try { + final Source source = packed ? new PackedIntsSource(input) + : new FixedIntsSource(input); + success = true; + return source; + } finally { + if (!success) { + IOUtils.closeSafely(true, datIn); + } + } + } + + private static class FixedIntsSource extends Source { + private final long[] values; + public FixedIntsSource(IndexInput dataIn) throws IOException { + dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1); + final int numDocs = dataIn.readInt(); + values = new long[numDocs]; + for (int i = 0; i < values.length; i++) { + values[i] = dataIn.readLong(); + } + } + + @Override + public long getInt(int docID) { + assert docID >= 0 && docID < values.length; + return values[docID]; + } + + @Override + public ValueType type() { + return ValueType.INTS; + } + + @Override + public DocValuesEnum getEnum(AttributeSource attrSource) + throws IOException { + return new SourceEnum(attrSource, type(), this, values.length) { + + @Override + public int advance(int target) throws IOException { + if (target >= numDocs) + return pos = NO_MORE_DOCS; + intsRef.ints[intsRef.offset] = values[target]; + return pos = target; + } + }; + } + } - private static class IntsSource extends Source { + private static class PackedIntsSource extends Source { private final long minValue; private final long defaultValue; private final PackedInts.Reader values; - public IntsSource(IndexInput dataIn) throws IOException { - dataIn.seek(CodecUtil.headerLength(CODEC_NAME)); + public PackedIntsSource(IndexInput dataIn) throws IOException { + dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1); minValue = dataIn.readLong(); defaultValue = dataIn.readLong(); values = PackedInts.getReader(dataIn); - missingValue.longValue = minValue + defaultValue; } @Override @@ -218,23 +278,18 @@ class PackedIntsImpl { // on each get? must push minValue down, and make // PackedInts implement Ints.Source assert docID >= 0; - return minValue + values.get(docID); + final long value = values.get(docID); + return value == defaultValue ? 0 : minValue + value; } @Override public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { - final MissingValue missing = getMissing(); return new SourceEnum(attrSource, type(), this, values.size()) { @Override public int advance(int target) throws IOException { if (target >= numDocs) return pos = NO_MORE_DOCS; - while (source.getInt(target) == missing.longValue) { - if (++target >= numDocs) { - return pos = NO_MORE_DOCS; - } - } intsRef.ints[intsRef.offset] = source.getInt(target); return pos = target; } @@ -255,7 +310,18 @@ class PackedIntsImpl { @Override public DocValuesEnum getEnum(AttributeSource source) throws IOException { - return new IntsEnumImpl(source, (IndexInput) datIn.clone()); + final IndexInput input = (IndexInput) datIn.clone(); + boolean success = false; + try { + DocValuesEnum inst = packed ? new PackedIntsEnumImpl(source, input) + : new FixedIntsEnumImpl(source, input); + success = true; + return inst; + } finally { + if (!success) { + IOUtils.closeSafely(true, input); + } + } } @Override @@ -265,7 +331,7 @@ class PackedIntsImpl { } - private static final class IntsEnumImpl extends DocValuesEnum { + private static final class PackedIntsEnumImpl extends DocValuesEnum { private final PackedInts.ReaderIterator ints; private long minValue; private final IndexInput dataIn; @@ -273,12 +339,12 @@ class PackedIntsImpl { private final int maxDoc; private int pos = -1; - private IntsEnumImpl(AttributeSource source, IndexInput dataIn) + private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn) throws IOException { super(source, ValueType.INTS); intsRef.offset = 0; this.dataIn = dataIn; - dataIn.seek(CodecUtil.headerLength(CODEC_NAME)); + dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1); minValue = dataIn.readLong(); defaultValue = dataIn.readLong(); this.ints = PackedInts.getReaderIterator(dataIn); @@ -296,15 +362,8 @@ class PackedIntsImpl { if (target >= maxDoc) { return pos = NO_MORE_DOCS; } - long val = ints.advance(target); - while (val == defaultValue) { - if (++target >= maxDoc) { - return pos = NO_MORE_DOCS; - } - val = ints.advance(target); - } - intsRef.ints[0] = minValue + val; - intsRef.offset = 0; // can we skip this? + final long val = ints.advance(target); + intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val; return pos = target; } @@ -321,4 +380,51 @@ class PackedIntsImpl { return advance(pos + 1); } } + + private static final class FixedIntsEnumImpl extends DocValuesEnum { + private final IndexInput dataIn; + private final int maxDoc; + private int pos = -1; + + private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn) + throws IOException { + super(source, ValueType.INTS); + intsRef.offset = 0; + this.dataIn = dataIn; + dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1); + maxDoc = dataIn.readInt(); + } + + @Override + public void close() throws IOException { + dataIn.close(); + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) { + return pos = NO_MORE_DOCS; + } + assert target > pos; + if (target > pos+1) { + dataIn.seek(dataIn.getFilePointer() + ((target - pos - 1) * 8)); + } + intsRef.ints[intsRef.offset] = dataIn.readLong(); + return pos = target; + } + + @Override + public int docID() { + return pos; + } + + @Override + public int nextDoc() throws IOException { + if (pos >= maxDoc) { + return pos = NO_MORE_DOCS; + } + return advance(pos + 1); + } + } + } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index 8f29dcaa0ec..f049119e51a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -227,7 +227,8 @@ class VarDerefBytesImpl { @Override public BytesRef getBytes(int docID, BytesRef bytesRef) { long address = index.get(docID); - return address == 0 ? null : data.fillSliceWithPrefix(bytesRef, + bytesRef.length = 0; + return address == 0 ? bytesRef : data.fillSliceWithPrefix(bytesRef, --address); } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index 9bfaa809a04..c4596922099 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -191,11 +191,11 @@ class VarSortedBytesImpl { @Override public int ord(int docID) { - return (int) docToOrdIndex.get(docID); + return (int) docToOrdIndex.get(docID) - 1; } @Override - public LookupResult getByValue(BytesRef bytes, BytesRef tmpRef) { + public int getByValue(BytesRef bytes, BytesRef tmpRef) { return binarySearch(bytes, tmpRef, 0, valueCount - 1); } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 3ac37097338..477055dac0e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -137,7 +137,6 @@ class VarStraightBytesImpl { public Source(IndexInput datIn, IndexInput idxIn) throws IOException { super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong()); addresses = PackedInts.getReader(idxIn); - missingValue.bytesValue = new BytesRef(0); // empty } @Override diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 52c5bdb2ea0..3dbb76ed661 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.FieldCache.DocTermsIndex; @@ -337,7 +336,6 @@ public abstract class FieldComparator { private final String field; private double bottom; private final float missingValue; - private MissingValue missing; FloatDocValuesComparator(int numHits, String field, Float missingValue) { values = new double[numHits]; @@ -372,8 +370,7 @@ public abstract class FieldComparator { @Override public void copy(int slot, int doc) { - final double value = currentReaderValues.getFloat(doc); - values[slot] = value == missing.doubleValue ? missingValue : value; + values[slot] = currentReaderValues.getFloat(doc); } @Override @@ -381,7 +378,6 @@ public abstract class FieldComparator { final DocValues docValues = context.reader.docValues(field); if (docValues != null) { currentReaderValues = docValues.getSource(); - missing = currentReaderValues.getMissing(); } return this; } @@ -612,7 +608,6 @@ public abstract class FieldComparator { private final String field; private long bottom; private int missingValue; - private MissingValue missing; IntDocValuesComparator(int numHits, String field, Integer missingValue) { values = new long[numHits]; @@ -651,8 +646,7 @@ public abstract class FieldComparator { @Override public void copy(int slot, int doc) { - final long value = currentReaderValues.getInt(doc); - values[slot] = value == missing.longValue ? missingValue : value; + values[slot] = currentReaderValues.getInt(doc); } @Override @@ -660,7 +654,6 @@ public abstract class FieldComparator { DocValues docValues = context.reader.docValues(field); if (docValues != null) { currentReaderValues = docValues.getSource(); - missing = currentReaderValues.getMissing(); } return this; } diff --git a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java index 92da41f6fc7..89f1f3f89a6 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java @@ -287,35 +287,46 @@ public class _TestUtil { return sb.toString(); } - public static String randomUnicodeString(Random r, int minLength, int maxLength) { - if(minLength > maxLength) - throw new IllegalArgumentException("minLength must be >= maxLength"); - final boolean lenEqual = minLength==maxLength; - final int end = lenEqual?minLength:minLength + r.nextInt(maxLength-minLength+1); - if (end == 0) { - // allow 0 length - return ""; - } + /** Returns random string, with a given UTF-8 byte length*/ + public static String randomFixedByteLengthUnicodeString(Random r, int length) { - // TODO(simonw): check this - final int fixedPlane = 5;//minLength % 5; - final char[] buffer = new char[end]; - for (int i = 0; i < end; i++) { - int t = lenEqual? fixedPlane: r.nextInt(5); - //buffer[i] = (char) (97 + r.nextInt(26)); - if (0 == t && i < end - 1 && !lenEqual) { + final char[] buffer = new char[length*3]; + int bytes = length; + int i = 0; + for (; i < buffer.length && bytes != 0; i++) { + int t; + if (bytes >= 4) { + t = r.nextInt(5); + } else if (bytes >= 3) { + t = r.nextInt(4); + } else if (bytes >= 2) { + t = r.nextInt(2); + } else { + t = 0; + } + if (t == 0) { + buffer[i] = (char) r.nextInt(0x80); + bytes--; + } else if (1 == t) { + buffer[i] = (char) nextInt(r, 0x80, 0x7ff); + bytes -= 2; + } else if (2 == t) { + buffer[i] = (char) nextInt(r, 0x800, 0xd7ff); + bytes -= 3; + } else if (3 == t) { + buffer[i] = (char) nextInt(r, 0xe000, 0xffff); + bytes -= 3; + } else if (4 == t) { // Make a surrogate pair // High surrogate buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff); // Low surrogate buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff); + bytes -= 4; } - else if (t <= 1) buffer[i] = (char) r.nextInt(0x80); - else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800); - else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff); - else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff); + } - return new String(buffer, 0, end); + return new String(buffer, 0, i); } public static CodecProvider alwaysCodec(final Codec c) { diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java index 694ed68d1a4..5cc14d806d3 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java @@ -50,7 +50,7 @@ public class TestDocValues extends LuceneTestCase { runTestBytes(Bytes.Mode.SORTED, true); runTestBytes(Bytes.Mode.SORTED, false); } - + public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize) throws IOException { @@ -64,20 +64,14 @@ public class TestDocValues extends LuceneTestCase { Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize, trackBytes); int maxDoc = 220; final String[] values = new String[maxDoc]; - final int lenMin, lenMax; - if (fixedSize) { - lenMin = lenMax = 3 + random.nextInt(7); - } else { - lenMin = 1; - lenMax = 15 + random.nextInt(6); - } + final int fixedLength = 3 + random.nextInt(7); for (int i = 0; i < 100; i++) { final String s; if (i > 0 && random.nextInt(5) <= 2) { // use prior value s = values[2 * random.nextInt(i)]; } else { - s = _TestUtil.randomUnicodeString(random, lenMin, lenMax); + s = _TestUtil.randomFixedByteLengthUnicodeString(random, fixedSize? fixedLength : 1 + random.nextInt(39)); } values[2 * i] = s; @@ -89,7 +83,7 @@ public class TestDocValues extends LuceneTestCase { DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); for (int iter = 0; iter < 2; iter++) { - DocValuesEnum bytesEnum = r.getEnum(); + DocValuesEnum bytesEnum = getEnum(r); assertNotNull("enum is null", bytesEnum); BytesRef ref = bytesEnum.bytes(); @@ -125,10 +119,10 @@ public class TestDocValues extends LuceneTestCase { if (ss != null) { assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx), bytesRef).utf8ToString()); - DocValues.SortedSource.LookupResult result = ss + int ord = ss .getByValue(new BytesRef(values[idx])); - assertTrue(result.found); - assertEquals(ss.ord(idx), result.ord); + assertTrue(ord >= 0); + assertEquals(ss.ord(idx), ord); } } @@ -136,39 +130,36 @@ public class TestDocValues extends LuceneTestCase { if (mode == Bytes.Mode.SORTED) { final int numValues = ss.getValueCount(); for (int i = 0; i < 1000; i++) { - BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString( - random, lenMin, lenMax)); - SortedSource.LookupResult result = ss.getByValue(bytesValue); - if (result.found) { - assert result.ord > 0; + BytesRef bytesValue = new BytesRef(_TestUtil.randomFixedByteLengthUnicodeString(random, fixedSize? fixedLength : 1 + random.nextInt(39))); + int ord = ss.getByValue(bytesValue); + if (ord >= 0) { assertTrue(bytesValue - .bytesEquals(ss.getByOrd(result.ord, bytesRef))); + .bytesEquals(ss.getByOrd(ord, bytesRef))); int count = 0; for (int k = 0; k < 100; k++) { if (bytesValue.utf8ToString().equals(values[2 * k])) { - assertEquals(ss.ord(2 * k), result.ord); + assertEquals(ss.ord(2 * k), ord); count++; } } assertTrue(count > 0); } else { - assert result.ord >= 0; - if (result.ord == 0) { + assert ord < 0; + int insertIndex = (-ord)-1; + if (insertIndex == 0) { final BytesRef firstRef = ss.getByOrd(1, bytesRef); // random string was before our first assertTrue(firstRef.compareTo(bytesValue) > 0); - } else if (result.ord == numValues) { - final BytesRef lastRef = ss.getByOrd(numValues, bytesRef); + } else if (insertIndex == numValues) { + final BytesRef lastRef = ss.getByOrd(numValues-1, bytesRef); // random string was after our last assertTrue(lastRef.compareTo(bytesValue) < 0); } else { - // random string fell between two of our values - final BytesRef before = (BytesRef) ss.getByOrd(result.ord, - bytesRef).clone(); - final BytesRef after = ss.getByOrd(result.ord + 1, bytesRef); - assertTrue(before.compareTo(bytesValue) < 0); - assertTrue(bytesValue.compareTo(after) < 0); - + final BytesRef before = (BytesRef) ss.getByOrd(insertIndex-1, bytesRef) + .clone(); + BytesRef after = ss.getByOrd(insertIndex, bytesRef); + assertTrue(comp.compare(before, bytesValue) < 0); + assertTrue(comp.compare(bytesValue, after) < 0); } } } @@ -180,67 +171,75 @@ public class TestDocValues extends LuceneTestCase { } public void testInts() throws IOException { - long maxV = 1; - final int NUM_VALUES = 777 + random.nextInt(777); - final long[] values = new long[NUM_VALUES]; - for (int rx = 1; rx < 63; rx++, maxV *= 2) { - Directory dir = newDirectory(); - final AtomicLong trackBytes = new AtomicLong(0); - Writer w = Ints.getWriter(dir, "test", false, trackBytes); - for (int i = 0; i < NUM_VALUES; i++) { - final long v = random.nextLong() % (1 + maxV); - values[i] = v; - w.add(i, v); - } - final int additionalDocs = 1 + random.nextInt(9); - w.finish(NUM_VALUES + additionalDocs); - assertEquals(0, trackBytes.get()); - - - DocValues r = Ints.getValues(dir, "test", false); - for (int iter = 0; iter < 2; iter++) { - Source s = getSource(r); - for (int i = 0; i < NUM_VALUES; i++) { - final long v = s.getInt(i); - assertEquals("index " + i, values[i], v); + long[] maxMin = new long[] { + Long.MIN_VALUE, Long.MAX_VALUE, + 1, Long.MAX_VALUE, + 0, Long.MAX_VALUE, + -1, Long.MAX_VALUE, + Long.MIN_VALUE, -1, + random.nextInt(), random.nextInt() }; + for (int j = 0; j < maxMin.length; j+=2) { + long maxV = 1; + final int NUM_VALUES = 777 + random.nextInt(777); + final long[] values = new long[NUM_VALUES]; + for (int rx = 1; rx < 63; rx++, maxV *= 2) { + Directory dir = newDirectory(); + final AtomicLong trackBytes = new AtomicLong(0); + Writer w = Ints.getWriter(dir, "test", false, trackBytes); + values[0] = maxMin[j]; + w.add(0, values[0]); + values[1] = maxMin[j+1]; + w.add(1, values[1]); + for (int i = 2; i < NUM_VALUES; i++) { + final long v = random.nextLong() % (1 + maxV); + values[i] = v; + w.add(i, v); } - } + final int additionalDocs = 1 + random.nextInt(9); + w.finish(NUM_VALUES + additionalDocs); + assertEquals(0, trackBytes.get()); - for (int iter = 0; iter < 2; iter++) { - DocValuesEnum iEnum = r.getEnum(); - LongsRef ints = iEnum.getInt(); - for (int i = 0; i < NUM_VALUES; i++) { - assertEquals(i, iEnum.nextDoc()); - assertEquals(values[i], ints.get()); + DocValues r = Ints.getValues(dir, "test", false); + for (int iter = 0; iter < 2; iter++) { + Source s = getSource(r); + for (int i = 0; i < NUM_VALUES; i++) { + final long v = s.getInt(i); + assertEquals("index " + i, values[i], v); + } } - if (iEnum.docID() < NUM_VALUES - 1) { - assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1)); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { + + for (int iter = 0; iter < 2; iter++) { + DocValuesEnum iEnum = getEnum(r); + LongsRef ints = iEnum.getInt(); + for (int i = 0; i < NUM_VALUES + additionalDocs; i++) { + assertEquals(i, iEnum.nextDoc()); + if (i < NUM_VALUES) { + assertEquals(values[i], ints.get()); + } else { + assertEquals(0, ints.get()); + } + } assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); + iEnum.close(); } - iEnum.close(); + for (int iter = 0; iter < 2; iter++) { + DocValuesEnum iEnum = getEnum(r); + LongsRef ints = iEnum.getInt(); + for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) { + assertEquals(i, iEnum.advance(i)); + if (i < NUM_VALUES) { + assertEquals(values[i], ints.get()); + } else { + assertEquals(0, ints.get()); + } + } + assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs)); + iEnum.close(); + } + r.close(); + dir.close(); } - - for (int iter = 0; iter < 2; iter++) { - DocValuesEnum iEnum = r.getEnum(); - LongsRef ints = iEnum.getInt(); - for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { - assertEquals(i, iEnum.advance(i)); - assertEquals(values[i], ints.get()); - } - if (iEnum.docID() < NUM_VALUES - 1) { - assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1)); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) { - assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); - } - - iEnum.close(); - } - r.close(); - dir.close(); } } @@ -273,27 +272,31 @@ public class TestDocValues extends LuceneTestCase { } for (int iter = 0; iter < 2; iter++) { - DocValuesEnum fEnum = r.getEnum(); + DocValuesEnum fEnum = getEnum(r); FloatsRef floats = fEnum.getFloat(); - for (int i = 0; i < NUM_VALUES; i++) { + for (int i = 0; i < NUM_VALUES + additionalValues; i++) { assertEquals(i, fEnum.nextDoc()); - assertEquals(values[i], floats.get(), delta); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.nextDoc()); + if (i < NUM_VALUES) { + assertEquals(values[i], floats.get(), delta); + } else { + assertEquals(0.0d, floats.get(), delta); + } } + assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.nextDoc()); fEnum.close(); } for (int iter = 0; iter < 2; iter++) { - DocValuesEnum fEnum = r.getEnum(); + DocValuesEnum fEnum = getEnum(r); FloatsRef floats = fEnum.getFloat(); - for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) { + for (int i = 0; i < NUM_VALUES + additionalValues; i += 1 + random.nextInt(25)) { assertEquals(i, fEnum.advance(i)); - assertEquals(values[i], floats.get(), delta); - } - for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) { - assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.advance(i)); + if (i < NUM_VALUES) { + assertEquals(values[i], floats.get(), delta); + } else { + assertEquals(0.0d, floats.get(), delta); + } } + assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.advance(NUM_VALUES + additionalValues)); fEnum.close(); } @@ -304,6 +307,10 @@ public class TestDocValues extends LuceneTestCase { public void testFloats8() throws IOException { runTestFloats(8, 0.0); } + + private DocValuesEnum getEnum(DocValues values) throws IOException { + return random.nextBoolean() ? values.getEnum() : getSource(values).getEnum(); + } private Source getSource(DocValues values) throws IOException { // getSource uses cache internally diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 644d81b913a..8986f11e3e2 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -41,7 +41,6 @@ import org.apache.lucene.index.MultiPerDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.values.DocValues.MissingValue; import org.apache.lucene.index.values.DocValues.Source; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; @@ -197,16 +196,22 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals(valuesPerIndex * 2, w.maxDoc()); // check values - + IndexReader merged = IndexReader.open(w, true); DocValuesEnum vE_1 = getValuesEnum(getDocValues(r_1, first.name())); DocValuesEnum vE_2 = getValuesEnum(getDocValues(r_2, second.name())); DocValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name())); DocValuesEnum vE_2_merged = getValuesEnum(getDocValues(merged, second .name())); - if (second == ValueType.BYTES_VAR_STRAIGHT || second == ValueType.BYTES_FIXED_STRAIGHT) { + switch (second) { // these variants don't advance over missing values + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_STRAIGHT: + case FLOAT_32: + case FLOAT_64: + case INTS: assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1)); } + for (int i = 0; i < valuesPerIndex; i++) { assertEquals(msg, i, vE_1.nextDoc()); assertEquals(msg, i, vE_1_merged.nextDoc()); @@ -263,15 +268,14 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertNotNull(intsReader); Source ints = getSource(intsReader); - MissingValue missing = ints.getMissing(); for (int i = 0; i < base; i++) { long value = ints.getInt(i); - assertEquals("index " + i, missing.longValue, value); + assertEquals("index " + i, 0, value); } DocValuesEnum intsEnum = getValuesEnum(intsReader); - assertTrue(intsEnum.advance(0) >= base); + assertTrue(intsEnum.advance(base) >= base); intsEnum = getValuesEnum(intsReader); LongsRef enumRef = intsEnum.getInt(); @@ -283,8 +287,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { } assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs", i, intsEnum.advance(i)); - assertEquals(expected, enumRef.get()); assertEquals(expected, ints.getInt(i)); + assertEquals(expected, enumRef.get()); } } @@ -294,15 +298,13 @@ public class TestDocValuesIndexing extends LuceneTestCase { DocValues floatReader = getDocValues(r, val.name()); assertNotNull(floatReader); Source floats = getSource(floatReader); - MissingValue missing = floats.getMissing(); - for (int i = 0; i < base; i++) { double value = floats.getFloat(i); assertEquals(val + " failed for doc: " + i + " base: " + base, - missing.doubleValue, value, 0.0d); + 0.0d, value, 0.0d); } DocValuesEnum floatEnum = getValuesEnum(floatReader); - assertTrue(floatEnum.advance(0) >= base); + assertTrue(floatEnum.advance(base) >= base); floatEnum = getValuesEnum(floatReader); FloatsRef enumRef = floatEnum.getFloat(); @@ -358,7 +360,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { byte upto = 0; // test the filled up slots for correctness - MissingValue missing = bytes.getMissing(); for (int i = 0; i < base; i++) { BytesRef br = bytes.getBytes(i, new BytesRef()); @@ -369,18 +370,14 @@ public class TestDocValuesIndexing extends LuceneTestCase { case BYTES_FIXED_STRAIGHT: // fixed straight returns bytesref with zero bytes all of fixed // length - if (missing.bytesValue != null) { - assertNotNull("expected none null - " + msg, br); - if (br.length != 0) { - assertEquals("expected zero bytes of length " + bytesSize + " - " - + msg, bytesSize, br.length); - for (int j = 0; j < br.length; j++) { - assertEquals("Byte at index " + j + " doesn't match - " + msg, - 0, br.bytes[br.offset + j]); - } + assertNotNull("expected none null - " + msg, br); + if (br.length != 0) { + assertEquals("expected zero bytes of length " + bytesSize + " - " + + msg, bytesSize, br.length); + for (int j = 0; j < br.length; j++) { + assertEquals("Byte at index " + j + " doesn't match - " + msg, 0, + br.bytes[br.offset + j]); } - } else { - assertNull("expected null - " + msg + " " + br, br); } break; case BYTES_VAR_SORTED: @@ -388,13 +385,21 @@ public class TestDocValuesIndexing extends LuceneTestCase { case BYTES_VAR_DEREF: case BYTES_FIXED_DEREF: default: - assertNull("expected null - " + msg + " " + br, br); + assertNotNull("expected none null - " + msg, br); + assertEquals(0, br.length); // make sure we advance at least until base DocValuesEnum bytesEnum = getValuesEnum(bytesReader); + try { + final int advancedTo = bytesEnum.advance(0); assertTrue(byteIndexValue.name() + " advanced failed base:" + base + " advancedTo: " + advancedTo, base <= advancedTo); + }catch(Throwable e) { + final int advancedTo = bytesEnum.advance(0); + assertTrue(byteIndexValue.name() + " advanced failed base:" + base + + " advancedTo: " + advancedTo, base <= advancedTo); + } } } From 789a44d7061b47d993d2797ff162c8ffc8a51fbe Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 3 Jun 2011 15:00:13 +0000 Subject: [PATCH 049/116] LUCENE-3108: Renamed DocValues to IndexDocValues to prevent naming conflict with Function Queries git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131061 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/document/Fieldable.java | 4 +- .../org/apache/lucene/index/CheckIndex.java | 4 +- .../org/apache/lucene/index/FieldsEnum.java | 2 +- .../org/apache/lucene/index/IndexReader.java | 4 +- .../org/apache/lucene/index/MultiFields.java | 2 +- .../lucene/index/MultiPerDocValues.java | 10 ++--- .../lucene/index/PerFieldCodecWrapper.java | 4 +- .../apache/lucene/index/SegmentReader.java | 4 +- .../lucene/index/codecs/BlockTermsReader.java | 2 +- .../codecs/DefaultDocValuesProducer.java | 34 +++++++------- .../index/codecs/DocValuesConsumer.java | 10 ++--- .../lucene/index/codecs/PerDocConsumer.java | 4 +- .../lucene/index/codecs/PerDocValues.java | 8 ++-- .../org/apache/lucene/index/values/Bytes.java | 18 ++++---- .../index/values/FixedSortedBytesImpl.java | 2 +- .../apache/lucene/index/values/Floats.java | 6 +-- .../{DocValues.java => IndexDocValues.java} | 38 ++++++++-------- .../org/apache/lucene/index/values/Ints.java | 2 +- .../lucene/index/values/MultiDocValues.java | 10 ++--- .../lucene/index/values/PackedIntsImpl.java | 2 +- .../lucene/index/values/SourceCache.java | 44 +++++++++---------- .../apache/lucene/index/values/ValueType.java | 4 +- .../index/values/VarSortedBytesImpl.java | 2 +- .../apache/lucene/search/FieldComparator.java | 8 ++-- .../lucene/index/values/TestDocValues.java | 18 ++++---- .../index/values/TestDocValuesIndexing.java | 18 ++++---- 26 files changed, 132 insertions(+), 132 deletions(-) rename lucene/src/java/org/apache/lucene/index/values/{DocValues.java => IndexDocValues.java} (88%) diff --git a/lucene/src/java/org/apache/lucene/document/Fieldable.java b/lucene/src/java/org/apache/lucene/document/Fieldable.java index 8b9c325d3f8..81529bcb9a7 100755 --- a/lucene/src/java/org/apache/lucene/document/Fieldable.java +++ b/lucene/src/java/org/apache/lucene/document/Fieldable.java @@ -18,7 +18,7 @@ package org.apache.lucene.document; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInvertState; // for javadocs -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.ValueType; import org.apache.lucene.search.PhraseQuery; // for javadocs @@ -219,7 +219,7 @@ public interface Fieldable { * Sets the {@link PerDocFieldValues} for this field. If * {@link PerDocFieldValues} is set this field will store per-document values * - * @see DocValues + * @see IndexDocValues */ public void setDocValues(PerDocFieldValues docValues); diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 145a43801ae..1da3d4db6ea 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -28,7 +28,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.DocValuesEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -952,7 +952,7 @@ public class CheckIndex { if (fieldInfo.hasDocValues()) { status.totalValueFields++; final PerDocValues perDocValues = reader.perDocValues(); - final DocValues docValues = perDocValues.docValues(fieldInfo.name); + final IndexDocValues docValues = perDocValues.docValues(fieldInfo.name); if (docValues == null) { continue; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java index 51ffa5f04b9..5d2d707a32d 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.DocValuesEnum; import org.apache.lucene.util.AttributeSource; diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 2fc8173c06e..0eb25dc1a76 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -24,7 +24,7 @@ import org.apache.lucene.search.Similarity; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.store.*; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; @@ -1585,7 +1585,7 @@ public abstract class IndexReader implements Cloneable,Closeable { throw new UnsupportedOperationException("This reader does not support this method."); } - public DocValues docValues(String field) throws IOException { + public IndexDocValues docValues(String field) throws IOException { final PerDocValues perDoc = perDocValues(); if (perDoc == null) { return null; diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index 453bce7cbc0..920bcf669a3 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -22,7 +22,7 @@ import java.util.Map; import java.util.List; import java.util.ArrayList; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.MultiDocValues; import org.apache.lucene.index.values.ValueType; import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java index 60848bb5aa9..d4a5c062534 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -24,7 +24,7 @@ import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.MultiDocValues; import org.apache.lucene.index.values.ValueType; import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; @@ -47,7 +47,7 @@ import org.apache.lucene.util.ReaderUtil.Gather; public class MultiPerDocValues extends PerDocValues { private final PerDocValues[] subs; private final ReaderUtil.Slice[] subSlices; - private final Map docValues = new ConcurrentHashMap(); + private final Map docValues = new ConcurrentHashMap(); private final TreeSet fields; public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) { @@ -110,8 +110,8 @@ public class MultiPerDocValues extends PerDocValues { return perDocValues; } - public DocValues docValues(String field) throws IOException { - DocValues result = docValues.get(field); + public IndexDocValues docValues(String field) throws IOException { + IndexDocValues result = docValues.get(field); if (result == null) { // Lazy init: first time this field is requested, we // create & add to docValues: @@ -120,7 +120,7 @@ public class MultiPerDocValues extends PerDocValues { ValueType type = null; // Gather all sub-readers that share this field for (int i = 0; i < subs.length; i++) { - DocValues values = subs[i].docValues(field); + IndexDocValues values = subs[i].docValues(field); final int start = subSlices[i].start; final int length = subSlices[i].length; if (values != null) { diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 10aa7c24d97..7e8b73aa060 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -34,7 +34,7 @@ import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.index.codecs.DocValuesConsumer; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; @@ -257,7 +257,7 @@ final class PerFieldCodecWrapper extends Codec { return codecs.keySet(); } @Override - public DocValues docValues(String field) throws IOException { + public IndexDocValues docValues(String field) throws IOException { final PerDocValues perDocProducer = codecs.get(field); if (perDocProducer == null) { return null; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 0273fab5ca7..61fe5a6826f 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -30,7 +30,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -842,7 +842,7 @@ public class SegmentReader extends IndexReader implements Cloneable { @Override - public DocValues docValues(String field) throws IOException { + public IndexDocValues docValues(String field) throws IOException { return core.perDocProducer.docValues(field); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java index 8d16a6c33b0..c17f84d569e 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java @@ -35,7 +35,7 @@ import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java index d3d6980d63d..a509f560334 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java @@ -25,7 +25,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.Bytes; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.Floats; import org.apache.lucene.index.values.Ints; import org.apache.lucene.index.values.ValueType; @@ -33,22 +33,22 @@ import org.apache.lucene.store.Directory; /** * Abstract base class for FieldsProducer implementations supporting - * {@link DocValues}. + * {@link IndexDocValues}. * * @lucene.experimental */ public class DefaultDocValuesProducer extends PerDocValues { - protected final TreeMap docValues; + protected final TreeMap docValues; /** * Creates a new {@link DefaultDocValuesProducer} instance and loads all - * {@link DocValues} instances for this segment and codec. + * {@link IndexDocValues} instances for this segment and codec. * * @param si - * the segment info to load the {@link DocValues} for. + * the segment info to load the {@link IndexDocValues} for. * @param dir - * the directory to load the {@link DocValues} from. + * the directory to load the {@link IndexDocValues} from. * @param fieldInfo * the {@link FieldInfos} * @param codecId @@ -62,19 +62,19 @@ public class DefaultDocValuesProducer extends PerDocValues { } /** - * Returns a {@link DocValues} instance for the given field name or - * null if this field has no {@link DocValues}. + * Returns a {@link IndexDocValues} instance for the given field name or + * null if this field has no {@link IndexDocValues}. */ @Override - public DocValues docValues(String field) throws IOException { + public IndexDocValues docValues(String field) throws IOException { return docValues.get(field); } // Only opens files... doesn't actually load any values - protected TreeMap load(FieldInfos fieldInfos, + protected TreeMap load(FieldInfos fieldInfos, String segment, int docCount, Directory dir, int codecId) throws IOException { - TreeMap values = new TreeMap(); + TreeMap values = new TreeMap(); boolean success = false; try { @@ -101,25 +101,25 @@ public class DefaultDocValuesProducer extends PerDocValues { /** - * Loads a {@link DocValues} instance depending on the given {@link ValueType}. + * Loads a {@link IndexDocValues} instance depending on the given {@link ValueType}. * Codecs that use different implementations for a certain {@link ValueType} can * simply override this method and return their custom implementations. * * @param docCount * number of documents in the segment * @param dir - * the {@link Directory} to load the {@link DocValues} from + * the {@link Directory} to load the {@link IndexDocValues} from * @param id * the unique file ID within the segment * @param type * the type to load - * @return a {@link DocValues} instance for the given type + * @return a {@link IndexDocValues} instance for the given type * @throws IOException * if an {@link IOException} occurs * @throws IllegalArgumentException * if the given {@link ValueType} is not supported */ - protected DocValues loadDocValues(int docCount, Directory dir, String id, + protected IndexDocValues loadDocValues(int docCount, Directory dir, String id, ValueType type) throws IOException { switch (type) { case INTS: @@ -149,10 +149,10 @@ public class DefaultDocValuesProducer extends PerDocValues { closeDocValues(docValues.values()); } - private void closeDocValues(final Collection values) + private void closeDocValues(final Collection values) throws IOException { IOException ex = null; - for (DocValues docValues : values) { + for (IndexDocValues docValues : values) { try { docValues.close(); } catch (IOException e) { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java index caf98aa8316..98a4d9f58e9 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java @@ -21,7 +21,7 @@ import java.util.Collection; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.Writer; import org.apache.lucene.util.Bits; @@ -99,7 +99,7 @@ public abstract class DocValuesConsumer { * if an {@link IOException} occurs */ public void merge(org.apache.lucene.index.codecs.MergeState mergeState, - DocValues values) throws IOException { + IndexDocValues values) throws IOException { assert mergeState != null; // TODO we need some kind of compatibility notation for values such // that two slightly different segments can be merged eg. fixed vs. @@ -112,7 +112,7 @@ public abstract class DocValuesConsumer { * with MultiDocValues the writer can not optimize for bulk-copyable data */ for (final IndexReader reader : mergeState.readers) { - final DocValues r = reader.docValues(mergeState.fieldInfo.name); + final IndexDocValues r = reader.docValues(mergeState.fieldInfo.name); if (r != null) { merged = true; merge(new Writer.MergeState(r, docBase, reader.maxDoc(), reader @@ -148,7 +148,7 @@ public abstract class DocValuesConsumer { * the source reader for this MergeState - merged values should be read from * this instance */ - public final DocValues reader; + public final IndexDocValues reader; /** the absolute docBase for this MergeState within the resulting segment */ public final int docBase; /** the number of documents in this MergeState */ @@ -156,7 +156,7 @@ public abstract class DocValuesConsumer { /** the deleted bits for this MergeState */ public final Bits bits; - public MergeState(DocValues reader, int docBase, int docCount, Bits bits) { + public MergeState(IndexDocValues reader, int docBase, int docCount, Bits bits) { assert reader != null; this.reader = reader; this.docBase = docBase; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java index 1749414a747..7acaef4504e 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java @@ -19,7 +19,7 @@ import java.io.Closeable; import java.io.IOException; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; /** * Abstract API that consumes per document values. Concrete implementations of @@ -48,7 +48,7 @@ public abstract class PerDocConsumer implements Closeable{ assert mergeState.fieldInfo != null : "FieldInfo for field is null: " + field; if (mergeState.fieldInfo.hasDocValues()) { - final DocValues docValues = producer.docValues(field); + final IndexDocValues docValues = producer.docValues(field); if (docValues == null) { /* * It is actually possible that a fieldInfo has a values type but no diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java index 19b0c4e0728..bc194dfd30d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java @@ -19,7 +19,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.Collection; -import org.apache.lucene.index.values.DocValues; +import org.apache.lucene.index.values.IndexDocValues; /** * Abstract API that provides access to one or more per-document storage @@ -35,15 +35,15 @@ import org.apache.lucene.index.values.DocValues; */ public abstract class PerDocValues implements Closeable { /** - * Returns {@link DocValues} for the current field. + * Returns {@link IndexDocValues} for the current field. * * @param field * the field name - * @return the {@link DocValues} for this field or null if not + * @return the {@link IndexDocValues} for this field or null if not * applicable. * @throws IOException */ - public abstract DocValues docValues(String field) throws IOException; + public abstract IndexDocValues docValues(String field) throws IOException; public static final PerDocValues[] EMPTY_ARRAY = new PerDocValues[0]; diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index d94fa17b220..cddc69e449e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -24,9 +24,9 @@ import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.values.DocValues.SortedSource; -import org.apache.lucene.index.values.DocValues.Source; -import org.apache.lucene.index.values.DocValues.SourceEnum; +import org.apache.lucene.index.values.IndexDocValues.SortedSource; +import org.apache.lucene.index.values.IndexDocValues.Source; +import org.apache.lucene.index.values.IndexDocValues.SourceEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -136,13 +136,13 @@ public final class Bytes { } /** - * Creates a new {@link DocValues} instance that provides either memory + * Creates a new {@link IndexDocValues} instance that provides either memory * resident or iterative access to a per-document stored byte[] - * value. The returned {@link DocValues} instance will be initialized without + * value. The returned {@link IndexDocValues} instance will be initialized without * consuming a significant amount of memory. * * @param dir - * the directory to load the {@link DocValues} from. + * the directory to load the {@link IndexDocValues} from. * @param id * the file ID in the {@link Directory} to load the values from. * @param mode @@ -152,11 +152,11 @@ public final class Bytes { * otherwise false * @param maxDoc * the number of document values stored for the given ID - * @return an initialized {@link DocValues} instance. + * @return an initialized {@link IndexDocValues} instance. * @throws IOException * if an {@link IOException} occurs */ - public static DocValues getValues(Directory dir, String id, Mode mode, + public static IndexDocValues getValues(Directory dir, String id, Mode mode, boolean fixedSize, int maxDoc) throws IOException { // TODO -- I can peek @ header to determing fixed/mode? if (fixedSize) { @@ -421,7 +421,7 @@ public final class Bytes { * Opens all necessary files, but does not read any data in until you call * {@link #load}. */ - static abstract class BytesReaderBase extends DocValues { + static abstract class BytesReaderBase extends IndexDocValues { protected final IndexInput idxIn; protected final IndexInput datIn; protected final int version; diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index e1385a41838..b5ba7e8b3cc 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -166,7 +166,7 @@ class FixedSortedBytesImpl { } @Override - public org.apache.lucene.index.values.DocValues.Source load() + public org.apache.lucene.index.values.IndexDocValues.Source load() throws IOException { return loadSorted(null); } diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 72d1ec83eea..52a1f2faa0f 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -21,7 +21,7 @@ import java.util.Collection; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.index.values.IndexDocValues.Source; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -63,7 +63,7 @@ public class Floats { } } - public static DocValues getValues(Directory dir, String id, int maxDoc) + public static IndexDocValues getValues(Directory dir, String id, int maxDoc) throws IOException { return new FloatsReader(dir, id, maxDoc); } @@ -232,7 +232,7 @@ public class Floats { * Opens all necessary files, but does not read any data in until you call * {@link #load}. */ - static class FloatsReader extends DocValues { + static class FloatsReader extends IndexDocValues { private final IndexInput datIn; private final int precisionBytes; diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java similarity index 88% rename from lucene/src/java/org/apache/lucene/index/values/DocValues.java rename to lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java index 4ad9a07dbc8..e4d68bc3ee0 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java @@ -31,7 +31,7 @@ import org.apache.lucene.util.BytesRef; * * @lucene.experimental */ -public abstract class DocValues implements Closeable { +public abstract class IndexDocValues implements Closeable { /* * TODO: it might be useful to add another Random Access enum for some * implementations like packed ints and only return such a random access enum @@ -39,13 +39,13 @@ public abstract class DocValues implements Closeable { * useful or even required in certain environements to have disc based random * access */ - public static final DocValues[] EMPTY_ARRAY = new DocValues[0]; + public static final IndexDocValues[] EMPTY_ARRAY = new IndexDocValues[0]; private SourceCache cache = new SourceCache.DirectSourceCache(); /** * Returns an iterator that steps through all documents values for this - * {@link DocValues} field instance. {@link DocValuesEnum} will skip document + * {@link IndexDocValues} field instance. {@link DocValuesEnum} will skip document * without a value if applicable. */ public DocValuesEnum getEnum() throws IOException { @@ -54,7 +54,7 @@ public abstract class DocValues implements Closeable { /** * Returns an iterator that steps through all documents values for this - * {@link DocValues} field instance. {@link DocValuesEnum} will skip document + * {@link IndexDocValues} field instance. {@link DocValuesEnum} will skip document * without a value if applicable. *

    * If an {@link AttributeSource} is supplied to this method the @@ -65,12 +65,12 @@ public abstract class DocValues implements Closeable { throws IOException; /** - * Loads a new {@link Source} instance for this {@link DocValues} field + * Loads a new {@link Source} instance for this {@link IndexDocValues} field * instance. Source instances returned from this method are not cached. It is * the callers responsibility to maintain the instance and release its * resources once the source is not needed anymore. *

    - * This method will return null iff this {@link DocValues} represent a + * This method will return null iff this {@link IndexDocValues} represent a * {@link SortedSource}. *

    * For managed {@link Source} instances see {@link #getSource()}. @@ -88,11 +88,11 @@ public abstract class DocValues implements Closeable { * instance unless it is not needed for the rest of its life time. *

    * {@link Source} instances obtained from this method are closed / released - * from the cache once this {@link DocValues} instance is closed by the + * from the cache once this {@link IndexDocValues} instance is closed by the * {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the - * {@link DocValues} was created from. + * {@link IndexDocValues} was created from. *

    - * This method will return null iff this {@link DocValues} represent a + * This method will return null iff this {@link IndexDocValues} represent a * {@link SortedSource}. */ public Source getSource() throws IOException { @@ -100,10 +100,10 @@ public abstract class DocValues implements Closeable { } /** - * Returns a {@link SortedSource} instance for this {@link DocValues} field + * Returns a {@link SortedSource} instance for this {@link IndexDocValues} field * instance like {@link #getSource()}. *

    - * This method will return null iff this {@link DocValues} represent a + * This method will return null iff this {@link IndexDocValues} represent a * {@link Source} instead of a {@link SortedSource}. */ public SortedSource getSortedSorted(Comparator comparator) @@ -113,9 +113,9 @@ public abstract class DocValues implements Closeable { /** * Loads and returns a {@link SortedSource} instance for this - * {@link DocValues} field instance like {@link #load()}. + * {@link IndexDocValues} field instance like {@link #load()}. *

    - * This method will return null iff this {@link DocValues} represent a + * This method will return null iff this {@link IndexDocValues} represent a * {@link Source} instead of a {@link SortedSource}. */ public SortedSource loadSorted(Comparator comparator) @@ -124,21 +124,21 @@ public abstract class DocValues implements Closeable { } /** - * Returns the {@link ValueType} of this {@link DocValues} instance + * Returns the {@link ValueType} of this {@link IndexDocValues} instance */ public abstract ValueType type(); /** - * Closes this {@link DocValues} instance. This method should only be called - * by the creator of this {@link DocValues} instance. API users should not - * close {@link DocValues} instances. + * Closes this {@link IndexDocValues} instance. This method should only be called + * by the creator of this {@link IndexDocValues} instance. API users should not + * close {@link IndexDocValues} instances. */ public void close() throws IOException { cache.close(this); } /** - * Sets the {@link SourceCache} used by this {@link DocValues} instance. This + * Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This * method should be called before {@link #load()} or * {@link #loadSorted(Comparator)} is called. All {@link Source} or * {@link SortedSource} instances in the currently used cache will be closed @@ -162,7 +162,7 @@ public abstract class DocValues implements Closeable { /** * Source of per document values like long, double or {@link BytesRef} - * depending on the {@link DocValues} fields {@link ValueType}. Source + * depending on the {@link IndexDocValues} fields {@link ValueType}. Source * implementations provide random access semantics similar to array lookups * and typically are entirely memory resident. *

    diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java index 52dbcc6f2e5..d11b548761e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java @@ -39,7 +39,7 @@ public class Ints { return new IntsWriter(dir, id, bytesUsed); } - public static DocValues getValues(Directory dir, String id, + public static IndexDocValues getValues(Directory dir, String id, boolean useFixedArray) throws IOException { return new IntsReader(dir, id); } diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java index 6456f9d178b..3187d80a216 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java @@ -26,15 +26,15 @@ import org.apache.lucene.util.ReaderUtil; /** * @lucene.experimental */ -public class MultiDocValues extends DocValues { +public class MultiDocValues extends IndexDocValues { public static class DocValuesIndex { public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0]; final int start; final int length; - final DocValues docValues; + final IndexDocValues docValues; - public DocValuesIndex(DocValues docValues, int start, int length) { + public DocValuesIndex(IndexDocValues docValues, int start, int length) { this.docValues = docValues; this.start = start; this.length = length; @@ -67,7 +67,7 @@ public class MultiDocValues extends DocValues { super.close(); } - public DocValues reset(DocValuesIndex[] docValuesIdx) { + public IndexDocValues reset(DocValuesIndex[] docValuesIdx) { int[] start = new int[docValuesIdx.length]; for (int i = 0; i < docValuesIdx.length; i++) { start[i] = docValuesIdx[i].start; @@ -77,7 +77,7 @@ public class MultiDocValues extends DocValues { return this; } - public static class DummyDocValues extends DocValues { + public static class DummyDocValues extends IndexDocValues { final int maxDoc; final Source emptySoruce; diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java index ca5831742e6..4f3f885629a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java @@ -182,7 +182,7 @@ class PackedIntsImpl { * Opens all necessary files, but does not read any data in until you call * {@link #load}. */ - static class IntsReader extends DocValues { + static class IntsReader extends IndexDocValues { private final IndexInput datIn; private final boolean packed; diff --git a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java index 565df06eb02..7080006c65e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java +++ b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java @@ -21,23 +21,23 @@ import java.io.IOException; import java.util.Comparator; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.values.DocValues.SortedSource; -import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.index.values.IndexDocValues.SortedSource; +import org.apache.lucene.index.values.IndexDocValues.Source; import org.apache.lucene.util.BytesRef; /** - * Abstract base class for {@link DocValues} {@link Source} / + * Abstract base class for {@link IndexDocValues} {@link Source} / * {@link SortedSource} cache. *

    * {@link Source} and {@link SortedSource} instances loaded via - * {@link DocValues#load()} and {@link DocValues#loadSorted(Comparator)} are + * {@link IndexDocValues#load()} and {@link IndexDocValues#loadSorted(Comparator)} are * entirely memory resident and need to be maintained by the caller. Each call - * to {@link DocValues#load()} or {@link DocValues#loadSorted(Comparator)} will + * to {@link IndexDocValues#load()} or {@link IndexDocValues#loadSorted(Comparator)} will * cause an entire reload of the underlying data. Source and - * {@link SortedSource} instances obtained from {@link DocValues#getSource()} - * and {@link DocValues#getSource()} respectively are maintained by a - * {@link SourceCache} that is closed ({@link #close(DocValues)}) once the - * {@link IndexReader} that created the {@link DocValues} instance is closed. + * {@link SortedSource} instances obtained from {@link IndexDocValues#getSource()} + * and {@link IndexDocValues#getSource()} respectively are maintained by a + * {@link SourceCache} that is closed ({@link #close(IndexDocValues)}) once the + * {@link IndexReader} that created the {@link IndexDocValues} instance is closed. *

    * Unless {@link Source} and {@link SortedSource} instances are managed by * another entity it is recommended to use the cached variants to obtain a @@ -45,9 +45,9 @@ import org.apache.lucene.util.BytesRef; *

    * Implementation of this API must be thread-safe. * - * @see DocValues#setCache(SourceCache) - * @see DocValues#getSource() - * @see DocValues#getSortedSorted(Comparator) + * @see IndexDocValues#setCache(SourceCache) + * @see IndexDocValues#getSource() + * @see IndexDocValues#getSortedSorted(Comparator) * * @lucene.experimental */ @@ -55,38 +55,38 @@ public abstract class SourceCache { /** * Atomically loads a {@link Source} into the cache from the given - * {@link DocValues} and returns it iff no other {@link Source} has already + * {@link IndexDocValues} and returns it iff no other {@link Source} has already * been cached. Otherwise the cached source is returned. *

    * This method will not return null */ - public abstract Source load(DocValues values) throws IOException; + public abstract Source load(IndexDocValues values) throws IOException; /** * Atomically loads a {@link SortedSource} into the cache from the given - * {@link DocValues} and returns it iff no other {@link SortedSource} has + * {@link IndexDocValues} and returns it iff no other {@link SortedSource} has * already been cached. Otherwise the cached source is returned. *

    * This method will not return null */ - public abstract SortedSource loadSorted(DocValues values, + public abstract SortedSource loadSorted(IndexDocValues values, Comparator comp) throws IOException; /** * Atomically invalidates the cached {@link Source} and {@link SortedSource} * instances if any and empties the cache. */ - public abstract void invalidate(DocValues values); + public abstract void invalidate(IndexDocValues values); /** * Atomically closes the cache and frees all resources. */ - public synchronized void close(DocValues values) { + public synchronized void close(IndexDocValues values) { invalidate(values); } /** - * Simple per {@link DocValues} instance cache implementation that holds a + * Simple per {@link IndexDocValues} instance cache implementation that holds a * {@link Source} and {@link SortedSource} reference as a member variable. *

    * If a {@link DirectSourceCache} instance is closed or invalidated the cached @@ -96,14 +96,14 @@ public abstract class SourceCache { private Source ref; private SortedSource sortedRef; - public synchronized Source load(DocValues values) throws IOException { + public synchronized Source load(IndexDocValues values) throws IOException { if (ref == null) { ref = values.load(); } return ref; } - public synchronized SortedSource loadSorted(DocValues values, + public synchronized SortedSource loadSorted(IndexDocValues values, Comparator comp) throws IOException { if (sortedRef == null) { sortedRef = values.loadSorted(comp); @@ -111,7 +111,7 @@ public abstract class SourceCache { return sortedRef; } - public synchronized void invalidate(DocValues values) { + public synchronized void invalidate(IndexDocValues values) { ref = null; sortedRef = null; } diff --git a/lucene/src/java/org/apache/lucene/index/values/ValueType.java b/lucene/src/java/org/apache/lucene/index/values/ValueType.java index d1d4cedb75a..fa8f95a0700 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValueType.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValueType.java @@ -19,10 +19,10 @@ package org.apache.lucene.index.values; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.PerDocConsumer; -import org.apache.lucene.index.values.DocValues.SortedSource; +import org.apache.lucene.index.values.IndexDocValues.SortedSource; /** - * {@link ValueType} specifies the type of the {@link DocValues} for a certain field. + * {@link ValueType} specifies the type of the {@link IndexDocValues} for a certain field. * A {@link ValueType} only defines the data type for a field while the actual * Implementation used to encode and decode the values depends on the field's * {@link Codec}. It is up to the {@link Codec} implementing diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index c4596922099..ac6bbbe813e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -161,7 +161,7 @@ class VarSortedBytesImpl { } @Override - public org.apache.lucene.index.values.DocValues.Source load() + public org.apache.lucene.index.values.IndexDocValues.Source load() throws IOException { return loadSorted(null); } diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 3dbb76ed661..7ab0d4cd6c4 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -20,8 +20,8 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.values.DocValues; -import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.index.values.IndexDocValues; +import org.apache.lucene.index.values.IndexDocValues.Source; import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.cache.ByteValuesCreator; @@ -375,7 +375,7 @@ public abstract class FieldComparator { @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - final DocValues docValues = context.reader.docValues(field); + final IndexDocValues docValues = context.reader.docValues(field); if (docValues != null) { currentReaderValues = docValues.getSource(); } @@ -651,7 +651,7 @@ public abstract class FieldComparator { @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - DocValues docValues = context.reader.docValues(field); + IndexDocValues docValues = context.reader.docValues(field); if (docValues != null) { currentReaderValues = docValues.getSource(); } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java index 5cc14d806d3..42d32a0ee41 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.index.values.DocValues.SortedSource; -import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.index.values.IndexDocValues.SortedSource; +import org.apache.lucene.index.values.IndexDocValues.Source; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FloatsRef; @@ -81,7 +81,7 @@ public class TestDocValues extends LuceneTestCase { w.finish(maxDoc); assertEquals(0, trackBytes.get()); - DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); + IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); for (int iter = 0; iter < 2; iter++) { DocValuesEnum bytesEnum = getEnum(r); assertNotNull("enum is null", bytesEnum); @@ -103,7 +103,7 @@ public class TestDocValues extends LuceneTestCase { // Verify we can load source twice: for (int iter = 0; iter < 2; iter++) { Source s; - DocValues.SortedSource ss; + IndexDocValues.SortedSource ss; if (mode == Bytes.Mode.SORTED) { s = ss = getSortedSource(r, comp); } else { @@ -199,7 +199,7 @@ public class TestDocValues extends LuceneTestCase { w.finish(NUM_VALUES + additionalDocs); assertEquals(0, trackBytes.get()); - DocValues r = Ints.getValues(dir, "test", false); + IndexDocValues r = Ints.getValues(dir, "test", false); for (int iter = 0; iter < 2; iter++) { Source s = getSource(r); for (int i = 0; i < NUM_VALUES; i++) { @@ -263,7 +263,7 @@ public class TestDocValues extends LuceneTestCase { w.finish(NUM_VALUES + additionalValues); assertEquals(0, trackBytes.get()); - DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues); + IndexDocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues); for (int iter = 0; iter < 2; iter++) { Source s = getSource(r); for (int i = 0; i < NUM_VALUES; i++) { @@ -308,16 +308,16 @@ public class TestDocValues extends LuceneTestCase { runTestFloats(8, 0.0); } - private DocValuesEnum getEnum(DocValues values) throws IOException { + private DocValuesEnum getEnum(IndexDocValues values) throws IOException { return random.nextBoolean() ? values.getEnum() : getSource(values).getEnum(); } - private Source getSource(DocValues values) throws IOException { + private Source getSource(IndexDocValues values) throws IOException { // getSource uses cache internally return random.nextBoolean() ? values.load() : values.getSource(); } - private SortedSource getSortedSource(DocValues values, + private SortedSource getSortedSource(IndexDocValues values, Comparator comparator) throws IOException { // getSortedSource uses cache internally return random.nextBoolean() ? values.loadSorted(comparator) : values diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 8986f11e3e2..910fe386a19 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -41,7 +41,7 @@ import org.apache.lucene.index.MultiPerDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.values.DocValues.Source; +import org.apache.lucene.index.values.IndexDocValues.Source; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; @@ -107,7 +107,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { TopDocs search = searcher.search(parser.parse("0 OR 1 OR 2 OR 3 OR 4"), 10); assertEquals(5, search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; - DocValues docValues = MultiPerDocValues.getPerDocs(reader).docValues("docId"); + IndexDocValues docValues = MultiPerDocValues.getPerDocs(reader).docValues("docId"); Source source = docValues.getSource(); for (int i = 0; i < scoreDocs.length; i++) { assertEquals(i, scoreDocs[i].doc); @@ -264,7 +264,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { final int base = r.numDocs() - numRemainingValues; switch (val) { case INTS: { - DocValues intsReader = getDocValues(r, val.name()); + IndexDocValues intsReader = getDocValues(r, val.name()); assertNotNull(intsReader); Source ints = getSource(intsReader); @@ -295,7 +295,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { break; case FLOAT_32: case FLOAT_64: { - DocValues floatReader = getDocValues(r, val.name()); + IndexDocValues floatReader = getDocValues(r, val.name()); assertNotNull(floatReader); Source floats = getSource(floatReader); for (int i = 0; i < base; i++) { @@ -353,7 +353,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals(0, r.numDeletedDocs()); final int numRemainingValues = (int) (numValues - deleted.cardinality()); final int base = r.numDocs() - numRemainingValues; - DocValues bytesReader = getDocValues(r, byteIndexValue.name()); + IndexDocValues bytesReader = getDocValues(r, byteIndexValue.name()); assertNotNull("field " + byteIndexValue.name() + " returned null reader - maybe merged failed", bytesReader); Source bytes = getSource(bytesReader); @@ -448,7 +448,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { d.close(); } - private DocValues getDocValues(IndexReader reader, String field) + private IndexDocValues getDocValues(IndexReader reader, String field) throws IOException { boolean optimized = reader.isOptimized(); PerDocValues perDoc = optimized ? reader.getSequentialSubReaders()[0].perDocValues() @@ -457,7 +457,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { case 0: return perDoc.docValues(field); case 1: - DocValues docValues = perDoc.docValues(field); + IndexDocValues docValues = perDoc.docValues(field); if (docValues != null) { return docValues; } @@ -468,7 +468,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { throw new RuntimeException(); } - private Source getSource(DocValues values) throws IOException { + private Source getSource(IndexDocValues values) throws IOException { Source source; if (random.nextInt(10) == 0) { source = values.load(); @@ -480,7 +480,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { return source; } - private DocValuesEnum getValuesEnum(DocValues values) throws IOException { + private DocValuesEnum getValuesEnum(IndexDocValues values) throws IOException { DocValuesEnum valuesEnum; if (!(values instanceof MultiDocValues) && random.nextInt(10) == 0) { // TODO not supported by MultiDocValues yet! From 2e903dd8d37868ef617af5bda30a12b85ddfdf43 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 3 Jun 2011 15:11:05 +0000 Subject: [PATCH 050/116] LUCENE-3108: Added ValueSource based on IndexDocValues git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131063 13f79535-47bb-0310-9956-ffa450edef68 --- .../function/NumericIndexDocValueSource.java | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 lucene/src/java/org/apache/lucene/search/function/NumericIndexDocValueSource.java diff --git a/lucene/src/java/org/apache/lucene/search/function/NumericIndexDocValueSource.java b/lucene/src/java/org/apache/lucene/search/function/NumericIndexDocValueSource.java new file mode 100644 index 00000000000..8b85a6ae587 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/function/NumericIndexDocValueSource.java @@ -0,0 +1,114 @@ +package org.apache.lucene.search.function; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.values.IndexDocValues; +import org.apache.lucene.index.values.ValueType; + +/** + * Expert: obtains numeric field values from a {@link IndexDocValues} field. + * This {@link ValueSource} is compatible with all numerical + * {@link IndexDocValues} + * + * @lucene.experimental + * + */ +public class NumericIndexDocValueSource extends ValueSource { + + private final String field; + + public NumericIndexDocValueSource(String field) { + this.field = field; + } + + @Override + public DocValues getValues(AtomicReaderContext context) throws IOException { + final IndexDocValues.Source source = context.reader.docValues(field) + .getSource(); + ValueType type = source.type(); + switch (type) { + case FLOAT_32: + case FLOAT_64: + return new DocValues() { + + @Override + public String toString(int doc) { + return "float: [" + floatVal(doc) + "]"; + } + + @Override + public float floatVal(int doc) { + return (float) source.getFloat(doc); + } + }; + + case INTS: + return new DocValues() { + @Override + public String toString(int doc) { + return "float: [" + floatVal(doc) + "]"; + } + + @Override + public float floatVal(int doc) { + return (float) source.getInt(doc); + } + }; + default: + throw new IOException("Type: " + type + "is not numeric"); + } + + } + + @Override + public String description() { + return toString(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((field == null) ? 0 : field.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + NumericIndexDocValueSource other = (NumericIndexDocValueSource) obj; + if (field == null) { + if (other.field != null) + return false; + } else if (!field.equals(other.field)) + return false; + return true; + } + + @Override + public String toString() { + return "DocValues float(" + field + ')'; + } +} From e4beeafa035b452f70523c79ca94457a6ed0d0ac Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 3 Jun 2011 16:14:18 +0000 Subject: [PATCH 051/116] LUCENE-3108: Renamed parts of the DocValues API according to IndexDocValues and added javadoc comment git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131089 13f79535-47bb-0310-9956-ffa450edef68 --- ...uesField.java => IndexDocValuesField.java} | 24 ++-- .../org/apache/lucene/index/CheckIndex.java | 6 +- .../apache/lucene/index/DirectoryReader.java | 2 +- .../org/apache/lucene/index/FieldsEnum.java | 2 +- .../org/apache/lucene/index/MultiFields.java | 4 +- .../apache/lucene/index/MultiFieldsEnum.java | 6 +- .../lucene/index/MultiPerDocValues.java | 18 +-- .../org/apache/lucene/index/MultiReader.java | 2 +- .../org/apache/lucene/index/values/Bytes.java | 6 +- .../index/values/FixedDerefBytesImpl.java | 6 +- .../index/values/FixedSortedBytesImpl.java | 2 +- .../index/values/FixedStraightBytesImpl.java | 6 +- .../apache/lucene/index/values/Floats.java | 10 +- .../lucene/index/values/IndexDocValues.java | 46 +++++-- .../org/apache/lucene/index/values/Ints.java | 4 +- .../{PackedIntsImpl.java => IntsImpl.java} | 16 +-- ...ocValues.java => MultiIndexDocValues.java} | 23 ++-- .../index/values/PerDocFieldValues.java | 4 +- .../apache/lucene/index/values/ValueType.java | 121 ++++++++++++++++-- .../{DocValuesEnum.java => ValuesEnum.java} | 20 +-- .../index/values/VarDerefBytesImpl.java | 2 +- .../index/values/VarSortedBytesImpl.java | 4 +- .../index/values/VarStraightBytesImpl.java | 4 +- .../apache/lucene/index/values/Writer.java | 18 +-- .../lucene/index/RandomIndexWriter.java | 4 +- .../lucene/index/values/TestDocValues.java | 24 ++-- .../index/values/TestDocValuesIndexing.java | 42 +++--- .../org/apache/lucene/search/TestSort.java | 8 +- 28 files changed, 273 insertions(+), 161 deletions(-) rename lucene/src/java/org/apache/lucene/document/{DocValuesField.java => IndexDocValuesField.java} (90%) rename lucene/src/java/org/apache/lucene/index/values/{PackedIntsImpl.java => IntsImpl.java} (96%) rename lucene/src/java/org/apache/lucene/index/values/{MultiDocValues.java => MultiIndexDocValues.java} (91%) rename lucene/src/java/org/apache/lucene/index/values/{DocValuesEnum.java => ValuesEnum.java} (87%) diff --git a/lucene/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java similarity index 90% rename from lucene/src/java/org/apache/lucene/document/DocValuesField.java rename to lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java index 38d6c3df3a2..997cde24501 100644 --- a/lucene/src/java/org/apache/lucene/document/DocValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java @@ -34,14 +34,14 @@ import org.apache.lucene.util.BytesRef; * example usage, adding an int value: * *

    - * document.add(new DocValuesField(name).setInt(value));
    + * document.add(new IndexDocValuesField(name).setInt(value));
      * 
    * * For optimal performance, re-use the DocValuesField and * {@link Document} instance for more than one document: * *
    - *  DocValuesField field = new DocValuesField(name);
    + *  IndexDocValuesField field = new IndexDocValuesField(name);
      *  Document document = new Document();
      *  document.add(field);
      * 
    @@ -55,11 +55,11 @@ import org.apache.lucene.util.BytesRef;
      * 
      * 

    * If doc values are stored in addition to an indexed ({@link Index}) or stored - * ({@link Store}) value it's recommended to use the {@link DocValuesField}'s + * ({@link Store}) value it's recommended to use the {@link IndexDocValuesField}'s * {@link #set(AbstractField)} API: * *

    - *  DocValuesField field = new DocValuesField(name);
    + *  IndexDocValuesField field = new IndexDocValuesField(name);
      *  Field indexedField = new Field(name, stringValue, Stored.NO, Indexed.ANALYZED);
      *  Document document = new Document();
      *  document.add(indexedField);
    @@ -73,7 +73,7 @@ import org.apache.lucene.util.BytesRef;
      * 
    * * */ -public class DocValuesField extends AbstractField implements PerDocFieldValues { +public class IndexDocValuesField extends AbstractField implements PerDocFieldValues { protected BytesRef bytes; protected double doubleValue; @@ -82,17 +82,17 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { protected Comparator bytesComparator; /** - * Creates a new {@link DocValuesField} with the given name. + * Creates a new {@link IndexDocValuesField} with the given name. */ - public DocValuesField(String name) { + public IndexDocValuesField(String name) { super(name, Store.NO, Index.NO, TermVector.NO); setDocValues(this); } /** - * Creates a {@link DocValuesField} prototype + * Creates a {@link IndexDocValuesField} prototype */ - DocValuesField() { + IndexDocValuesField() { this(""); } @@ -238,7 +238,7 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { } /** - * Sets this {@link DocValuesField} to the given {@link AbstractField} and + * Sets this {@link IndexDocValuesField} to the given {@link AbstractField} and * returns the given field. Any modifications to this instance will be visible * to the given field. */ @@ -253,9 +253,9 @@ public class DocValuesField extends AbstractField implements PerDocFieldValues { * */ public static T set(T field, ValueType type) { - if (field instanceof DocValuesField) + if (field instanceof IndexDocValuesField) return field; - final DocValuesField valField = new DocValuesField(); + final IndexDocValuesField valField = new IndexDocValuesField(); switch (type) { case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 1da3d4db6ea..cc5d1d214af 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -29,7 +29,7 @@ import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.DocValuesEnum; +import org.apache.lucene.index.values.ValuesEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -956,8 +956,8 @@ public class CheckIndex { if (docValues == null) { continue; } - final DocValuesEnum values = docValues.getEnum(); - while (values.nextDoc() != DocValuesEnum.NO_MORE_DOCS) { + final ValuesEnum values = docValues.getEnum(); + while (values.nextDoc() != ValuesEnum.NO_MORE_DOCS) { switch (fieldInfo.docValues) { case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index a5cbbfb1b46..2b05adf7030 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -1026,6 +1026,6 @@ class DirectoryReader extends IndexReader implements Cloneable { @Override public PerDocValues perDocValues() throws IOException { - throw new UnsupportedOperationException("please use MultiPerDoc#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); + throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); } } diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java index 5d2d707a32d..3fa3729d45a 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java @@ -20,7 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.DocValuesEnum; +import org.apache.lucene.index.values.ValuesEnum; import org.apache.lucene.util.AttributeSource; /** Enumerates indexed fields. You must first call {@link diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index 920bcf669a3..ffe1ac557fe 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -23,9 +23,9 @@ import java.util.List; import java.util.ArrayList; import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.MultiDocValues; +import org.apache.lucene.index.values.MultiIndexDocValues; import org.apache.lucene.index.values.ValueType; -import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; +import org.apache.lucene.index.values.MultiIndexDocValues.DocValuesIndex; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs diff --git a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java index 173cd68d6c4..feaa61775ca 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java @@ -17,7 +17,7 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.index.values.MultiDocValues; +import org.apache.lucene.index.values.MultiIndexDocValues; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ReaderUtil; @@ -45,7 +45,7 @@ public final class MultiFieldsEnum extends FieldsEnum { // Re-used TermsEnum private final MultiTermsEnum terms; - private final MultiDocValues docValues; + private final MultiIndexDocValues docValues; private String currentField; @@ -55,7 +55,7 @@ public final class MultiFieldsEnum extends FieldsEnum { public MultiFieldsEnum(FieldsEnum[] subs, ReaderUtil.Slice[] subSlices) throws IOException { terms = new MultiTermsEnum(subSlices); queue = new FieldMergeQueue(subs.length); - docValues = new MultiDocValues(); + docValues = new MultiIndexDocValues(); top = new FieldsEnumWithSlice[subs.length]; List enumWithSlices = new ArrayList(); diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java index d4a5c062534..34397fbd914 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -25,9 +25,9 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.MultiDocValues; +import org.apache.lucene.index.values.MultiIndexDocValues; import org.apache.lucene.index.values.ValueType; -import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex; +import org.apache.lucene.index.values.MultiIndexDocValues.DocValuesIndex; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; @@ -115,7 +115,7 @@ public class MultiPerDocValues extends PerDocValues { if (result == null) { // Lazy init: first time this field is requested, we // create & add to docValues: - final List docValuesIndex = new ArrayList(); + final List docValuesIndex = new ArrayList(); int docsUpto = 0; ValueType type = null; // Gather all sub-readers that share this field @@ -126,24 +126,24 @@ public class MultiPerDocValues extends PerDocValues { if (values != null) { if (docsUpto != start) { type = values.type(); - docValuesIndex.add(new MultiDocValues.DocValuesIndex( - new MultiDocValues.DummyDocValues(start, type), docsUpto, start + docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex( + new MultiIndexDocValues.DummyDocValues(start, type), docsUpto, start - docsUpto)); } - docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, + docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(values, start, length)); docsUpto = start + length; } else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) { - docValuesIndex.add(new MultiDocValues.DocValuesIndex( - new MultiDocValues.DummyDocValues(start, type), docsUpto, start + docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex( + new MultiIndexDocValues.DummyDocValues(start, type), docsUpto, start - docsUpto)); } } if (docValuesIndex.isEmpty()) { return null; } - result = new MultiDocValues( + result = new MultiIndexDocValues( docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY)); docValues.put(field, result); } diff --git a/lucene/src/java/org/apache/lucene/index/MultiReader.java b/lucene/src/java/org/apache/lucene/index/MultiReader.java index 7a943fadcd0..a67470930ac 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiReader.java +++ b/lucene/src/java/org/apache/lucene/index/MultiReader.java @@ -407,6 +407,6 @@ public class MultiReader extends IndexReader implements Cloneable { @Override public PerDocValues perDocValues() throws IOException { - throw new UnsupportedOperationException("please use MultiPerDoc#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); + throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); } } diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java index cddc69e449e..0944fc9ee4c 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java @@ -223,7 +223,7 @@ public final class Bytes { protected abstract int maxDoc(); @Override - public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { return new SourceEnum(attrSource, type(), this, maxDoc()) { @Override public int advance(int target) throws IOException { @@ -315,7 +315,7 @@ public final class Bytes { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { return new SourceEnum(attrSource, type(), this, maxDoc()) { @Override @@ -400,7 +400,7 @@ public final class Bytes { } @Override - protected void setNextEnum(DocValuesEnum valuesEnum) { + protected void setNextEnum(ValuesEnum valuesEnum) { bytesRef = valuesEnum.bytes(); } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 5513e4e06da..20f2e323546 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -184,11 +184,11 @@ class FixedDerefBytesImpl { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { return new DerefBytesEnum(source, cloneData(), cloneIndex(), size); } - static class DerefBytesEnum extends DocValuesEnum { + static class DerefBytesEnum extends ValuesEnum { protected final IndexInput datIn; private final PackedInts.ReaderIterator idx; protected final long fp; @@ -215,7 +215,7 @@ class FixedDerefBytesImpl { valueCount = idx.size(); } - protected void copyFrom(DocValuesEnum valuesEnum) { + protected void copyFrom(ValuesEnum valuesEnum) { bytesRef = valuesEnum.bytesRef; if (bytesRef.bytes.length < size) { bytesRef.grow(size); diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java index b5ba7e8b3cc..3a32f9892c3 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java @@ -229,7 +229,7 @@ class FixedSortedBytesImpl { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { // do unsorted return new DerefBytesEnum(source, cloneData(), cloneIndex(), size); } diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index a28e7d2793d..7b1e1673a0b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -180,11 +180,11 @@ class FixedStraightBytesImpl { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc); } - private static final class FixedStraightBytesEnum extends DocValuesEnum { + private static final class FixedStraightBytesEnum extends ValuesEnum { private final IndexInput datIn; private final int size; private final int maxDoc; @@ -203,7 +203,7 @@ class FixedStraightBytesImpl { fp = datIn.getFilePointer(); } - protected void copyFrom(DocValuesEnum valuesEnum) { + protected void copyFrom(ValuesEnum valuesEnum) { bytesRef = valuesEnum.bytesRef; if (bytesRef.bytes.length < size) { bytesRef.grow(size); diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index 52a1f2faa0f..efe26b8d0e5 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -111,7 +111,7 @@ public class Floats { } @Override - protected void setNextEnum(DocValuesEnum valuesEnum) { + protected void setNextEnum(ValuesEnum valuesEnum) { floatsRef = valuesEnum.getFloat(); } @@ -305,7 +305,7 @@ public class Floats { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { return new SourceEnum(attrSource, ValueType.FLOAT_32, this, maxDoc) { @Override @@ -337,7 +337,7 @@ public class Floats { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { return new SourceEnum(attrSource, type(), this, maxDoc) { @Override @@ -363,7 +363,7 @@ public class Floats { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { IndexInput indexInput = (IndexInput) datIn.clone(); indexInput.seek(CodecUtil.headerLength(CODEC_NAME)); // skip precision: @@ -443,7 +443,7 @@ public class Floats { } } - static abstract class FloatsEnumImpl extends DocValuesEnum { + static abstract class FloatsEnumImpl extends ValuesEnum { protected final IndexInput dataIn; protected int pos = -1; protected final int precision; diff --git a/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java b/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java index e4d68bc3ee0..32885679f24 100644 --- a/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java @@ -20,15 +20,35 @@ import java.io.Closeable; import java.io.IOException; import java.util.Comparator; +import org.apache.lucene.document.IndexDocValuesField; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; /** - * nocommit - javadoc + * {@link IndexDocValues} provides a dense per-document typed storage for fast + * value access based on the lucene internal document id. {@link IndexDocValues} + * exposes two distinct APIs: + *
      + *
    • via {@link Source} an entirely RAM resident API for random access
    • + *
    • via {@link ValuesEnum} a disk resident API for sequential access
    • + *
    {@link IndexDocValues} are exposed via + * {@link IndexReader#perDocValues()} on a per-segment basis. For best + * performance {@link IndexDocValues} should be consumed per-segment just like + * IndexReader. + *

    + * {@link IndexDocValues} are fully integrated into the {@link Codec} API. + * Custom implementations can be exposed on a per field basis via + * {@link CodecProvider}. * + * @see ValueType for limitations and default implementation documentation + * @see IndexDocValuesField for adding values to the index + * @see Codec#docsConsumer(org.apache.lucene.index.PerDocWriteState) for + * customization * @lucene.experimental */ public abstract class IndexDocValues implements Closeable { @@ -45,23 +65,23 @@ public abstract class IndexDocValues implements Closeable { /** * Returns an iterator that steps through all documents values for this - * {@link IndexDocValues} field instance. {@link DocValuesEnum} will skip document + * {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document * without a value if applicable. */ - public DocValuesEnum getEnum() throws IOException { + public ValuesEnum getEnum() throws IOException { return getEnum(null); } /** * Returns an iterator that steps through all documents values for this - * {@link IndexDocValues} field instance. {@link DocValuesEnum} will skip document + * {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document * without a value if applicable. *

    * If an {@link AttributeSource} is supplied to this method the - * {@link DocValuesEnum} will use the given source to access implementation + * {@link ValuesEnum} will use the given source to access implementation * related attributes. */ - public abstract DocValuesEnum getEnum(AttributeSource attrSource) + public abstract ValuesEnum getEnum(AttributeSource attrSource) throws IOException; /** @@ -215,9 +235,9 @@ public abstract class IndexDocValues implements Closeable { } /** - * Returns a {@link DocValuesEnum} for this source. + * Returns a {@link ValuesEnum} for this source. */ - public DocValuesEnum getEnum() throws IOException { + public ValuesEnum getEnum() throws IOException { return getEnum(null); } @@ -229,18 +249,18 @@ public abstract class IndexDocValues implements Closeable { public abstract ValueType type(); /** - * Returns a {@link DocValuesEnum} for this source which uses the given + * Returns a {@link ValuesEnum} for this source which uses the given * {@link AttributeSource}. */ - public abstract DocValuesEnum getEnum(AttributeSource attrSource) + public abstract ValuesEnum getEnum(AttributeSource attrSource) throws IOException; } /** - * {@link DocValuesEnum} utility for {@link Source} implemenations. + * {@link ValuesEnum} utility for {@link Source} implemenations. * */ - public abstract static class SourceEnum extends DocValuesEnum { + public abstract static class SourceEnum extends ValuesEnum { protected final Source source; protected final int numDocs; protected int pos = -1; @@ -284,7 +304,7 @@ public abstract class IndexDocValues implements Closeable { /** * A sorted variant of {@link Source} for byte[] values per document. *

    - * Note: {@link DocValuesEnum} obtained from a {@link SortedSource} will + * Note: {@link ValuesEnum} obtained from a {@link SortedSource} will * enumerate values in document order and not in sorted order. */ public static abstract class SortedSource extends Source { diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java index d11b548761e..d3cf1039538 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java @@ -20,8 +20,8 @@ package org.apache.lucene.index.values; import java.io.IOException; import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.index.values.PackedIntsImpl.IntsReader; -import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter; +import org.apache.lucene.index.values.IntsImpl.IntsReader; +import org.apache.lucene.index.values.IntsImpl.IntsWriter; import org.apache.lucene.store.Directory; /** diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java similarity index 96% rename from lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java rename to lucene/src/java/org/apache/lucene/index/values/IntsImpl.java index 4f3f885629a..af93a5212cb 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java @@ -37,7 +37,7 @@ import org.apache.lucene.util.packed.PackedInts; * * @lucene.experimental * */ -class PackedIntsImpl { +class IntsImpl { private static final String CODEC_NAME = "Ints"; private static final byte PACKED = 0x00; @@ -163,7 +163,7 @@ class PackedIntsImpl { } @Override - protected void setNextEnum(DocValuesEnum valuesEnum) { + protected void setNextEnum(ValuesEnum valuesEnum) { intsRef = valuesEnum.getInt(); } @@ -244,7 +244,7 @@ class PackedIntsImpl { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { return new SourceEnum(attrSource, type(), this, values.length) { @@ -283,7 +283,7 @@ class PackedIntsImpl { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { return new SourceEnum(attrSource, type(), this, values.size()) { @Override @@ -309,11 +309,11 @@ class PackedIntsImpl { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { final IndexInput input = (IndexInput) datIn.clone(); boolean success = false; try { - DocValuesEnum inst = packed ? new PackedIntsEnumImpl(source, input) + ValuesEnum inst = packed ? new PackedIntsEnumImpl(source, input) : new FixedIntsEnumImpl(source, input); success = true; return inst; @@ -331,7 +331,7 @@ class PackedIntsImpl { } - private static final class PackedIntsEnumImpl extends DocValuesEnum { + private static final class PackedIntsEnumImpl extends ValuesEnum { private final PackedInts.ReaderIterator ints; private long minValue; private final IndexInput dataIn; @@ -381,7 +381,7 @@ class PackedIntsImpl { } } - private static final class FixedIntsEnumImpl extends DocValuesEnum { + private static final class FixedIntsEnumImpl extends ValuesEnum { private final IndexInput dataIn; private final int maxDoc; private int pos = -1; diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java similarity index 91% rename from lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java rename to lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java index 3187d80a216..c6c4c4b2d3b 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java @@ -24,9 +24,12 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; /** + * A wrapper for compound IndexReader providing access to per segment + * {@link IndexDocValues} + * * @lucene.experimental */ -public class MultiDocValues extends IndexDocValues { +public class MultiIndexDocValues extends IndexDocValues { public static class DocValuesIndex { public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0]; @@ -44,17 +47,17 @@ public class MultiDocValues extends IndexDocValues { private DocValuesIndex[] docValuesIdx; private int[] starts; - public MultiDocValues() { + public MultiIndexDocValues() { starts = new int[0]; docValuesIdx = new DocValuesIndex[0]; } - public MultiDocValues(DocValuesIndex[] docValuesIdx) { + public MultiIndexDocValues(DocValuesIndex[] docValuesIdx) { reset(docValuesIdx); } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { return new MultiValuesEnum(docValuesIdx, starts); } @@ -87,7 +90,7 @@ public class MultiDocValues extends IndexDocValues { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { return emptySoruce.getEnum(attrSource); } @@ -107,13 +110,13 @@ public class MultiDocValues extends IndexDocValues { } - private static class MultiValuesEnum extends DocValuesEnum { + private static class MultiValuesEnum extends ValuesEnum { private DocValuesIndex[] docValuesIdx; private final int maxDoc; private int currentStart; private int currentMax; private int currentDoc = -1; - private DocValuesEnum currentEnum; + private ValuesEnum currentEnum; private final int[] starts; public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts) @@ -222,7 +225,7 @@ public class MultiDocValues extends IndexDocValues { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { throw new UnsupportedOperationException(); // TODO } @@ -258,8 +261,8 @@ public class MultiDocValues extends IndexDocValues { } @Override - public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException { - return DocValuesEnum.emptyEnum(type); + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + return ValuesEnum.emptyEnum(type); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java index cbd0bb79fe2..130d5b3a507 100644 --- a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java @@ -18,14 +18,14 @@ package org.apache.lucene.index.values; */ import java.util.Comparator; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.IndexDocValuesField; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.util.BytesRef; /** * Per document and field values consumed by {@link DocValuesConsumer}. - * @see DocValuesField + * @see IndexDocValuesField * @see Fieldable#setDocValues(PerDocFieldValues) * * @lucene.experimental diff --git a/lucene/src/java/org/apache/lucene/index/values/ValueType.java b/lucene/src/java/org/apache/lucene/index/values/ValueType.java index fa8f95a0700..af953f662b3 100644 --- a/lucene/src/java/org/apache/lucene/index/values/ValueType.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValueType.java @@ -20,12 +20,14 @@ package org.apache.lucene.index.values; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.values.IndexDocValues.SortedSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.packed.PackedInts; /** - * {@link ValueType} specifies the type of the {@link IndexDocValues} for a certain field. - * A {@link ValueType} only defines the data type for a field while the actual - * Implementation used to encode and decode the values depends on the field's - * {@link Codec}. It is up to the {@link Codec} implementing + * {@link ValueType} specifies the type of the {@link IndexDocValues} for a + * certain field. A {@link ValueType} only defines the data type for a field + * while the actual Implementation used to encode and decode the values depends + * on the field's {@link Codec}. It is up to the {@link Codec} implementing * {@link PerDocConsumer#addValuesField(org.apache.lucene.index.FieldInfo)} and * using a different low-level implementations to write the stored values for a * field. @@ -37,50 +39,141 @@ public enum ValueType { * TODO: Add INT_32 INT_64 INT_16 & INT_8?! */ /** - * Integer values. + * Defines an 64 bit integer value. By default this type uses a simple + * compression technique based on {@link PackedInts}. Internally only the used + * value range is encoded if it fits into 263-1. If that range is + * exceeded the default implementation falls back to fixed size 64bit + * integers. + *

    + * NOTE: this type uses 0 as the default value without any + * distinction between provided 0 values during indexing. All + * documents without an explicit value will use 0 instead. In turn, + * {@link ValuesEnum} instances will not skip documents without an explicit + * value assigned. Custom default values must be assigned explicitly. + *

    */ INTS, - + /** - * 32 bit floating point values. + * Defines a 32 bit floating point values. By default there is no compression + * applied. To fit custom float values into less than 32bit either a custom + * implementation is needed or values must be encoded into a + * {@link #BYTES_FIXED_STRAIGHT} type. + *

    + * NOTE: this type uses 0.0f as the default value without any + * distinction between provided 0.0f values during indexing. All + * documents without an explicit value will use 0.0f instead. In + * turn, {@link ValuesEnum} instances will not skip documents without an + * explicit value assigned. Custom default values must be assigned explicitly. + *

    */ FLOAT_32, /** - * 64 bit floating point values. + * Defines a 64 bit floating point values. By default there is no compression + * applied. To fit custom float values into less than 64bit either a custom + * implementation is needed or values must be encoded into a + * {@link #BYTES_FIXED_STRAIGHT} type. + *

    + * NOTE: this type uses 0.0d as the default value without any + * distinction between provided 0.0d values during indexing. All + * documents without an explicit value will use 0.0d instead. In + * turn, {@link ValuesEnum} instances will not skip documents without an + * explicit value assigned. Custom default values must be assigned explicitly. + *

    */ FLOAT_64, // TODO(simonw): -- shouldn't lucene decide/detect straight vs // deref, as well fixed vs var? /** - * Fixed length straight stored byte variant + * Defines a fixed length straight stored byte variant. All values added to + * such a field must be of the same length. All bytes are stored sequentially + * for fast offset access. + *

    + * NOTE: this type uses 0-bytes based on the length of the first seen + * values as the default value without any distinction between explicitly + * provided values during indexing. All documents without an explicit value + * will use the default instead. In turn, {@link ValuesEnum} instances will + * not skip documents without an explicit value assigned. Custom default + * values must be assigned explicitly. + *

    */ BYTES_FIXED_STRAIGHT, /** - * Fixed length dereferenced (indexed) byte variant + * Defines a fixed length dereferenced (indexed) byte variant. Fields with + * this type only store distinct byte values and store an additional offset + * pointer per document to dereference the payload. + *

    + * NOTE: Fields of this type will not store values for documents without and + * explicitly provided value. If a documents value is accessed while no + * explicit value is stored the returned {@link BytesRef} will be a 0-length + * reference. In turn, {@link ValuesEnum} instances will skip over documents + * without an explicit value assigned. Custom default values must be assigned + * explicitly. + *

    */ BYTES_FIXED_DEREF, /** - * Fixed length pre-sorted byte variant + * Defines a fixed length pre-sorted byte variant. Fields with this type only + * store distinct byte values and store an additional offset pointer per + * document to dereference the payload. The stored byte payload is presorted + * and allows access via document id, ordinal and by-value. + *

    + * NOTE: Fields of this type will not store values for documents without and + * explicitly provided value. If a documents value is accessed while no + * explicit value is stored the returned {@link BytesRef} will be a 0-length + * reference. In turn, {@link ValuesEnum} instances will skip over documents + * without an explicit value assigned. Custom default values must be assigned + * explicitly. + *

    * * @see SortedSource */ BYTES_FIXED_SORTED, /** - * Variable length straight stored byte variant + * Defines a variable length straight stored byte variant. All bytes are + * stored sequentially for compactness. Usage of this type via the + * disk-resident API might yield performance degradation since no additional + * index is used to advance by more than one documents value at a time. + *

    + * NOTE: Fields of this type will not store values for documents without and + * explicitly provided value. If a documents value is accessed while no + * explicit value is stored the returned {@link BytesRef} will be a 0-length + * reference. Yet, in contrast to dereferences variants {@link ValuesEnum} + * instances will not skip over documents without an explicit value + * assigned. Custom default values must be assigned explicitly. + *

    */ BYTES_VAR_STRAIGHT, /** - * Variable length dereferenced (indexed) byte variant + * Defines a variable length dereferenced (indexed) byte variant. Just as + * {@link #BYTES_FIXED_DEREF} yet supporting variable length values. + *

    + * NOTE: Fields of this type will not store values for documents without and + * explicitly provided value. If a documents value is accessed while no + * explicit value is stored the returned {@link BytesRef} will be a 0-length + * reference. In turn, {@link ValuesEnum} instances will skip over documents + * without an explicit value assigned. Custom default values must be assigned + * explicitly. + *

    */ BYTES_VAR_DEREF, /** - * Variable length pre-sorted byte variant + * Defines a variable length pre-sorted byte variant. Just as + * {@link #BYTES_FIXED_SORTED} yet supporting variable length values. + *

    + * NOTE: Fields of this type will not store values for documents without and + * explicitly provided value. If a documents value is accessed while no + * explicit value is stored the returned {@link BytesRef} will be a 0-length + * reference. In turn, {@link ValuesEnum} instances will skip over documents + * without an explicit value assigned. Custom default values must be assigned + * explicitly. + *

    * * @see SortedSource */ diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java similarity index 87% rename from lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java rename to lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java index 2c9ef2b8fb9..03512073cde 100644 --- a/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java +++ b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java @@ -25,7 +25,7 @@ import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.LongsRef; /** - * {@link DocValuesEnum} is a {@link DocIdSetIterator} iterating byte[] + * {@link ValuesEnum} is a {@link DocIdSetIterator} iterating byte[] * , long and double stored per document. Depending on the * enum's {@link ValueType} ({@link #type()}) the enum might skip over documents that * have no value stored. Types like {@link ValueType#BYTES_VAR_STRAIGHT} might not @@ -41,7 +41,7 @@ import org.apache.lucene.util.LongsRef; * * @lucene.experimental */ -public abstract class DocValuesEnum extends DocIdSetIterator { +public abstract class ValuesEnum extends DocIdSetIterator { private AttributeSource source; private final ValueType enumType; protected BytesRef bytesRef; @@ -49,17 +49,17 @@ public abstract class DocValuesEnum extends DocIdSetIterator { protected LongsRef intsRef; /** - * Creates a new {@link DocValuesEnum} for the given type. The + * Creates a new {@link ValuesEnum} for the given type. The * {@link AttributeSource} for this enum is set to null */ - protected DocValuesEnum(ValueType enumType) { + protected ValuesEnum(ValueType enumType) { this(null, enumType); } /** - * Creates a new {@link DocValuesEnum} for the given type. + * Creates a new {@link ValuesEnum} for the given type. */ - protected DocValuesEnum(AttributeSource source, ValueType enumType) { + protected ValuesEnum(AttributeSource source, ValueType enumType) { this.source = source; this.enumType = enumType; switch (enumType) { @@ -115,7 +115,7 @@ public abstract class DocValuesEnum extends DocIdSetIterator { /** * Copies the internal state from the given enum */ - protected void copyFrom(DocValuesEnum valuesEnum) { + protected void copyFrom(ValuesEnum valuesEnum) { intsRef = valuesEnum.intsRef; floatsRef = valuesEnum.floatsRef; bytesRef = valuesEnum.bytesRef; @@ -144,10 +144,10 @@ public abstract class DocValuesEnum extends DocIdSetIterator { public abstract void close() throws IOException; /** - * Returns an empty {@link DocValuesEnum} for the given {@link ValueType}. + * Returns an empty {@link ValuesEnum} for the given {@link ValueType}. */ - public static DocValuesEnum emptyEnum(ValueType type) { - return new DocValuesEnum(type) { + public static ValuesEnum emptyEnum(ValueType type) { + return new ValuesEnum(type) { @Override public int nextDoc() throws IOException { return NO_MORE_DOCS; diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java index f049119e51a..215acd469ea 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java @@ -249,7 +249,7 @@ class VarDerefBytesImpl { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { return new VarDerefBytesEnum(source, cloneData(), cloneIndex()); } diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java index ac6bbbe813e..89d4b7b1bf6 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java @@ -230,11 +230,11 @@ class VarSortedBytesImpl { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { return new VarSortedBytesEnum(source, cloneData(), cloneIndex()); } - private static class VarSortedBytesEnum extends DocValuesEnum { + private static class VarSortedBytesEnum extends ValuesEnum { private PackedInts.Reader docToOrdIndex; private PackedInts.Reader ordToOffsetIndex; private IndexInput idxIn; diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 477055dac0e..7d71643f51a 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -164,11 +164,11 @@ class VarStraightBytesImpl { } @Override - public DocValuesEnum getEnum(AttributeSource source) throws IOException { + public ValuesEnum getEnum(AttributeSource source) throws IOException { return new VarStraightBytesEnum(source, cloneData(), cloneIndex()); } - private class VarStraightBytesEnum extends DocValuesEnum { + private class VarStraightBytesEnum extends ValuesEnum { private final PackedInts.Reader addresses; private final IndexInput datIn; private final IndexInput idxIn; diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java index 4c6132424e5..e3444545773 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java @@ -99,15 +99,15 @@ public abstract class Writer extends DocValuesConsumer { /** * Records a value from the given document id. The methods implementation - * obtains the value for the document id from the last {@link DocValuesEnum} - * set to {@link #setNextEnum(DocValuesEnum)}. + * obtains the value for the document id from the last {@link ValuesEnum} + * set to {@link #setNextEnum(ValuesEnum)}. *

    * This method is used during merging to provide implementation agnostic * default merge implementation. *

    *

    * The given document id must be the same document id returned from - * {@link DocValuesEnum#docID()} when this method is called. All documents IDs + * {@link ValuesEnum#docID()} when this method is called. All documents IDs * between the given ID and the previously given ID or 0 if the * method is call the first time are filled with default values depending on * the {@link Writer} implementation. The given document ID must always be @@ -116,13 +116,13 @@ public abstract class Writer extends DocValuesConsumer { protected abstract void add(int docID) throws IOException; /** - * Sets the next {@link DocValuesEnum} to consume values from on calls to + * Sets the next {@link ValuesEnum} to consume values from on calls to * {@link #add(int)} * * @param valuesEnum - * the next {@link DocValuesEnum}, this must not be null + * the next {@link ValuesEnum}, this must not be null */ - protected abstract void setNextEnum(DocValuesEnum valuesEnum); + protected abstract void setNextEnum(ValuesEnum valuesEnum); /** * Finish writing and close any files and resources used by this Writer. @@ -140,7 +140,7 @@ public abstract class Writer extends DocValuesConsumer { // simply override this and decide if they want to merge // segments using this generic implementation or if a bulk merge is possible // / feasible. - final DocValuesEnum valEnum = state.reader.getEnum(); + final ValuesEnum valEnum = state.reader.getEnum(); assert valEnum != null; try { setNextEnum(valEnum); // set the current enum we are working on - the @@ -150,11 +150,11 @@ public abstract class Writer extends DocValuesConsumer { final Bits bits = state.bits; final int docCount = state.docCount; int currentDocId; - if ((currentDocId = valEnum.advance(0)) != DocValuesEnum.NO_MORE_DOCS) { + if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) { for (int i = 0; i < docCount; i++) { if (bits == null || !bits.get(i)) { if (currentDocId < i) { - if ((currentDocId = valEnum.advance(i)) == DocValuesEnum.NO_MORE_DOCS) { + if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) { break; // advance can jump over default values } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 4f6d5759a46..ea1f8911fb3 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -24,7 +24,7 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.IndexDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; // javadoc import org.apache.lucene.index.codecs.CodecProvider; @@ -173,7 +173,7 @@ public class RandomIndexWriter implements Closeable { String name = "random_" + type.name() + "" + docValuesFieldPrefix; if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null) return; - DocValuesField docValuesField = new DocValuesField(name); + IndexDocValuesField docValuesField = new IndexDocValuesField(name); switch (type) { case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java index 42d32a0ee41..c1365d824ac 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java @@ -83,7 +83,7 @@ public class TestDocValues extends LuceneTestCase { IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc); for (int iter = 0; iter < 2; iter++) { - DocValuesEnum bytesEnum = getEnum(r); + ValuesEnum bytesEnum = getEnum(r); assertNotNull("enum is null", bytesEnum); BytesRef ref = bytesEnum.bytes(); @@ -94,8 +94,8 @@ public class TestDocValues extends LuceneTestCase { assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + " lenRight: " + utf8String.length(), values[idx], utf8String); } - assertEquals(DocValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); - assertEquals(DocValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1)); + assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc)); + assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1)); bytesEnum.close(); } @@ -209,7 +209,7 @@ public class TestDocValues extends LuceneTestCase { } for (int iter = 0; iter < 2; iter++) { - DocValuesEnum iEnum = getEnum(r); + ValuesEnum iEnum = getEnum(r); LongsRef ints = iEnum.getInt(); for (int i = 0; i < NUM_VALUES + additionalDocs; i++) { assertEquals(i, iEnum.nextDoc()); @@ -219,12 +219,12 @@ public class TestDocValues extends LuceneTestCase { assertEquals(0, ints.get()); } } - assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); + assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc()); iEnum.close(); } for (int iter = 0; iter < 2; iter++) { - DocValuesEnum iEnum = getEnum(r); + ValuesEnum iEnum = getEnum(r); LongsRef ints = iEnum.getInt(); for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) { assertEquals(i, iEnum.advance(i)); @@ -234,7 +234,7 @@ public class TestDocValues extends LuceneTestCase { assertEquals(0, ints.get()); } } - assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs)); + assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs)); iEnum.close(); } r.close(); @@ -272,7 +272,7 @@ public class TestDocValues extends LuceneTestCase { } for (int iter = 0; iter < 2; iter++) { - DocValuesEnum fEnum = getEnum(r); + ValuesEnum fEnum = getEnum(r); FloatsRef floats = fEnum.getFloat(); for (int i = 0; i < NUM_VALUES + additionalValues; i++) { assertEquals(i, fEnum.nextDoc()); @@ -282,11 +282,11 @@ public class TestDocValues extends LuceneTestCase { assertEquals(0.0d, floats.get(), delta); } } - assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.nextDoc()); + assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.nextDoc()); fEnum.close(); } for (int iter = 0; iter < 2; iter++) { - DocValuesEnum fEnum = getEnum(r); + ValuesEnum fEnum = getEnum(r); FloatsRef floats = fEnum.getFloat(); for (int i = 0; i < NUM_VALUES + additionalValues; i += 1 + random.nextInt(25)) { assertEquals(i, fEnum.advance(i)); @@ -296,7 +296,7 @@ public class TestDocValues extends LuceneTestCase { assertEquals(0.0d, floats.get(), delta); } } - assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.advance(NUM_VALUES + additionalValues)); + assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.advance(NUM_VALUES + additionalValues)); fEnum.close(); } @@ -308,7 +308,7 @@ public class TestDocValues extends LuceneTestCase { runTestFloats(8, 0.0); } - private DocValuesEnum getEnum(IndexDocValues values) throws IOException { + private ValuesEnum getEnum(IndexDocValues values) throws IOException { return random.nextBoolean() ? values.getEnum() : getSource(values).getEnum(); } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 910fe386a19..20ffa085e80 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -26,7 +26,7 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.AbstractField; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.IndexDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; @@ -64,10 +64,6 @@ import org.junit.Before; */ public class TestDocValuesIndexing extends LuceneTestCase { /* - * TODO: Roadmap to land on trunk - * - * - Add documentation for: - * - DocValues * - add test for unoptimized case with deletes * - add multithreaded tests / integrate into stress indexing? */ @@ -87,7 +83,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, writerConfig(false)); for (int i = 0; i < 5; i++) { Document doc = new Document(); - DocValuesField valuesField = new DocValuesField("docId"); + IndexDocValuesField valuesField = new IndexDocValuesField("docId"); valuesField.setInt(i); doc.add(valuesField); doc.add(new Field("docId", "" + i, Store.NO, Index.ANALYZED)); @@ -198,10 +194,10 @@ public class TestDocValuesIndexing extends LuceneTestCase { // check values IndexReader merged = IndexReader.open(w, true); - DocValuesEnum vE_1 = getValuesEnum(getDocValues(r_1, first.name())); - DocValuesEnum vE_2 = getValuesEnum(getDocValues(r_2, second.name())); - DocValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name())); - DocValuesEnum vE_2_merged = getValuesEnum(getDocValues(merged, second + ValuesEnum vE_1 = getValuesEnum(getDocValues(r_1, first.name())); + ValuesEnum vE_2 = getValuesEnum(getDocValues(r_2, second.name())); + ValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name())); + ValuesEnum vE_2_merged = getValuesEnum(getDocValues(merged, second .name())); switch (second) { // these variants don't advance over missing values case BYTES_FIXED_STRAIGHT: @@ -219,10 +215,10 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals(msg, i, vE_2.nextDoc()); assertEquals(msg, i + valuesPerIndex, vE_2_merged.nextDoc()); } - assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_1.nextDoc()); - assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_2.nextDoc()); - assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_1_merged.advance(valuesPerIndex*2)); - assertEquals(msg, DocValuesEnum.NO_MORE_DOCS, vE_2_merged.nextDoc()); + assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_1.nextDoc()); + assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_2.nextDoc()); + assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_1_merged.advance(valuesPerIndex*2)); + assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_2_merged.nextDoc()); // close resources r_1.close(); @@ -274,7 +270,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals("index " + i, 0, value); } - DocValuesEnum intsEnum = getValuesEnum(intsReader); + ValuesEnum intsEnum = getValuesEnum(intsReader); assertTrue(intsEnum.advance(base) >= base); intsEnum = getValuesEnum(intsReader); @@ -303,7 +299,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals(val + " failed for doc: " + i + " base: " + base, 0.0d, value, 0.0d); } - DocValuesEnum floatEnum = getValuesEnum(floatReader); + ValuesEnum floatEnum = getValuesEnum(floatReader); assertTrue(floatEnum.advance(base) >= base); floatEnum = getValuesEnum(floatReader); @@ -388,7 +384,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertNotNull("expected none null - " + msg, br); assertEquals(0, br.length); // make sure we advance at least until base - DocValuesEnum bytesEnum = getValuesEnum(bytesReader); + ValuesEnum bytesEnum = getValuesEnum(bytesReader); try { final int advancedTo = bytesEnum.advance(0); @@ -403,7 +399,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } } - DocValuesEnum bytesEnum = getValuesEnum(bytesReader); + ValuesEnum bytesEnum = getValuesEnum(bytesReader); final BytesRef enumRef = bytesEnum.bytes(); // test the actual doc values added in this iteration assertEquals(base + numRemainingValues, r.numDocs()); @@ -480,9 +476,9 @@ public class TestDocValuesIndexing extends LuceneTestCase { return source; } - private DocValuesEnum getValuesEnum(IndexDocValues values) throws IOException { - DocValuesEnum valuesEnum; - if (!(values instanceof MultiDocValues) && random.nextInt(10) == 0) { + private ValuesEnum getValuesEnum(IndexDocValues values) throws IOException { + ValuesEnum valuesEnum; + if (!(values instanceof MultiIndexDocValues) && random.nextInt(10) == 0) { // TODO not supported by MultiDocValues yet! valuesEnum = getSource(values).getEnum(); } else { @@ -511,11 +507,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { OpenBitSet deleted = new OpenBitSet(numValues); Document doc = new Document(); Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)]; - AbstractField field = random.nextBoolean() ? new DocValuesField(value.name()) + AbstractField field = random.nextBoolean() ? new IndexDocValuesField(value.name()) : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, 10), idx == Index.NO ? Store.YES : Store.NO, idx); doc.add(field); - DocValuesField valField = new DocValuesField("prototype"); + IndexDocValuesField valField = new IndexDocValuesField("prototype"); final BytesRef bytesRef = new BytesRef(); final String idBase = value.name() + "_"; diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index 4c81a18455a..170857b20a8 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -25,7 +25,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.IndexDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; @@ -124,13 +124,13 @@ public class TestSort extends LuceneTestCase { doc.add (new Field ("contents", data[i][1], Field.Store.NO, Field.Index.ANALYZED)); if (data[i][2] != null) { Field f = supportsDocValues ? - DocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.INTS) + IndexDocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.INTS) : new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(f); } if (data[i][3] != null) { Field f = supportsDocValues ? - DocValuesField.set(new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.FLOAT_32) + IndexDocValuesField.set(new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.FLOAT_32) : new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(f); } @@ -140,7 +140,7 @@ public class TestSort extends LuceneTestCase { if (data[i][7] != null) doc.add (new Field ("long", data[i][7], Field.Store.NO, Field.Index.NOT_ANALYZED)); if (data[i][8] != null) { Field f = supportsDocValues ? - DocValuesField.set(new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.FLOAT_64) + IndexDocValuesField.set(new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.FLOAT_64) : new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(f); } From c19a2e84a9bd69009e4837be96befc5b672e8288 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 3 Jun 2011 16:40:17 +0000 Subject: [PATCH 052/116] remove missed contribs from merge git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131102 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/idea/lucene/contrib/ant/ant.iml | 31 ------------------- .../idea/lucene/contrib/db/bdb-je/bdb-je.iml | 27 ---------------- dev-tools/idea/lucene/contrib/db/bdb/bdb.iml | 27 ---------------- dev-tools/idea/lucene/contrib/lucli/lucli.iml | 28 ----------------- dev-tools/idea/lucene/contrib/swing/swing.iml | 19 ------------ 5 files changed, 132 deletions(-) delete mode 100644 dev-tools/idea/lucene/contrib/ant/ant.iml delete mode 100644 dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml delete mode 100644 dev-tools/idea/lucene/contrib/db/bdb/bdb.iml delete mode 100644 dev-tools/idea/lucene/contrib/lucli/lucli.iml delete mode 100644 dev-tools/idea/lucene/contrib/swing/swing.iml diff --git a/dev-tools/idea/lucene/contrib/ant/ant.iml b/dev-tools/idea/lucene/contrib/ant/ant.iml deleted file mode 100644 index 8987c57a65f..00000000000 --- a/dev-tools/idea/lucene/contrib/ant/ant.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml b/dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml deleted file mode 100644 index af2667fdc24..00000000000 --- a/dev-tools/idea/lucene/contrib/db/bdb-je/bdb-je.iml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev-tools/idea/lucene/contrib/db/bdb/bdb.iml b/dev-tools/idea/lucene/contrib/db/bdb/bdb.iml deleted file mode 100644 index af2667fdc24..00000000000 --- a/dev-tools/idea/lucene/contrib/db/bdb/bdb.iml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev-tools/idea/lucene/contrib/lucli/lucli.iml b/dev-tools/idea/lucene/contrib/lucli/lucli.iml deleted file mode 100644 index 6d61e7ec871..00000000000 --- a/dev-tools/idea/lucene/contrib/lucli/lucli.iml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev-tools/idea/lucene/contrib/swing/swing.iml b/dev-tools/idea/lucene/contrib/swing/swing.iml deleted file mode 100644 index a84cc08be50..00000000000 --- a/dev-tools/idea/lucene/contrib/swing/swing.iml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - - - - - From a26db5db69dc3716098d0d5c414194c97ef81818 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 3 Jun 2011 16:41:28 +0000 Subject: [PATCH 053/116] LUCENE-3108: remove dead code / comment git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131103 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/MultiFieldsEnum.java | 33 ------------------- 1 file changed, 33 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java index feaa61775ca..02b52d703d1 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java @@ -149,38 +149,5 @@ public final class MultiFieldsEnum extends FieldsEnum { return fieldsA.current.compareTo(fieldsB.current) < 0; } } - -// @Override -// public DocValues docValues() throws IOException { -// final List docValuesIndex = new ArrayList(); -// int docsUpto = 0; -// Type type = null; -// final int numEnums = enumWithSlices.length; -// for (int i = 0; i < numEnums; i++) { -// FieldsEnumWithSlice withSlice = enumWithSlices[i]; -// Slice slice = withSlice.slice; -// final DocValues values = withSlice.fields.docValues(); -// final int start = slice.start; -// final int length = slice.length; -// if (values != null && currentField.equals(withSlice.current)) { -// if (docsUpto != start) { -// type = values.type(); -// docValuesIndex.add(new MultiDocValues.DocValuesIndex( -// new MultiDocValues.DummyDocValues(start, type), docsUpto, start -// - docsUpto)); -// } -// docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, -// length)); -// docsUpto = start + length; -// -// } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) { -// docValuesIndex.add(new MultiDocValues.DocValuesIndex( -// new MultiDocValues.DummyDocValues(start, type), docsUpto, start -// - docsUpto)); -// } -// } -// return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex -// .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY)); -// } } From 794abeff8129ef0a42afbc817be7ef265da4ff57 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 3 Jun 2011 20:51:31 +0000 Subject: [PATCH 054/116] remove old icu jar git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131232 13f79535-47bb-0310-9956-ffa450edef68 --- modules/analysis/icu/lib/icu4j-4_6.jar | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 modules/analysis/icu/lib/icu4j-4_6.jar diff --git a/modules/analysis/icu/lib/icu4j-4_6.jar b/modules/analysis/icu/lib/icu4j-4_6.jar deleted file mode 100644 index 918a187f1a7..00000000000 --- a/modules/analysis/icu/lib/icu4j-4_6.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history. -Apache SVN contains full history. \ No newline at end of file From a3a2899c92500ef4276b356a8b3e4af2fea87457 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 4 Jun 2011 06:24:17 +0000 Subject: [PATCH 055/116] remove leftover git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131325 13f79535-47bb-0310-9956-ffa450edef68 --- solr/contrib/analysis-extras/lib/icu4j-4_6.jar | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 solr/contrib/analysis-extras/lib/icu4j-4_6.jar diff --git a/solr/contrib/analysis-extras/lib/icu4j-4_6.jar b/solr/contrib/analysis-extras/lib/icu4j-4_6.jar deleted file mode 100644 index 918a187f1a7..00000000000 --- a/solr/contrib/analysis-extras/lib/icu4j-4_6.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history. -Apache SVN contains full history. \ No newline at end of file From 79e8ecb73fa4cf9d1dea8c2a855ebea3d8dbe723 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 4 Jun 2011 07:21:31 +0000 Subject: [PATCH 056/116] remove leftover git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131344 13f79535-47bb-0310-9956-ffa450edef68 --- solr/contrib/extraction/lib/icu4j-4_6.jar | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 solr/contrib/extraction/lib/icu4j-4_6.jar diff --git a/solr/contrib/extraction/lib/icu4j-4_6.jar b/solr/contrib/extraction/lib/icu4j-4_6.jar deleted file mode 100644 index 918a187f1a7..00000000000 --- a/solr/contrib/extraction/lib/icu4j-4_6.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history. -Apache SVN contains full history. \ No newline at end of file From fa5b875281d6ce3a9eab83374cb883f730cf4c59 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 4 Jun 2011 10:30:19 +0000 Subject: [PATCH 057/116] LUCENE-3170: Fixed wrong in-memory ValuesEnum on BYTES_VAR_STRAIGHT type git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131367 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/values/VarStraightBytesImpl.java | 14 ++++++++++++++ .../index/values/TestDocValuesIndexing.java | 15 +++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java index 7d71643f51a..8d0bb19c04e 100644 --- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java @@ -146,6 +146,20 @@ class VarStraightBytesImpl { : (int) (addresses.get(1 + docID) - address); return data.fillSlice(bytesRef, address, length); } + + @Override + public ValuesEnum getEnum(AttributeSource attrSource) throws IOException { + return new SourceEnum(attrSource, type(), this, maxDoc()) { + @Override + public int advance(int target) throws IOException { + if (target >= numDocs) { + return pos = NO_MORE_DOCS; + } + source.getBytes(target, bytesRef); + return pos = target; + } + }; + } @Override public int getValueCount() { diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index 20ffa085e80..f76bb40af57 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -207,6 +207,13 @@ public class TestDocValuesIndexing extends LuceneTestCase { case INTS: assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1)); } +// switch (first) { // these variants don't advance over missing values +// case BYTES_FIXED_STRAIGHT: +// case FLOAT_32: +// case FLOAT_64: +// case INTS: +// assertEquals(msg, valuesPerIndex-1, vE_1_merged.advance(valuesPerIndex-1)); +// } for (int i = 0; i < valuesPerIndex; i++) { assertEquals(msg, i, vE_1.nextDoc()); @@ -385,17 +392,9 @@ public class TestDocValuesIndexing extends LuceneTestCase { assertEquals(0, br.length); // make sure we advance at least until base ValuesEnum bytesEnum = getValuesEnum(bytesReader); - try { - final int advancedTo = bytesEnum.advance(0); assertTrue(byteIndexValue.name() + " advanced failed base:" + base + " advancedTo: " + advancedTo, base <= advancedTo); - }catch(Throwable e) { - final int advancedTo = bytesEnum.advance(0); - assertTrue(byteIndexValue.name() + " advanced failed base:" + base - + " advancedTo: " + advancedTo, base <= advancedTo); - - } } } From 9b1c6a44a2e2457c146791066551e726981e10d5 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 4 Jun 2011 10:32:08 +0000 Subject: [PATCH 058/116] remove dead code git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1131369 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/values/TestDocValuesIndexing.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index f76bb40af57..e31b54dec9d 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -207,13 +207,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { case INTS: assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1)); } -// switch (first) { // these variants don't advance over missing values -// case BYTES_FIXED_STRAIGHT: -// case FLOAT_32: -// case FLOAT_64: -// case INTS: -// assertEquals(msg, valuesPerIndex-1, vE_1_merged.advance(valuesPerIndex-1)); -// } for (int i = 0; i < valuesPerIndex; i++) { assertEquals(msg, i, vE_1.nextDoc()); From ff60d299456a43a5227f93f114dd1d9dc8c1eef7 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 5 Jun 2011 14:23:17 +0000 Subject: [PATCH 059/116] small cleanups git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1132418 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/codecs/appending/AppendingCodec.java | 7 +++---- .../apache/lucene/document/AbstractField.java | 2 +- .../org/apache/lucene/index/FieldsEnum.java | 2 -- .../apache/lucene/index/IndexFileNames.java | 7 ++++++- .../org/apache/lucene/index/IndexReader.java | 13 +++++++------ .../org/apache/lucene/index/MultiFields.java | 7 +------ .../apache/lucene/index/MultiPerDocValues.java | 8 ++++---- .../apache/lucene/index/PerDocWriteState.java | 4 ---- .../org/apache/lucene/index/SegmentInfo.java | 1 - .../apache/lucene/index/SegmentReadState.java | 4 ---- .../apache/lucene/index/SegmentWriteState.java | 4 ---- .../lucene/index/codecs/BlockTermsReader.java | 5 ++--- .../lucene/index/codecs/BlockTermsWriter.java | 2 +- .../lucene/index/codecs/CodecProvider.java | 4 +++- .../index/codecs/DefaultDocValuesConsumer.java | 1 + .../index/codecs/DefaultDocValuesProducer.java | 1 - .../lucene/index/codecs/FieldsConsumer.java | 3 +-- .../index/codecs/FixedGapTermsIndexReader.java | 4 ++-- .../index/codecs/FixedGapTermsIndexWriter.java | 2 +- .../lucene/index/codecs/PerDocConsumer.java | 2 +- .../lucene/index/codecs/PerDocValues.java | 2 +- .../codecs/VariableGapTermsIndexReader.java | 4 ++-- .../codecs/VariableGapTermsIndexWriter.java | 2 +- .../index/codecs/pulsing/PulsingCodec.java | 7 +++---- .../codecs/sep/SepPostingsReaderImpl.java | 13 ++++++------- .../codecs/sep/SepPostingsWriterImpl.java | 10 +++++----- .../codecs/simpletext/SimpleTextCodec.java | 5 ++--- .../simpletext/SimpleTextFieldsReader.java | 2 +- .../simpletext/SimpleTextFieldsWriter.java | 2 +- .../index/codecs/standard/StandardCodec.java | 9 +++------ .../standard/StandardPostingsReader.java | 6 +++--- .../standard/StandardPostingsWriter.java | 4 ++-- .../index/values/FixedDerefBytesImpl.java | 2 +- .../index/values/FixedStraightBytesImpl.java | 4 ++-- .../org/apache/lucene/index/values/Floats.java | 2 -- .../index/values/MultiIndexDocValues.java | 9 --------- .../java/org/apache/lucene/util/FloatsRef.java | 4 ++-- .../util/packed/PackedReaderIterator.java | 6 +++--- .../apache/lucene/index/RandomIndexWriter.java | 8 ++++++++ .../mockintblock/MockFixedIntBlockCodec.java | 8 +++----- .../MockVariableIntBlockCodec.java | 8 +++----- .../codecs/mockrandom/MockRandomCodec.java | 18 ++++++++---------- .../index/codecs/mocksep/MockSepCodec.java | 7 +++---- .../org/apache/lucene/index/TestCodecs.java | 1 - .../apache/lucene/index/TestDocTermOrds.java | 7 +++---- 45 files changed, 100 insertions(+), 133 deletions(-) diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java index 8e46e0420ce..77b2eaf38eb 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java @@ -135,10 +135,9 @@ public class AppendingCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { - final String codecIdAsString = "" + codecId; - StandardPostingsReader.files(dir, segmentInfo, codecIdAsString, files); - BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + StandardPostingsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } diff --git a/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene/src/java/org/apache/lucene/document/AbstractField.java index 344aa9f7481..b1144dc3be6 100755 --- a/lucene/src/java/org/apache/lucene/document/AbstractField.java +++ b/lucene/src/java/org/apache/lucene/document/AbstractField.java @@ -18,7 +18,7 @@ package org.apache.lucene.document; import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.spans.SpanQuery; // for javadocs import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.FieldInvertState; // for javadocs import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.ValueType; import org.apache.lucene.util.StringHelper; // for javadocs diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java index 3fa3729d45a..68e8993a26b 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java @@ -19,8 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; -import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.ValuesEnum; import org.apache.lucene.util.AttributeSource; /** Enumerates indexed fields. You must first call {@link diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java index 66bebfadc7d..0ba80eede94 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java @@ -207,7 +207,12 @@ public final class IndexFileNames { return segmentName; } } - + + /** Sugar for passing "" + name instead */ + public static String segmentFileName(String segmentName, int name, String ext) { + return segmentFileName(segmentName, ""+name, ext); + } + /** * Returns true if the given filename ends with the given extension. One * should provide a pure extension, without '.'. diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 7b091d5ccea..3f766fd2720 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -1056,9 +1056,10 @@ public abstract class IndexReader implements Cloneable,Closeable { protected abstract void doSetNorm(int doc, String field, byte value) throws CorruptIndexException, IOException; - /** Flex API: returns {@link Fields} for this reader. - * This method may return null if the reader has no - * postings. + /** + * Returns {@link Fields} for this reader. + * This method may return null if the reader has no + * postings. * *

    NOTE: if this is a multi reader ({@link * #getSequentialSubReaders} is not null) then this @@ -1071,9 +1072,9 @@ public abstract class IndexReader implements Cloneable,Closeable { public abstract Fields fields() throws IOException; /** - * Flex API: returns {@link PerDocValues} for this reader. - * This method may return null if the reader has no per-document - * values stored. + * Returns {@link PerDocValues} for this reader. + * This method may return null if the reader has no per-document + * values stored. * *

    NOTE: if this is a multi reader ({@link * #getSequentialSubReaders} is not null) then this diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index ffe1ac557fe..0e973f438a8 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -21,12 +21,8 @@ import java.io.IOException; import java.util.Map; import java.util.List; import java.util.ArrayList; - -import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.MultiIndexDocValues; -import org.apache.lucene.index.values.ValueType; -import org.apache.lucene.index.values.MultiIndexDocValues.DocValuesIndex; import java.util.concurrent.ConcurrentHashMap; + import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs import org.apache.lucene.util.Bits; @@ -275,6 +271,5 @@ public final class MultiFields extends Fields { return result; } - } diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java index 34397fbd914..6e6b6d4e3e2 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java @@ -32,10 +32,10 @@ import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; /** - * Exposes per-document flex API, merged from per-document flex API of - * sub-segments. This is useful when you're interacting with an - * {@link IndexReader} implementation that consists of sequential sub-readers - * (eg DirectoryReader or {@link MultiReader}). + * Exposes per-document values, merged from per-document values API of + * sub-segments. This is useful when you're interacting with an {@link IndexReader} + * implementation that consists of sequential sub-readers (eg DirectoryReader + * or {@link MultiReader}). * *

    * NOTE: for multi readers, you'll get better performance by gathering diff --git a/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java b/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java index 9ee8bbcf163..e7b1d9339ed 100644 --- a/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java @@ -67,8 +67,4 @@ public class PerDocWriteState { this.codecId = codecId; this.bytesUsed = state.bytesUsed; } - - public String codecIdAsString() { - return "" + codecId; - } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 90f18bb13d3..0daf3e876ff 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.regex.Pattern; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReadState.java b/lucene/src/java/org/apache/lucene/index/SegmentReadState.java index 01d4582ec86..d2159d92295 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReadState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReadState.java @@ -54,8 +54,4 @@ public class SegmentReadState { this.termsIndexDivisor = termsIndexDivisor; this.codecId = codecId; } - - public String codecIdAsString() { - return "" + codecId; - } } \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index 7e910286bbc..32fab036273 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -79,8 +79,4 @@ public class SegmentWriteState { this.codecId = codecId; segDeletes = state.segDeletes; } - - public String codecIdAsString() { - return "" + codecId; - } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java index c17f84d569e..1552f4fe24f 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java @@ -35,7 +35,6 @@ import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs -import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -116,7 +115,7 @@ public class BlockTermsReader extends FieldsProducer { termsCache = new DoubleBarrelLRUCache(termsCacheSize); //this.segment = segment; - in = dir.openInput(IndexFileNames.segmentFileName(segment, ""+codecId, BlockTermsWriter.TERMS_EXTENSION), + in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION), readBufferSize); boolean success = false; @@ -197,7 +196,7 @@ public class BlockTermsReader extends FieldsProducer { } } - public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection files) { + public static void files(Directory dir, SegmentInfo segmentInfo, int id, Collection files) { files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, BlockTermsWriter.TERMS_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java index b7a82ec0d66..9cb9d4cbd8d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java @@ -70,7 +70,7 @@ public class BlockTermsWriter extends FieldsConsumer { public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter, SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException { - final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), TERMS_EXTENSION); + final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION); this.termsIndexWriter = termsIndexWriter; out = state.directory.createOutput(termsFileName); boolean success = false; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java index 1d0a06daf25..04c70bcf0ae 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java @@ -189,8 +189,9 @@ public class CodecProvider { * NOTE: This method will pass any codec from the given codec to * {@link #register(Codec)} and sets fiels codecs via * {@link #setFieldCodec(String, String)}. + * @return this */ - public void copyFrom(CodecProvider other) { + public CodecProvider copyFrom(CodecProvider other) { final Collection values = other.codecs.values(); for (Codec codec : values) { register(codec); @@ -200,5 +201,6 @@ public class CodecProvider { setFieldCodec(entry.getKey(), entry.getValue()); } setDefaultFieldCodec(other.getDefaultFieldCodec()); + return this; } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java index b3c4d8422dd..9edadc467df 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java @@ -58,6 +58,7 @@ public class DefaultDocValuesConsumer extends PerDocConsumer { directory, comparator, bytesUsed); } + @SuppressWarnings("fallthrough") public static void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { FieldInfos fieldInfos = segmentInfo.getFieldInfos(); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java index a509f560334..c00f54fbbd2 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesProducer.java @@ -23,7 +23,6 @@ import java.util.TreeMap; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.Bytes; import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.Floats; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java index c042b7c99d6..10d39657700 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java @@ -48,11 +48,10 @@ public abstract class FieldsConsumer implements Closeable { mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field; TermsEnum terms = fieldsEnum.terms(); - if(terms != null) { + if (terms != null) { final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); termsConsumer.merge(mergeState, terms); } } } - } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java index d45e44446ae..ad48f039020 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java @@ -73,7 +73,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { this.termComp = termComp; - in = dir.openInput(IndexFileNames.segmentFileName(segment, ""+codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); + in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); boolean success = false; @@ -406,7 +406,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { } } - public static void files(Directory dir, SegmentInfo info, String id, Collection files) { + public static void files(Directory dir, SegmentInfo info, int id, Collection files) { files.add(IndexFileNames.segmentFileName(info.name, id, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java index a2ef5f28765..38f094d47c0 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java @@ -56,7 +56,7 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { private final FieldInfos fieldInfos; // unread public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException { - final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), TERMS_INDEX_EXTENSION); + final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION); termIndexInterval = state.termIndexInterval; out = state.directory.createOutput(indexFileName); boolean success = false; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java index 7acaef4504e..f7656544f4c 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java @@ -25,7 +25,7 @@ import org.apache.lucene.index.values.IndexDocValues; * Abstract API that consumes per document values. Concrete implementations of * this convert field values into a Codec specific format during indexing. *

    - * The {@link PerDocConsumer} API is accessible through flexible indexing / the + * The {@link PerDocConsumer} API is accessible through the * {@link Codec} - API providing per field consumers and producers for inverted * data (terms, postings) as well as per-document data. * diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java index bc194dfd30d..1b2910e2b50 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocValues.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.values.IndexDocValues; * storage on a per-document basis corresponding to their actual * {@link PerDocConsumer} counterpart. *

    - * The {@link PerDocValues} API is accessible through flexible indexing / the + * The {@link PerDocValues} API is accessible through the * {@link Codec} - API providing per field consumers and producers for inverted * data (terms, postings) as well as per-document data. * diff --git a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java index 52798dd89b2..e66f413d54c 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java @@ -60,7 +60,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, int codecId) throws IOException { - in = dir.openInput(IndexFileNames.segmentFileName(segment, ""+codecId, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); + in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); this.segment = segment; boolean success = false; @@ -236,7 +236,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { } } - public static void files(Directory dir, SegmentInfo info, String id, Collection files) { + public static void files(Directory dir, SegmentInfo info, int id, Collection files) { files.add(IndexFileNames.segmentFileName(info.name, id, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java index 3305188506b..d10608879cb 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java @@ -158,7 +158,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { // in the extremes. public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException { - final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), TERMS_INDEX_EXTENSION); + final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION); out = state.directory.createOutput(indexFileName); boolean success = false; try { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java index 987d9d51d57..3f88575b92f 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java @@ -154,10 +154,9 @@ public class PulsingCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int id, Set files) throws IOException { - final String codecId = "" + id; - StandardPostingsReader.files(dir, segmentInfo, codecId, files); - BlockTermsReader.files(dir, segmentInfo, codecId, files); - VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); + StandardPostingsReader.files(dir, segmentInfo, id, files); + BlockTermsReader.files(dir, segmentInfo, id, files); + VariableGapTermsIndexReader.files(dir, segmentInfo, id, files); DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java index 4d25e7afd5b..a6588d5c217 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java @@ -59,19 +59,18 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { int skipMinimum; public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, int codecId) throws IOException { - final String codecIdAsString = "" + codecId; boolean success = false; try { - final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecIdAsString, SepPostingsWriterImpl.DOC_EXTENSION); + final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION); docIn = intFactory.openInput(dir, docFileName); - skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecIdAsString, SepPostingsWriterImpl.SKIP_EXTENSION), readBufferSize); + skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION), readBufferSize); if (segmentInfo.getHasProx()) { - freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecIdAsString, SepPostingsWriterImpl.FREQ_EXTENSION)); - posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecIdAsString, SepPostingsWriterImpl.POS_EXTENSION), readBufferSize); - payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecIdAsString, SepPostingsWriterImpl.PAYLOAD_EXTENSION), readBufferSize); + freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.FREQ_EXTENSION)); + posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.POS_EXTENSION), readBufferSize); + payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.PAYLOAD_EXTENSION), readBufferSize); } else { posIn = null; payloadIn = null; @@ -85,7 +84,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase { } } - public static void files(SegmentInfo segmentInfo, String codecId, Collection files) throws IOException { + public static void files(SegmentInfo segmentInfo, int codecId, Collection files) throws IOException { files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION)); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java index 7a7219a7fee..e4c5a484865 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java @@ -117,25 +117,25 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase { try { this.skipInterval = skipInterval; this.skipMinimum = skipInterval; /* set to the same for now */ - final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), DOC_EXTENSION); + final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION); docOut = factory.createOutput(state.directory, docFileName); docIndex = docOut.index(); if (state.fieldInfos.hasProx()) { - final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), FREQ_EXTENSION); + final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION); freqOut = factory.createOutput(state.directory, frqFileName); freqIndex = freqOut.index(); - final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), POS_EXTENSION); + final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION); posOut = factory.createOutput(state.directory, posFileName); posIndex = posOut.index(); // TODO: -- only if at least one field stores payloads? - final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), PAYLOAD_EXTENSION); + final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION); payloadOut = state.directory.createOutput(payloadFileName); } - final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), SKIP_EXTENSION); + final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION); skipOut = state.directory.createOutput(skipFileName); totalNumDocs = state.numDocs; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java index 89456109500..612b70d1333 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java @@ -26,7 +26,6 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -63,13 +62,13 @@ public class SimpleTextCodec extends Codec { /** Extension of freq postings file */ static final String POSTINGS_EXTENSION = "pst"; - static String getPostingsFileName(String segment, String id) { + static String getPostingsFileName(String segment, int id) { return IndexFileNames.segmentFileName(segment, id, POSTINGS_EXTENSION); } @Override public void files(Directory dir, SegmentInfo segmentInfo, int id, Set files) throws IOException { - files.add(getPostingsFileName(segmentInfo.name, ""+id)); + files.add(getPostingsFileName(segmentInfo.name, id)); DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java index ab17022d9d3..e92dfbc82b5 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java @@ -58,7 +58,7 @@ class SimpleTextFieldsReader extends FieldsProducer { final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD; public SimpleTextFieldsReader(SegmentReadState state) throws IOException { - in = state.dir.openInput(SimpleTextCodec.getPostingsFileName(state.segmentInfo.name, ""+state.codecId)); + in = state.dir.openInput(SimpleTextCodec.getPostingsFileName(state.segmentInfo.name, state.codecId)); fieldInfos = state.fieldInfos; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java index 9424de39a85..d1d5f333a50 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java @@ -45,7 +45,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer { final static BytesRef PAYLOAD = new BytesRef(" payload "); public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException { - final String fileName = SimpleTextCodec.getPostingsFileName(state.segmentName, state.codecIdAsString()); + final String fileName = SimpleTextCodec.getPostingsFileName(state.segmentName, state.codecId); out = state.directory.createOutput(fileName); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java index 8b61d8fa2cb..9bd8bd58de6 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java @@ -25,7 +25,6 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.PerDocConsumer; @@ -40,7 +39,6 @@ import org.apache.lucene.index.codecs.VariableGapTermsIndexReader; import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.index.codecs.DefaultDocValuesProducer; -import org.apache.lucene.index.values.Writer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -139,10 +137,9 @@ public class StandardCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int id, Set files) throws IOException { - final String codecId = "" + id; - StandardPostingsReader.files(dir, segmentInfo, codecId, files); - BlockTermsReader.files(dir, segmentInfo, codecId, files); - VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); + StandardPostingsReader.files(dir, segmentInfo, id, files); + BlockTermsReader.files(dir, segmentInfo, id, files); + VariableGapTermsIndexReader.files(dir, segmentInfo, id, files); DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java index 0515232a480..3ef44dd33c0 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java @@ -52,13 +52,13 @@ public class StandardPostingsReader extends PostingsReaderBase { //private String segment; public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, int codecId) throws IOException { - freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, ""+codecId, StandardCodec.FREQ_EXTENSION), + freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION), readBufferSize); //this.segment = segmentInfo.name; if (segmentInfo.getHasProx()) { boolean success = false; try { - proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, ""+codecId, StandardCodec.PROX_EXTENSION), + proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.PROX_EXTENSION), readBufferSize); success = true; } finally { @@ -71,7 +71,7 @@ public class StandardPostingsReader extends PostingsReaderBase { } } - public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection files) throws IOException { + public static void files(Directory dir, SegmentInfo segmentInfo, int id, Collection files) throws IOException { files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.FREQ_EXTENSION)); if (segmentInfo.getHasProx()) { files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.PROX_EXTENSION)); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java index 1b7021b2bf5..457e3c24821 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java @@ -91,14 +91,14 @@ public final class StandardPostingsWriter extends PostingsWriterBase { this.skipInterval = skipInterval; this.skipMinimum = skipInterval; /* set to the same for now */ //this.segment = state.segmentName; - String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), StandardCodec.FREQ_EXTENSION); + String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION); freqOut = state.directory.createOutput(fileName); boolean success = false; try { if (state.fieldInfos.hasProx()) { // At least one field does not omit TF, so create the // prox file - fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), StandardCodec.PROX_EXTENSION); + fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION); proxOut = state.directory.createOutput(fileName); } else { // Every field omits TF so we will write no prox file diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java index 20f2e323546..56493a20e63 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java @@ -70,7 +70,7 @@ class FixedDerefBytesImpl { } @Override - public void add(int docID, BytesRef bytes) throws IOException { + public void add(int docID, BytesRef bytes) throws IOException { if (bytes.length == 0) // default value - skip it return; if (size == -1) { diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java index 7b1e1673a0b..55efe460676 100644 --- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java @@ -78,8 +78,9 @@ class FixedStraightBytesImpl { if (state.bits == null && state.reader instanceof Reader) { Reader reader = (Reader) state.reader; final int maxDocs = reader.maxDoc; - if (maxDocs == 0) + if (maxDocs == 0) { return; + } if (size == -1) { size = reader.size; datOut.writeInt(size); @@ -124,7 +125,6 @@ class FixedStraightBytesImpl { public long ramBytesUsed() { return oneRecord == null ? 0 : oneRecord.length; } - } public static class Reader extends BytesReaderBase { diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java index efe26b8d0e5..e4200b6ab3d 100644 --- a/lucene/src/java/org/apache/lucene/index/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java @@ -95,7 +95,6 @@ public class Floats { } } - public long ramBytesUsed() { return 0; } @@ -140,7 +139,6 @@ public class Floats { public void files(Collection files) throws IOException { files.add(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION)); } - } // Writes 4 bytes (float) per value diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java index c6c4c4b2d3b..e0b126d94ff 100644 --- a/lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java @@ -66,10 +66,6 @@ public class MultiIndexDocValues extends IndexDocValues { return new MultiSource(docValuesIdx, starts); } - public void close() throws IOException { - super.close(); - } - public IndexDocValues reset(DocValuesIndex[] docValuesIdx) { int[] start = new int[docValuesIdx.length]; for (int i = 0; i < docValuesIdx.length; i++) { @@ -103,11 +99,6 @@ public class MultiIndexDocValues extends IndexDocValues { public ValueType type() { return emptySoruce.type(); } - - public void close() throws IOException { - super.close(); - } - } private static class MultiValuesEnum extends ValuesEnum { diff --git a/lucene/src/java/org/apache/lucene/util/FloatsRef.java b/lucene/src/java/org/apache/lucene/util/FloatsRef.java index e6f8bdd1573..99b0ac0344a 100644 --- a/lucene/src/java/org/apache/lucene/util/FloatsRef.java +++ b/lucene/src/java/org/apache/lucene/util/FloatsRef.java @@ -18,11 +18,11 @@ package org.apache.lucene.util; */ /** - * Represents double[], as a slice (offset + length) into an existing float[]. + * Represents double[], as a slice (offset + length) into an existing double[]. * * @lucene.internal */ -public final class FloatsRef implements Cloneable{ +public final class FloatsRef implements Cloneable { public double[] floats; public int offset; public int length; diff --git a/lucene/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java b/lucene/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java index 7843b6c0508..90c67dc8571 100644 --- a/lucene/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java +++ b/lucene/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java @@ -93,12 +93,12 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator { final long bits = (long) bitsPerValue; final int posToSkip = ord - 1 - position; final long bitsToSkip = (bits * (long)posToSkip); - if(bitsToSkip < pendingBitsLeft ){ // enough bits left - no seek required + if (bitsToSkip < pendingBitsLeft) { // enough bits left - no seek required pendingBitsLeft -= bitsToSkip; - }else { + } else { final long skip = bitsToSkip-pendingBitsLeft; final long closestByte = (skip >> 6) << 3; - if(closestByte != 0) { // need to seek + if (closestByte != 0) { // need to seek final long filePointer = in.getFilePointer(); in.seek(filePointer + closestByte); } diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index ea1f8911fb3..9362e15ed0a 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -65,6 +65,14 @@ public class RandomIndexWriter implements Closeable { // called from different threads; else test failures may // not be reproducible from the original seed this.r = new Random(r.nextInt()); + + if (r.nextBoolean()) { + if (LuceneTestCase.VERBOSE) { + System.out.println("NOTE: RIW ctor is setting new CodecProvider().copyFrom"); + } + // Just to test CP.copyFrom: + conf.setCodecProvider(new CodecProvider().copyFrom(conf.getCodecProvider())); + } } @Override diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java index c5b953899a7..614a972af2a 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java @@ -34,7 +34,6 @@ import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; -import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; @@ -204,10 +203,9 @@ public class MockFixedIntBlockCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { - final String codecIdAsString = "" + codecId; - SepPostingsReaderImpl.files(segmentInfo, codecIdAsString, files); - BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + SepPostingsReaderImpl.files(segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java index 3b76f3f0cf7..f3567f76dd5 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java @@ -34,7 +34,6 @@ import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexInput; import org.apache.lucene.index.codecs.intblock.VariableIntBlockIndexOutput; -import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; @@ -227,10 +226,9 @@ public class MockVariableIntBlockCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { - final String codecIdAsString = "" + codecId; - SepPostingsReaderImpl.files(segmentInfo, codecIdAsString, files); - BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + SepPostingsReaderImpl.files(segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java index 90e4be7ba68..6ad2ad2c896 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java @@ -33,7 +33,6 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -140,7 +139,7 @@ public class MockRandomCodec extends Codec { System.out.println("MockRandomCodec: writing to seg=" + state.segmentName + " seed=" + seed); } - final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), SEED_EXT); + final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SEED_EXT); final IndexOutput out = state.directory.createOutput(seedFileName); try { out.writeLong(seed); @@ -241,7 +240,7 @@ public class MockRandomCodec extends Codec { @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.codecIdAsString(), SEED_EXT); + final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.codecId, SEED_EXT); final IndexInput in = state.dir.openInput(seedFileName); final long seed = in.readLong(); if (LuceneTestCase.VERBOSE) { @@ -348,14 +347,13 @@ public class MockRandomCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { - final String codecIdAsString = codecId + ""; - final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecIdAsString, SEED_EXT); + final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SEED_EXT); files.add(seedFileName); - SepPostingsReaderImpl.files(segmentInfo, codecIdAsString, files); - StandardPostingsReader.files(dir, segmentInfo, codecIdAsString, files); - BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); - VariableGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + SepPostingsReaderImpl.files(segmentInfo, codecId, files); + StandardPostingsReader.files(dir, segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); + VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); // hackish! Iterator it = files.iterator(); diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java index 4331457bdca..f1c99b620a0 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java +++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java @@ -136,10 +136,9 @@ public class MockSepCodec extends Codec { @Override public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set files) throws IOException { - final String codecIdAsString = "" + codecId; - SepPostingsReaderImpl.files(segmentInfo, codecIdAsString, files); - BlockTermsReader.files(dir, segmentInfo, codecIdAsString, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, codecIdAsString, files); + SepPostingsReaderImpl.files(segmentInfo, codecId, files); + BlockTermsReader.files(dir, segmentInfo, codecId, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files); } diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java index 040e9d035f6..118c25c68cd 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java @@ -20,7 +20,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; -import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; diff --git a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java index 472375454fc..a35b964e114 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java @@ -33,7 +33,6 @@ import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.index.codecs.BlockTermsWriter; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CoreCodecProvider; -import org.apache.lucene.index.codecs.DocValuesConsumer; import org.apache.lucene.index.codecs.DefaultDocValuesProducer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -198,9 +197,9 @@ public class TestDocTermOrds extends LuceneTestCase { @Override public void files(Directory dir, SegmentInfo segmentInfo, int id, Set files) throws IOException { - StandardPostingsReader.files(dir, segmentInfo, ""+id, files); - BlockTermsReader.files(dir, segmentInfo, ""+id, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, ""+id, files); + StandardPostingsReader.files(dir, segmentInfo, id, files); + BlockTermsReader.files(dir, segmentInfo, id, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, id, files); DefaultDocValuesConsumer.files(dir, segmentInfo, id, files); } From e8f9e80b743e31f3c69d643fb24e1455f5f79ff5 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 5 Jun 2011 14:34:12 +0000 Subject: [PATCH 060/116] fix ob1 that caused us to use 64 bits not 1 bit, per value git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1132421 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/values/IntsImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java index af93a5212cb..29d6c929a53 100644 --- a/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java @@ -111,7 +111,7 @@ class IntsImpl { minValue = maxValue = 0; } // if we exceed the range of positive longs we must switch to fixed ints - if ((maxValue - minValue) < (((long)1) << 63) && (maxValue - minValue) > 0) { + if ((maxValue - minValue) < (((long)1) << 63) && (maxValue - minValue) >= 0) { writePackedInts(docCount); } else { writeFixedInts(docCount); From f4da02857f725330549130147de32e7ec794778b Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 5 Jun 2011 14:34:20 +0000 Subject: [PATCH 061/116] remove unused method git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1132422 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/codecs/CodecProvider.java | 22 ------------------- .../lucene/index/RandomIndexWriter.java | 8 ------- 2 files changed, 30 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java index 04c70bcf0ae..cfc9c458311 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java @@ -181,26 +181,4 @@ public class CodecProvider { public synchronized void setDefaultFieldCodec(String codec) { defaultFieldCodec = codec; } - - /** - * Registers all codecs from the given provider including the field to codec - * mapping and the default field codec. - *

    - * NOTE: This method will pass any codec from the given codec to - * {@link #register(Codec)} and sets fiels codecs via - * {@link #setFieldCodec(String, String)}. - * @return this - */ - public CodecProvider copyFrom(CodecProvider other) { - final Collection values = other.codecs.values(); - for (Codec codec : values) { - register(codec); - } - final Set> entrySet = other.perFieldMap.entrySet(); - for (Entry entry : entrySet) { - setFieldCodec(entry.getKey(), entry.getValue()); - } - setDefaultFieldCodec(other.getDefaultFieldCodec()); - return this; - } } diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 9362e15ed0a..ea1f8911fb3 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -65,14 +65,6 @@ public class RandomIndexWriter implements Closeable { // called from different threads; else test failures may // not be reproducible from the original seed this.r = new Random(r.nextInt()); - - if (r.nextBoolean()) { - if (LuceneTestCase.VERBOSE) { - System.out.println("NOTE: RIW ctor is setting new CodecProvider().copyFrom"); - } - // Just to test CP.copyFrom: - conf.setCodecProvider(new CodecProvider().copyFrom(conf.getCodecProvider())); - } } @Override From 451cc4079b3cd08a4b6ff01d19ee3163f5922c9f Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 5 Jun 2011 14:41:17 +0000 Subject: [PATCH 062/116] LUCENE-3108: Removed missing value from FieldComparator and added PerDoc impl to ParallelReader git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1132424 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/ParallelReader.java | 33 +++++++++++++++++-- .../apache/lucene/search/FieldComparator.java | 8 ++--- .../org/apache/lucene/search/SortField.java | 4 +-- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java index 4b5d78d5682..d587b4a7b51 100644 --- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java +++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java @@ -22,6 +22,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.codecs.PerDocValues; +import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.MapBackedSet; @@ -61,7 +62,8 @@ public class ParallelReader extends IndexReader { private int numDocs; private boolean hasDeletions; - private ParallelFields fields = new ParallelFields(); + private final ParallelFields fields = new ParallelFields(); + private final ParallelPerDocs perDocs = new ParallelPerDocs(); /** Construct a ParallelReader. *

    Note that all subreaders are closed if this ParallelReader is closed.

    @@ -135,6 +137,7 @@ public class ParallelReader extends IndexReader { fieldToReader.put(field, reader); } this.fields.addField(field, reader); + this.perDocs.addField(field, reader); } if (!ignoreStoredFields) @@ -569,8 +572,32 @@ public class ParallelReader extends IndexReader { @Override public PerDocValues perDocValues() throws IOException { - // TODO Auto-generated method stub - return null; + return perDocs; + } + + // Single instance of this, per ParallelReader instance + private static final class ParallelPerDocs extends PerDocValues { + final TreeMap fields = new TreeMap(); + + void addField(String field, IndexReader r) throws IOException { + PerDocValues perDocs = MultiPerDocValues.getPerDocs(r); + fields.put(field, perDocs.docValues(field)); + } + + @Override + public void close() throws IOException { + // nothing to do here + } + + @Override + public IndexDocValues docValues(String field) throws IOException { + return fields.get(field); + } + + @Override + public Collection fields() { + return fields.keySet(); + } } } diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 7ab0d4cd6c4..d7565317abe 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -335,12 +335,10 @@ public abstract class FieldComparator { private Source currentReaderValues; private final String field; private double bottom; - private final float missingValue; - FloatDocValuesComparator(int numHits, String field, Float missingValue) { + FloatDocValuesComparator(int numHits, String field) { values = new double[numHits]; this.field = field; - this.missingValue = missingValue == null ? 0 : missingValue.floatValue(); } @Override @@ -607,12 +605,10 @@ public abstract class FieldComparator { private Source currentReaderValues; private final String field; private long bottom; - private int missingValue; - IntDocValuesComparator(int numHits, String field, Integer missingValue) { + IntDocValuesComparator(int numHits, String field) { values = new long[numHits]; this.field = field; - this.missingValue = missingValue == null ? 0 : missingValue.intValue(); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java index 55e8425d600..8fdc66c36e8 100644 --- a/lucene/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/src/java/org/apache/lucene/search/SortField.java @@ -442,14 +442,14 @@ public class SortField { case SortField.INT: if (useIndexValues) { - return new FieldComparator.IntDocValuesComparator(numHits, field, (Integer) missingValue); + return new FieldComparator.IntDocValuesComparator(numHits, field); } else { return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer) missingValue); } case SortField.FLOAT: if (useIndexValues) { - return new FieldComparator.FloatDocValuesComparator(numHits, field, (Float) missingValue); + return new FieldComparator.FloatDocValuesComparator(numHits, field); } else { return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator) creator, (Float) missingValue); } From a9368ea98e809851cc9d9a96ff79f9875c4ba1f1 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 5 Jun 2011 16:56:18 +0000 Subject: [PATCH 063/116] fix java 1.6 code git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1132457 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/ParallelReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java index d587b4a7b51..04934e66fd9 100644 --- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java +++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java @@ -584,7 +584,7 @@ public class ParallelReader extends IndexReader { fields.put(field, perDocs.docValues(field)); } - @Override + //@Override -- not until Java 1.6 public void close() throws IOException { // nothing to do here } From 56179f0fc399220f06983a265dd7c8705dcbd962 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 6 Jun 2011 10:36:02 +0000 Subject: [PATCH 064/116] fix jdoc typo git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132582 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/store/NRTCachingDirectory.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/store/NRTCachingDirectory.java b/lucene/contrib/misc/src/java/org/apache/lucene/store/NRTCachingDirectory.java index 05ac00fd6a7..86998a0378f 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/store/NRTCachingDirectory.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/store/NRTCachingDirectory.java @@ -217,7 +217,7 @@ public class NRTCachingDirectory extends Directory { delegate.clearLock(name); } - /** Close thius directory, which flushes any cached files + /** Close this directory, which flushes any cached files * to the delegate and then closes the delegate. */ @Override public void close() throws IOException { @@ -277,4 +277,3 @@ public class NRTCachingDirectory extends Directory { } } } - From 205e63ab7dd15b39940d9370089dc1bcfa278095 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 6 Jun 2011 10:37:15 +0000 Subject: [PATCH 065/116] fix test bug git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132583 13f79535-47bb-0310-9956-ffa450edef68 --- .../test/org/apache/lucene/index/TestIndexFileDeleter.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index 21525d8ed76..f14abca8bd1 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -53,6 +53,8 @@ public class TestIndexFileDeleter extends LuceneTestCase { setMergePolicy(mergePolicy) ); + writer.setInfoStream(VERBOSE ? System.out : null); + int i; for(i=0;i<35;i++) { addDoc(writer, i); @@ -146,7 +148,9 @@ public class TestIndexFileDeleter extends LuceneTestCase { copyFile(dir, "segments_2", "segments_1"); // Create a bogus cfs file shadowing a non-cfs segment: - copyFile(dir, "_1.cfs", "_2.cfs"); + assertTrue(dir.fileExists("_3.fdt")); + assertTrue(!dir.fileExists("_3.cfs")); + copyFile(dir, "_1.cfs", "_3.cfs"); String[] filesPre = dir.listAll(); From a385b7b5c9160381ba50e80ad0b23e33994c3a74 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 6 Jun 2011 13:01:03 +0000 Subject: [PATCH 066/116] fix test bug git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132620 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/test/org/apache/lucene/index/TestLazyBug.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java index 9aa79fbb608..c16d504b016 100755 --- a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java +++ b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java @@ -126,15 +126,15 @@ public class TestLazyBug extends LuceneTestCase { } public void testLazyWorks() throws Exception { - doTest(new int[] { 399 }); + doTest(new int[] { TEST_NIGHTLY ? 499 : 49 }); } public void testLazyAlsoWorks() throws Exception { - doTest(new int[] { 399, 150 }); + doTest(TEST_NIGHTLY ? new int[] { 499, 150 } : new int[] { 49, 15 }); } public void testLazyBroken() throws Exception { - doTest(new int[] { 150, 399 }); + doTest(TEST_NIGHTLY ? new int[] { 150, 499 } : new int[] { 15, 49 }); } } From 2e57b61092d4332e2bdbcee6f158bd06072fe9f8 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 6 Jun 2011 16:06:07 +0000 Subject: [PATCH 067/116] LUCENE-2645: add David to CHANGES git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132689 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a95e04a4f7b..f350beb7ba5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -434,7 +434,7 @@ Bug fixes deletions, provided they share some segments. (yonik) * LUCENE-2645: Fix false assertion error when same token was added one - after another with 0 posIncr. (Kurosaka Teruhiko via Mike + after another with 0 posIncr. (David Smiley, Kurosaka Teruhiko via Mike McCandless) ======================= Lucene 3.x (not yet released) ================ From a74ac5b8f77a448e30815834f7077ddd75dba934 Mon Sep 17 00:00:00 2001 From: Steven Rowe Date: Mon, 6 Jun 2011 18:22:18 +0000 Subject: [PATCH 068/116] dropped unnecessary tests.linedocsfile sysprop setting from the lucene module test run configuration git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132717 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/idea/.idea/workspace.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml index c4b3cae4bf2..0bca3b21b63 100644 --- a/dev-tools/idea/.idea/workspace.xml +++ b/dev-tools/idea/.idea/workspace.xml @@ -82,7 +82,7 @@ From 63083944b188daac391cceb01061871d7682bbf3 Mon Sep 17 00:00:00 2001 From: Ryan McKinley Date: Mon, 6 Jun 2011 18:37:56 +0000 Subject: [PATCH 069/116] SOLR-2399: fix stopwords and added field sorting git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132724 13f79535-47bb-0310-9956-ffa450edef68 --- solr/src/webapp/web/css/screen.css | 5 +++++ solr/src/webapp/web/js/script.js | 19 +++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/solr/src/webapp/web/css/screen.css b/solr/src/webapp/web/css/screen.css index ae7e13a278e..0c2f0594a15 100644 --- a/solr/src/webapp/web/css/screen.css +++ b/solr/src/webapp/web/css/screen.css @@ -757,6 +757,11 @@ ul padding: 1px 2px; } +#content #analysis .analysis-result .row table td div.empty +{ + color: #f0f0f0; +} + #content #analysis .analysis-result .row table td div.match { background-color: #e9eff7; diff --git a/solr/src/webapp/web/js/script.js b/solr/src/webapp/web/js/script.js index 0c1145fb87a..45d6789aabc 100644 --- a/solr/src/webapp/web/js/script.js +++ b/solr/src/webapp/web/js/script.js @@ -1893,7 +1893,7 @@ var sammy = $.sammy { fields.sort(); related_options += '' + "\n"; - related_options += fields.join( "\n" ) + "\n"; + related_options += fields.sort().join( "\n" ) + "\n"; related_options += '' + "\n"; } @@ -1909,7 +1909,7 @@ var sammy = $.sammy { dynamic_fields.sort(); related_options += '' + "\n"; - related_options += dynamic_fields.join( "\n" ) + "\n"; + related_options += dynamic_fields.sort().join( "\n" ) + "\n"; related_options += '' + "\n"; } @@ -1925,7 +1925,7 @@ var sammy = $.sammy { types.sort(); related_options += '' + "\n"; - related_options += types.join( "\n" ) + "\n"; + related_options += types.sort().join( "\n" ) + "\n"; related_options += '' + "\n"; } @@ -3289,7 +3289,7 @@ var sammy = $.sammy if( 0 !== fields.length ) { content += '' + "\n"; - content += fields.join( "\n" ) + "\n"; + content += fields.sort().join( "\n" ) + "\n"; content += '' + "\n"; } @@ -3304,7 +3304,7 @@ var sammy = $.sammy if( 0 !== types.length ) { content += '' + "\n"; - content += types.join( "\n" ) + "\n"; + content += types.sort().join( "\n" ) + "\n"; content += '' + "\n"; } @@ -3433,7 +3433,14 @@ var sammy = $.sammy var length = raw_parts[key].length; for( var j = 0; j < length; j++ ) { - parts[key].push( '' + raw_parts[key][j].join( "\n" ) + '' ); + if( raw_parts[key][j] ) + { + parts[key].push( '' + raw_parts[key][j].join( "\n" ) + '' ); + } + else + { + parts[key].push( '
    ' ); + } } } From bdee0a976497332dd922510c0ac459ba0dbb0af4 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 6 Jun 2011 19:14:48 +0000 Subject: [PATCH 070/116] SOLR-2462: use of spellcheck.collate could result in extremely high memory usage git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132729 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 5 ++ .../solr/common/params/SpellingParams.java | 12 ++++- .../component/SpellCheckComponent.java | 3 +- .../solr/spelling/PossibilityIterator.java | 33 +++++++++---- .../solr/spelling/RankedSpellPossibility.java | 16 +++++- .../solr/spelling/SpellCheckCollator.java | 4 +- .../response/TestSpellCheckResponse.java | 2 +- .../solr/spelling/SpellCheckCollatorTest.java | 12 +++-- .../SpellPossibilityIteratorTest.java | 49 ++++++++++++++++--- 9 files changed, 109 insertions(+), 27 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a30f7e700ec..97a6b5a1183 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -277,6 +277,11 @@ Bug Fixes English-specific fieldTypes (Jan Høydahl, hossman, Robert Muir, yonik, Mike McCandless) +* SOLR-2462: Fix extremely high memory usage problems with spellcheck.collate. + Separately, an additional spellcheck.maxCollationEvaluations (default=10000) + parameter is added to avoid excessive CPU time in extreme cases (e.g. long + queries with many misspelled words). (James Dyer via rmuir) + ================== 3.2.0 ================== Versions of Major Components --------------------- diff --git a/solr/src/common/org/apache/solr/common/params/SpellingParams.java b/solr/src/common/org/apache/solr/common/params/SpellingParams.java index 71d2aa5f348..50a10a2a577 100644 --- a/solr/src/common/org/apache/solr/common/params/SpellingParams.java +++ b/solr/src/common/org/apache/solr/common/params/SpellingParams.java @@ -95,7 +95,15 @@ public interface SpellingParams { * Default=0. Ignored of "spellcheck.collate" is false. *

    */ - public static final String SPELLCHECK_MAX_COLLATION_TRIES = SPELLCHECK_PREFIX + "maxCollationTries"; + public static final String SPELLCHECK_MAX_COLLATION_TRIES = SPELLCHECK_PREFIX + "maxCollationTries"; + /** + *

    + * The maximum number of word correction combinations to rank and evaluate prior to deciding which collation + * candidates to test against the index. This is a performance safety-net in cases a user enters a query with + * many misspelled words. The default is 10,000 combinations. + *

    + */ + public static final String SPELLCHECK_MAX_COLLATION_EVALUATIONS = SPELLCHECK_PREFIX + "maxCollationEvaluations"; /** *

    @@ -105,7 +113,7 @@ public interface SpellingParams { *

    */ public static final String SPELLCHECK_COLLATE_EXTENDED_RESULTS = SPELLCHECK_PREFIX + "collateExtendedResults"; - + /** * Certain spelling implementations may allow for an accuracy setting. */ diff --git a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 897eb87af9b..559bbd81f83 100644 --- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -172,11 +172,12 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar NamedList response) { int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1); int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0); + int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000); boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false); boolean shard = params.getBool(ShardParams.IS_SHARD, false); SpellCheckCollator collator = new SpellCheckCollator(); - List collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries); + List collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations); //by sorting here we guarantee a non-distributed request returns all //results in the same order as a distributed request would, //even in cases when the internal rank is the same. diff --git a/solr/src/java/org/apache/solr/spelling/PossibilityIterator.java b/solr/src/java/org/apache/solr/spelling/PossibilityIterator.java index ec3aaa7db94..84e41e27ede 100644 --- a/solr/src/java/org/apache/solr/spelling/PossibilityIterator.java +++ b/solr/src/java/org/apache/solr/spelling/PossibilityIterator.java @@ -17,12 +17,13 @@ package org.apache.solr.spelling; */ import java.util.ArrayList; -import java.util.Collections; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.PriorityQueue; import org.apache.lucene.analysis.Token; @@ -38,8 +39,7 @@ import org.apache.lucene.analysis.Token; */ public class PossibilityIterator implements Iterator { private List> possibilityList = new ArrayList>(); - private List rankedPossibilityList = new ArrayList(); - private Iterator rankedPossibilityIterator; + private Iterator rankedPossibilityIterator = null; private int correctionIndex[]; private boolean done = false; @@ -56,7 +56,7 @@ public class PossibilityIterator implements Iterator { * * @param suggestions */ - public PossibilityIterator(Map> suggestions) { + public PossibilityIterator(Map> suggestions, int maximumRequiredSuggestions, int maxEvaluations) { for (Map.Entry> entry : suggestions.entrySet()) { Token token = entry.getKey(); List possibleCorrections = new ArrayList(); @@ -84,12 +84,27 @@ public class PossibilityIterator implements Iterator { correctionIndex[i] = 0; } } - - while (internalHasNext()) { - rankedPossibilityList.add(internalNext()); + + long count = 0; + PriorityQueue rankedPossibilities = new PriorityQueue(); + while (count < maxEvaluations && internalHasNext()) { + RankedSpellPossibility rsp = internalNext(); + count++; + + if(rankedPossibilities.size() >= maximumRequiredSuggestions && rsp.getRank() >= rankedPossibilities.peek().getRank()) { + continue; + } + rankedPossibilities.offer(rsp); + if(rankedPossibilities.size() > maximumRequiredSuggestions) { + rankedPossibilities.poll(); + } } - Collections.sort(rankedPossibilityList); - rankedPossibilityIterator = rankedPossibilityList.iterator(); + + RankedSpellPossibility[] rpArr = new RankedSpellPossibility[rankedPossibilities.size()]; + for(int i=rankedPossibilities.size() - 1 ; i>=0 ; i--) { + rpArr[i] = rankedPossibilities.remove(); + } + rankedPossibilityIterator = Arrays.asList(rpArr).iterator(); } private boolean internalHasNext() { diff --git a/solr/src/java/org/apache/solr/spelling/RankedSpellPossibility.java b/solr/src/java/org/apache/solr/spelling/RankedSpellPossibility.java index 02103f9c3f5..61093fc79a9 100644 --- a/solr/src/java/org/apache/solr/spelling/RankedSpellPossibility.java +++ b/solr/src/java/org/apache/solr/spelling/RankedSpellPossibility.java @@ -22,8 +22,9 @@ public class RankedSpellPossibility implements Comparable corrections; private int rank; + //Rank poorer suggestions ahead of better ones for use with a PriorityQueue public int compareTo(RankedSpellPossibility rcl) { - return new Integer(rank).compareTo(rcl.rank); + return new Integer(rcl.rank).compareTo(rank); } public List getCorrections() { @@ -41,4 +42,17 @@ public class RankedSpellPossibility implements Comparable").append(corr.getCorrection()).append(" (").append(corr.getNumberOfOccurences()).append(")"); + } + } + return sb.toString(); + } } diff --git a/solr/src/java/org/apache/solr/spelling/SpellCheckCollator.java b/solr/src/java/org/apache/solr/spelling/SpellCheckCollator.java index 6b5c37b0ef1..5ed1ab3e4c0 100644 --- a/solr/src/java/org/apache/solr/spelling/SpellCheckCollator.java +++ b/solr/src/java/org/apache/solr/spelling/SpellCheckCollator.java @@ -36,7 +36,7 @@ public class SpellCheckCollator { private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class); public List collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse, - int maxCollations, int maxTries) { + int maxCollations, int maxTries, int maxEvaluations) { List collations = new ArrayList(); QueryComponent queryComponent = null; @@ -62,7 +62,7 @@ public class SpellCheckCollator { int tryNo = 0; int collNo = 0; - PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions()); + PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxTries, maxEvaluations); while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) { RankedSpellPossibility possibility = possibilityIter.next(); diff --git a/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java b/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java index 646d8bca727..7b11bf0f4ac 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java @@ -143,7 +143,7 @@ public class TestSpellCheckResponse extends SolrJettyTestBase { //Test Expanded Collation Results query.set(SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, true); - query.set(SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, 5); + query.set(SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, 10); query.set(SpellingParams.SPELLCHECK_MAX_COLLATIONS, 2); request = new QueryRequest(query); response = request.process(server).getSpellCheckResponse(); diff --git a/solr/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java b/solr/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java index 669676f6341..c819c9f8a8a 100644 --- a/solr/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java +++ b/solr/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java @@ -60,8 +60,8 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 { params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true"); params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10"); params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true"); - params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5"); - params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2"); + params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10"); + params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"); params.add(CommonParams.Q, "lowerfilt:(+fauth +home +loane)"); params.add(CommonParams.FQ, "NOT(id:1)"); @@ -77,8 +77,10 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 { NamedList spellCheck = (NamedList) values.get("spellcheck"); NamedList suggestions = (NamedList) spellCheck.get("suggestions"); List collations = suggestions.getAll("collation"); - assertTrue(collations.size() == 1); - assertTrue(collations.get(0).equals("lowerfilt:(+faith +hope +love)")); + assertTrue(collations.size() > 0); + for(String collation : collations) { + assertTrue(!collation.equals("lowerfilt:(+faith +hope +loaves)")); + } } @Test @@ -180,7 +182,7 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 { // combination exists. params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES); params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS); - params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5"); + params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10"); params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2"); handler = core.getRequestHandler("spellCheckCompRH"); rsp = new SolrQueryResponse(); diff --git a/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java b/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java index b70ba1fb95a..6cf12d76e06 100644 --- a/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java +++ b/solr/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java @@ -28,6 +28,7 @@ import org.junit.Test; public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 { private static Map> suggestions = new LinkedHashMap>(); + private static Map> lotsaSuggestions = new LinkedHashMap>(); @Override @Before @@ -72,21 +73,57 @@ public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 { suggestions.put(new Token("AYE", 0, 2), AYE); suggestions.put(new Token("BEE", 0, 2), BEE); suggestions.put(new Token("CEE", 0, 2), CEE); + + lotsaSuggestions.put(new Token("AYE", 0, 2), AYE); + lotsaSuggestions.put(new Token("BEE", 0, 2), BEE); + lotsaSuggestions.put(new Token("CEE", 0, 2), CEE); + + lotsaSuggestions.put(new Token("AYE1", 0, 3), AYE); + lotsaSuggestions.put(new Token("BEE1", 0, 3), BEE); + lotsaSuggestions.put(new Token("CEE1", 0, 3), CEE); + + lotsaSuggestions.put(new Token("AYE2", 0, 3), AYE); + lotsaSuggestions.put(new Token("BEE2", 0, 3), BEE); + lotsaSuggestions.put(new Token("CEE2", 0, 3), CEE); + + lotsaSuggestions.put(new Token("AYE3", 0, 3), AYE); + lotsaSuggestions.put(new Token("BEE3", 0, 3), BEE); + lotsaSuggestions.put(new Token("CEE3", 0, 3), CEE); + + lotsaSuggestions.put(new Token("AYE4", 0, 3), AYE); + lotsaSuggestions.put(new Token("BEE4", 0, 3), BEE); + lotsaSuggestions.put(new Token("CEE4", 0, 3), CEE); + } + + @Test + public void testScalability() throws Exception { + PossibilityIterator iter = new PossibilityIterator(lotsaSuggestions, 1000, 10000); + int count = 0; + while (iter.hasNext()) { + RankedSpellPossibility rsp = iter.next(); + count++; + } + assertTrue(count==1000); } @Test public void testSpellPossibilityIterator() throws Exception { - PossibilityIterator iter = new PossibilityIterator(suggestions); + PossibilityIterator iter = new PossibilityIterator(suggestions, 1000, 10000); int count = 0; while (iter.hasNext()) { - iter.next(); + RankedSpellPossibility rsp = iter.next(); + if(count==0) { + assertTrue("I".equals(rsp.getCorrections().get(0).getCorrection())); + assertTrue("alpha".equals(rsp.getCorrections().get(1).getCorrection())); + assertTrue("one".equals(rsp.getCorrections().get(2).getCorrection())); + } count++; } assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720); suggestions.remove(new Token("CEE", 0, 2)); - iter = new PossibilityIterator(suggestions); + iter = new PossibilityIterator(suggestions, 100, 10000); count = 0; while (iter.hasNext()) { iter.next(); @@ -95,16 +132,16 @@ public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 { assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72); suggestions.remove(new Token("BEE", 0, 2)); - iter = new PossibilityIterator(suggestions); + iter = new PossibilityIterator(suggestions, 5, 10000); count = 0; while (iter.hasNext()) { iter.next(); count++; } - assertTrue(("One map of 8 should return 8 iterations but instead returned " + count), count == 8); + assertTrue(("We requested 5 suggestions but got " + count), count == 5); suggestions.remove(new Token("AYE", 0, 2)); - iter = new PossibilityIterator(suggestions); + iter = new PossibilityIterator(suggestions, Integer.MAX_VALUE, 10000); count = 0; while (iter.hasNext()) { iter.next(); From 2d56e6a05c5cb78bb11a53a0ef78bda56a7bb205 Mon Sep 17 00:00:00 2001 From: Steven Rowe Date: Mon, 6 Jun 2011 21:54:00 +0000 Subject: [PATCH 071/116] Inserted space after [-t] in cmdline usage message to make it clear that there should be a space after the "-t" if it is used git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132795 13f79535-47bb-0310-9956-ffa450edef68 --- .../misc/src/java/org/apache/lucene/misc/HighFreqTerms.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java index cc9ce2635f7..be5e4154e1d 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java @@ -97,7 +97,7 @@ public class HighFreqTerms { private static void usage() { System.out .println("\n\n" - + "java org.apache.lucene.misc.HighFreqTerms [-t][number_terms] [field]\n\t -t: include totalTermFreq\n\n"); + + "java org.apache.lucene.misc.HighFreqTerms [-t] [number_terms] [field]\n\t -t: include totalTermFreq\n\n"); } /** * From 31479ab48ce81ff9c4163bb6229dce764ff10803 Mon Sep 17 00:00:00 2001 From: Steven Rowe Date: Mon, 6 Jun 2011 22:21:27 +0000 Subject: [PATCH 072/116] LUCENE-1736: DateTools.java general improvements git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132806 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../org/apache/lucene/document/DateTools.java | 128 ++++++------------ 2 files changed, 44 insertions(+), 87 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f350beb7ba5..1b84eb4b5d2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -423,6 +423,9 @@ Optimizations * LUCENE-2897: Apply deleted terms while flushing a segment. We still buffer deleted terms to later apply to past segments. (Mike McCandless) + +* LUCENE-1736: DateTools.java general improvements. + (David Smiley via Steve Rowe) Bug fixes diff --git a/lucene/src/java/org/apache/lucene/document/DateTools.java b/lucene/src/java/org/apache/lucene/document/DateTools.java index 0e5199c6247..17d42939ac0 100644 --- a/lucene/src/java/org/apache/lucene/document/DateTools.java +++ b/lucene/src/java/org/apache/lucene/document/DateTools.java @@ -17,14 +17,15 @@ package org.apache.lucene.document; * limitations under the License. */ +import org.apache.lucene.search.NumericRangeQuery; // for javadocs +import org.apache.lucene.util.NumericUtils; // for javadocs + import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; -import java.util.TimeZone; import java.util.Locale; -import org.apache.lucene.search.NumericRangeQuery; // for javadocs -import org.apache.lucene.util.NumericUtils; // for javadocs +import java.util.TimeZone; /** * Provides support for converting dates to strings and vice-versa. @@ -47,38 +48,27 @@ import org.apache.lucene.util.NumericUtils; // for javadocs */ public class DateTools { - private static final class DateFormats { - final static TimeZone GMT = TimeZone.getTimeZone("GMT"); + final static TimeZone GMT = TimeZone.getTimeZone("GMT"); - final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US); - final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US); - final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US); - final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US); - final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US); - final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US); - final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US); - { - // times need to be normalized so the value doesn't depend on the - // location the index is created/used: - YEAR_FORMAT.setTimeZone(GMT); - MONTH_FORMAT.setTimeZone(GMT); - DAY_FORMAT.setTimeZone(GMT); - HOUR_FORMAT.setTimeZone(GMT); - MINUTE_FORMAT.setTimeZone(GMT); - SECOND_FORMAT.setTimeZone(GMT); - MILLISECOND_FORMAT.setTimeZone(GMT); - } - - final Calendar calInstance = Calendar.getInstance(GMT, Locale.US); - } - - private static final ThreadLocal FORMATS = new ThreadLocal() { + private static final ThreadLocal TL_CAL = new ThreadLocal() { @Override - protected DateFormats initialValue() { - return new DateFormats(); + protected Calendar initialValue() { + return Calendar.getInstance(GMT, Locale.US); } }; - + + //indexed by format length + private static final ThreadLocal TL_FORMATS = new ThreadLocal() { + @Override + protected SimpleDateFormat[] initialValue() { + SimpleDateFormat[] arr = new SimpleDateFormat[Resolution.MILLISECOND.formatLen+1]; + for (Resolution resolution : Resolution.values()) { + arr[resolution.formatLen] = (SimpleDateFormat)resolution.format.clone(); + } + return arr; + } + }; + // cannot create, the class has static methods only private DateTools() {} @@ -105,22 +95,8 @@ public class DateTools { * depending on resolution; using GMT as timezone */ public static String timeToString(long time, Resolution resolution) { - final DateFormats formats = FORMATS.get(); - - formats.calInstance.setTimeInMillis(round(time, resolution)); - final Date date = formats.calInstance.getTime(); - - switch (resolution) { - case YEAR: return formats.YEAR_FORMAT.format(date); - case MONTH:return formats.MONTH_FORMAT.format(date); - case DAY: return formats.DAY_FORMAT.format(date); - case HOUR: return formats.HOUR_FORMAT.format(date); - case MINUTE: return formats.MINUTE_FORMAT.format(date); - case SECOND: return formats.SECOND_FORMAT.format(date); - case MILLISECOND: return formats.MILLISECOND_FORMAT.format(date); - } - - throw new IllegalArgumentException("unknown resolution " + resolution); + final Date date = new Date(round(time, resolution)); + return TL_FORMATS.get()[resolution.formatLen].format(date); } /** @@ -148,24 +124,11 @@ public class DateTools { * expected format */ public static Date stringToDate(String dateString) throws ParseException { - final DateFormats formats = FORMATS.get(); - - if (dateString.length() == 4) { - return formats.YEAR_FORMAT.parse(dateString); - } else if (dateString.length() == 6) { - return formats.MONTH_FORMAT.parse(dateString); - } else if (dateString.length() == 8) { - return formats.DAY_FORMAT.parse(dateString); - } else if (dateString.length() == 10) { - return formats.HOUR_FORMAT.parse(dateString); - } else if (dateString.length() == 12) { - return formats.MINUTE_FORMAT.parse(dateString); - } else if (dateString.length() == 14) { - return formats.SECOND_FORMAT.parse(dateString); - } else if (dateString.length() == 17) { - return formats.MILLISECOND_FORMAT.parse(dateString); + try { + return TL_FORMATS.get()[dateString.length()].parse(dateString); + } catch (Exception e) { + throw new ParseException("Input is not a valid date string: " + dateString, 0); } - throw new ParseException("Input is not valid date string: " + dateString, 0); } /** @@ -192,43 +155,23 @@ public class DateTools { * set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT */ public static long round(long time, Resolution resolution) { - final Calendar calInstance = FORMATS.get().calInstance; + final Calendar calInstance = TL_CAL.get(); calInstance.setTimeInMillis(time); switch (resolution) { + //NOTE: switch statement fall-through is deliberate case YEAR: calInstance.set(Calendar.MONTH, 0); - calInstance.set(Calendar.DAY_OF_MONTH, 1); - calInstance.set(Calendar.HOUR_OF_DAY, 0); - calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - break; case MONTH: calInstance.set(Calendar.DAY_OF_MONTH, 1); - calInstance.set(Calendar.HOUR_OF_DAY, 0); - calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - break; case DAY: calInstance.set(Calendar.HOUR_OF_DAY, 0); - calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - break; case HOUR: calInstance.set(Calendar.MINUTE, 0); - calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - break; case MINUTE: calInstance.set(Calendar.SECOND, 0); - calInstance.set(Calendar.MILLISECOND, 0); - break; case SECOND: calInstance.set(Calendar.MILLISECOND, 0); - break; case MILLISECOND: // don't cut off anything break; @@ -241,7 +184,18 @@ public class DateTools { /** Specifies the time granularity. */ public static enum Resolution { - YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND; + YEAR(4), MONTH(6), DAY(8), HOUR(10), MINUTE(12), SECOND(14), MILLISECOND(17); + + final int formatLen; + final SimpleDateFormat format;//should be cloned before use, since it's not threadsafe + + Resolution(int formatLen) { + this.formatLen = formatLen; + // formatLen 10's place: 11111111 + // formatLen 1's place: 12345678901234567 + this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US); + this.format.setTimeZone(GMT); + } /** this method returns the name of the resolution * in lowercase (for backwards compatibility) */ From d280169fefc038ee19fa5f3989766611c948ab26 Mon Sep 17 00:00:00 2001 From: Ryan McKinley Date: Mon, 6 Jun 2011 22:54:05 +0000 Subject: [PATCH 073/116] SOLR-2399: Solr Admin Interface, reworked git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1132826 13f79535-47bb-0310-9956-ffa450edef68 --- solr/src/webapp/web/css/screen.css | 197 ++++++++++++++++++-- solr/src/webapp/web/css/syntax.css | 153 --------------- solr/src/webapp/web/js/script.js | 66 +++++-- solr/src/webapp/web/tpl/cores.html | 2 +- solr/src/webapp/web/tpl/dataimport.html | 1 - solr/src/webapp/web/tpl/schema-browser.html | 2 +- 6 files changed, 235 insertions(+), 186 deletions(-) delete mode 100644 solr/src/webapp/web/css/syntax.css diff --git a/solr/src/webapp/web/css/screen.css b/solr/src/webapp/web/css/screen.css index 0c2f0594a15..5fc779d2540 100644 --- a/solr/src/webapp/web/css/screen.css +++ b/solr/src/webapp/web/css/screen.css @@ -293,7 +293,7 @@ ul display: none; } -#menu .singlecore li:last-child +#menu li.active:last-child { border-bottom: 0; } @@ -310,10 +310,14 @@ ul #menu ul .active ul { - border-bottom: 1px solid #f0f0f0; display: block; } +#menu ul li.active:last-child ul +{ + border-bottom: 1px solid #f0f0f0; +} + #menu ul ul li { border-bottom: 0; @@ -406,6 +410,18 @@ ul #menu .plugins a { background-image: url( ../img/ico/block.png ); } #menu .dataimport a { background-image: url( ../img/ico/document-import.png ); } +#menu .ping.error +{ + background-color: #f7f7e9; +} + +#menu .ping.error a +{ + background-color: transparent; + background-image: url( ../img/ico/system-monitor--exclamation.png ); + cursor: help; +} + #content-wrapper { float: right; @@ -417,17 +433,16 @@ ul padding: 10px; } -#content.single +#content > .loader { - padding: 0; + background-position: 0 50%; + padding-left: 21px; } -#content iframe +#content > pre { - border: 0; - display: block; - min-height: 400px; - width: 100%; + max-height: 600px; + overflow: auto; } #content .block @@ -462,6 +477,155 @@ ul display: none; } +/* syntax */ + +pre.syntax +{ + overflow: auto; +} + +pre.syntax code +{ + display: block; + color: #000; +} + +pre.syntax .comment, +pre.syntax .template_comment, +pre.syntax .diff .header, +pre.syntax .javadoc +{ + color: #998; + font-style: italic; +} + +pre.syntax .keyword, +pre.syntax .css .rule .keyword, +pre.syntax .winutils, +pre.syntax .javascript .title, +pre.syntax .lisp .title, +pre.syntax .subst +{ + color: #000; + font-weight: bold; +} + +pre.syntax .number, +pre.syntax .hexcolor +{ + color: #40a070; +} + +pre.syntax .string, +pre.syntax .tag .value, +pre.syntax .phpdoc, +pre.syntax .tex .formula +{ + color: #d14; +} + +pre.syntax .title, +pre.syntax .id +{ + color: #900; + font-weight: bold; +} + +pre.syntax .javascript .title, +pre.syntax .lisp .title, +pre.syntax .subst +{ + font-weight: normal; +} + +pre.syntax .class .title, +pre.syntax .tex .command +{ + color: #458; + font-weight: bold; +} + +pre.syntax .tag, +pre.syntax .css .keyword, +pre.syntax .html .keyword, +pre.syntax .tag .title, +pre.syntax .django .tag .keyword +{ + color: #000080; + font-weight: normal; +} + +pre.syntax .attribute, +pre.syntax .variable, +pre.syntax .instancevar, +pre.syntax .lisp .body +{ + color: #008080; +} + +pre.syntax .regexp +{ + color: #009926; +} + +pre.syntax .class +{ + color: #458; + font-weight: bold; +} + +pre.syntax .symbol, +pre.syntax .ruby .symbol .string, +pre.syntax .ruby .symbol .keyword, +pre.syntax .ruby .symbol .keymethods, +pre.syntax .lisp .keyword, +pre.syntax .tex .special +{ + color: #990073; +} + +pre.syntax .builtin, +pre.syntax .built_in, +pre.syntax .lisp .title +{ + color: #0086b3; +} + +pre.syntax .preprocessor, +pre.syntax .pi, +pre.syntax .doctype, +pre.syntax .shebang, +pre.syntax .cdata +{ + color: #999; + font-weight: bold; +} + +pre.syntax .deletion +{ + background: #fdd; +} + +pre.syntax .addition +{ + background: #dfd; +} + +pre.syntax .diff .change +{ + background: #0086b3; +} + +pre.syntax .chunk +{ + color: #aaa; +} + +pre.syntax .tex .formula +{ + opacity: 0.5; +} + /* dashboard */ #content #dashboard .block @@ -2177,22 +2341,27 @@ ul #content #cores { - background-image: url( ../img/div.gif ); - background-position: 150px 0; - background-repeat: repeat-y; } #content #cores #frame { float: right; - width: 610px; + width: 78%; +} + +#content #cores #navigation +{ + background-image: url( ../img/div.gif ); + background-position: 100% 0; + background-repeat: repeat-y; + width: 20%; } #content #cores #list { float: left; padding-top: 15px; - width: 151px; + width: 100%; } #content #cores #list a diff --git a/solr/src/webapp/web/css/syntax.css b/solr/src/webapp/web/css/syntax.css deleted file mode 100644 index ea21cf9afa2..00000000000 --- a/solr/src/webapp/web/css/syntax.css +++ /dev/null @@ -1,153 +0,0 @@ - -/* - -github.com style (c) Vasily Polovnyov - -*/ - -pre.syntax -{ - overflow: auto; -} - -pre.syntax code -{ - display: block; - color: #000; -} - -pre.syntax .comment, -pre.syntax .template_comment, -pre.syntax .diff .header, -pre.syntax .javadoc -{ - color: #998; - font-style: italic; -} - -pre.syntax .keyword, -pre.syntax .css .rule .keyword, -pre.syntax .winutils, -pre.syntax .javascript .title, -pre.syntax .lisp .title, -pre.syntax .subst -{ - color: #000; - font-weight: bold; -} - -pre.syntax .number, -pre.syntax .hexcolor -{ - color: #40a070; -} - -pre.syntax .string, -pre.syntax .tag .value, -pre.syntax .phpdoc, -pre.syntax .tex .formula -{ - color: #d14; -} - -pre.syntax .title, -pre.syntax .id -{ - color: #900; - font-weight: bold; -} - -pre.syntax .javascript .title, -pre.syntax .lisp .title, -pre.syntax .subst -{ - font-weight: normal; -} - -pre.syntax .class .title, -pre.syntax .tex .command -{ - color: #458; - font-weight: bold; -} - -pre.syntax .tag, -pre.syntax .css .keyword, -pre.syntax .html .keyword, -pre.syntax .tag .title, -pre.syntax .django .tag .keyword -{ - color: #000080; - font-weight: normal; -} - -pre.syntax .attribute, -pre.syntax .variable, -pre.syntax .instancevar, -pre.syntax .lisp .body -{ - color: #008080; -} - -pre.syntax .regexp -{ - color: #009926; -} - -pre.syntax .class -{ - color: #458; - font-weight: bold; -} - -pre.syntax .symbol, -pre.syntax .ruby .symbol .string, -pre.syntax .ruby .symbol .keyword, -pre.syntax .ruby .symbol .keymethods, -pre.syntax .lisp .keyword, -pre.syntax .tex .special -{ - color: #990073; -} - -pre.syntax .builtin, -pre.syntax .built_in, -pre.syntax .lisp .title -{ - color: #0086b3; -} - -pre.syntax .preprocessor, -pre.syntax .pi, -pre.syntax .doctype, -pre.syntax .shebang, -pre.syntax .cdata -{ - color: #999; - font-weight: bold; -} - -pre.syntax .deletion -{ - background: #fdd; -} - -pre.syntax .addition -{ - background: #dfd; -} - -pre.syntax .diff .change -{ - background: #0086b3; -} - -pre.syntax .chunk -{ - color: #aaa; -} - -pre.syntax .tex .formula -{ - opacity: 0.5; -} \ No newline at end of file diff --git a/solr/src/webapp/web/js/script.js b/solr/src/webapp/web/js/script.js index 45d6789aabc..acc697ed2c1 100644 --- a/solr/src/webapp/web/js/script.js +++ b/solr/src/webapp/web/js/script.js @@ -36,26 +36,31 @@ var sammy = $.sammy 'ping', function( event ) { - var element = $( this.params.element ); - $.ajax ( { - url : element.attr( 'href' ) + '?wt=json', + url : $( this.params.element ).attr( 'href' ) + '?wt=json', dataType : 'json', + context: this.params.element, beforeSend : function( arr, form, options ) { - loader.show( element ); + loader.show( this ); }, - success : function( response ) + success : function( response, text_status, xhr ) { - var qtime_element = $( '.qtime', element ); + $( this ) + .removeAttr( 'title' ); + + $( this ).parents( 'li' ) + .removeClass( 'error' ); + + var qtime_element = $( '.qtime', this ); if( 0 === qtime_element.size() ) { qtime_element = $( ' ()' ); - element + $( this ) .append ( qtime_element @@ -65,12 +70,17 @@ var sammy = $.sammy $( 'span', qtime_element ) .html( response.responseHeader.QTime + 'ms' ); }, - error : function() + error : function( xhr, text_status, error_thrown ) { + $( this ) + .attr( 'title', '/admin/ping is not configured (' + xhr.status + ': ' + error_thrown + ')' ); + + $( this ).parents( 'li' ) + .addClass( 'error' ); }, - complete : function() + complete : function( xhr, text_status ) { - loader.hide( element ); + loader.hide( this ); } } ); @@ -261,6 +271,8 @@ var sammy = $.sammy /^#\/(cores)$/, function( context ) { + delete app.cores_template; + sammy.trigger ( 'cores_load_data', @@ -1991,6 +2003,8 @@ var sammy = $.sammy .hide(); }; + delete app.schema_browser_data; + sammy.trigger ( 'schema_browser_load', @@ -3510,13 +3524,33 @@ var sammy = $.sammy /^#\/([\w\d]+)\/(schema|config)$/, function( context ) { - var content_element = $( '#content' ); + $.ajax + ( + { + url : $( '.active a', this.active_core ).attr( 'href' ), + dataType : 'xml', + context : $( '#content' ), + beforeSend : function( xhr, settings ) + { + this + .html( '
    Loading ...
    ' ); + }, + complete : function( xhr, text_status ) + { + var code = $( + '
    ' +
    +                                xhr.responseText.replace( /\/g, '>' ) +
    +                                '
    ' + ); + this.html( code ); - content_element - .html( '' ); - - $( 'iframe', content_element ) - .css( 'height', $( '#main' ).height() ); + if( 'success' === text_status ) + { + hljs.highlightBlock( code.get(0) ); + } + } + } + ); } ); diff --git a/solr/src/webapp/web/tpl/cores.html b/solr/src/webapp/web/tpl/cores.html index 5baf4bb2a86..c5e02a92e07 100644 --- a/solr/src/webapp/web/tpl/cores.html +++ b/solr/src/webapp/web/tpl/cores.html @@ -174,7 +174,7 @@ -
* - * @version 1.0 * @deprecated (3.1) Use {@link StopFilter} instead, which has the same functionality. * This filter will be removed in Lucene 5.0 */ diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java index 13890039d6d..ddbbf18e4d9 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java @@ -52,7 +52,6 @@ import org.apache.lucene.util.AttributeSource; * C4C2, C1C2C3 ... the ChineseTokenizer works, but the * CJKTokenizer will not work. *

- * @version 1.0 * @deprecated (3.1) Use {@link StandardTokenizer} instead, which has the same functionality. * This filter will be removed in Lucene 5.0 */ diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java index 8383816380f..587e2f3d6fa 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java @@ -32,7 +32,6 @@ import org.apache.lucene.util.Version; /** * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words. - * @version 0.2 * *

NOTE: This class uses the same {@link Version} * dependent settings as {@link StandardAnalyzer}.

diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java index 2cfb6bfb112..07fc7ad47f9 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java @@ -26,7 +26,6 @@ import org.apache.lucene.util.Version; /** * Test case for FrenchAnalyzer. * - * @version $version$ */ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java index 256cbacd1ca..4daa39cf171 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java @@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.*; /** - * @version $Id:$ */ public class TestTrimFilter extends BaseTokenStreamTestCase { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java index 12e0982d61e..dd0e10c403a 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java @@ -25,7 +25,6 @@ import java.io.StringReader; import java.util.regex.Pattern; /** - * @version $Id:$ */ public class TestPatternReplaceFilter extends BaseTokenStreamTestCase { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java index 7af49add83b..82c2e1ce6ae 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.*; /** - * @version $Id$ */ public class TestSynonymFilter extends BaseTokenStreamTestCase { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java index 6247bbf97be..81f2e278204 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java @@ -27,7 +27,6 @@ import org.apache.lucene.util.Version; /** * Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer * - * @version 0.1 */ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java index b54b3122c7b..4dee7146309 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java @@ -23,7 +23,6 @@ import java.util.Iterator; * here is just a list of words. * * - * @version 1.0 */ public interface Dictionary { diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java index bfeae31581e..a57e552b2bf 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java @@ -64,7 +64,6 @@ import org.apache.lucene.util.Version; * * * - * @version 1.0 */ public class SpellChecker implements java.io.Closeable { diff --git a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java index 0231d1ec7e3..1db93273e87 100644 --- a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java @@ -42,7 +42,6 @@ import java.util.*; * http://wiki.apache.org/solr/DataImportHandler for more * details. This API is experimental and subject to change * - * @version $Id$ * @since solr 1.4 */ public class MailEntityProcessor extends EntityProcessorBase { diff --git a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java index 9913c15cf18..b76c03885c2 100644 --- a/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java @@ -52,7 +52,6 @@ import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL; *

An implementation of {@link EntityProcessor} which reads data from rich docs * using Apache Tika * - * @version $Id$ * @since solr 3.1 */ public class TikaEntityProcessor extends EntityProcessorBase { diff --git a/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java b/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java index 2ac19b32192..d12496a871a 100644 --- a/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java @@ -40,7 +40,6 @@ import java.util.Map; * * TODO: Find a way to make the tests actually test code * - * @version $Id$ * @see org.apache.solr.handler.dataimport.MailEntityProcessor * @since solr 1.4 */ diff --git a/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java b/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java index 019fa85a0a5..934430db5f0 100644 --- a/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java @@ -20,7 +20,6 @@ import org.junit.BeforeClass; import org.junit.Test; /**Testcase for TikaEntityProcessor - * @version $Id$ * @since solr 1.5 */ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java index 1187f65e92d..cb6ffe9cdd3 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java @@ -29,7 +29,6 @@ import java.util.Properties; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.5 */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java index e473ab8da32..986404ca8cf 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java @@ -37,7 +37,6 @@ import java.util.Properties; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.5 */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java index 045d6fa3bd0..fb8a47e7fd0 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java @@ -32,7 +32,6 @@ import java.util.Properties; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.5 */ public class BinURLDataSource extends DataSource{ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java index 864e772288e..a2ef3b97dd7 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java @@ -32,7 +32,6 @@ import java.util.Map; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class CachedSqlEntityProcessor extends SqlEntityProcessor { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java index ae970d25fdb..45aff148a11 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java @@ -33,7 +33,6 @@ import java.util.Map; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.4 */ public class ClobTransformer extends Transformer { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java index 58ed19ed595..a2de2876ee5 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java @@ -31,7 +31,6 @@ import java.util.Properties; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.4 */ public class ContentStreamDataSource extends DataSource { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java index 290b202b2d5..d629d8f6e4d 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Context.java @@ -35,7 +35,6 @@ import java.util.Map; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public abstract class Context { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java index bd726835e2d..87b1ae2be45 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java @@ -30,7 +30,6 @@ import java.util.concurrent.ConcurrentHashMap; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class ContextImpl extends Context { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java index f0a9e412427..ab958860acb 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java @@ -40,7 +40,6 @@ import java.util.*; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class DataConfig { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java index 83e546d0c32..5da8b133a06 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java @@ -63,7 +63,6 @@ import org.xml.sax.InputSource; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class DataImportHandler extends RequestHandlerBase implements diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java index b0fb60d6889..7c7bc3beb0d 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java @@ -51,7 +51,6 @@ import java.util.concurrent.ConcurrentHashMap; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class DataImporter { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java index 0226f504928..e76e4ee0091 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataSource.java @@ -36,7 +36,6 @@ import java.util.Properties; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public abstract class DataSource { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java index 95c601e8911..f093f973f1b 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java @@ -37,7 +37,6 @@ import org.slf4j.LoggerFactory; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class DateFormatTransformer extends Transformer { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java index 8ee0126dc25..4b2ebafafba 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java @@ -39,7 +39,6 @@ import java.util.Stack; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ class DebugLogger { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java index 974fe214480..a1091493e58 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java @@ -37,7 +37,6 @@ import java.util.concurrent.*; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class DocBuilder { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java index be3fe49d33b..f285be31273 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessor.java @@ -36,7 +36,6 @@ import java.util.Map; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public abstract class EntityProcessor { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java index 10d8c147d6c..9aaa5374841 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java @@ -29,7 +29,6 @@ import java.util.*; *

* This API is experimental and subject to change * - * @version $Id$ * @since solr 1.3 */ public class EntityProcessorBase extends EntityProcessor { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java index 92fc9d48b62..c85dec109a8 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java @@ -32,7 +32,6 @@ import java.util.Map; /** * A Wrapper over {@link EntityProcessor} instance which performs transforms and handles multi-row outputs correctly. * - * @version $Id$ * @since solr 1.4 */ public class EntityProcessorWrapper extends EntityProcessor { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java index 46ed2a33158..3393ad4ace2 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Evaluator.java @@ -30,7 +30,6 @@ package org.apache.solr.handler.dataimport; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public abstract class Evaluator { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java index 24e728d82c0..076734e2d95 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java @@ -41,7 +41,6 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public class EvaluatorBag { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java index 5ac603cff9d..b3cbee20ae8 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EventListener.java @@ -21,7 +21,6 @@ package org.apache.solr.handler.dataimport; * * This API is experimental and subject to change * - * @version $Id$ * @since solr 1.4 */ public interface EventListener { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java index b9d9ec74ab9..0fa0ddbbf94 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java @@ -42,7 +42,6 @@ import java.util.Properties; *

* Supports String, BLOB, CLOB data types and there is an extra field (in the entity) 'encoding' for BLOB types * - * @version $Id$ * @since 1.4 */ public class FieldReaderDataSource extends DataSource { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java index 132367cc0fa..252e8f08cc6 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java @@ -43,7 +43,6 @@ import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVE * This may be used with any {@link EntityProcessor} which uses a {@link DataSource}<{@link InputStream}> eg: {@link TikaEntityProcessor} *

* - * @version $Id$ * @since 3.1 */ public class FieldStreamDataSource extends DataSource { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java index 2f5a5aa1e2e..ca37c7368bc 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java @@ -41,7 +41,6 @@ import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVE *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public class FileDataSource extends DataSource { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java index 7549af7dfbd..520943f4e04 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java @@ -50,7 +50,6 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 * @see Pattern */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java index b8bc4207db9..526976c4cc2 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java @@ -30,7 +30,6 @@ import java.util.Map; * A {@link Transformer} implementation which strip off HTML tags using {@link HTMLStripCharFilter} This is useful * in case you don't need this HTML anyway. * - * @version $Id$ * @see HTMLStripCharFilter * @since solr 1.4 */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java index 82646534119..a65831056db 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HttpDataSource.java @@ -30,7 +30,6 @@ package org.apache.solr.handler.dataimport; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 * @deprecated use {@link org.apache.solr.handler.dataimport.URLDataSource} instead */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java index f48ca2cda86..df4b33f06a7 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java @@ -33,7 +33,6 @@ import java.util.concurrent.Callable; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public class JdbcDataSource extends diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java index 30e366316f7..e526e83c800 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java @@ -52,7 +52,6 @@ import org.apache.commons.io.IOUtils; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.4 * @see Pattern */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java index 5a603a74049..704a03cba4d 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java @@ -29,7 +29,6 @@ import java.util.Map; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.4 */ public class LogTransformer extends Transformer { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java index 6fd7213b5a7..32048eaa1f1 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java @@ -28,7 +28,6 @@ import java.util.Properties; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public class MockDataSource extends diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java index 36efecf5320..754e39b198d 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java @@ -42,7 +42,6 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public class NumberFormatTransformer extends Transformer { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java index 2d32eee0122..b88c85fc1fa 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/PlainTextEntityProcessor.java @@ -33,7 +33,6 @@ import java.util.Map; *

An implementation of {@link EntityProcessor} which reads data from a url/file and give out a row which contains one String * value. The name of the field is 'plainText'. * - * @version $Id$ * @since solr 1.4 */ public class PlainTextEntityProcessor extends EntityProcessorBase { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java index 429bb0cf2f3..d680c9d0ffb 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/RegexTransformer.java @@ -36,7 +36,6 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 * @see Pattern */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java index 547fc66cf2a..a4ea3afad3a 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ScriptTransformer.java @@ -37,7 +37,6 @@ import java.util.Map; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public class ScriptTransformer extends Transformer { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java index c342427a994..f548316b4f4 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java @@ -34,7 +34,6 @@ import java.util.Properties; *

* This API is experimental and may change in the future. * - * @version $Id$ * @since solr 1.3 */ public class SolrWriter { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java index 1748998720b..5f77063b3cb 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SqlEntityProcessor.java @@ -38,7 +38,7 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @since solr 1.3 */ public class SqlEntityProcessor extends EntityProcessorBase { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateString.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateString.java index d39bbec4fe5..7724de89219 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateString.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateString.java @@ -30,7 +30,7 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @since solr 1.3 */ public class TemplateString { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java index 6fd0665c700..97a4b11d153 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java @@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @since solr 1.3 */ public class TemplateTransformer extends Transformer { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Transformer.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Transformer.java index 995302884af..758b243adf9 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Transformer.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/Transformer.java @@ -35,7 +35,7 @@ import java.util.Map; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @since solr 1.3 */ public abstract class Transformer { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java index 234fb56f264..626040b4083 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/URLDataSource.java @@ -35,7 +35,7 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @since solr 1.4 */ public class URLDataSource extends DataSource { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolver.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolver.java index 2ce695550c1..8a91f0f0470 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolver.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolver.java @@ -30,7 +30,7 @@ package org.apache.solr.handler.dataimport; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @since solr 1.3 */ public abstract class VariableResolver { diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java index 8d39dd13bde..10ed0ac14a4 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/VariableResolverImpl.java @@ -28,7 +28,7 @@ import java.util.regex.Pattern; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @see VariableResolver * @since solr 1.3 */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java index 86be8d7fc74..b6500b808d7 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java @@ -49,7 +49,7 @@ import java.util.concurrent.atomic.AtomicReference; *

* This API is experimental and may change in the future. * - * @version $Id$ + * * @see XPathRecordReader * @since solr 1.3 */ diff --git a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java index a3c4b168f95..000727840f9 100644 --- a/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java +++ b/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java @@ -53,7 +53,7 @@ import org.slf4j.LoggerFactory; *

* This API is experimental and may change in the future. *

- * @version $Id$ + * * @since solr 1.3 */ public class XPathRecordReader { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java index 5df5f3d2df4..f4ebad9e8d3 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/AbstractDataImportHandlerTestCase.java @@ -46,7 +46,7 @@ import java.util.Map; *

* This API is experimental and subject to change * - * @version $Id$ + * * @since solr 1.3 */ public abstract class AbstractDataImportHandlerTestCase extends diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java index 0037d6796ca..89c79a64a5a 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java @@ -28,7 +28,7 @@ import java.util.Map; * Test for CachedSqlEntityProcessor *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestCachedSqlEntityProcessor extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java index 055a707b1d3..b1eb3947147 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestClobTransformer.java @@ -28,7 +28,7 @@ import java.util.*; /** * Test for ClobTransformer * - * @version $Id$ + * * @see org.apache.solr.handler.dataimport.ClobTransformer * @since solr 1.4 */ diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java index 692272d4e04..0ea8b1db84c 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java @@ -35,7 +35,7 @@ import java.util.List; /** * Test for ContentStreamDataSource * - * @version $Id$ + * * @since solr 1.4 */ public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java index 39b01ddedce..3efcc1e17db 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDataConfig.java @@ -31,7 +31,7 @@ import java.util.List; * Test for DataConfig *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestDataConfig extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java index bd37053f47f..606d5735630 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDateFormatTransformer.java @@ -26,7 +26,7 @@ import java.util.*; * Test for DateFormatTransformer *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestDateFormatTransformer extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java index a16b7017ab4..27ba8b39362 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder.java @@ -28,7 +28,7 @@ import java.util.*; * Test for DocBuilder *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestDocBuilder extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java index 4632318fa17..daa32cab7bf 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestDocBuilder2.java @@ -32,7 +32,7 @@ import java.io.File; * Test for DocBuilder using the test harness *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java index 42b29610666..a3cff08840f 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEntityProcessorBase.java @@ -28,7 +28,7 @@ import java.util.Map; * Test for EntityProcessorBase *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestEntityProcessorBase extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java index 64f58df4ff4..d46635cd194 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestErrorHandling.java @@ -29,7 +29,7 @@ import org.junit.BeforeClass; /** * Tests exception handling during imports in DataImportHandler * - * @version $Id$ + * * @since solr 1.4 */ public class TestErrorHandling extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java index 41ac1dc5d15..502182f069a 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestEvaluatorBag.java @@ -27,7 +27,7 @@ import java.util.*; /** *

Test for EvaluatorBag

* - * @version $Id$ + * * @since solr 1.3 */ public class TestEvaluatorBag extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java index 46f2ab5a4c9..4bf1e97d4c1 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFieldReader.java @@ -25,7 +25,7 @@ import java.util.Map; /** * Test for FieldReaderDataSource * - * @version $Id$ + * * @see org.apache.solr.handler.dataimport.FieldReaderDataSource * @since 1.4 */ diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java index e28ccbde537..cb2bc8ad157 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java @@ -28,7 +28,7 @@ import java.util.*; * Test for FileListEntityProcessor *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java index ac6626462ee..ba0642f8a48 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestJdbcDataSource.java @@ -39,7 +39,7 @@ import org.junit.Test; * Note: The tests are ignored for the lack of DB support for testing *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java index 91f8d034cda..5548924641f 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestLineEntityProcessor.java @@ -27,7 +27,7 @@ import java.util.*; /** *

Test for TestLineEntityProcessor

* - * @version $Id$ + * * @since solr 1.4 */ public class TestLineEntityProcessor extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java index 67ba544d682..b799717c13d 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestNumberFormatTransformer.java @@ -29,7 +29,7 @@ import java.util.Map; * Test for NumberFormatTransformer *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestNumberFormatTransformer extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java index e0a5b8bf39c..a67c1fe38cf 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java @@ -24,7 +24,7 @@ import java.util.Properties; /** * Test for PlainTextEntityProcessor * - * @version $Id$ + * * @see org.apache.solr.handler.dataimport.PlainTextEntityProcessor * @since solr 1.4 */ diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java index ca3143b511d..50835d73be0 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java @@ -31,7 +31,7 @@ import java.util.Map; /** *

Test for RegexTransformer

* - * @version $Id$ + * * @since solr 1.3 */ public class TestRegexTransformer extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java index 7c1076512a3..25477ef0cb1 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestScriptTransformer.java @@ -37,7 +37,7 @@ import java.util.Map; * All tests in this have been ignored because script support is only available * in Java 1.6+ * - * @version $Id$ + * * @since solr 1.3 */ public class TestScriptTransformer extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java index 0fbfb846eae..b9e5588220b 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java @@ -25,7 +25,7 @@ import java.util.*; * Test for SqlEntityProcessor *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestSqlEntityProcessor extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java index 6c0627e3e07..4429887d8df 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessor2.java @@ -32,7 +32,7 @@ import java.text.ParseException; * test harness *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestSqlEntityProcessor2 extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java index ade265f50f7..60f1ae5ecd0 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta.java @@ -30,7 +30,7 @@ import java.util.List; *

* * - * @version $Id: TestSqlEntityProcessor2.java 723824 2008-12-05 19:14:11Z shalin $ + * * @since solr 1.3 */ public class TestSqlEntityProcessorDelta extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java index bab66071d5c..c3ce3824b9f 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java @@ -30,7 +30,7 @@ import java.util.List; *

* * - * @version $Id: TestSqlEntityProcessor2.java 723824 2008-12-05 19:14:11Z shalin $ + * * @since solr 1.3 */ public class TestSqlEntityProcessorDelta2 extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDeltaPrefixedPk.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDeltaPrefixedPk.java index 51fc50b9f1a..2cc40a4f840 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDeltaPrefixedPk.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDeltaPrefixedPk.java @@ -31,7 +31,7 @@ import java.util.logging.*; *

* * - * @version $Id$ + * * @since solr 3.1 */ public class TestSqlEntityProcessorDeltaPrefixedPk extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java index 5eea4d72ecd..5334ec18d8f 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateString.java @@ -28,7 +28,7 @@ import java.util.regex.Pattern; * Test for TemplateString *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestTemplateString extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java index e279ab3bdec..846aa9bc04f 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java @@ -28,7 +28,7 @@ import java.util.Arrays; * Test for TemplateTransformer *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestTemplateTransformer extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java index 55c4b91ee34..15eff57c3a3 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestVariableResolver.java @@ -27,7 +27,7 @@ import java.util.*; * Test for VariableResolver *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestVariableResolver extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java index 596fa33b9d9..f6738692ab9 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java @@ -32,7 +32,7 @@ import java.util.concurrent.TimeUnit; * Test for XPathEntityProcessor *

* - * @version $Id$ + * * @since solr 1.3 */ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java index dad5510610c..619e45b5650 100644 --- a/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java +++ b/solr/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java @@ -26,7 +26,7 @@ import java.util.Map; /** *

Test for XPathRecordReader

* - * @version $Id$ + * * @since solr 1.3 */ public class TestXPathRecordReader extends AbstractDataImportHandlerTestCase { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java index 054857ab563..8cbab1bc668 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java @@ -22,7 +22,7 @@ import java.util.Map; /** * Configuration holding all the configurable parameters for calling UIMA inside Solr * - * @version $Id$ + * */ public class SolrUIMAConfiguration { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java index 1792fd7cd29..98e6c88d3a5 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java @@ -27,7 +27,7 @@ import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField; /** * Read configuration for Solr-UIMA integration * - * @version $Id$ + * * */ public class SolrUIMAConfigurationReader { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java index 6d8cdc50c0d..dfc531a34e1 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java @@ -32,7 +32,7 @@ import org.slf4j.LoggerFactory; /** * Map UIMA types and features over fields of a Solr document * - * @version $Id$ + * */ public class UIMAToSolrMapper { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java index 33154feb933..69d8d934690 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java @@ -38,7 +38,7 @@ import org.apache.uima.resource.ResourceInitializationException; /** * Update document(s) to be indexed with UIMA extracted information * - * @version $Id$ + * */ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java index 9cb46018ffa..8fab664c58c 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java @@ -26,7 +26,7 @@ import org.apache.solr.update.processor.UpdateRequestProcessorFactory; /** * Factory for {@link UIMAUpdateRequestProcessor} * - * @version $Id$ + * */ public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProvider.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProvider.java index 89c981ab6e3..2f6ac479eed 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProvider.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProvider.java @@ -23,7 +23,7 @@ import org.apache.uima.resource.ResourceInitializationException; /** * provide an Apache UIMA {@link AnalysisEngine} * - * @version $Id$ + * */ public interface AEProvider { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java index bc5ad81367e..2bd2417936e 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/AEProviderFactory.java @@ -23,7 +23,7 @@ import java.util.Map; /** * Singleton factory class responsible of {@link AEProvider}s' creation * - * @version $Id$ + * */ public class AEProviderFactory { diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java index 586c0cd9aab..f3af7c4435e 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/ae/OverridingParamsAEProvider.java @@ -33,7 +33,7 @@ import org.slf4j.LoggerFactory; * injecting runtime parameters defined in the solrconfig.xml Solr configuration file and assigning * them as overriding parameters in the aggregate AE * - * @version $Id$ + * */ public class OverridingParamsAEProvider implements AEProvider { diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java index 2c8a21bdab9..b921d15808b 100644 --- a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java +++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java @@ -43,7 +43,7 @@ import org.junit.Test; /** * TestCase for {@link UIMAUpdateRequestProcessor} * - * @version $Id$ + * */ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 { diff --git a/solr/src/common/org/apache/solr/common/SolrDocument.java b/solr/src/common/org/apache/solr/common/SolrDocument.java index 1864e09ea2a..54624d1fb03 100644 --- a/solr/src/common/org/apache/solr/common/SolrDocument.java +++ b/solr/src/common/org/apache/solr/common/SolrDocument.java @@ -37,7 +37,7 @@ import org.apache.solr.common.util.NamedList; * For indexing documents, use the SolrInputDocument that contains extra information * for document and field boosting. * - * @version $Id$ + * * @since solr 1.3 */ public class SolrDocument implements Map, Iterable>, Serializable diff --git a/solr/src/common/org/apache/solr/common/SolrDocumentList.java b/solr/src/common/org/apache/solr/common/SolrDocumentList.java index a7f3d4b15b7..b7a4629d01e 100644 --- a/solr/src/common/org/apache/solr/common/SolrDocumentList.java +++ b/solr/src/common/org/apache/solr/common/SolrDocumentList.java @@ -24,7 +24,7 @@ import java.util.ArrayList; * Represent a list of SolrDocuments returned from a search. This includes * position and offset information. * - * @version $Id$ + * * @since solr 1.3 */ public class SolrDocumentList extends ArrayList diff --git a/solr/src/common/org/apache/solr/common/SolrException.java b/solr/src/common/org/apache/solr/common/SolrException.java index 8df79c4ab28..135e5f384ee 100644 --- a/solr/src/common/org/apache/solr/common/SolrException.java +++ b/solr/src/common/org/apache/solr/common/SolrException.java @@ -26,7 +26,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * @version $Id$ + * */ public class SolrException extends RuntimeException { diff --git a/solr/src/common/org/apache/solr/common/SolrInputDocument.java b/solr/src/common/org/apache/solr/common/SolrInputDocument.java index 089f0e9b0b9..9002de2b877 100644 --- a/solr/src/common/org/apache/solr/common/SolrInputDocument.java +++ b/solr/src/common/org/apache/solr/common/SolrInputDocument.java @@ -29,7 +29,7 @@ import java.util.Set; * a Lucene Document. Like the SolrDocument, the field values should * match those specified in schema.xml * - * @version $Id$ + * * @since solr 1.3 */ public class SolrInputDocument implements Map, Iterable, Serializable diff --git a/solr/src/common/org/apache/solr/common/SolrInputField.java b/solr/src/common/org/apache/solr/common/SolrInputField.java index 5e9102ccdac..02c87f9203d 100644 --- a/solr/src/common/org/apache/solr/common/SolrInputField.java +++ b/solr/src/common/org/apache/solr/common/SolrInputField.java @@ -23,7 +23,7 @@ import java.util.Collection; import java.util.Iterator; /** - * @version $Id$ + * * @since solr 1.3 */ public class SolrInputField implements Iterable, Serializable diff --git a/solr/src/common/org/apache/solr/common/luke/FieldFlag.java b/solr/src/common/org/apache/solr/common/luke/FieldFlag.java index 6755e13e07d..f3d1eea02bc 100644 --- a/solr/src/common/org/apache/solr/common/luke/FieldFlag.java +++ b/solr/src/common/org/apache/solr/common/luke/FieldFlag.java @@ -18,7 +18,7 @@ package org.apache.solr.common.luke; /** - * @version $Id: AdminHandlers.java 608150 2008-01-02 17:15:30Z ryan $ + * * @since solr 1.3 */ public enum FieldFlag { diff --git a/solr/src/common/org/apache/solr/common/params/AnalysisParams.java b/solr/src/common/org/apache/solr/common/params/AnalysisParams.java index 6e066f2c5d5..c3372b8c26a 100644 --- a/solr/src/common/org/apache/solr/common/params/AnalysisParams.java +++ b/solr/src/common/org/apache/solr/common/params/AnalysisParams.java @@ -20,7 +20,7 @@ package org.apache.solr.common.params; /** * Defines the request parameters used by all analysis request handlers. * - * @version $Id$ + * * @since solr 1.4 */ public interface AnalysisParams { diff --git a/solr/src/common/org/apache/solr/common/params/DefaultSolrParams.java b/solr/src/common/org/apache/solr/common/params/DefaultSolrParams.java index b71d9cacba0..edfad64bbe3 100644 --- a/solr/src/common/org/apache/solr/common/params/DefaultSolrParams.java +++ b/solr/src/common/org/apache/solr/common/params/DefaultSolrParams.java @@ -22,7 +22,7 @@ import java.util.Iterator; import org.apache.solr.common.util.IteratorChain; /** - * @version $Id$ + * */ public class DefaultSolrParams extends SolrParams { protected final SolrParams params; diff --git a/solr/src/common/org/apache/solr/common/params/HighlightParams.java b/solr/src/common/org/apache/solr/common/params/HighlightParams.java index d874a2ec7f7..b8af3ad631d 100644 --- a/solr/src/common/org/apache/solr/common/params/HighlightParams.java +++ b/solr/src/common/org/apache/solr/common/params/HighlightParams.java @@ -18,7 +18,7 @@ package org.apache.solr.common.params; /** - * @version $Id$ + * * @since solr 1.3 */ public interface HighlightParams { diff --git a/solr/src/common/org/apache/solr/common/params/MapSolrParams.java b/solr/src/common/org/apache/solr/common/params/MapSolrParams.java index 95c71d2d95d..a2481589676 100644 --- a/solr/src/common/org/apache/solr/common/params/MapSolrParams.java +++ b/solr/src/common/org/apache/solr/common/params/MapSolrParams.java @@ -24,7 +24,7 @@ import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * */ public class MapSolrParams extends SolrParams { protected final Map map; diff --git a/solr/src/common/org/apache/solr/common/params/MultiMapSolrParams.java b/solr/src/common/org/apache/solr/common/params/MultiMapSolrParams.java index 781c1dabfe6..68202d5cbc0 100644 --- a/solr/src/common/org/apache/solr/common/params/MultiMapSolrParams.java +++ b/solr/src/common/org/apache/solr/common/params/MultiMapSolrParams.java @@ -24,7 +24,7 @@ import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * */ public class MultiMapSolrParams extends SolrParams { protected final Map map; diff --git a/solr/src/common/org/apache/solr/common/params/RequiredSolrParams.java b/solr/src/common/org/apache/solr/common/params/RequiredSolrParams.java index 7abf75b4799..16817c2f0f3 100755 --- a/solr/src/common/org/apache/solr/common/params/RequiredSolrParams.java +++ b/solr/src/common/org/apache/solr/common/params/RequiredSolrParams.java @@ -31,7 +31,7 @@ import java.util.Iterator; * (If you pass in null as the default value, you can * get a null return value) * - * @version $Id$ + * * @since solr 1.2 */ public class RequiredSolrParams extends SolrParams { diff --git a/solr/src/common/org/apache/solr/common/params/SolrParams.java b/solr/src/common/org/apache/solr/common/params/SolrParams.java index b0315259f75..ae7b291cf09 100644 --- a/solr/src/common/org/apache/solr/common/params/SolrParams.java +++ b/solr/src/common/org/apache/solr/common/params/SolrParams.java @@ -29,7 +29,7 @@ import org.apache.solr.common.util.StrUtils; /** SolrParams hold request parameters. * - * @version $Id$ + * */ public abstract class SolrParams implements Serializable { diff --git a/solr/src/common/org/apache/solr/common/params/UpdateParams.java b/solr/src/common/org/apache/solr/common/params/UpdateParams.java index 915ed7582cd..ab9d42ce4e9 100644 --- a/solr/src/common/org/apache/solr/common/params/UpdateParams.java +++ b/solr/src/common/org/apache/solr/common/params/UpdateParams.java @@ -20,7 +20,7 @@ package org.apache.solr.common.params; /** * A collection of standard params used by Update handlers * - * @version $Id$ + * * @since solr 1.2 */ public interface UpdateParams diff --git a/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java b/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java index e3de94f7396..eb9e882fa08 100644 --- a/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java +++ b/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java @@ -40,7 +40,7 @@ import java.lang.ref.WeakReference; * cleanup does not remove enough items to reach the 'acceptableWaterMark' limit, it can * remove more items forcefully regardless of access order. * - * @version $Id$ + * * @since solr 1.4 */ public class ConcurrentLRUCache { diff --git a/solr/src/common/org/apache/solr/common/util/ContentStream.java b/solr/src/common/org/apache/solr/common/util/ContentStream.java index 72494199259..de8d48bcf50 100755 --- a/solr/src/common/org/apache/solr/common/util/ContentStream.java +++ b/solr/src/common/org/apache/solr/common/util/ContentStream.java @@ -22,7 +22,7 @@ import java.io.InputStream; import java.io.Reader; /** - * @version $Id$ + * * @since solr 1.2 */ public interface ContentStream { diff --git a/solr/src/common/org/apache/solr/common/util/ContentStreamBase.java b/solr/src/common/org/apache/solr/common/util/ContentStreamBase.java index 5ba7f440600..9d4861d3627 100755 --- a/solr/src/common/org/apache/solr/common/util/ContentStreamBase.java +++ b/solr/src/common/org/apache/solr/common/util/ContentStreamBase.java @@ -34,7 +34,7 @@ import java.util.Locale; /** * Three concrete implementations for ContentStream - one for File/URL/String * - * @version $Id$ + * * @since solr 1.2 */ public abstract class ContentStreamBase implements ContentStream diff --git a/solr/src/common/org/apache/solr/common/util/DOMUtil.java b/solr/src/common/org/apache/solr/common/util/DOMUtil.java index d02be759bbd..2d937aa447f 100644 --- a/solr/src/common/org/apache/solr/common/util/DOMUtil.java +++ b/solr/src/common/org/apache/solr/common/util/DOMUtil.java @@ -25,7 +25,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** - * @version $Id$ + * */ public class DOMUtil { diff --git a/solr/src/common/org/apache/solr/common/util/FileUtils.java b/solr/src/common/org/apache/solr/common/util/FileUtils.java index 3c9e6fcbe18..94ef65e3a49 100644 --- a/solr/src/common/org/apache/solr/common/util/FileUtils.java +++ b/solr/src/common/org/apache/solr/common/util/FileUtils.java @@ -21,7 +21,7 @@ import java.io.*; import java.nio.channels.FileChannel; /** - * @version $Id$ + * */ public class FileUtils { diff --git a/solr/src/common/org/apache/solr/common/util/NamedList.java b/solr/src/common/org/apache/solr/common/util/NamedList.java index 10b1996e608..45fa7ac5fbf 100644 --- a/solr/src/common/org/apache/solr/common/util/NamedList.java +++ b/solr/src/common/org/apache/solr/common/util/NamedList.java @@ -47,7 +47,7 @@ import java.io.Serializable; * or simply use a regular {@link Map} *

* - * @version $Id$ + * */ public class NamedList implements Cloneable, Serializable, Iterable> { protected final List nvPairs; diff --git a/solr/src/common/org/apache/solr/common/util/RTimer.java b/solr/src/common/org/apache/solr/common/util/RTimer.java index cd10d23ee1c..db561d9d90c 100644 --- a/solr/src/common/org/apache/solr/common/util/RTimer.java +++ b/solr/src/common/org/apache/solr/common/util/RTimer.java @@ -27,7 +27,7 @@ import java.util.*; * started automatically when created. * * @since solr 1.3 - * @version $Id$ + * */ public class RTimer { diff --git a/solr/src/common/org/apache/solr/common/util/RegexFileFilter.java b/solr/src/common/org/apache/solr/common/util/RegexFileFilter.java index af3af18c5d0..6bba26e5f31 100644 --- a/solr/src/common/org/apache/solr/common/util/RegexFileFilter.java +++ b/solr/src/common/org/apache/solr/common/util/RegexFileFilter.java @@ -23,7 +23,7 @@ import java.util.regex.*; /** * Accepts any file whose name matches the pattern - * @version $Id$ + * */ public final class RegexFileFilter implements FileFilter { diff --git a/solr/src/common/org/apache/solr/common/util/StrUtils.java b/solr/src/common/org/apache/solr/common/util/StrUtils.java index fc2187aa44a..931ef5fd29a 100644 --- a/solr/src/common/org/apache/solr/common/util/StrUtils.java +++ b/solr/src/common/org/apache/solr/common/util/StrUtils.java @@ -26,7 +26,7 @@ import java.io.IOException; import org.apache.solr.common.SolrException; /** - * @version $Id$ + * */ public class StrUtils { public static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', diff --git a/solr/src/common/org/apache/solr/common/util/XML.java b/solr/src/common/org/apache/solr/common/util/XML.java index 2183311be0f..b8df2a638bc 100644 --- a/solr/src/common/org/apache/solr/common/util/XML.java +++ b/solr/src/common/org/apache/solr/common/util/XML.java @@ -22,7 +22,7 @@ import java.io.IOException; import java.util.Map; /** - * @version $Id$ + * */ public class XML { diff --git a/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java index 5926713b02d..7f336cb3e54 100644 --- a/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.TokenStream; * <filter class="solr.ASCIIFoldingFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory { public ASCIIFoldingFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java index f777959aa21..d7319428ca3 100644 --- a/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; * <filter class="solr.ArabicNormalizationFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{ diff --git a/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java index 0cbb097f31c..0766f74a7f9 100644 --- a/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.ar.ArabicStemFilter; * <filter class="solr.ArabicStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ArabicStemFilterFactory extends BaseTokenFilterFactory{ diff --git a/solr/src/java/org/apache/solr/analysis/BaseCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BaseCharFilterFactory.java index 62ff65dd0ef..a7cef830349 100644 --- a/solr/src/java/org/apache/solr/analysis/BaseCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BaseCharFilterFactory.java @@ -24,7 +24,6 @@ import org.slf4j.LoggerFactory; /** * -* @version $Id$ * @since Solr 1.4 * */ diff --git a/solr/src/java/org/apache/solr/analysis/BaseTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BaseTokenFilterFactory.java index 90ad366c33b..0714d4623a6 100644 --- a/solr/src/java/org/apache/solr/analysis/BaseTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BaseTokenFilterFactory.java @@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory; /** * Simple abstract implementation that handles init arg processing. * - * @version $Id$ + * */ public abstract class BaseTokenFilterFactory extends BaseTokenStreamFactory implements TokenFilterFactory { public static final Logger log = LoggerFactory.getLogger(BaseTokenFilterFactory.class); diff --git a/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java b/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java index 632b5dddbc4..bc5adddd0ee 100644 --- a/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java @@ -38,7 +38,7 @@ import org.slf4j.LoggerFactory; * a factory as it implements no interface, but removes code duplication * in its subclasses. * - * @version $Id$ + * */ abstract class BaseTokenStreamFactory { /** The init args */ diff --git a/solr/src/java/org/apache/solr/analysis/BaseTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/BaseTokenizerFactory.java index 5e4666377a2..8f0b81e5eab 100644 --- a/solr/src/java/org/apache/solr/analysis/BaseTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BaseTokenizerFactory.java @@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory; /** * Simple abstract implementation that handles init arg processing. * - * @version $Id$ + * */ public abstract class BaseTokenizerFactory extends BaseTokenStreamFactory implements TokenizerFactory { public static final Logger log = LoggerFactory.getLogger(BaseTokenizerFactory.class); diff --git a/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java index a6af3dbf9ec..e2e8e97207d 100644 --- a/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.br.BrazilianStemFilter; * <filter class="solr.BrazilianStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class BrazilianStemFilterFactory extends BaseTokenFilterFactory { public BrazilianStemFilter create(TokenStream in) { diff --git a/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java index 13d323089fe..0386351762a 100644 --- a/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.bg.BulgarianStemFilter; * <filter class="solr.BulgarianStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class BulgarianStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java index c3ba41b6be7..d48bc922828 100644 --- a/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java @@ -30,7 +30,7 @@ import java.io.Reader; * <tokenizer class="solr.CJKTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class CJKTokenizerFactory extends BaseTokenizerFactory { public CJKTokenizer create(Reader in) { diff --git a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java index 81747847333..080b1348961 100644 --- a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java @@ -53,7 +53,7 @@ import java.util.StringTokenizer; * </analyzer> * </fieldType> * - * @version $Id$ + * * @since solr 1.3 */ public class CapitalizationFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/CharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CharFilterFactory.java index 9f6158e6ced..29e99aacfec 100644 --- a/solr/src/java/org/apache/solr/analysis/CharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CharFilterFactory.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.CharStream; /** * -* @version $Id$ * @since Solr 1.4 * */ diff --git a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java index 1d8f02c1d11..eba8e056543 100644 --- a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.standard.ClassicFilter; * </analyzer> * </fieldType> * - * @version $Id$ + * */ public class ClassicFilterFactory extends BaseTokenFilterFactory { public TokenFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java index edaf6e2f996..3b8a54b61ad 100644 --- a/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java @@ -32,7 +32,7 @@ import java.util.Map; * </analyzer> * </fieldType> * - * @version $Id$ + * */ public class ClassicTokenizerFactory extends BaseTokenizerFactory { diff --git a/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java index dede00a5e4b..5eef2dd4c01 100644 --- a/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java @@ -35,7 +35,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware; * <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ /* diff --git a/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java index 8a27adc2df9..e1bb85c350d 100644 --- a/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java @@ -40,7 +40,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware; * <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { diff --git a/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java index de1ab11ab15..c970e94fa74 100644 --- a/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java @@ -39,7 +39,7 @@ import java.util.Map; * <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/> * </analyzer> * </fieldType> - * @version $Id$ + * * */ public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { diff --git a/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java index e3010ead884..5e79216c4ba 100644 --- a/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java @@ -38,7 +38,7 @@ import java.io.IOException; * minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { private CharArraySet dictionary; diff --git a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java index 2089f4835c1..5d8753136e3 100644 --- a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; * <filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java b/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java index c7f07796176..623f66a255a 100644 --- a/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; * <filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class EdgeNGramFilterFactory extends BaseTokenFilterFactory { private int maxGramSize = 0; diff --git a/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java index 9945d5f12a4..f2464437bc9 100755 --- a/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java @@ -30,7 +30,7 @@ import java.util.Map; * <tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class EdgeNGramTokenizerFactory extends BaseTokenizerFactory { private int maxGramSize = 0; diff --git a/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java index 027766843bb..e1a536cd616 100644 --- a/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java @@ -37,7 +37,7 @@ import org.apache.lucene.analysis.TokenStream; * <filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { diff --git a/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java index 5d2292e9c4a..98cdedeb742 100644 --- a/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; * <filter class="solr.EnglishMinimalStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java index 30b9ea5afae..110dd6e2952 100644 --- a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.en.EnglishPossessiveFilter; * <filter class="solr.EnglishPossessiveFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java index 6d4bf82133c..308fe129051 100644 --- a/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.fi.FinnishLightStemFilter; * <filter class="solr.FinnishLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class FinnishLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java index e0325465e86..ef74a66eee5 100644 --- a/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.fr.FrenchLightStemFilter; * <filter class="solr.FrenchLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java index 753984f122e..98bce9a6809 100644 --- a/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter; * <filter class="solr.FrenchMinimalStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java index bb32d502dbe..e6cf77106d5 100644 --- a/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.gl.GalicianStemFilter; * <filter class="solr.GalicianStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class GalicianStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java index 601d12e668c..111fff46046 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.de.GermanLightStemFilter; * <filter class="solr.GermanLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class GermanLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java index ac145878778..2ccba340e96 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.de.GermanMinimalStemFilter; * <filter class="solr.GermanMinimalStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java index c2f3d03dfb8..5e9088c5428 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java @@ -32,7 +32,7 @@ import org.apache.lucene.analysis.TokenStream; * <filter class="solr.GermanStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class GermanStemFilterFactory extends BaseTokenFilterFactory { public GermanStemFilter create(TokenStream in) { diff --git a/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java index ecd02e2f703..2742260ef3e 100644 --- a/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java @@ -35,7 +35,7 @@ import org.apache.solr.common.SolrException.ErrorCode; * <filter class="solr.GreekLowerCaseFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java index 0a12b04f8a1..90720dace21 100644 --- a/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.el.GreekStemFilter; * <filter class="solr.GreekStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class GreekStemFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java index 096a07b896c..c318a78aaa7 100644 --- a/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; * <tokenizer class="solr.WhitespaceTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java index 7dd3544015c..657f04d936e 100644 --- a/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.hi.HindiStemFilter; * <filter class="solr.HindiStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class HindiStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java index 0b06fa99ed3..c635f42f37d 100644 --- a/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter; * <filter class="solr.HungarianLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java index 7e68d4e727f..17a4ad58c39 100755 --- a/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.solr.analysis.BaseTokenFilterFactory; * <filter class="solr.HyphenatedWordsFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class HyphenatedWordsFilterFactory extends BaseTokenFilterFactory { public HyphenatedWordsFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java index 7fca6f2349b..274ac3bb595 100644 --- a/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java @@ -56,7 +56,7 @@ import org.xml.sax.InputSource; * dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/> * </analyzer> * </fieldType> - * @version $Id$ + * * @see HyphenationCompoundWordTokenFilter */ public class HyphenationCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { diff --git a/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java index 588946cc15a..ac0d1db0b71 100644 --- a/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.in.IndicNormalizationFilter; * <filter class="solr.IndicNormalizationFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java index bf3497a01a3..c2e2bc93f0f 100644 --- a/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java @@ -32,7 +32,7 @@ import org.apache.lucene.analysis.id.IndonesianStemFilter; * <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class IndonesianStemFilterFactory extends BaseTokenFilterFactory { private boolean stemDerivational = true; diff --git a/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java index 1ca00e3090a..c0d9ede0163 100644 --- a/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.it.ItalianLightStemFilter; * <filter class="solr.ItalianLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ItalianLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java b/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java index ca38a761fb5..e394dcf2380 100644 --- a/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java @@ -36,7 +36,7 @@ import java.io.IOException; * <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false" enablePositionIncrements="false"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class KeepWordFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { diff --git a/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java b/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java index 0c47b6314ba..bd96d6cad16 100644 --- a/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java @@ -34,7 +34,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware; * <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" ignoreCase="false"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class KeywordMarkerFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public static final String PROTECTED_TOKENS = "protected"; diff --git a/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java index 7a6d324a717..8f70f9eb530 100644 --- a/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java @@ -29,7 +29,7 @@ import java.io.Reader; * <tokenizer class="solr.KeywordTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class KeywordTokenizerFactory extends BaseTokenizerFactory { public KeywordTokenizer create(Reader input) { diff --git a/solr/src/java/org/apache/solr/analysis/LengthFilterFactory.java b/solr/src/java/org/apache/solr/analysis/LengthFilterFactory.java index 97d7703a72f..dc28d672bc5 100644 --- a/solr/src/java/org/apache/solr/analysis/LengthFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/LengthFilterFactory.java @@ -31,7 +31,7 @@ import java.util.Map; * <filter class="solr.LengthFilterFactory" min="0" max="1" enablePositionIncrements="false"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class LengthFilterFactory extends BaseTokenFilterFactory { int min,max; diff --git a/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java index 0c04c0519f9..e067d53c6e6 100644 --- a/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java @@ -30,7 +30,7 @@ import java.util.Map; * <tokenizer class="solr.LetterTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class LetterTokenizerFactory extends BaseTokenizerFactory { diff --git a/solr/src/java/org/apache/solr/analysis/LimitTokenCountFilterFactory.java b/solr/src/java/org/apache/solr/analysis/LimitTokenCountFilterFactory.java index 12da7f321d0..b4c0bce72ce 100644 --- a/solr/src/java/org/apache/solr/analysis/LimitTokenCountFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/LimitTokenCountFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter; * <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class LimitTokenCountFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java b/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java index b094eccc2f1..422ba68ad9d 100644 --- a/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.core.LowerCaseFilter; * <filter class="solr.LowerCaseFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class LowerCaseFilterFactory extends BaseTokenFilterFactory { @Override diff --git a/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java index 5bd59db62a4..e2980e75b70 100644 --- a/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java @@ -30,7 +30,7 @@ import java.util.Map; * <tokenizer class="solr.LowerCaseTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class LowerCaseTokenizerFactory extends BaseTokenizerFactory { @Override diff --git a/solr/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java index a401c31aa91..aaba0430f49 100644 --- a/solr/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java @@ -41,7 +41,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware; * </analyzer> * </fieldType> * - * @version $Id$ + * * @since Solr 1.4 * */ diff --git a/solr/src/java/org/apache/solr/analysis/NGramFilterFactory.java b/solr/src/java/org/apache/solr/analysis/NGramFilterFactory.java index 20849c9551a..6dc5eaebad0 100644 --- a/solr/src/java/org/apache/solr/analysis/NGramFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/NGramFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.ngram.NGramTokenFilter; * <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class NGramFilterFactory extends BaseTokenFilterFactory { private int maxGramSize = 0; diff --git a/solr/src/java/org/apache/solr/analysis/NGramTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/NGramTokenizerFactory.java index a9525b2d1a3..1b775230a92 100755 --- a/solr/src/java/org/apache/solr/analysis/NGramTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/NGramTokenizerFactory.java @@ -31,7 +31,7 @@ import java.util.Map; * <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class NGramTokenizerFactory extends BaseTokenizerFactory { private int maxGramSize = 0; diff --git a/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java index 2406da54231..21922e18c84 100644 --- a/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java @@ -32,7 +32,7 @@ import java.util.Map; * <filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class NumericPayloadTokenFilterFactory extends BaseTokenFilterFactory { private float payload; diff --git a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java index 35ad58d4f9f..2a4b5249bd4 100644 --- a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java @@ -32,7 +32,7 @@ import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer; * <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java index aff0b9892cc..81c77a66830 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java @@ -35,7 +35,7 @@ import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter; * </analyzer> * </fieldType> * - * @version $Id$ + * * @since Solr 3.1 */ public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java index 039408e208c..55d08f78719 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java @@ -33,7 +33,7 @@ import java.util.regex.PatternSyntaxException; * replace="all"/> * </analyzer> * </fieldType> - * @version $Id$ + * * @see PatternReplaceFilter */ public class PatternReplaceFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java index 2e748f5fe04..8996bb4b705 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java @@ -62,7 +62,7 @@ import org.apache.solr.common.SolrException; * * @see PatternTokenizer * @since solr1.2 - * @version $Id$ + * */ public class PatternTokenizerFactory extends BaseTokenizerFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java index 8afd32c3ebd..f860c1de58e 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.fa.PersianCharFilter; * <tokenizer class="solr.StandardTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class PersianCharFilterFactory extends BaseCharFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java index d6e9733ab75..970d9036bb0 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java @@ -32,7 +32,7 @@ import org.apache.lucene.analysis.TokenStream; * <filter class="solr.PersianNormalizationFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory { public PersianNormalizationFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java index 6653ee44b05..624fbbf71b4 100644 --- a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java @@ -52,7 +52,7 @@ import org.apache.solr.common.SolrException; * </analyzer> * </fieldType> * - * @version $Id$ + * * @see PhoneticFilter */ public class PhoneticFilterFactory extends BaseTokenFilterFactory diff --git a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java index 17e9d6cbd55..64f0afb23dc 100644 --- a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.en.PorterStemFilter; * <filter class="solr.PorterStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class PorterStemFilterFactory extends BaseTokenFilterFactory { public PorterStemFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java index 1e91e8804c4..3da115aaaca 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.pt.PortugueseLightStemFilter; * <filter class="solr.PortugueseLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class PortugueseLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java index 32d67fafb9d..21aad294d56 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter; * <filter class="solr.PortugueseMinimalStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class PortugueseMinimalStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java index 2c264a10bed..df0477ba332 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.pt.PortugueseStemFilter; * <filter class="solr.PortugueseStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class PortugueseStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java index 7f6a01fe02e..f30480927b5 100644 --- a/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java @@ -34,7 +34,7 @@ import java.util.Map; * </analyzer> * </fieldType> * - * @version $Id$ + * * @see org.apache.lucene.analysis.position.PositionFilter * @since solr 1.4 */ diff --git a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java index 4b14f33b6fd..e26ae5fe802 100644 --- a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter; * <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class RemoveDuplicatesTokenFilterFactory extends BaseTokenFilterFactory { public RemoveDuplicatesTokenFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java index 103a694b506..65d8f54adc3 100644 --- a/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter; * </analyzer> * </fieldType> * - * @version $Id$ + * * @since solr 1.4 */ public class ReverseStringFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java index b4c1b7bb538..22cba337d0b 100644 --- a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java @@ -59,7 +59,7 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter; * <tokenizer class="solr.WhitespaceTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ReversedWildcardFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java index e3307597a26..6a11a3c2172 100644 --- a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.ru.RussianLightStemFilter; * <filter class="solr.RussianLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class RussianLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java index 913a4882338..de82dfde5f4 100644 --- a/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java @@ -36,7 +36,7 @@ import java.util.Map; * outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ShingleFilterFactory extends BaseTokenFilterFactory { private int minShingleSize; diff --git a/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java index 2c1f8fb4d1f..d9dd65b0b6e 100644 --- a/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java @@ -41,7 +41,7 @@ import org.tartarus.snowball.SnowballProgram; * </analyzer> * </fieldType> * - * @version $Id$ + * */ public class SnowballPorterFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public static final String PROTECTED_TOKENS = "protected"; diff --git a/solr/src/java/org/apache/solr/analysis/SolrAnalyzer.java b/solr/src/java/org/apache/solr/analysis/SolrAnalyzer.java index b0b03cc2b3d..aad0807b260 100644 --- a/solr/src/java/org/apache/solr/analysis/SolrAnalyzer.java +++ b/solr/src/java/org/apache/solr/analysis/SolrAnalyzer.java @@ -23,7 +23,7 @@ import java.io.Reader; import java.io.IOException; /** - * @version $Id$ + * */ public abstract class SolrAnalyzer extends Analyzer { int posIncGap=0; diff --git a/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java index 107679f7622..9e706cdaa66 100644 --- a/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.es.SpanishLightStemFilter; * <filter class="solr.SpanishLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java index 91daca19f20..9c521083d0d 100644 --- a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; * <filter class="solr.StandardFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class StandardFilterFactory extends BaseTokenFilterFactory { @Override diff --git a/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java index 32087dfb465..14a090041e0 100644 --- a/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java @@ -31,7 +31,7 @@ import java.util.Map; * <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class StandardTokenizerFactory extends BaseTokenizerFactory { diff --git a/solr/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java b/solr/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java index 0ac0db28380..976113f040e 100644 --- a/solr/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java @@ -36,7 +36,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware; * <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/> * </analyzer> * </fieldType> - * @version $Id + * */ public class StemmerOverrideFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { private CharArrayMap dictionary = null; diff --git a/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java b/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java index 67fa11c17f8..42256d6465c 100644 --- a/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java @@ -38,7 +38,7 @@ import java.io.IOException; * words="stopwords.txt" enablePositionIncrements="true"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class StopFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { diff --git a/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java index 429f7ac1ed6..2e208a43796 100644 --- a/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.sv.SwedishLightStemFilter; * <filter class="solr.SwedishLightStemFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java index 162913e7899..3b8e4802b7d 100644 --- a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java @@ -44,7 +44,7 @@ import java.util.Map; * expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class SynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { diff --git a/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java index 8ed23ff9973..3f4c366a7ca 100644 --- a/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.TokenStream; * <filter class="solr.ThaiWordFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class ThaiWordFilterFactory extends BaseTokenFilterFactory { public ThaiWordFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/TokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/TokenFilterFactory.java index 937e19f7b6e..1ae90e88c5b 100644 --- a/solr/src/java/org/apache/solr/analysis/TokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TokenFilterFactory.java @@ -44,7 +44,7 @@ import java.util.Map; * A single instance of any registered TokenFilterFactory is created * via the default constructor and is reused for each FieldType. *

- * @version $Id$ + * */ public interface TokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java index 981aab415a9..f13dcdeff5d 100644 --- a/solr/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.TokenStream; * <filter class="solr.TokenOffsetPayloadTokenFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory { public TokenOffsetPayloadTokenFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/src/java/org/apache/solr/analysis/TokenizerChain.java index 3fd48842077..cf04a82c17d 100644 --- a/solr/src/java/org/apache/solr/analysis/TokenizerChain.java +++ b/solr/src/java/org/apache/solr/analysis/TokenizerChain.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Tokenizer; import java.io.Reader; /** - * @version $Id$ + * */ // diff --git a/solr/src/java/org/apache/solr/analysis/TokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/TokenizerFactory.java index fe248566758..a46a8025be8 100644 --- a/solr/src/java/org/apache/solr/analysis/TokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TokenizerFactory.java @@ -44,7 +44,7 @@ import org.apache.lucene.analysis.*; * A single instance of any registered TokenizerFactory is created * via the default constructor and is reused for each FieldType. *

- * @version $Id$ + * */ public interface TokenizerFactory { /** init will be called just once, immediately after creation. diff --git a/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java index 24b8de7f99d..bf2a3edae6e 100644 --- a/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java @@ -33,7 +33,7 @@ import java.io.Reader; *

* Refer to {@link org.apache.lucene.search.NumericRangeQuery} for more details. * - * @version $Id$ + * * @see org.apache.lucene.search.NumericRangeQuery * @see org.apache.solr.schema.TrieField * @since solr 1.4 diff --git a/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java b/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java index 31f85c32ba0..218878543f0 100644 --- a/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java @@ -32,7 +32,7 @@ import org.apache.solr.common.SolrException; * <filter class="solr.TrimFilterFactory" updateOffsets="false"/> * </analyzer> * </fieldType> - * @version $Id$ + * * @see TrimFilter */ public class TrimFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java b/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java index 660a278fdf4..153ad026b2a 100644 --- a/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; * <filter class="solr.TurkishLowerCaseFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java index c34c40f0358..b06a9a12a8b 100644 --- a/solr/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.TokenStream; * <filter class="solr.TypeAsPayloadTokenFilterFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory { public TypeAsPayloadTokenFilter create(TokenStream input) { diff --git a/solr/src/java/org/apache/solr/analysis/UAX29URLEmailTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/UAX29URLEmailTokenizerFactory.java index 3b071c7c816..e3dd931ca7f 100644 --- a/solr/src/java/org/apache/solr/analysis/UAX29URLEmailTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/UAX29URLEmailTokenizerFactory.java @@ -34,7 +34,7 @@ import java.util.Map; * <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/> * </analyzer> * </fieldType> - * @version $Id$ + * * */ diff --git a/solr/src/java/org/apache/solr/analysis/WhitespaceTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/WhitespaceTokenizerFactory.java index c94b12a03ea..0946e2a8932 100644 --- a/solr/src/java/org/apache/solr/analysis/WhitespaceTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/WhitespaceTokenizerFactory.java @@ -30,7 +30,7 @@ import java.util.Map; * <tokenizer class="solr.WhitespaceTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class WhitespaceTokenizerFactory extends BaseTokenizerFactory { @Override diff --git a/solr/src/java/org/apache/solr/analysis/WikipediaTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/WikipediaTokenizerFactory.java index 54a944fa7ac..49df72a9472 100644 --- a/solr/src/java/org/apache/solr/analysis/WikipediaTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/WikipediaTokenizerFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer; * <tokenizer class="solr.WikipediaTokenizerFactory"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class WikipediaTokenizerFactory extends BaseTokenizerFactory { // TODO: add support for WikipediaTokenizer's advanced options. diff --git a/solr/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java b/solr/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java index 093a5df9ce5..f9737af6fa8 100644 --- a/solr/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java @@ -48,7 +48,7 @@ import java.io.IOException; * generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"/> * </analyzer> * </fieldType> - * @version $Id$ + * */ public class WordDelimiterFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public static final String PROTECTED_TOKENS = "protected"; diff --git a/solr/src/java/org/apache/solr/core/Config.java b/solr/src/java/org/apache/solr/core/Config.java index f939a0af687..60440e31c36 100644 --- a/solr/src/java/org/apache/solr/core/Config.java +++ b/solr/src/java/org/apache/solr/core/Config.java @@ -46,7 +46,7 @@ import java.util.Locale; import java.util.concurrent.atomic.AtomicBoolean; /** - * @version $Id$ + * */ public class Config { public static final Logger log = LoggerFactory.getLogger(Config.class); diff --git a/solr/src/java/org/apache/solr/core/CoreContainer.java b/solr/src/java/org/apache/solr/core/CoreContainer.java index ef8dc62a4e8..513ae8e2430 100644 --- a/solr/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/src/java/org/apache/solr/core/CoreContainer.java @@ -54,7 +54,7 @@ import org.xml.sax.InputSource; /** - * @version $Id$ + * * @since solr 1.3 */ public class CoreContainer diff --git a/solr/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java b/solr/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java index f85c37fc2ff..f6f26c31982 100644 --- a/solr/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java +++ b/solr/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java @@ -32,7 +32,7 @@ import java.util.concurrent.atomic.AtomicInteger; * commit points for certain amounts of time to support features such as index replication * or snapshooting directly out of a live index directory. * - * @version $Id$ + * * @see org.apache.lucene.index.IndexDeletionPolicy */ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy { diff --git a/solr/src/java/org/apache/solr/core/JmxMonitoredMap.java b/solr/src/java/org/apache/solr/core/JmxMonitoredMap.java index 15c956ffe30..823dd9c0f2b 100644 --- a/solr/src/java/org/apache/solr/core/JmxMonitoredMap.java +++ b/solr/src/java/org/apache/solr/core/JmxMonitoredMap.java @@ -40,7 +40,7 @@ import org.slf4j.LoggerFactory; * Please see http://wiki.apache.org/solr/SolrJmx for instructions on usage and configuration *

* - * @version $Id$ + * * @see org.apache.solr.core.SolrConfig.JmxConfiguration * @since solr 1.3 */ diff --git a/solr/src/java/org/apache/solr/core/PluginInfo.java b/solr/src/java/org/apache/solr/core/PluginInfo.java index be97adf9940..35611b20bdb 100644 --- a/solr/src/java/org/apache/solr/core/PluginInfo.java +++ b/solr/src/java/org/apache/solr/core/PluginInfo.java @@ -27,7 +27,7 @@ import static java.util.Collections.unmodifiableMap; /** * An Object which represents a Plugin of any type - * @version $Id$ + * */ public class PluginInfo { public final String name, className, type; diff --git a/solr/src/java/org/apache/solr/core/QuerySenderListener.java b/solr/src/java/org/apache/solr/core/QuerySenderListener.java index d5485ce3665..12b388d6b3c 100644 --- a/solr/src/java/org/apache/solr/core/QuerySenderListener.java +++ b/solr/src/java/org/apache/solr/core/QuerySenderListener.java @@ -30,7 +30,7 @@ import org.apache.solr.response.SolrQueryResponse; import java.util.List; /** - * @version $Id$ + * */ public class QuerySenderListener extends AbstractSolrEventListener { public QuerySenderListener(SolrCore core) { diff --git a/solr/src/java/org/apache/solr/core/RequestHandlers.java b/solr/src/java/org/apache/solr/core/RequestHandlers.java index 9541dc32043..80c7c04d72f 100644 --- a/solr/src/java/org/apache/solr/core/RequestHandlers.java +++ b/solr/src/java/org/apache/solr/core/RequestHandlers.java @@ -209,7 +209,6 @@ final class RequestHandlers { * This is a private class - if there is a real need for it to be public, it could * move * - * @version $Id$ * @since solr 1.2 */ private static final class LazyRequestHandlerWrapper implements SolrRequestHandler, SolrInfoMBean diff --git a/solr/src/java/org/apache/solr/core/SolrConfig.java b/solr/src/java/org/apache/solr/core/SolrConfig.java index 3bdd551c5a4..7e96557cbf2 100644 --- a/solr/src/java/org/apache/solr/core/SolrConfig.java +++ b/solr/src/java/org/apache/solr/core/SolrConfig.java @@ -66,7 +66,7 @@ import java.io.IOException; * configuration data for a a Solr instance -- typically found in * "solrconfig.xml". * - * @version $Id$ + * */ public class SolrConfig extends Config { diff --git a/solr/src/java/org/apache/solr/core/SolrCore.java b/solr/src/java/org/apache/solr/core/SolrCore.java index 4bf9288999a..1afc3be2269 100644 --- a/solr/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/src/java/org/apache/solr/core/SolrCore.java @@ -71,7 +71,7 @@ import java.lang.reflect.Constructor; /** - * @version $Id$ + * */ public final class SolrCore implements SolrInfoMBean { public static final String version="1.0"; diff --git a/solr/src/java/org/apache/solr/core/SolrDeletionPolicy.java b/solr/src/java/org/apache/solr/core/SolrDeletionPolicy.java index e165484677e..c5e5278d860 100644 --- a/solr/src/java/org/apache/solr/core/SolrDeletionPolicy.java +++ b/solr/src/java/org/apache/solr/core/SolrDeletionPolicy.java @@ -38,7 +38,7 @@ import java.util.Locale; * for certain amounts of time to support features such as index replication * or snapshooting directly out of a live index directory. * - * @version $Id$ + * * @see org.apache.lucene.index.IndexDeletionPolicy */ public class SolrDeletionPolicy implements IndexDeletionPolicy, NamedListInitializedPlugin { diff --git a/solr/src/java/org/apache/solr/core/SolrEventListener.java b/solr/src/java/org/apache/solr/core/SolrEventListener.java index f499e85a2c1..62c97ea258b 100644 --- a/solr/src/java/org/apache/solr/core/SolrEventListener.java +++ b/solr/src/java/org/apache/solr/core/SolrEventListener.java @@ -24,7 +24,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * @version $Id$ + * */ public interface SolrEventListener extends NamedListInitializedPlugin{ static final Logger log = LoggerFactory.getLogger(SolrCore.class); diff --git a/solr/src/java/org/apache/solr/core/SolrInfoMBean.java b/solr/src/java/org/apache/solr/core/SolrInfoMBean.java index 3e70e165b86..4858559b5c2 100644 --- a/solr/src/java/org/apache/solr/core/SolrInfoMBean.java +++ b/solr/src/java/org/apache/solr/core/SolrInfoMBean.java @@ -26,7 +26,7 @@ import org.apache.solr.common.util.NamedList; * for use by objects which are 'pluggable' to make server administration * easier. * - * @version $Id$ + * */ public interface SolrInfoMBean { diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java index bb0e73d9061..dd83506462e 100644 --- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java +++ b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java @@ -47,7 +47,7 @@ import java.util.*; /** * A base class for all analysis request handlers. * - * @version $Id$ + * * @since solr 1.4 */ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { diff --git a/solr/src/java/org/apache/solr/handler/BinaryUpdateRequestHandler.java b/solr/src/java/org/apache/solr/handler/BinaryUpdateRequestHandler.java index c9f92857b1c..05c0067bc1b 100644 --- a/solr/src/java/org/apache/solr/handler/BinaryUpdateRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/BinaryUpdateRequestHandler.java @@ -38,7 +38,7 @@ import java.util.List; /** * Update handler which uses the JavaBin format * - * @version $Id$ + * * @see org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec * @see org.apache.solr.common.util.JavaBinCodec * @since solr 1.4 diff --git a/solr/src/java/org/apache/solr/handler/CSVRequestHandler.java b/solr/src/java/org/apache/solr/handler/CSVRequestHandler.java index 6ba5ead5f89..f1bfb118bb2 100755 --- a/solr/src/java/org/apache/solr/handler/CSVRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/CSVRequestHandler.java @@ -37,7 +37,7 @@ import java.util.List; import java.io.*; /** - * @version $Id$ + * */ public class CSVRequestHandler extends ContentStreamHandlerBase { diff --git a/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java index 0f18012aa14..610a043ca33 100644 --- a/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java @@ -73,7 +73,7 @@ import java.util.*; * supports the "analysis.showmatch" parameter which when set to {@code true}, all field tokens that match the query * tokens will be marked as a "match". * - * @version $Id$ + * * @since solr 1.4 */ public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase { diff --git a/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java index b30ae6135ab..d0848272bcf 100644 --- a/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java @@ -86,7 +86,7 @@ import java.io.IOException; *

Note that if neither analysis.fieldname and analysis.fieldtype is specified, then the default search field's * analyzer is used.

* - * @version $Id$ + * * @since solr 1.4 */ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase { diff --git a/solr/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/src/java/org/apache/solr/handler/ReplicationHandler.java index 71780562890..8fcc7e54fe4 100644 --- a/solr/src/java/org/apache/solr/handler/ReplicationHandler.java +++ b/solr/src/java/org/apache/solr/handler/ReplicationHandler.java @@ -61,7 +61,7 @@ import java.util.zip.DeflaterOutputStream; *
  • Abort a snap pull (command=abort)
  • Enable/Disable polling the master for new versions (command=enablepoll * or command=disablepoll)
  • * - * @version $Id$ + * * @since solr 1.4 */ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAware { diff --git a/solr/src/java/org/apache/solr/handler/RequestHandlerUtils.java b/solr/src/java/org/apache/solr/handler/RequestHandlerUtils.java index d7d85767a61..3d23322e723 100755 --- a/solr/src/java/org/apache/solr/handler/RequestHandlerUtils.java +++ b/solr/src/java/org/apache/solr/handler/RequestHandlerUtils.java @@ -32,7 +32,7 @@ import org.apache.solr.update.processor.UpdateRequestProcessor; /** * Common helper functions for RequestHandlers * - * @version $Id$ + * * @since solr 1.2 */ public class RequestHandlerUtils diff --git a/solr/src/java/org/apache/solr/handler/SnapPuller.java b/solr/src/java/org/apache/solr/handler/SnapPuller.java index 20ce25e485d..70d33ab7243 100644 --- a/solr/src/java/org/apache/solr/handler/SnapPuller.java +++ b/solr/src/java/org/apache/solr/handler/SnapPuller.java @@ -57,7 +57,7 @@ import java.util.zip.InflaterInputStream; *

    Provides functionality of downloading changed index files as well as config files and a timer for scheduling fetches from the * master.

    * - * @version $Id$ + * * @since solr 1.4 */ public class SnapPuller { diff --git a/solr/src/java/org/apache/solr/handler/SnapShooter.java b/solr/src/java/org/apache/solr/handler/SnapShooter.java index b534d04a97e..d0afaa2ece7 100644 --- a/solr/src/java/org/apache/solr/handler/SnapShooter.java +++ b/solr/src/java/org/apache/solr/handler/SnapShooter.java @@ -39,7 +39,7 @@ import org.slf4j.LoggerFactory; /** *

    Provides functionality equivalent to the snapshooter script

    * - * @version $Id$ + * * @since solr 1.4 */ public class SnapShooter { diff --git a/solr/src/java/org/apache/solr/handler/StandardRequestHandler.java b/solr/src/java/org/apache/solr/handler/StandardRequestHandler.java index 09dcf64e461..2489e5d71c6 100644 --- a/solr/src/java/org/apache/solr/handler/StandardRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/StandardRequestHandler.java @@ -23,7 +23,7 @@ import java.net.MalformedURLException; import java.net.URL; /** - * @version $Id$ + * * * All of the following options may be configured for this handler * in the solrconfig as defaults, and may be overriden as request parameters. diff --git a/solr/src/java/org/apache/solr/handler/admin/AdminHandlers.java b/solr/src/java/org/apache/solr/handler/admin/AdminHandlers.java index e030fb6db22..0f4a0a98a09 100644 --- a/solr/src/java/org/apache/solr/handler/admin/AdminHandlers.java +++ b/solr/src/java/org/apache/solr/handler/admin/AdminHandlers.java @@ -31,7 +31,7 @@ import org.apache.solr.util.plugin.SolrCoreAware; /** * A special Handler that registers all standard admin handlers * - * @version $Id$ + * * @since solr 1.3 */ public class AdminHandlers implements SolrCoreAware, SolrRequestHandler diff --git a/solr/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java b/solr/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java index 63f72c3ddc8..39b9a048e22 100644 --- a/solr/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java +++ b/solr/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java @@ -45,7 +45,7 @@ import java.io.IOException; import java.util.Date; /** - * @version $Id$ + * * @since solr 1.3 */ public class CoreAdminHandler extends RequestHandlerBase { diff --git a/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 17ff7730984..0127915004a 100644 --- a/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -81,7 +81,7 @@ import org.apache.solr.search.SolrIndexSearcher; * For more documentation see: * http://wiki.apache.org/solr/LukeRequestHandler * - * @version $Id$ + * * @since solr 1.2 */ public class LukeRequestHandler extends RequestHandlerBase diff --git a/solr/src/java/org/apache/solr/handler/admin/PluginInfoHandler.java b/solr/src/java/org/apache/solr/handler/admin/PluginInfoHandler.java index 86f98c67c31..c0707dfd49e 100644 --- a/solr/src/java/org/apache/solr/handler/admin/PluginInfoHandler.java +++ b/solr/src/java/org/apache/solr/handler/admin/PluginInfoHandler.java @@ -36,7 +36,7 @@ import org.apache.solr.response.SolrQueryResponse; * that it works nicely with an XSLT transformation. Until we have a nice * XSLT front end for /admin, the format is still open to change. * - * @version $Id$ + * * @since solr 1.2 */ public class PluginInfoHandler extends RequestHandlerBase diff --git a/solr/src/java/org/apache/solr/handler/admin/PropertiesRequestHandler.java b/solr/src/java/org/apache/solr/handler/admin/PropertiesRequestHandler.java index 12ee354b5fa..f24ac8d5cb6 100644 --- a/solr/src/java/org/apache/solr/handler/admin/PropertiesRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/admin/PropertiesRequestHandler.java @@ -25,7 +25,7 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; /** - * @version $Id$ + * * @since solr 1.2 */ public class PropertiesRequestHandler extends RequestHandlerBase diff --git a/solr/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java b/solr/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java index cda4b94bd81..2c30558f301 100644 --- a/solr/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java @@ -72,7 +72,7 @@ import org.apache.solr.response.SolrQueryResponse; * http://localhost:8983/solr/admin/file?file=schema.xml&contentType=text/plain * * - * @version $Id$ + * * @since solr 1.3 */ public class ShowFileRequestHandler extends RequestHandlerBase diff --git a/solr/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java b/solr/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java index 4a123944624..8dc782424a8 100644 --- a/solr/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java +++ b/solr/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java @@ -50,7 +50,7 @@ import org.slf4j.LoggerFactory; * that it works nicely with an XSLT transformation. Until we have a nice * XSLT front end for /admin, the format is still open to change. * - * @version $Id$ + * * @since solr 1.2 */ public class SystemInfoHandler extends RequestHandlerBase diff --git a/solr/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java b/solr/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java index f8850160696..a39d63c568a 100644 --- a/solr/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java +++ b/solr/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java @@ -35,7 +35,7 @@ import org.apache.solr.response.SolrQueryResponse; * that it works nicely with an XSLT transformation. Until we have a nice * XSLT front end for /admin, the format is still open to change. * - * @version $Id$ + * * @since solr 1.2 */ public class ThreadDumpHandler extends RequestHandlerBase diff --git a/solr/src/java/org/apache/solr/handler/component/DebugComponent.java b/solr/src/java/org/apache/solr/handler/component/DebugComponent.java index a6717981786..6d133bb6990 100644 --- a/solr/src/java/org/apache/solr/handler/component/DebugComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/DebugComponent.java @@ -34,7 +34,7 @@ import org.apache.solr.util.SolrPluginUtils; /** * Adds debugging information to a request. * - * @version $Id$ + * * @since solr 1.3 */ public class DebugComponent extends SearchComponent diff --git a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java index 3a5ee6149bd..a398967ccdf 100644 --- a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java @@ -38,7 +38,7 @@ import java.util.*; /** * TODO! * - * @version $Id$ + * * @since solr 1.3 */ public class FacetComponent extends SearchComponent diff --git a/solr/src/java/org/apache/solr/handler/component/HighlightComponent.java b/solr/src/java/org/apache/solr/handler/component/HighlightComponent.java index 137f30b52c3..f055e29faf3 100644 --- a/solr/src/java/org/apache/solr/handler/component/HighlightComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/HighlightComponent.java @@ -41,7 +41,7 @@ import java.util.List; /** * TODO! * - * @version $Id$ + * * @since solr 1.3 */ public class HighlightComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware diff --git a/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java b/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java index 8851ff7761d..33251f04361 100644 --- a/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java @@ -34,7 +34,7 @@ import org.apache.solr.search.SolrIndexSearcher; /** * TODO! * - * @version $Id$ + * * @since solr 1.3 */ public class MoreLikeThisComponent extends SearchComponent diff --git a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java index 1d26b374d06..98602d280a6 100644 --- a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -56,7 +56,7 @@ import java.util.*; /** * TODO! * - * @version $Id$ + * * @since solr 1.3 */ public class QueryComponent extends SearchComponent diff --git a/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index 743cca317c2..008bc91ca17 100644 --- a/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -72,7 +72,7 @@ import org.xml.sax.InputSource; /** * A component to elevate some documents to the top of the result set. * - * @version $Id$ + * * @since solr 1.3 */ public class QueryElevationComponent extends SearchComponent implements SolrCoreAware diff --git a/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java index 5bcb5a967d2..47ef191c962 100644 --- a/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java +++ b/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java @@ -36,7 +36,7 @@ import java.util.Map; /** * This class is experimental and will be changing in the future. * - * @version $Id$ + * * @since solr 1.3 */ public class ResponseBuilder diff --git a/solr/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/src/java/org/apache/solr/handler/component/SearchComponent.java index 2908fbc033d..425b7d8c341 100644 --- a/solr/src/java/org/apache/solr/handler/component/SearchComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/SearchComponent.java @@ -28,7 +28,7 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin; /** * TODO! * - * @version $Id$ + * * @since solr 1.3 */ public abstract class SearchComponent implements SolrInfoMBean, NamedListInitializedPlugin diff --git a/solr/src/java/org/apache/solr/handler/component/StatsComponent.java b/solr/src/java/org/apache/solr/handler/component/StatsComponent.java index a8b0c4a65d4..805ad10c668 100644 --- a/solr/src/java/org/apache/solr/handler/component/StatsComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/StatsComponent.java @@ -42,7 +42,7 @@ import org.apache.solr.request.UnInvertedField; /** * Stats component calculates simple statistics on numeric field values * - * @version $Id$ + * * @since solr 1.4 */ public class StatsComponent extends SearchComponent { diff --git a/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java b/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java index 24c5256576b..7b78735447e 100644 --- a/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java @@ -74,7 +74,7 @@ import java.util.Map; * </arr> * </requestHandler> * - * @version $Id$ + * */ public class TermVectorComponent extends SearchComponent implements SolrCoreAware { diff --git a/solr/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/src/java/org/apache/solr/handler/component/TermsComponent.java index 4ce9f9f507a..7158b733e2c 100644 --- a/solr/src/java/org/apache/solr/handler/component/TermsComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -56,7 +56,7 @@ import java.util.regex.Pattern; * * @see org.apache.solr.common.params.TermsParams * See Lucene's TermEnum class - * @version $Id$ + * */ public class TermsComponent extends SearchComponent { public static final int UNLIMITED_MAX_COUNT = -1; diff --git a/solr/src/java/org/apache/solr/request/LocalSolrQueryRequest.java b/solr/src/java/org/apache/solr/request/LocalSolrQueryRequest.java index 427cf0d1c5d..900cb860fe9 100644 --- a/solr/src/java/org/apache/solr/request/LocalSolrQueryRequest.java +++ b/solr/src/java/org/apache/solr/request/LocalSolrQueryRequest.java @@ -31,7 +31,7 @@ import java.util.Iterator; // retained more for backward compatibility. /** - * @version $Id$ + * */ public class LocalSolrQueryRequest extends SolrQueryRequestBase { public final static Map emptyArgs = new HashMap(0,1); diff --git a/solr/src/java/org/apache/solr/request/ServletSolrParams.java b/solr/src/java/org/apache/solr/request/ServletSolrParams.java index 49922c81332..18dc7559b21 100644 --- a/solr/src/java/org/apache/solr/request/ServletSolrParams.java +++ b/solr/src/java/org/apache/solr/request/ServletSolrParams.java @@ -22,7 +22,7 @@ import javax.servlet.ServletRequest; import org.apache.solr.common.params.MultiMapSolrParams; /** - * @version $Id$ + * */ public class ServletSolrParams extends MultiMapSolrParams { public ServletSolrParams(ServletRequest req) { diff --git a/solr/src/java/org/apache/solr/request/SolrQueryRequest.java b/solr/src/java/org/apache/solr/request/SolrQueryRequest.java index eeb3c60408f..931acbc123d 100644 --- a/solr/src/java/org/apache/solr/request/SolrQueryRequest.java +++ b/solr/src/java/org/apache/solr/request/SolrQueryRequest.java @@ -29,7 +29,7 @@ import java.util.Map; *

    Container for a request to execute a query.

    *

    SolrQueryRequest is not thread safe.

    * - * @version $Id$ + * */ public interface SolrQueryRequest { diff --git a/solr/src/java/org/apache/solr/request/SolrQueryRequestBase.java b/solr/src/java/org/apache/solr/request/SolrQueryRequestBase.java index 699cbe7c74d..82d28728d34 100644 --- a/solr/src/java/org/apache/solr/request/SolrQueryRequestBase.java +++ b/solr/src/java/org/apache/solr/request/SolrQueryRequestBase.java @@ -38,7 +38,7 @@ import java.util.HashMap; *

    * * - * @version $Id$ + * */ public abstract class SolrQueryRequestBase implements SolrQueryRequest { protected final SolrCore core; diff --git a/solr/src/java/org/apache/solr/request/SolrRequestHandler.java b/solr/src/java/org/apache/solr/request/SolrRequestHandler.java index 45a80fda622..50adac068f9 100644 --- a/solr/src/java/org/apache/solr/request/SolrRequestHandler.java +++ b/solr/src/java/org/apache/solr/request/SolrRequestHandler.java @@ -37,7 +37,7 @@ import org.apache.solr.response.SolrQueryResponse; * A single instance of any registered SolrRequestHandler is created * via the default constructor and is reused for all relevant queries. * - * @version $Id$ + * */ public interface SolrRequestHandler extends SolrInfoMBean { diff --git a/solr/src/java/org/apache/solr/response/CSVResponseWriter.java b/solr/src/java/org/apache/solr/response/CSVResponseWriter.java index 3116467a5c7..d35ad3c2bd6 100755 --- a/solr/src/java/org/apache/solr/response/CSVResponseWriter.java +++ b/solr/src/java/org/apache/solr/response/CSVResponseWriter.java @@ -43,7 +43,7 @@ import java.io.Writer; import java.util.*; /** - * @version $Id$ + * */ public class CSVResponseWriter implements QueryResponseWriter { diff --git a/solr/src/java/org/apache/solr/response/JSONResponseWriter.java b/solr/src/java/org/apache/solr/response/JSONResponseWriter.java index 7a515788f62..0e328fcca41 100644 --- a/solr/src/java/org/apache/solr/response/JSONResponseWriter.java +++ b/solr/src/java/org/apache/solr/response/JSONResponseWriter.java @@ -37,7 +37,7 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.ReturnFields; /** - * @version $Id$ + * */ public class JSONResponseWriter implements QueryResponseWriter { diff --git a/solr/src/java/org/apache/solr/response/QueryResponseWriter.java b/solr/src/java/org/apache/solr/response/QueryResponseWriter.java index eeae6c7c5c1..81e19b7b63c 100644 --- a/solr/src/java/org/apache/solr/response/QueryResponseWriter.java +++ b/solr/src/java/org/apache/solr/response/QueryResponseWriter.java @@ -40,7 +40,7 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin; * A single instance of any registered QueryResponseWriter is created * via the default constructor and is reused for all relevant queries. * - * @version $Id$ + * */ public interface QueryResponseWriter extends NamedListInitializedPlugin { public static String CONTENT_TYPE_XML_UTF8="application/xml; charset=UTF-8"; diff --git a/solr/src/java/org/apache/solr/response/RawResponseWriter.java b/solr/src/java/org/apache/solr/response/RawResponseWriter.java index e34691c192d..dba1eaca790 100644 --- a/solr/src/java/org/apache/solr/response/RawResponseWriter.java +++ b/solr/src/java/org/apache/solr/response/RawResponseWriter.java @@ -42,7 +42,7 @@ import org.apache.solr.request.SolrQueryRequest; * defaults to the "standard" writer. *

    * - * @version $Id$ + * * @since solr 1.3 */ public class RawResponseWriter implements BinaryQueryResponseWriter diff --git a/solr/src/java/org/apache/solr/response/ResultContext.java b/solr/src/java/org/apache/solr/response/ResultContext.java index f1da1d6c73a..e218df5e1f7 100644 --- a/solr/src/java/org/apache/solr/response/ResultContext.java +++ b/solr/src/java/org/apache/solr/response/ResultContext.java @@ -23,7 +23,7 @@ import org.apache.solr.search.DocList; /** * A class to hold the QueryResult and the Query * - * @version $Id$ + * */ public class ResultContext { public Query query; diff --git a/solr/src/java/org/apache/solr/response/SolrQueryResponse.java b/solr/src/java/org/apache/solr/response/SolrQueryResponse.java index a0ca1c4bfff..0afec1ee02b 100644 --- a/solr/src/java/org/apache/solr/response/SolrQueryResponse.java +++ b/solr/src/java/org/apache/solr/response/SolrQueryResponse.java @@ -55,7 +55,7 @@ import java.util.*; * that QueryResponseWriters will be able to deal with unexpected types. *

    * - * @version $Id$ + * * @since solr 0.9 */ public class SolrQueryResponse { diff --git a/solr/src/java/org/apache/solr/response/TextResponseWriter.java b/solr/src/java/org/apache/solr/response/TextResponseWriter.java index fae3e6b7da4..9a7d9cf5b0f 100644 --- a/solr/src/java/org/apache/solr/response/TextResponseWriter.java +++ b/solr/src/java/org/apache/solr/response/TextResponseWriter.java @@ -37,7 +37,7 @@ import org.apache.solr.search.ReturnFields; /** Base class for text-oriented response writers. * - * @version $Id$ + * */ public abstract class TextResponseWriter { diff --git a/solr/src/java/org/apache/solr/response/XMLResponseWriter.java b/solr/src/java/org/apache/solr/response/XMLResponseWriter.java index a883e9d82cc..95d46544434 100644 --- a/solr/src/java/org/apache/solr/response/XMLResponseWriter.java +++ b/solr/src/java/org/apache/solr/response/XMLResponseWriter.java @@ -24,7 +24,7 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; /** - * @version $Id$ + * */ public class XMLResponseWriter implements QueryResponseWriter { public void init(NamedList n) { diff --git a/solr/src/java/org/apache/solr/response/transform/DocIdAugmenterFactory.java b/solr/src/java/org/apache/solr/response/transform/DocIdAugmenterFactory.java index c4e19a1d171..8c23d31f348 100644 --- a/solr/src/java/org/apache/solr/response/transform/DocIdAugmenterFactory.java +++ b/solr/src/java/org/apache/solr/response/transform/DocIdAugmenterFactory.java @@ -21,7 +21,7 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; /** - * @version $Id$ + * * @since solr 4.0 */ public class DocIdAugmenterFactory extends TransformerFactory diff --git a/solr/src/java/org/apache/solr/response/transform/DocTransformer.java b/solr/src/java/org/apache/solr/response/transform/DocTransformer.java index a882238446c..bcc827f6f55 100644 --- a/solr/src/java/org/apache/solr/response/transform/DocTransformer.java +++ b/solr/src/java/org/apache/solr/response/transform/DocTransformer.java @@ -24,7 +24,7 @@ import org.apache.solr.common.SolrDocument; /** * New instance for each request * - * @version $Id$ + * */ public abstract class DocTransformer { diff --git a/solr/src/java/org/apache/solr/response/transform/DocTransformers.java b/solr/src/java/org/apache/solr/response/transform/DocTransformers.java index 3aef76eda7f..6906f2f3ea9 100644 --- a/solr/src/java/org/apache/solr/response/transform/DocTransformers.java +++ b/solr/src/java/org/apache/solr/response/transform/DocTransformers.java @@ -27,7 +27,7 @@ import org.apache.solr.common.SolrDocument; /** * Transform a document before it gets sent out * - * @version $Id$ + * */ public class DocTransformers extends DocTransformer { diff --git a/solr/src/java/org/apache/solr/response/transform/ExplainAugmenterFactory.java b/solr/src/java/org/apache/solr/response/transform/ExplainAugmenterFactory.java index a65b0daed69..3b0d4297406 100644 --- a/solr/src/java/org/apache/solr/response/transform/ExplainAugmenterFactory.java +++ b/solr/src/java/org/apache/solr/response/transform/ExplainAugmenterFactory.java @@ -28,7 +28,7 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.util.SolrPluginUtils; /** - * @version $Id$ + * * @since solr 4.0 */ public class ExplainAugmenterFactory extends TransformerFactory diff --git a/solr/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java b/solr/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java index 76a40f4397b..1c09b653e6f 100644 --- a/solr/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java +++ b/solr/src/java/org/apache/solr/response/transform/RenameFieldsTransformer.java @@ -22,7 +22,7 @@ import org.apache.solr.common.util.NamedList; /** * Return a field with a name that is different that what is indexed * - * @version $Id$ + * * @since solr 4.0 */ public class RenameFieldsTransformer extends DocTransformer diff --git a/solr/src/java/org/apache/solr/response/transform/ScoreAugmenter.java b/solr/src/java/org/apache/solr/response/transform/ScoreAugmenter.java index f4a1effc1ef..69cae827fd4 100644 --- a/solr/src/java/org/apache/solr/response/transform/ScoreAugmenter.java +++ b/solr/src/java/org/apache/solr/response/transform/ScoreAugmenter.java @@ -21,7 +21,7 @@ import org.apache.solr.common.SolrDocument; /** * Simple Augmenter that adds the docId * - * @version $Id$ + * * @since solr 4.0 */ public class ScoreAugmenter extends TransformerWithContext diff --git a/solr/src/java/org/apache/solr/response/transform/ShardAugmenterFactory.java b/solr/src/java/org/apache/solr/response/transform/ShardAugmenterFactory.java index 670bbbea158..42979b60fe8 100644 --- a/solr/src/java/org/apache/solr/response/transform/ShardAugmenterFactory.java +++ b/solr/src/java/org/apache/solr/response/transform/ShardAugmenterFactory.java @@ -22,7 +22,7 @@ import org.apache.solr.request.SolrQueryRequest; /** - * @version $Id$ + * * @since solr 4.0 */ public class ShardAugmenterFactory extends TransformerFactory diff --git a/solr/src/java/org/apache/solr/response/transform/TransformContext.java b/solr/src/java/org/apache/solr/response/transform/TransformContext.java index 3bd8ad3cba0..dc71e58b691 100644 --- a/solr/src/java/org/apache/solr/response/transform/TransformContext.java +++ b/solr/src/java/org/apache/solr/response/transform/TransformContext.java @@ -23,7 +23,7 @@ import org.apache.solr.search.SolrIndexSearcher; /** * Environment variables for the transformed documents * - * @version $Id$ + * * @since solr 4.0 */ public class TransformContext diff --git a/solr/src/java/org/apache/solr/response/transform/TransformerFactory.java b/solr/src/java/org/apache/solr/response/transform/TransformerFactory.java index 832d009ceb7..9b316d67217 100644 --- a/solr/src/java/org/apache/solr/response/transform/TransformerFactory.java +++ b/solr/src/java/org/apache/solr/response/transform/TransformerFactory.java @@ -28,7 +28,7 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin; /** * New instance for each request * - * @version $Id$ + * */ public abstract class TransformerFactory implements NamedListInitializedPlugin { diff --git a/solr/src/java/org/apache/solr/response/transform/TransformerWithContext.java b/solr/src/java/org/apache/solr/response/transform/TransformerWithContext.java index 3b1ad181f69..060bfd55748 100644 --- a/solr/src/java/org/apache/solr/response/transform/TransformerWithContext.java +++ b/solr/src/java/org/apache/solr/response/transform/TransformerWithContext.java @@ -18,7 +18,7 @@ package org.apache.solr.response.transform; /** - * @version $Id$ + * * @since solr 4.0 */ public abstract class TransformerWithContext extends DocTransformer diff --git a/solr/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java b/solr/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java index f319f71d4da..a62e709f2d1 100644 --- a/solr/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java +++ b/solr/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java @@ -25,7 +25,7 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; /** - * @version $Id$ + * * @since solr 4.0 */ public class ValueAugmenterFactory extends TransformerFactory diff --git a/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java b/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java index 51d40dba98e..89d9b975d06 100644 --- a/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java +++ b/solr/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java @@ -33,7 +33,7 @@ import org.apache.solr.search.function.ValueSource; * * NOT really sure how or if this could work... * - * @version $Id$ + * * @since solr 4.0 */ public class ValueSourceAugmenter extends DocTransformer diff --git a/solr/src/java/org/apache/solr/schema/BCDIntField.java b/solr/src/java/org/apache/solr/schema/BCDIntField.java index 534a7dcf1b0..2568db97491 100644 --- a/solr/src/java/org/apache/solr/schema/BCDIntField.java +++ b/solr/src/java/org/apache/solr/schema/BCDIntField.java @@ -27,7 +27,7 @@ import org.apache.solr.response.TextResponseWriter; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * */ public class BCDIntField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/BCDLongField.java b/solr/src/java/org/apache/solr/schema/BCDLongField.java index 5a4c4f01264..fa78ba2b190 100644 --- a/solr/src/java/org/apache/solr/schema/BCDLongField.java +++ b/solr/src/java/org/apache/solr/schema/BCDLongField.java @@ -19,7 +19,7 @@ package org.apache.solr.schema; import org.apache.lucene.document.Fieldable; /** - * @version $Id$ + * */ public class BCDLongField extends BCDIntField { @Override diff --git a/solr/src/java/org/apache/solr/schema/BCDStrField.java b/solr/src/java/org/apache/solr/schema/BCDStrField.java index ab27f81d7c1..a7bf0828688 100644 --- a/solr/src/java/org/apache/solr/schema/BCDStrField.java +++ b/solr/src/java/org/apache/solr/schema/BCDStrField.java @@ -19,7 +19,7 @@ package org.apache.solr.schema; import org.apache.lucene.document.Fieldable; /** - * @version $Id$ + * */ public class BCDStrField extends BCDIntField { /** diff --git a/solr/src/java/org/apache/solr/schema/BoolField.java b/solr/src/java/org/apache/solr/schema/BoolField.java index 3cd50247abc..10e1c862be7 100644 --- a/solr/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/src/java/org/apache/solr/schema/BoolField.java @@ -38,7 +38,7 @@ import java.util.Map; import java.io.Reader; import java.io.IOException; /** - * @version $Id$ + * */ public class BoolField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/ByteField.java b/solr/src/java/org/apache/solr/schema/ByteField.java index 091f1e9fca8..ce5901045c7 100644 --- a/solr/src/java/org/apache/solr/schema/ByteField.java +++ b/solr/src/java/org/apache/solr/schema/ByteField.java @@ -30,7 +30,7 @@ import java.io.IOException; import java.util.Map; /** - * @version $Id$ + * */ public class ByteField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/DateField.java b/solr/src/java/org/apache/solr/schema/DateField.java index d7bcef0ca0a..f7e16bd6708 100644 --- a/solr/src/java/org/apache/solr/schema/DateField.java +++ b/solr/src/java/org/apache/solr/schema/DateField.java @@ -93,7 +93,7 @@ import java.util.*; * acronym UTC was chosen as a compromise." * * - * @version $Id$ + * * @see XML schema part 2 * */ diff --git a/solr/src/java/org/apache/solr/schema/DoubleField.java b/solr/src/java/org/apache/solr/schema/DoubleField.java index 80f5e988f62..c07f43d4703 100644 --- a/solr/src/java/org/apache/solr/schema/DoubleField.java +++ b/solr/src/java/org/apache/solr/schema/DoubleField.java @@ -30,7 +30,7 @@ import java.io.IOException; import java.util.Map; /** - * @version $Id$ + * */ public class DoubleField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/ExternalFileField.java b/solr/src/java/org/apache/solr/schema/ExternalFileField.java index 2eb539426b8..90ce98d4ca1 100755 --- a/solr/src/java/org/apache/solr/schema/ExternalFileField.java +++ b/solr/src/java/org/apache/solr/schema/ExternalFileField.java @@ -51,7 +51,7 @@ import java.io.IOException; *

    The external file may be sorted or unsorted by the key field, but it will be substantially slower (untested) if it isn't sorted. *

    Fields of this type may currently only be used as a ValueSource in a FunctionQuery. * - * @version $Id$ + * */ public class ExternalFileField extends FieldType { private FieldType ftype; diff --git a/solr/src/java/org/apache/solr/schema/FieldProperties.java b/solr/src/java/org/apache/solr/schema/FieldProperties.java index c1cc519b1f8..07cbd148314 100644 --- a/solr/src/java/org/apache/solr/schema/FieldProperties.java +++ b/solr/src/java/org/apache/solr/schema/FieldProperties.java @@ -21,7 +21,7 @@ import java.util.Map; import java.util.HashMap; /** - * @version $Id$ + * * * @lucene.internal */ diff --git a/solr/src/java/org/apache/solr/schema/FieldType.java b/solr/src/java/org/apache/solr/schema/FieldType.java index 325064bf9b7..43daec385bf 100644 --- a/solr/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/src/java/org/apache/solr/schema/FieldType.java @@ -50,7 +50,7 @@ import java.util.Map; /** * Base class for all field types used by an index schema. * - * @version $Id$ + * */ public abstract class FieldType extends FieldProperties { public static final Logger log = LoggerFactory.getLogger(FieldType.class); diff --git a/solr/src/java/org/apache/solr/schema/FloatField.java b/solr/src/java/org/apache/solr/schema/FloatField.java index b7c7fbfbb96..4c9c46af0f1 100644 --- a/solr/src/java/org/apache/solr/schema/FloatField.java +++ b/solr/src/java/org/apache/solr/schema/FloatField.java @@ -29,7 +29,7 @@ import org.apache.solr.response.TextResponseWriter; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * */ public class FloatField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/IndexSchema.java b/solr/src/java/org/apache/solr/schema/IndexSchema.java index 818f8d85a56..4fd106d25e0 100644 --- a/solr/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/src/java/org/apache/solr/schema/IndexSchema.java @@ -62,7 +62,7 @@ import org.slf4j.LoggerFactory; * IndexSchema contains information about the valid fields in an index * and the types of those fields. * - * @version $Id$ + * */ public final class IndexSchema { public static final String DEFAULT_SCHEMA_FILE = "schema.xml"; diff --git a/solr/src/java/org/apache/solr/schema/IntField.java b/solr/src/java/org/apache/solr/schema/IntField.java index 7b681e59dfc..3199ff8e1fd 100644 --- a/solr/src/java/org/apache/solr/schema/IntField.java +++ b/solr/src/java/org/apache/solr/schema/IntField.java @@ -29,7 +29,7 @@ import org.apache.solr.response.TextResponseWriter; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * */ public class IntField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/LongField.java b/solr/src/java/org/apache/solr/schema/LongField.java index 239dc1f2a70..7c0d2ac8f29 100644 --- a/solr/src/java/org/apache/solr/schema/LongField.java +++ b/solr/src/java/org/apache/solr/schema/LongField.java @@ -29,7 +29,7 @@ import org.apache.solr.search.function.LongFieldSource; import java.io.IOException; import java.util.Map; /** - * @version $Id$ + * */ public class LongField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/RandomSortField.java b/solr/src/java/org/apache/solr/schema/RandomSortField.java index f5b1ee0a042..80353fd567c 100644 --- a/solr/src/java/org/apache/solr/schema/RandomSortField.java +++ b/solr/src/java/org/apache/solr/schema/RandomSortField.java @@ -59,7 +59,7 @@ import org.apache.solr.search.function.ValueSource; * * Note that multiple calls to the same URL will return the same sorting order. * - * @version $Id$ + * * @since solr 1.3 */ public class RandomSortField extends FieldType { diff --git a/solr/src/java/org/apache/solr/schema/SchemaField.java b/solr/src/java/org/apache/solr/schema/SchemaField.java index bb2d3e7a523..4425753f19d 100644 --- a/solr/src/java/org/apache/solr/schema/SchemaField.java +++ b/solr/src/java/org/apache/solr/schema/SchemaField.java @@ -31,7 +31,7 @@ import java.io.IOException; /** * Encapsulates all information about a Field in a Solr Schema * - * @version $Id$ + * */ public final class SchemaField extends FieldProperties { final String name; diff --git a/solr/src/java/org/apache/solr/schema/SortableDoubleField.java b/solr/src/java/org/apache/solr/schema/SortableDoubleField.java index 0a0d53a6fe1..e31a6719820 100644 --- a/solr/src/java/org/apache/solr/schema/SortableDoubleField.java +++ b/solr/src/java/org/apache/solr/schema/SortableDoubleField.java @@ -35,7 +35,7 @@ import org.apache.solr.response.TextResponseWriter; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * * * @deprecated use {@link DoubleField} or {@link TrieDoubleField} - will be removed in 5.x */ diff --git a/solr/src/java/org/apache/solr/schema/SortableFloatField.java b/solr/src/java/org/apache/solr/schema/SortableFloatField.java index 876a5f9eede..d5a5315b1aa 100644 --- a/solr/src/java/org/apache/solr/schema/SortableFloatField.java +++ b/solr/src/java/org/apache/solr/schema/SortableFloatField.java @@ -35,7 +35,7 @@ import org.apache.solr.response.TextResponseWriter; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * * * @deprecated use {@link FloatField} or {@link TrieFloatField} - will be removed in 5.x */ diff --git a/solr/src/java/org/apache/solr/schema/SortableIntField.java b/solr/src/java/org/apache/solr/schema/SortableIntField.java index 74f52f80b91..e13c2c3af07 100644 --- a/solr/src/java/org/apache/solr/schema/SortableIntField.java +++ b/solr/src/java/org/apache/solr/schema/SortableIntField.java @@ -35,7 +35,7 @@ import org.apache.solr.response.TextResponseWriter; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * * * @deprecated use {@link IntField} or {@link TrieIntField} - will be removed in 5.x */ diff --git a/solr/src/java/org/apache/solr/schema/SortableLongField.java b/solr/src/java/org/apache/solr/schema/SortableLongField.java index 6a6e8b49ee2..78344312463 100644 --- a/solr/src/java/org/apache/solr/schema/SortableLongField.java +++ b/solr/src/java/org/apache/solr/schema/SortableLongField.java @@ -35,7 +35,7 @@ import org.apache.solr.response.TextResponseWriter; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * * * @deprecated use {@link LongField} or {@link TrieLongField} - will be removed in 5.x */ diff --git a/solr/src/java/org/apache/solr/schema/StrField.java b/solr/src/java/org/apache/solr/schema/StrField.java index ae07be17121..c8d15aa3765 100644 --- a/solr/src/java/org/apache/solr/schema/StrField.java +++ b/solr/src/java/org/apache/solr/schema/StrField.java @@ -27,7 +27,7 @@ import org.apache.solr.search.QParser; import java.util.Map; import java.io.IOException; /** - * @version $Id$ + * */ public class StrField extends FieldType { @Override diff --git a/solr/src/java/org/apache/solr/schema/TextField.java b/solr/src/java/org/apache/solr/schema/TextField.java index 72782252dac..4fab9466972 100644 --- a/solr/src/java/org/apache/solr/schema/TextField.java +++ b/solr/src/java/org/apache/solr/schema/TextField.java @@ -43,7 +43,7 @@ import java.io.StringReader; /** TextField is the basic type for configurable text analysis. * Analyzers for field types using this implementation should be defined in the schema. - * @version $Id$ + * */ public class TextField extends FieldType { protected boolean autoGeneratePhraseQueries; diff --git a/solr/src/java/org/apache/solr/schema/TrieField.java b/solr/src/java/org/apache/solr/schema/TrieField.java index 608596a8dc7..8fbde52d84c 100644 --- a/solr/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/src/java/org/apache/solr/schema/TrieField.java @@ -55,7 +55,7 @@ import java.util.Date; * Note that if you use a precisionStep of 32 for int/float and 64 for long/double/date, then multiple terms will not be * generated, range search will be no faster than any other number field, but sorting will still be possible. * - * @version $Id$ + * * @see org.apache.lucene.search.NumericRangeQuery * @since solr 1.4 */ diff --git a/solr/src/java/org/apache/solr/schema/UUIDField.java b/solr/src/java/org/apache/solr/schema/UUIDField.java index f1bfa424bff..71da5615f34 100644 --- a/solr/src/java/org/apache/solr/schema/UUIDField.java +++ b/solr/src/java/org/apache/solr/schema/UUIDField.java @@ -33,7 +33,7 @@ import org.apache.solr.response.TextResponseWriter; * * @see UUID#toString * @see UUID#randomUUID - * @version $Id$ + * */ public class UUIDField extends StrField { private static final String NEW = "NEW"; diff --git a/solr/src/java/org/apache/solr/search/BitDocSet.java b/solr/src/java/org/apache/solr/search/BitDocSet.java index d5cd85d6d2e..2ffe6f086dc 100644 --- a/solr/src/java/org/apache/solr/search/BitDocSet.java +++ b/solr/src/java/org/apache/solr/search/BitDocSet.java @@ -25,7 +25,7 @@ import org.apache.lucene.search.DocIdSetIterator; * BitDocSet represents an unordered set of Lucene Document Ids * using a BitSet. A set bit represents inclusion in the set for that document. * - * @version $Id$ + * * @since solr 0.9 */ public class BitDocSet extends DocSetBase { diff --git a/solr/src/java/org/apache/solr/search/CacheConfig.java b/solr/src/java/org/apache/solr/search/CacheConfig.java index 736b2d98724..f4dc05480c5 100644 --- a/solr/src/java/org/apache/solr/search/CacheConfig.java +++ b/solr/src/java/org/apache/solr/search/CacheConfig.java @@ -34,7 +34,7 @@ import javax.xml.xpath.XPathConstants; * stored in the solrconfig.xml file, and implements a * factory to create caches. * - * @version $Id$ + * */ public class CacheConfig { private String nodeName; diff --git a/solr/src/java/org/apache/solr/search/CacheRegenerator.java b/solr/src/java/org/apache/solr/search/CacheRegenerator.java index a72cab1ac5d..9c96c57d519 100644 --- a/solr/src/java/org/apache/solr/search/CacheRegenerator.java +++ b/solr/src/java/org/apache/solr/search/CacheRegenerator.java @@ -26,7 +26,7 @@ import java.io.IOException; * Implementations should have a noarg constructor and be thread safe (a single instance will be * used for all cache autowarmings). * - * @version $Id$ + * */ public interface CacheRegenerator { /** diff --git a/solr/src/java/org/apache/solr/search/DisMaxQParser.java b/solr/src/java/org/apache/solr/search/DisMaxQParser.java index f8398a1a63f..483340509d3 100644 --- a/solr/src/java/org/apache/solr/search/DisMaxQParser.java +++ b/solr/src/java/org/apache/solr/search/DisMaxQParser.java @@ -39,7 +39,7 @@ import java.util.Map; *

    * Note: This API is experimental and may change in non backward-compatible ways in the future * - * @version $Id$ + * */ public class DisMaxQParser extends QParser { diff --git a/solr/src/java/org/apache/solr/search/DocIterator.java b/solr/src/java/org/apache/solr/search/DocIterator.java index 0481562cf7b..299a6261116 100644 --- a/solr/src/java/org/apache/solr/search/DocIterator.java +++ b/solr/src/java/org/apache/solr/search/DocIterator.java @@ -27,7 +27,7 @@ import java.util.Iterator; * DocIterator instance was retrieved. *

    * - * @version $Id$ + * */ public interface DocIterator extends Iterator { // already declared in superclass, redeclaring prevents javadoc inheritance diff --git a/solr/src/java/org/apache/solr/search/DocList.java b/solr/src/java/org/apache/solr/search/DocList.java index eb70a3f34ad..b344e5f6cd0 100644 --- a/solr/src/java/org/apache/solr/search/DocList.java +++ b/solr/src/java/org/apache/solr/search/DocList.java @@ -23,7 +23,7 @@ package org.apache.solr.search; * This list contains a subset of the complete list of documents actually matched: size() * document ids starting at offset(). * - * @version $Id$ + * * @since solr 0.9 */ public interface DocList extends DocSet { diff --git a/solr/src/java/org/apache/solr/search/DocListAndSet.java b/solr/src/java/org/apache/solr/search/DocListAndSet.java index 7549a325289..c304c7732e6 100644 --- a/solr/src/java/org/apache/solr/search/DocListAndSet.java +++ b/solr/src/java/org/apache/solr/search/DocListAndSet.java @@ -28,7 +28,7 @@ package org.apache.solr.search; * Oh, if only java had "out" parameters or multiple return args... *

    * - * @version $Id$ + * * @since solr 0.9 */ public final class DocListAndSet { diff --git a/solr/src/java/org/apache/solr/search/DocSet.java b/solr/src/java/org/apache/solr/search/DocSet.java index fcf8785cce4..dd69c976e7c 100644 --- a/solr/src/java/org/apache/solr/search/DocSet.java +++ b/solr/src/java/org/apache/solr/search/DocSet.java @@ -35,7 +35,7 @@ import java.io.IOException; * a cache and could be shared. *

    * - * @version $Id$ + * * @since solr 0.9 */ public interface DocSet /* extends Collection */ { diff --git a/solr/src/java/org/apache/solr/search/DocSetHitCollector.java b/solr/src/java/org/apache/solr/search/DocSetHitCollector.java index 3caafa99727..3557eff71d4 100644 --- a/solr/src/java/org/apache/solr/search/DocSetHitCollector.java +++ b/solr/src/java/org/apache/solr/search/DocSetHitCollector.java @@ -25,7 +25,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import java.io.IOException; /** - * @version $Id$ + * */ class DocSetCollector extends Collector { diff --git a/solr/src/java/org/apache/solr/search/DocSlice.java b/solr/src/java/org/apache/solr/search/DocSlice.java index 51fc6f12c3a..148ea6a7b64 100644 --- a/solr/src/java/org/apache/solr/search/DocSlice.java +++ b/solr/src/java/org/apache/solr/search/DocSlice.java @@ -22,7 +22,7 @@ import java.util.Arrays; /** * DocSlice implements DocList as an array of docids and optional scores. * - * @version $Id$ + * * @since solr 0.9 */ public class DocSlice extends DocSetBase implements DocList { diff --git a/solr/src/java/org/apache/solr/search/FastLRUCache.java b/solr/src/java/org/apache/solr/search/FastLRUCache.java index 594f37f2b56..fabf07f7899 100644 --- a/solr/src/java/org/apache/solr/search/FastLRUCache.java +++ b/solr/src/java/org/apache/solr/search/FastLRUCache.java @@ -38,7 +38,7 @@ import java.util.concurrent.CopyOnWriteArrayList; *

    * Also see SolrCaching * - * @version $Id$ + * * @see org.apache.solr.common.util.ConcurrentLRUCache * @see org.apache.solr.search.SolrCache * @since solr 1.4 diff --git a/solr/src/java/org/apache/solr/search/HashDocSet.java b/solr/src/java/org/apache/solr/search/HashDocSet.java index d5dd5023261..691211c0895 100644 --- a/solr/src/java/org/apache/solr/search/HashDocSet.java +++ b/solr/src/java/org/apache/solr/search/HashDocSet.java @@ -26,7 +26,7 @@ import org.apache.lucene.util.BitUtil; * in the set because it takes up less memory and is faster to iterate and take * set intersections. * - * @version $Id$ + * * @since solr 0.9 */ public final class HashDocSet extends DocSetBase { diff --git a/solr/src/java/org/apache/solr/search/LRUCache.java b/solr/src/java/org/apache/solr/search/LRUCache.java index 00f8379f7be..ca28444fe5d 100644 --- a/solr/src/java/org/apache/solr/search/LRUCache.java +++ b/solr/src/java/org/apache/solr/search/LRUCache.java @@ -29,7 +29,7 @@ import java.net.URL; /** - * @version $Id$ + * */ public class LRUCache extends SolrCacheBase implements SolrCache { diff --git a/solr/src/java/org/apache/solr/search/QParser.java b/solr/src/java/org/apache/solr/search/QParser.java index 9b845e65d7e..9ae8f4dbd93 100755 --- a/solr/src/java/org/apache/solr/search/QParser.java +++ b/solr/src/java/org/apache/solr/search/QParser.java @@ -31,7 +31,7 @@ import java.util.*; /** * Note: This API is experimental and may change in non backward-compatible ways in the future * - * @version $Id$ + * */ public abstract class QParser { protected String qstr; diff --git a/solr/src/java/org/apache/solr/search/QueryParsing.java b/solr/src/java/org/apache/solr/search/QueryParsing.java index 4ee880c4f5b..66c5da7e278 100644 --- a/solr/src/java/org/apache/solr/search/QueryParsing.java +++ b/solr/src/java/org/apache/solr/search/QueryParsing.java @@ -54,7 +54,7 @@ import java.util.Map; /** * Collection of static utilities useful for query parsing. * - * @version $Id$ + * */ public class QueryParsing { public static final String OP = "q.op"; // the SolrParam used to override the QueryParser "default operator" diff --git a/solr/src/java/org/apache/solr/search/QueryResultKey.java b/solr/src/java/org/apache/solr/search/QueryResultKey.java index 7c1e080d7a4..679d3902d73 100644 --- a/solr/src/java/org/apache/solr/search/QueryResultKey.java +++ b/solr/src/java/org/apache/solr/search/QueryResultKey.java @@ -24,7 +24,7 @@ import java.io.IOException; import java.util.List; /** A hash key encapsulating a query, a list of filters, and a sort - * @version $Id$ + * */ public final class QueryResultKey { final Query query; diff --git a/solr/src/java/org/apache/solr/search/QueryUtils.java b/solr/src/java/org/apache/solr/search/QueryUtils.java index 931776cf491..f8cd6e9e4d6 100755 --- a/solr/src/java/org/apache/solr/search/QueryUtils.java +++ b/solr/src/java/org/apache/solr/search/QueryUtils.java @@ -25,7 +25,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import java.util.List; /** - * @version $Id$ + * */ public class QueryUtils { diff --git a/solr/src/java/org/apache/solr/search/ReturnFields.java b/solr/src/java/org/apache/solr/search/ReturnFields.java index 741c8d8b700..3b66b126aa6 100644 --- a/solr/src/java/org/apache/solr/search/ReturnFields.java +++ b/solr/src/java/org/apache/solr/search/ReturnFields.java @@ -42,7 +42,7 @@ import org.slf4j.LoggerFactory; /** * A class representing the return fields * - * @version $Id$ + * * @since solr 4.0 */ public class ReturnFields diff --git a/solr/src/java/org/apache/solr/search/SolrCache.java b/solr/src/java/org/apache/solr/search/SolrCache.java index a9154360cc2..2a870436b9c 100644 --- a/solr/src/java/org/apache/solr/search/SolrCache.java +++ b/solr/src/java/org/apache/solr/search/SolrCache.java @@ -28,7 +28,7 @@ import java.io.IOException; /** * Primary API for dealing with Solr's internal caches. * - * @version $Id$ + * */ public interface SolrCache extends SolrInfoMBean { public final static Logger log = LoggerFactory.getLogger(SolrCache.class); diff --git a/solr/src/java/org/apache/solr/search/SolrFieldCacheMBean.java b/solr/src/java/org/apache/solr/search/SolrFieldCacheMBean.java index aa81b7b7205..82ef1f043c3 100644 --- a/solr/src/java/org/apache/solr/search/SolrFieldCacheMBean.java +++ b/solr/src/java/org/apache/solr/search/SolrFieldCacheMBean.java @@ -33,7 +33,7 @@ import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; /** * A SolrInfoMBean that provides introspection of the Lucene FiledCache, this is NOT a cache that is manged by Solr. * - * @version $Id$ + * */ public class SolrFieldCacheMBean implements SolrInfoMBean { diff --git a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java index ce9f31abb1c..f9f355b83e4 100644 --- a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -54,7 +54,7 @@ import java.util.concurrent.atomic.AtomicLong; * SolrIndexSearcher adds schema awareness and caching functionality * over the lucene IndexSearcher. * - * @version $Id$ + * * @since solr 0.9 */ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { diff --git a/solr/src/java/org/apache/solr/search/Sorting.java b/solr/src/java/org/apache/solr/search/Sorting.java index e116b816778..325871c8d7a 100644 --- a/solr/src/java/org/apache/solr/search/Sorting.java +++ b/solr/src/java/org/apache/solr/search/Sorting.java @@ -22,7 +22,7 @@ import org.apache.lucene.search.*; /** * Extra lucene sorting utilities & convenience methods * - * @version $Id$ + * * */ diff --git a/solr/src/java/org/apache/solr/search/function/ByteFieldSource.java b/solr/src/java/org/apache/solr/search/function/ByteFieldSource.java index 08ec2129a35..6362463d796 100644 --- a/solr/src/java/org/apache/solr/search/function/ByteFieldSource.java +++ b/solr/src/java/org/apache/solr/search/function/ByteFieldSource.java @@ -28,7 +28,7 @@ import java.util.Map; * using getInts() * and makes those values available as other numeric types, casting as needed. * * - * @version $Id$ + * */ public class ByteFieldSource extends NumericFieldCacheSource { diff --git a/solr/src/java/org/apache/solr/search/function/DocValues.java b/solr/src/java/org/apache/solr/search/function/DocValues.java index fec021934a8..af71b15d3fb 100644 --- a/solr/src/java/org/apache/solr/search/function/DocValues.java +++ b/solr/src/java/org/apache/solr/search/function/DocValues.java @@ -27,7 +27,7 @@ import org.apache.solr.search.MutableValueFloat; * Represents field values as different types. * Normally created via a {@link ValueSource} for a particular field and reader. * - * @version $Id$ + * */ // DocValues is distinct from ValueSource because diff --git a/solr/src/java/org/apache/solr/search/function/DoubleFieldSource.java b/solr/src/java/org/apache/solr/search/function/DoubleFieldSource.java index 789af74c6f4..0d206179ccb 100644 --- a/solr/src/java/org/apache/solr/search/function/DoubleFieldSource.java +++ b/solr/src/java/org/apache/solr/search/function/DoubleFieldSource.java @@ -33,7 +33,7 @@ import java.util.Map; * using getFloats() * and makes those values available as other numeric types, casting as needed. * - * @version $Id$ + * */ public class DoubleFieldSource extends NumericFieldCacheSource { diff --git a/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java b/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java index df80cecae03..2dd1d8703ef 100644 --- a/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java +++ b/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java @@ -23,7 +23,7 @@ import org.apache.lucene.search.FieldCache; * A base class for ValueSource implementations that retrieve values for * a single field from the {@link org.apache.lucene.search.FieldCache}. * - * @version $Id$ + * */ public abstract class FieldCacheSource extends ValueSource { protected String field; diff --git a/solr/src/java/org/apache/solr/search/function/FileFloatSource.java b/solr/src/java/org/apache/solr/search/function/FileFloatSource.java index 3e25cd301c7..0a12d778c84 100755 --- a/solr/src/java/org/apache/solr/search/function/FileFloatSource.java +++ b/solr/src/java/org/apache/solr/search/function/FileFloatSource.java @@ -51,7 +51,7 @@ import org.slf4j.LoggerFactory; /** * Obtains float field values from an external file. - * @version $Id$ + * */ public class FileFloatSource extends ValueSource { diff --git a/solr/src/java/org/apache/solr/search/function/FloatFieldSource.java b/solr/src/java/org/apache/solr/search/function/FloatFieldSource.java index 6158420616a..abcd6342f15 100644 --- a/solr/src/java/org/apache/solr/search/function/FloatFieldSource.java +++ b/solr/src/java/org/apache/solr/search/function/FloatFieldSource.java @@ -32,7 +32,7 @@ import org.apache.solr.search.MutableValueFloat; * using getFloats() * and makes those values available as other numeric types, casting as needed. * - * @version $Id$ + * */ public class FloatFieldSource extends NumericFieldCacheSource { diff --git a/solr/src/java/org/apache/solr/search/function/FunctionQuery.java b/solr/src/java/org/apache/solr/search/function/FunctionQuery.java index beb43bc22ac..e7d4be011cb 100644 --- a/solr/src/java/org/apache/solr/search/function/FunctionQuery.java +++ b/solr/src/java/org/apache/solr/search/function/FunctionQuery.java @@ -34,7 +34,7 @@ import java.util.Map; * * Note: This API is experimental and may change in non backward-compatible ways in the future * - * @version $Id$ + * */ public class FunctionQuery extends Query { ValueSource func; diff --git a/solr/src/java/org/apache/solr/search/function/IntFieldSource.java b/solr/src/java/org/apache/solr/search/function/IntFieldSource.java index e2c94f95804..8f7a58039ea 100644 --- a/solr/src/java/org/apache/solr/search/function/IntFieldSource.java +++ b/solr/src/java/org/apache/solr/search/function/IntFieldSource.java @@ -32,7 +32,7 @@ import java.util.Map; * Obtains int field values from the {@link org.apache.lucene.search.FieldCache} * using getInts() * and makes those values available as other numeric types, casting as needed. * - * @version $Id$ + * */ public class IntFieldSource extends NumericFieldCacheSource { diff --git a/solr/src/java/org/apache/solr/search/function/LinearFloatFunction.java b/solr/src/java/org/apache/solr/search/function/LinearFloatFunction.java index 58204c0f7cd..cbfafbd8f6e 100644 --- a/solr/src/java/org/apache/solr/search/function/LinearFloatFunction.java +++ b/solr/src/java/org/apache/solr/search/function/LinearFloatFunction.java @@ -29,7 +29,7 @@ import java.util.Map; *
    * Normally Used as an argument to a {@link FunctionQuery} * - * @version $Id$ + * */ public class LinearFloatFunction extends ValueSource { protected final ValueSource source; diff --git a/solr/src/java/org/apache/solr/search/function/LongFieldSource.java b/solr/src/java/org/apache/solr/search/function/LongFieldSource.java index 8e75f924512..03ff11e23b3 100644 --- a/solr/src/java/org/apache/solr/search/function/LongFieldSource.java +++ b/solr/src/java/org/apache/solr/search/function/LongFieldSource.java @@ -34,7 +34,7 @@ import java.util.Map; * using getFloats() * and makes those values available as other numeric types, casting as needed. * - * @version $Id$ + * */ public class LongFieldSource extends NumericFieldCacheSource { diff --git a/solr/src/java/org/apache/solr/search/function/NumericFieldCacheSource.java b/solr/src/java/org/apache/solr/search/function/NumericFieldCacheSource.java index 710a00973a5..f7fde5cdd8b 100644 --- a/solr/src/java/org/apache/solr/search/function/NumericFieldCacheSource.java +++ b/solr/src/java/org/apache/solr/search/function/NumericFieldCacheSource.java @@ -22,7 +22,7 @@ import org.apache.lucene.search.cache.CachedArrayCreator; /** * - * @version $Id$ + * */ public abstract class NumericFieldCacheSource extends FieldCacheSource { protected final CachedArrayCreator creator; diff --git a/solr/src/java/org/apache/solr/search/function/OrdFieldSource.java b/solr/src/java/org/apache/solr/search/function/OrdFieldSource.java index 8e188983d1d..f4ac6133597 100644 --- a/solr/src/java/org/apache/solr/search/function/OrdFieldSource.java +++ b/solr/src/java/org/apache/solr/search/function/OrdFieldSource.java @@ -42,7 +42,7 @@ import java.util.Map; *
    WARNING: as of Solr 1.4, ord() and rord() can cause excess memory use since they must use a FieldCache entry * at the top level reader, while sorting and function queries now use entries at the segment level. Hence sorting * or using a different function query, in addition to ord()/rord() will double memory use. - * @version $Id$ + * */ public class OrdFieldSource extends ValueSource { diff --git a/solr/src/java/org/apache/solr/search/function/RangeMapFloatFunction.java b/solr/src/java/org/apache/solr/search/function/RangeMapFloatFunction.java index 1703c2f3dac..403f1b3d578 100755 --- a/solr/src/java/org/apache/solr/search/function/RangeMapFloatFunction.java +++ b/solr/src/java/org/apache/solr/search/function/RangeMapFloatFunction.java @@ -29,7 +29,7 @@ import java.util.Map; *
    * Normally Used as an argument to a {@link org.apache.solr.search.function.FunctionQuery} * - * @version $Id$ + * */ public class RangeMapFloatFunction extends ValueSource { protected final ValueSource source; diff --git a/solr/src/java/org/apache/solr/search/function/ReciprocalFloatFunction.java b/solr/src/java/org/apache/solr/search/function/ReciprocalFloatFunction.java index 430549c0e1c..b28ed88a7e6 100644 --- a/solr/src/java/org/apache/solr/search/function/ReciprocalFloatFunction.java +++ b/solr/src/java/org/apache/solr/search/function/ReciprocalFloatFunction.java @@ -39,7 +39,7 @@ import java.util.Map; * * @see FunctionQuery * - * @version $Id$ + * */ public class ReciprocalFloatFunction extends ValueSource { protected final ValueSource source; diff --git a/solr/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java b/solr/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java index a7508ce8a35..3eba69a4359 100644 --- a/solr/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java +++ b/solr/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java @@ -43,7 +43,7 @@ import java.util.Map; * at the top level reader, while sorting and function queries now use entries at the segment level. Hence sorting * or using a different function query, in addition to ord()/rord() will double memory use. * - * @version $Id$ + * */ public class ReverseOrdFieldSource extends ValueSource { diff --git a/solr/src/java/org/apache/solr/search/function/ValueSource.java b/solr/src/java/org/apache/solr/search/function/ValueSource.java index 5b4bd29c9d7..52189c05bf1 100644 --- a/solr/src/java/org/apache/solr/search/function/ValueSource.java +++ b/solr/src/java/org/apache/solr/search/function/ValueSource.java @@ -39,7 +39,7 @@ import java.util.Map; *
    * Often used when creating a {@link FunctionQuery}. * - * @version $Id$ + * */ public abstract class ValueSource implements Serializable { diff --git a/solr/src/java/org/apache/solr/update/AddUpdateCommand.java b/solr/src/java/org/apache/solr/update/AddUpdateCommand.java index 6a02010bf43..a8599b47f5f 100644 --- a/solr/src/java/org/apache/solr/update/AddUpdateCommand.java +++ b/solr/src/java/org/apache/solr/update/AddUpdateCommand.java @@ -27,7 +27,7 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; /** - * @version $Id$ + * */ public class AddUpdateCommand extends UpdateCommand { // optional id in "internal" indexed form... if it is needed and not supplied, diff --git a/solr/src/java/org/apache/solr/update/CommitUpdateCommand.java b/solr/src/java/org/apache/solr/update/CommitUpdateCommand.java index a6bdb39cd67..439bddb4d3a 100644 --- a/solr/src/java/org/apache/solr/update/CommitUpdateCommand.java +++ b/solr/src/java/org/apache/solr/update/CommitUpdateCommand.java @@ -20,7 +20,7 @@ package org.apache.solr.update; import org.apache.solr.request.SolrQueryRequest; /** - * @version $Id$ + * */ public class CommitUpdateCommand extends UpdateCommand { public boolean optimize; diff --git a/solr/src/java/org/apache/solr/update/DeleteUpdateCommand.java b/solr/src/java/org/apache/solr/update/DeleteUpdateCommand.java index 79b865a62ea..adfd5ca2578 100644 --- a/solr/src/java/org/apache/solr/update/DeleteUpdateCommand.java +++ b/solr/src/java/org/apache/solr/update/DeleteUpdateCommand.java @@ -20,7 +20,7 @@ package org.apache.solr.update; import org.apache.solr.request.SolrQueryRequest; /** - * @version $Id$ + * */ public class DeleteUpdateCommand extends UpdateCommand { public String id; // external (printable) id, for delete-by-id diff --git a/solr/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/src/java/org/apache/solr/update/DocumentBuilder.java index a7b80e0ded8..3194dd7c72e 100644 --- a/solr/src/java/org/apache/solr/update/DocumentBuilder.java +++ b/solr/src/java/org/apache/solr/update/DocumentBuilder.java @@ -30,7 +30,7 @@ import org.apache.solr.common.SolrInputField; import org.apache.solr.schema.*; /** - * @version $Id$ + * */ diff --git a/solr/src/java/org/apache/solr/update/MergeIndexesCommand.java b/solr/src/java/org/apache/solr/update/MergeIndexesCommand.java index 0eba124012f..412d2887083 100644 --- a/solr/src/java/org/apache/solr/update/MergeIndexesCommand.java +++ b/solr/src/java/org/apache/solr/update/MergeIndexesCommand.java @@ -24,7 +24,7 @@ import org.apache.solr.request.SolrQueryRequest; * A merge indexes command encapsulated in an object. * * @since solr 1.4 - * @version $Id$ + * */ public class MergeIndexesCommand extends UpdateCommand { public Directory[] dirs; diff --git a/solr/src/java/org/apache/solr/update/RollbackUpdateCommand.java b/solr/src/java/org/apache/solr/update/RollbackUpdateCommand.java index 1dd12ae8bf0..1308c53fc1d 100644 --- a/solr/src/java/org/apache/solr/update/RollbackUpdateCommand.java +++ b/solr/src/java/org/apache/solr/update/RollbackUpdateCommand.java @@ -20,7 +20,7 @@ package org.apache.solr.update; import org.apache.solr.request.SolrQueryRequest; /** - * @version $Id$ + * * @since Solr 1.4 */ public class RollbackUpdateCommand extends UpdateCommand { diff --git a/solr/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/src/java/org/apache/solr/update/SolrIndexConfig.java index b65c500ac8e..d51e7053b81 100644 --- a/solr/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -36,7 +36,7 @@ import java.util.HashMap; // This config object encapsulates IndexWriter config params. // /** - * @version $Id$ + * */ public class SolrIndexConfig { public static final Logger log = LoggerFactory.getLogger(SolrIndexConfig.class); diff --git a/solr/src/java/org/apache/solr/update/SolrIndexWriter.java b/solr/src/java/org/apache/solr/update/SolrIndexWriter.java index 5ecb4998e2d..64ef16748d9 100644 --- a/solr/src/java/org/apache/solr/update/SolrIndexWriter.java +++ b/solr/src/java/org/apache/solr/update/SolrIndexWriter.java @@ -18,10 +18,8 @@ package org.apache.solr.update; import org.apache.lucene.index.*; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.*; -import org.apache.lucene.util.Version; import org.apache.solr.common.SolrException; import org.apache.solr.core.DirectoryFactory; import org.apache.solr.schema.IndexSchema; @@ -30,7 +28,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -42,9 +39,8 @@ import java.util.Locale; /** * An IndexWriter that is configured via Solr config mechanisms. * -* @version $Id$ -* @since solr 0.9 -*/ + * @since solr 0.9 + */ public class SolrIndexWriter extends IndexWriter { private static Logger log = LoggerFactory.getLogger(SolrIndexWriter.class); diff --git a/solr/src/java/org/apache/solr/update/UpdateCommand.java b/solr/src/java/org/apache/solr/update/UpdateCommand.java index 0e8f8bc1e07..215f0e2bc69 100644 --- a/solr/src/java/org/apache/solr/update/UpdateCommand.java +++ b/solr/src/java/org/apache/solr/update/UpdateCommand.java @@ -22,7 +22,7 @@ import org.apache.solr.request.SolrQueryRequest; /** An index update command encapsulated in an object (Command pattern) * - * @version $Id$ + * */ public class UpdateCommand { protected final SolrQueryRequest req; diff --git a/solr/src/java/org/apache/solr/update/UpdateHandler.java b/solr/src/java/org/apache/solr/update/UpdateHandler.java index b90b80e11bd..a4f7eb071cc 100644 --- a/solr/src/java/org/apache/solr/update/UpdateHandler.java +++ b/solr/src/java/org/apache/solr/update/UpdateHandler.java @@ -42,7 +42,7 @@ import org.apache.solr.core.*; * UpdateHandler handles requests to change the index * (adds, deletes, commits, optimizes, etc). * - * @version $Id$ + * * @since solr 0.9 */ diff --git a/solr/src/java/org/apache/solr/util/BCDUtils.java b/solr/src/java/org/apache/solr/util/BCDUtils.java index 2cd20cf2307..66c52d32430 100644 --- a/solr/src/java/org/apache/solr/util/BCDUtils.java +++ b/solr/src/java/org/apache/solr/util/BCDUtils.java @@ -19,7 +19,7 @@ package org.apache.solr.util; /** - * @version $Id$ + * */ public class BCDUtils { // idiv is expensive... diff --git a/solr/src/java/org/apache/solr/util/DateMathParser.java b/solr/src/java/org/apache/solr/util/DateMathParser.java index bb62e589115..81152139354 100644 --- a/solr/src/java/org/apache/solr/util/DateMathParser.java +++ b/solr/src/java/org/apache/solr/util/DateMathParser.java @@ -75,7 +75,7 @@ import java.util.regex.Pattern; * inspecting the keySet of CALENDAR_UNITS. *

    * - * @version $Id$ + * */ public class DateMathParser { diff --git a/solr/src/java/org/apache/solr/util/NumberUtils.java b/solr/src/java/org/apache/solr/util/NumberUtils.java index 2954a367eac..79c83b83926 100644 --- a/solr/src/java/org/apache/solr/util/NumberUtils.java +++ b/solr/src/java/org/apache/solr/util/NumberUtils.java @@ -20,7 +20,7 @@ package org.apache.solr.util; import org.apache.lucene.util.BytesRef; /** - * @version $Id$ + * */ public class NumberUtils { diff --git a/solr/src/java/org/apache/solr/util/RefCounted.java b/solr/src/java/org/apache/solr/util/RefCounted.java index 508fb01df90..6cd5abc610b 100644 --- a/solr/src/java/org/apache/solr/util/RefCounted.java +++ b/solr/src/java/org/apache/solr/util/RefCounted.java @@ -28,7 +28,7 @@ import java.util.concurrent.atomic.AtomicInteger; * is safe since the count can only hit zero if it's unregistered (and * hence incref() will not be called again on it). * - * @version $Id$ + * */ public abstract class RefCounted { diff --git a/solr/src/java/org/apache/solr/util/SuggestMissingFactories.java b/solr/src/java/org/apache/solr/util/SuggestMissingFactories.java index b8d06259594..013f6bc483c 100644 --- a/solr/src/java/org/apache/solr/util/SuggestMissingFactories.java +++ b/solr/src/java/org/apache/solr/util/SuggestMissingFactories.java @@ -35,7 +35,7 @@ import java.util.jar.*; /** * Given a list of Jar files, suggest missing analysis factories. * - * @version $Id$ + * */ public class SuggestMissingFactories { diff --git a/solr/src/java/org/apache/solr/util/plugin/AbstractPluginLoader.java b/solr/src/java/org/apache/solr/util/plugin/AbstractPluginLoader.java index f3488b448d9..635960cfe2e 100644 --- a/solr/src/java/org/apache/solr/util/plugin/AbstractPluginLoader.java +++ b/solr/src/java/org/apache/solr/util/plugin/AbstractPluginLoader.java @@ -33,7 +33,7 @@ import org.w3c.dom.NodeList; /** * An abstract super class that manages standard solr-style plugin configuration. * - * @version $Id$ + * * @since solr 1.3 */ public abstract class AbstractPluginLoader diff --git a/solr/src/java/org/apache/solr/util/plugin/MapInitializedPlugin.java b/solr/src/java/org/apache/solr/util/plugin/MapInitializedPlugin.java index 933db38682d..c499f7033de 100644 --- a/solr/src/java/org/apache/solr/util/plugin/MapInitializedPlugin.java +++ b/solr/src/java/org/apache/solr/util/plugin/MapInitializedPlugin.java @@ -22,7 +22,7 @@ import java.util.Map; /** * A plugin that can be initialized with a Map * - * @version $Id$ + * * @since solr 1.3 */ public interface MapInitializedPlugin { diff --git a/solr/src/java/org/apache/solr/util/plugin/MapPluginLoader.java b/solr/src/java/org/apache/solr/util/plugin/MapPluginLoader.java index 6a7f55bbdea..3f284944ea8 100644 --- a/solr/src/java/org/apache/solr/util/plugin/MapPluginLoader.java +++ b/solr/src/java/org/apache/solr/util/plugin/MapPluginLoader.java @@ -24,7 +24,7 @@ import org.w3c.dom.Node; /** * - * @version $Id$ + * * @since solr 1.3 */ public class MapPluginLoader extends AbstractPluginLoader diff --git a/solr/src/java/org/apache/solr/util/plugin/NamedListInitializedPlugin.java b/solr/src/java/org/apache/solr/util/plugin/NamedListInitializedPlugin.java index a3c6d3cdd39..ad8a533fd62 100644 --- a/solr/src/java/org/apache/solr/util/plugin/NamedListInitializedPlugin.java +++ b/solr/src/java/org/apache/solr/util/plugin/NamedListInitializedPlugin.java @@ -22,7 +22,7 @@ import org.apache.solr.common.util.NamedList; /** * A plugin that can be initialized with a NamedList * - * @version $Id$ + * * @since solr 1.3 */ public interface NamedListInitializedPlugin { diff --git a/solr/src/java/org/apache/solr/util/plugin/NamedListPluginLoader.java b/solr/src/java/org/apache/solr/util/plugin/NamedListPluginLoader.java index 2e064ca661f..13ad5fefaab 100644 --- a/solr/src/java/org/apache/solr/util/plugin/NamedListPluginLoader.java +++ b/solr/src/java/org/apache/solr/util/plugin/NamedListPluginLoader.java @@ -23,7 +23,7 @@ import org.apache.solr.common.util.DOMUtil; import org.w3c.dom.Node; /** - * @version $Id$ + * * @since solr 1.3 */ public class NamedListPluginLoader extends AbstractPluginLoader diff --git a/solr/src/java/org/apache/solr/util/plugin/PluginInfoInitialized.java b/solr/src/java/org/apache/solr/util/plugin/PluginInfoInitialized.java index 36e6e7568b3..2b2b020bc3f 100644 --- a/solr/src/java/org/apache/solr/util/plugin/PluginInfoInitialized.java +++ b/solr/src/java/org/apache/solr/util/plugin/PluginInfoInitialized.java @@ -21,7 +21,7 @@ import org.apache.solr.core.PluginInfo; /** * A plugin that can be initialized with a PluginInfo * - * @version $Id$ + * * @since solr 1.4 */ public interface PluginInfoInitialized { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/ResponseParser.java b/solr/src/solrj/org/apache/solr/client/solrj/ResponseParser.java index 5c028be83b1..90cd47db5dd 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/ResponseParser.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/ResponseParser.java @@ -23,7 +23,7 @@ import org.apache.solr.common.util.NamedList; /** * - * @version $Id$ + * * @since solr 1.3 */ public abstract class ResponseParser diff --git a/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java b/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java index 20739fe4161..a7228f0da9d 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java @@ -31,7 +31,7 @@ import java.util.regex.Pattern; * This is an augmented SolrParams with get/set/add fields for common fields used * in the Standard and Dismax request handlers * - * @version $Id$ + * * @since solr 1.3 */ public class SolrQuery extends ModifiableSolrParams diff --git a/solr/src/solrj/org/apache/solr/client/solrj/SolrRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/SolrRequest.java index e60df31a48b..8a6942c13f2 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/SolrRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/SolrRequest.java @@ -26,7 +26,7 @@ import org.apache.solr.common.util.ContentStream; /** * - * @version $Id$ + * * @since solr 1.3 */ public abstract class SolrRequest implements Serializable diff --git a/solr/src/solrj/org/apache/solr/client/solrj/SolrResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/SolrResponse.java index aa89c741cf5..c6cb357e4f9 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/SolrResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/SolrResponse.java @@ -24,7 +24,7 @@ import org.apache.solr.common.util.NamedList; /** * - * @version $Id$ + * * @since solr 1.3 */ public abstract class SolrResponse implements Serializable diff --git a/solr/src/solrj/org/apache/solr/client/solrj/SolrServer.java b/solr/src/solrj/org/apache/solr/client/solrj/SolrServer.java index 72c6160af68..4f5f86fec1a 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/SolrServer.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/SolrServer.java @@ -37,7 +37,7 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; /** - * @version $Id$ + * * @since solr 1.3 */ public abstract class SolrServer implements Serializable diff --git a/solr/src/solrj/org/apache/solr/client/solrj/SolrServerException.java b/solr/src/solrj/org/apache/solr/client/solrj/SolrServerException.java index 67c46d92ddf..b8815096a02 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/SolrServerException.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/SolrServerException.java @@ -19,7 +19,7 @@ package org.apache.solr.client.solrj; /** Exception to catch all types of communication / parsing issues associated with talking to SOLR * - * @version $Id$ + * * @since solr 1.3 */ public class SolrServerException extends Exception { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/beans/DocumentObjectBinder.java b/solr/src/solrj/org/apache/solr/client/solrj/beans/DocumentObjectBinder.java index f01cb9dc17d..10096b52aea 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/beans/DocumentObjectBinder.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/beans/DocumentObjectBinder.java @@ -29,7 +29,7 @@ import java.nio.ByteBuffer; /** * A class to map objects to and from solr documents. * - * @version $Id$ + * * @since solr 1.3 */ public class DocumentObjectBinder { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/beans/Field.java b/solr/src/solrj/org/apache/solr/client/solrj/beans/Field.java index 156fcef8723..b4beb68a896 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/beans/Field.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/beans/Field.java @@ -24,7 +24,7 @@ import java.lang.annotation.Retention; /** - * @version $Id$ + * * @since solr 1.3 */ @Target({FIELD, METHOD}) diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java index 37bf449a76c..0a5a01255ff 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java @@ -30,7 +30,7 @@ import java.util.List; /** * A RequestWriter which writes requests in the javabin format * - * @version $Id$ + * * @see org.apache.solr.client.solrj.request.RequestWriter * @since solr 1.4 */ diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java index 13794601adb..7ac9294ff7a 100755 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/BinaryResponseParser.java @@ -26,7 +26,7 @@ import java.io.InputStream; import java.io.Reader; /** - * @version $Id$ + * * @since solr 1.3 */ public class BinaryResponseParser extends ResponseParser { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java index e9f795b0f8e..2f1c19a6c5b 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java @@ -57,7 +57,7 @@ import org.slf4j.LoggerFactory; * The {@link CommonsHttpSolrServer} uses the Apache Commons HTTP Client to connect to solr. *
    SolrServer server = new CommonsHttpSolrServer( url );
    * - * @version $Id$ + * * @since solr 1.3 */ public class CommonsHttpSolrServer extends SolrServer diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java index b0b90864296..89708d7a342 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java @@ -32,7 +32,7 @@ import java.util.List; * A BinaryResponseParser that sends callback events rather then build * a large response * - * @version $Id: StreamingBinaryResponseParser.java 990180 2010-08-27 15:40:17Z rmuir $ + * * @since solr 4.0 */ public class StreamingBinaryResponseParser extends BinaryResponseParser { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java index 607480ed69f..206dd501b81 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.java @@ -49,7 +49,7 @@ import org.slf4j.LoggerFactory; * it is only recommended to use the {@link StreamingUpdateSolrServer} with * /update requests. The query interface is better suited for * - * @version $Id: CommonsHttpSolrServer.java 724175 2008-12-07 19:07:11Z ryan $ + * * @since solr 1.4 */ public class StreamingUpdateSolrServer extends CommonsHttpSolrServer diff --git a/solr/src/solrj/org/apache/solr/client/solrj/impl/XMLResponseParser.java b/solr/src/solrj/org/apache/solr/client/solrj/impl/XMLResponseParser.java index f75681f0118..d17b3e647fe 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/impl/XMLResponseParser.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/impl/XMLResponseParser.java @@ -44,7 +44,7 @@ import org.apache.solr.common.util.XMLErrorLogger; /** * - * @version $Id$ + * * @since solr 1.3 */ public class XMLResponseParser extends ResponseParser diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/CoreAdminRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/request/CoreAdminRequest.java index d1fe433dea3..2e8bfafbdf1 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/CoreAdminRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/CoreAdminRequest.java @@ -34,7 +34,7 @@ import org.apache.solr.common.util.ContentStream; /** * This class is experimental and subject to change. - * @version $Id: CoreAdminRequest.java 606335 2007-12-21 22:23:39Z ryan $ + * * @since solr 1.3 */ public class CoreAdminRequest extends SolrRequest diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/DirectXmlRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/request/DirectXmlRequest.java index 23964b65d50..8d0fdba49dd 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/DirectXmlRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/DirectXmlRequest.java @@ -31,7 +31,7 @@ import org.apache.solr.common.util.ContentStream; /** * Send arbitrary XML to a request handler * - * @version $Id$ + * * @since solr 1.3 */ public class DirectXmlRequest extends SolrRequest diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java index 50f937c9191..56ba9466ddb 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java @@ -36,7 +36,7 @@ import java.util.List; /** * A request for the org.apache.solr.handler.DocumentAnalysisRequestHandler. * - * @version $Id$ + * * @since solr 1.4 */ public class DocumentAnalysisRequest extends SolrRequest { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java index edffe7e2d70..f17efa57321 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java @@ -34,7 +34,7 @@ import java.util.List; /** * A request for the org.apache.solr.handler.DocumentAnalysisRequestHandler. * - * @version $Id$ + * * @since solr.14 */ public class FieldAnalysisRequest extends SolrRequest { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java b/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java index 0f5842e632e..cbb8b81c033 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java @@ -33,7 +33,7 @@ import java.util.*; * Provides methods for marshalling an UpdateRequest to a NamedList which can be serialized in the javabin format and * vice versa. * - * @version $Id$ + * * @see org.apache.solr.common.util.JavaBinCodec * @since solr 1.4 */ diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/LukeRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/request/LukeRequest.java index 31092039f44..1b575a0cb3c 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/LukeRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/LukeRequest.java @@ -33,7 +33,7 @@ import org.apache.solr.common.util.ContentStream; /** * - * @version $Id$ + * * @since solr 1.3 */ public class LukeRequest extends SolrRequest diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/QueryRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/request/QueryRequest.java index 48ce71ab8de..ed58c9f9e6e 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/QueryRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/QueryRequest.java @@ -29,7 +29,7 @@ import org.apache.solr.common.util.ContentStream; /** * - * @version $Id$ + * * @since solr 1.3 */ public class QueryRequest extends SolrRequest diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/RequestWriter.java b/solr/src/solrj/org/apache/solr/client/solrj/request/RequestWriter.java index 63cd0c008ad..70d10d0a40c 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/RequestWriter.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/RequestWriter.java @@ -33,7 +33,7 @@ import java.nio.charset.Charset; *

    * A subclass can override the methods in this class to supply a custom format in which a request can be sent. * - * @version $Id$ + * * @since solr 1.4 */ public class RequestWriter { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/SolrPing.java b/solr/src/solrj/org/apache/solr/client/solrj/request/SolrPing.java index 16c7e172c48..41062dcf205 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/SolrPing.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/SolrPing.java @@ -29,7 +29,7 @@ import org.apache.solr.common.util.ContentStream; /** * - * @version $Id$ + * * @since solr 1.3 */ public class SolrPing extends SolrRequest diff --git a/solr/src/solrj/org/apache/solr/client/solrj/request/UpdateRequest.java b/solr/src/solrj/org/apache/solr/client/solrj/request/UpdateRequest.java index 8038dcc28a6..8edef78b0df 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/request/UpdateRequest.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/request/UpdateRequest.java @@ -34,7 +34,7 @@ import org.apache.solr.common.util.XML; /** * - * @version $Id$ + * * @since solr 1.3 */ public class UpdateRequest extends AbstractUpdateRequest { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java b/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java index 55d87d4fbe8..097b0debf45 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/AnalysisResponseBase.java @@ -26,7 +26,7 @@ import java.util.Map; /** * A base class for all analysis responses. * - * @version $Id$ + * * @since solr 1.4 */ public class AnalysisResponseBase extends SolrResponseBase { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/CoreAdminResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/CoreAdminResponse.java index 7034402c43d..639aa82918c 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/CoreAdminResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/CoreAdminResponse.java @@ -22,7 +22,7 @@ import org.apache.solr.common.util.NamedList; /** * - * @version $Id$ + * * @since solr 1.3 */ public class CoreAdminResponse extends SolrResponseBase diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java index f0d5dff787e..7f4bb4d574c 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/DocumentAnalysisResponse.java @@ -28,7 +28,7 @@ import java.util.Map; * A response that is returned by processing the {@link org.apache.solr.client.solrj.request.DocumentAnalysisRequest}. * Holds a map of {@link DocumentAnalysis} objects by a document id (unique key). * - * @version $Id$ + * * @since solr 1.4 */ public class DocumentAnalysisResponse extends AnalysisResponseBase implements Iterable> { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/FacetField.java b/solr/src/solrj/org/apache/solr/client/solrj/response/FacetField.java index bbfaa479e2a..6b653487344 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/FacetField.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/FacetField.java @@ -28,7 +28,6 @@ import org.apache.solr.client.solrj.util.ClientUtils; * A utility class to hold the facet response. It could use the NamedList container, * but for JSTL, it is nice to have something that implements List so it can be iterated * - * @version $Id$ * @since solr 1.3 */ public class FacetField implements Serializable diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java index e7343647db8..81259d74153 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldAnalysisResponse.java @@ -27,7 +27,7 @@ import java.util.Map; * A response that is returned by processing the {@link org.apache.solr.client.solrj.request.FieldAnalysisRequest}. * Holds a map of {@link Analysis} objects per field name as well as a map of {@link Analysis} objects per field type. * - * @version $Id$ + * * @since solr 1.4 */ public class FieldAnalysisResponse extends AnalysisResponseBase { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java index d0154c29698..c32cd1cdd37 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/FieldStatsInfo.java @@ -27,7 +27,7 @@ import java.util.Map; /** * Holds stats info * - * @version $Id: SpellCheckResponse.java 693622 2008-09-09 21:21:06Z gsingers $ + * * @since solr 1.4 */ public class FieldStatsInfo implements Serializable { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/LukeResponse.java index 4e4877c0e1d..68f3be69ee5 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/LukeResponse.java @@ -27,7 +27,7 @@ import java.util.*; /** * This is an incomplete representation of the data returned from Luke * - * @version $Id$ + * * @since solr 1.3 */ public class LukeResponse extends SolrResponseBase { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/QueryResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/QueryResponse.java index f1259d1ccbc..78883284ba6 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/QueryResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/QueryResponse.java @@ -31,7 +31,7 @@ import org.apache.solr.client.solrj.beans.DocumentObjectBinder; /** * - * @version $Id$ + * * @since solr 1.3 */ @SuppressWarnings("unchecked") diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/SolrPingResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/SolrPingResponse.java index ad58878f835..4cdfc8d1bc2 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/SolrPingResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/SolrPingResponse.java @@ -19,7 +19,7 @@ package org.apache.solr.client.solrj.response; /** * - * @version $Id$ + * * @since solr 1.3 */ public class SolrPingResponse extends SolrResponseBase diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/SolrResponseBase.java b/solr/src/solrj/org/apache/solr/client/solrj/response/SolrResponseBase.java index ed0908f7ddb..16f2cdc83cf 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/SolrResponseBase.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/SolrResponseBase.java @@ -22,7 +22,7 @@ import org.apache.solr.common.util.NamedList; /** * - * @version $Id$ + * * @since solr 1.3 */ public class SolrResponseBase extends SolrResponse diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java index c5bb419ee87..238e3a74a0c 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/SpellCheckResponse.java @@ -26,7 +26,7 @@ import java.util.Map; /** * Encapsulates responses from SpellCheckComponent * - * @version $Id$ + * * @since solr 1.3 */ public class SpellCheckResponse { diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/UpdateResponse.java b/solr/src/solrj/org/apache/solr/client/solrj/response/UpdateResponse.java index 028b313ad01..3f56a7dfbd4 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/response/UpdateResponse.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/UpdateResponse.java @@ -21,7 +21,7 @@ package org.apache.solr.client.solrj.response; /** * TODO -- mostly a stub until we have a defined output format * - * @version $Id$ + * * @since solr 1.3 */ public class UpdateResponse extends SolrResponseBase diff --git a/solr/src/solrj/org/apache/solr/client/solrj/util/ClientUtils.java b/solr/src/solrj/org/apache/solr/client/solrj/util/ClientUtils.java index 50225340f4a..038d8a7a1ff 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/util/ClientUtils.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/util/ClientUtils.java @@ -42,7 +42,7 @@ import org.apache.solr.common.util.*; /** * TODO? should this go in common? * - * @version $Id$ + * * @since solr 1.3 */ public class ClientUtils diff --git a/solr/src/test-framework/org/apache/solr/util/TestHarness.java b/solr/src/test-framework/org/apache/solr/util/TestHarness.java index 4eee02b88cc..59f0f40082c 100644 --- a/solr/src/test-framework/org/apache/solr/util/TestHarness.java +++ b/solr/src/test-framework/org/apache/solr/util/TestHarness.java @@ -66,7 +66,7 @@ import java.util.Map; * distribution, in order to encourage plugin writers to create unit * tests for their plugins. * - * @version $Id$ + * */ public class TestHarness { protected CoreContainer container; diff --git a/solr/src/test/org/apache/solr/TestDistributedSearch.java b/solr/src/test/org/apache/solr/TestDistributedSearch.java index 9b31d391232..5ec3f352748 100755 --- a/solr/src/test/org/apache/solr/TestDistributedSearch.java +++ b/solr/src/test/org/apache/solr/TestDistributedSearch.java @@ -24,7 +24,7 @@ import org.apache.solr.common.params.CommonParams; * http://docs.codehaus.org/display/JETTY/ServletTester * rather then open a real connection? * - * @version $Id$ + * * @since solr 1.3 */ public class TestDistributedSearch extends BaseDistributedSearchTestCase { diff --git a/solr/src/test/org/apache/solr/TestPluginEnable.java b/solr/src/test/org/apache/solr/TestPluginEnable.java index b390ddf37bb..454302f285e 100644 --- a/solr/src/test/org/apache/solr/TestPluginEnable.java +++ b/solr/src/test/org/apache/solr/TestPluginEnable.java @@ -23,7 +23,7 @@ import org.junit.Test; /** *

    Test disabling components

    * - * @version $Id$ + * * @since solr 1.4 */ public class TestPluginEnable extends SolrTestCaseJ4 { diff --git a/solr/src/test/org/apache/solr/TestSolrCoreProperties.java b/solr/src/test/org/apache/solr/TestSolrCoreProperties.java index 28f6198420d..d85add266aa 100644 --- a/solr/src/test/org/apache/solr/TestSolrCoreProperties.java +++ b/solr/src/test/org/apache/solr/TestSolrCoreProperties.java @@ -33,7 +33,7 @@ import java.util.Properties; /** *

    Test for Loading core properties from a properties file

    * - * @version $Id$ + * * @since solr 1.4 */ public class TestSolrCoreProperties extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/TestTrie.java b/solr/src/test/org/apache/solr/TestTrie.java index 861c349b5fa..4637e031be2 100644 --- a/solr/src/test/org/apache/solr/TestTrie.java +++ b/solr/src/test/org/apache/solr/TestTrie.java @@ -32,7 +32,7 @@ import java.util.TimeZone; /** * Tests for TrieField functionality * - * @version $Id$ + * * @since solr 1.4 */ public class TestTrie extends SolrTestCaseJ4 { diff --git a/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java b/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java index f9f8cca3f2a..409ab0ac8d6 100644 --- a/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java +++ b/solr/src/test/org/apache/solr/analysis/TestPhoneticFilterFactory.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer; /** - * @version $Id$ + * */ public class TestPhoneticFilterFactory extends BaseTokenTestCase { diff --git a/solr/src/test/org/apache/solr/client/solrj/LargeVolumeTestBase.java b/solr/src/test/org/apache/solr/client/solrj/LargeVolumeTestBase.java index 955ed59feef..8e4f96f72f1 100644 --- a/solr/src/test/org/apache/solr/client/solrj/LargeVolumeTestBase.java +++ b/solr/src/test/org/apache/solr/client/solrj/LargeVolumeTestBase.java @@ -27,7 +27,7 @@ import org.apache.solr.common.SolrInputDocument; import org.junit.Test; /** - * @version $Id$ + * * @since solr 1.3 */ public abstract class LargeVolumeTestBase extends SolrJettyTestBase diff --git a/solr/src/test/org/apache/solr/client/solrj/MergeIndexesExampleTestBase.java b/solr/src/test/org/apache/solr/client/solrj/MergeIndexesExampleTestBase.java index 3c6b2d13c45..b3361030009 100644 --- a/solr/src/test/org/apache/solr/client/solrj/MergeIndexesExampleTestBase.java +++ b/solr/src/test/org/apache/solr/client/solrj/MergeIndexesExampleTestBase.java @@ -30,7 +30,7 @@ import org.apache.solr.util.ExternalPaths; * Abstract base class for testing merge indexes command * * @since solr 1.4 - * @version $Id$ + * */ public abstract class MergeIndexesExampleTestBase extends SolrExampleTestBase { // protected static final CoreContainer cores = new CoreContainer(); diff --git a/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java b/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java index 5a435d75acc..a25676a6d47 100644 --- a/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java +++ b/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java @@ -30,7 +30,7 @@ import org.junit.Test; /** - * @version $Id$ + * * @since solr 1.3 */ public abstract class MultiCoreExampleTestBase extends SolrExampleTestBase diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java index 60fc20123e3..bba195fdc5d 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTestBase.java @@ -25,7 +25,7 @@ import org.apache.solr.util.AbstractSolrTestCase; * * This lets us try various SolrServer implementations with the same tests. * - * @version $Id$ + * * @since solr 1.3 */ abstract public class SolrExampleTestBase extends AbstractSolrTestCase diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java index fe398730835..fc295b1d95f 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java @@ -58,7 +58,7 @@ import org.junit.Test; * * This lets us try various SolrServer implementations with the same tests. * - * @version $Id$ + * * @since solr 1.3 */ abstract public class SolrExampleTests extends SolrJettyTestBase diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrExceptionTest.java b/solr/src/test/org/apache/solr/client/solrj/SolrExceptionTest.java index 25197812150..f7dc9400d77 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrExceptionTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrExceptionTest.java @@ -25,7 +25,7 @@ import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; /** * - * @version $Id$ + * * @since solr 1.3 */ public class SolrExceptionTest extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrQueryTest.java b/solr/src/test/org/apache/solr/client/solrj/SolrQueryTest.java index 69d7d37bb75..9c24c4d9c0c 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrQueryTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrQueryTest.java @@ -24,7 +24,7 @@ import junit.framework.Assert; /** * - * @version $Id$ + * * @since solr 1.3 */ public class SolrQueryTest extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java b/solr/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java index 69fb792a8e5..c4212df9639 100644 --- a/solr/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java +++ b/solr/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java @@ -33,7 +33,7 @@ import java.util.Iterator; * Test for SOLR-1038 * * @since solr 1.4 - * @version $Id$ + * */ public class TestBatchUpdate extends SolrJettyTestBase { diff --git a/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java b/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java index 75f4e341860..fed8e9d2d5b 100644 --- a/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java +++ b/solr/src/test/org/apache/solr/client/solrj/TestLBHttpSolrServer.java @@ -41,7 +41,7 @@ import java.util.Set; /** * Test for LBHttpSolrServer * - * @version $Id$ + * * @since solr 1.4 */ public class TestLBHttpSolrServer extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java b/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java index 161da30bd8a..dd8c47304dd 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java @@ -34,7 +34,7 @@ import org.mortbay.jetty.servlet.HashSessionIdManager; import org.mortbay.jetty.webapp.WebAppContext; /** - * @version $Id$ + * * @since solr 1.3 */ public class JettyWebappTest extends LuceneTestCase diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/MergeIndexesEmbeddedTest.java b/solr/src/test/org/apache/solr/client/solrj/embedded/MergeIndexesEmbeddedTest.java index 59bf574b607..e0732fb0e8c 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/MergeIndexesEmbeddedTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/MergeIndexesEmbeddedTest.java @@ -27,7 +27,7 @@ import org.apache.solr.core.SolrCore; * Test for merge indexes command * * @since solr 1.4 - * @version $Id$ + * */ public class MergeIndexesEmbeddedTest extends MergeIndexesExampleTestBase { diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreEmbeddedTest.java b/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreEmbeddedTest.java index ede29c58afd..5c7952884f2 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreEmbeddedTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreEmbeddedTest.java @@ -25,7 +25,7 @@ import org.apache.solr.client.solrj.SolrServer; /** * This runs SolrServer test using * - * @version $Id$ + * * @since solr 1.3 */ public class MultiCoreEmbeddedTest extends MultiCoreExampleTestBase { diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreExampleJettyTest.java b/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreExampleJettyTest.java index c15226c5f0f..10757670945 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreExampleJettyTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/MultiCoreExampleJettyTest.java @@ -37,7 +37,7 @@ import org.junit.Test; * http://docs.codehaus.org/display/JETTY/ServletTester * rather then open a real connection? * - * @version $Id$ + * * @since solr 1.3 */ public class MultiCoreExampleJettyTest extends MultiCoreExampleTestBase { diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java b/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java index 6a569bc44d6..ab7d798be80 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java @@ -24,7 +24,7 @@ import org.junit.BeforeClass; /** * This runs SolrServer test using * - * @version $Id$ + * * @since solr 1.3 */ public class SolrExampleEmbeddedTest extends SolrExampleTests { diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java b/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java index a7747d09be8..511db090a5d 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java @@ -27,7 +27,7 @@ import org.junit.BeforeClass; /** * - * @version $Id: SolrExampleJettyTest.java 724175 2008-12-07 19:07:11Z ryan $ + * * @since solr 1.3 */ public class SolrExampleStreamingTest extends SolrExampleTests { diff --git a/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java b/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java index 2f08b1de0f7..4df93f2d120 100644 --- a/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java +++ b/solr/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java @@ -49,7 +49,7 @@ import org.w3c.dom.Document; import org.w3c.dom.Node; /** - * @version $Id$ + * * @since solr 1.3 */ public class TestSolrProperties extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/client/solrj/request/TestUpdateRequestCodec.java b/solr/src/test/org/apache/solr/client/solrj/request/TestUpdateRequestCodec.java index 8059f5f36de..5c8a2df9947 100644 --- a/solr/src/test/org/apache/solr/client/solrj/request/TestUpdateRequestCodec.java +++ b/solr/src/test/org/apache/solr/client/solrj/request/TestUpdateRequestCodec.java @@ -37,7 +37,7 @@ import java.util.ArrayList; * Test for UpdateRequestCodec * * @since solr 1.4 - * @version $Id$ + * * @see org.apache.solr.client.solrj.request.UpdateRequest */ public class TestUpdateRequestCodec extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/client/solrj/response/AnlysisResponseBaseTest.java b/solr/src/test/org/apache/solr/client/solrj/response/AnlysisResponseBaseTest.java index e1856505e0c..b42adeee19c 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/AnlysisResponseBaseTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/AnlysisResponseBaseTest.java @@ -27,7 +27,7 @@ import java.util.List; /** * A Test case for the {@link AnalysisResponseBase} class. * - * @version $Id$ + * * @since solr 1.4 */ @SuppressWarnings("unchecked") diff --git a/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java b/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java index 5cbf67f0865..1c5d47e521a 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/DocumentAnalysisResponseTest.java @@ -27,7 +27,7 @@ import java.util.List; /** * A test for the {@link DocumentAnalysisResponse} class. * - * @version $Id$ + * * @since solr 1.4 */ public class DocumentAnalysisResponseTest extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java b/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java index f6f62aaa2f9..12f61002104 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/FieldAnalysisResponseTest.java @@ -28,7 +28,7 @@ import java.util.List; /** * A test case for the {@link FieldAnalysisResponse} class. * - * @version $Id$ + * * @since solr 1.4 */ @SuppressWarnings("unchecked") diff --git a/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java b/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java index 7b11bf0f4ac..64e35ff2dad 100644 --- a/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java +++ b/solr/src/test/org/apache/solr/client/solrj/response/TestSpellCheckResponse.java @@ -34,7 +34,7 @@ import java.util.List; /** * Test for SpellCheckComponent's response in Solrj * - * @version $Id$ + * * @since solr 1.3 */ public class TestSpellCheckResponse extends SolrJettyTestBase { diff --git a/solr/src/test/org/apache/solr/client/solrj/util/ClientUtilsTest.java b/solr/src/test/org/apache/solr/client/solrj/util/ClientUtilsTest.java index 7d2c297c8de..6b9c5e65665 100644 --- a/solr/src/test/org/apache/solr/client/solrj/util/ClientUtilsTest.java +++ b/solr/src/test/org/apache/solr/client/solrj/util/ClientUtilsTest.java @@ -21,7 +21,7 @@ import org.apache.lucene.util.LuceneTestCase; /** * - * @version $Id$ + * * @since solr 1.3 */ public class ClientUtilsTest extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/common/util/TestFastInputStream.java b/solr/src/test/org/apache/solr/common/util/TestFastInputStream.java index 68744e85c63..74c43b12ffa 100644 --- a/solr/src/test/org/apache/solr/common/util/TestFastInputStream.java +++ b/solr/src/test/org/apache/solr/common/util/TestFastInputStream.java @@ -26,7 +26,7 @@ import java.util.zip.GZIPOutputStream; /** * Test for FastInputStream. * - * @version $Id$ + * * @see org.apache.solr.common.util.FastInputStream */ public class TestFastInputStream extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/core/FakeDeletionPolicy.java b/solr/src/test/org/apache/solr/core/FakeDeletionPolicy.java index 54a76ca854b..721b55f566c 100644 --- a/solr/src/test/org/apache/solr/core/FakeDeletionPolicy.java +++ b/solr/src/test/org/apache/solr/core/FakeDeletionPolicy.java @@ -24,7 +24,7 @@ import java.io.IOException; import java.util.List; /** - * @version $Id$ + * */ public class FakeDeletionPolicy implements IndexDeletionPolicy, NamedListInitializedPlugin { diff --git a/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java b/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java index fe5557347a7..104c5edeedd 100644 --- a/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java +++ b/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java @@ -39,7 +39,7 @@ import org.junit.Test; import org.xml.sax.SAXException; /** - * @version $Id$ + * */ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ diff --git a/solr/src/test/org/apache/solr/core/TestJmxIntegration.java b/solr/src/test/org/apache/solr/core/TestJmxIntegration.java index 3977c4a1e94..9e27ddc1370 100644 --- a/solr/src/test/org/apache/solr/core/TestJmxIntegration.java +++ b/solr/src/test/org/apache/solr/core/TestJmxIntegration.java @@ -31,7 +31,7 @@ import java.util.Hashtable; /** * Test for JMX Integration * - * @version $Id$ + * * @since solr 1.3 */ public class TestJmxIntegration extends AbstractSolrTestCase { diff --git a/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java b/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java index ff91adeb41b..e69e074fbbc 100644 --- a/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java +++ b/solr/src/test/org/apache/solr/core/TestJmxMonitoredMap.java @@ -38,7 +38,7 @@ import java.util.Set; /** * Test for JmxMonitoredMap * - * @version $Id$ + * * @since solr 1.3 */ public class TestJmxMonitoredMap extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java b/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java index 29f2f5d8cf2..cd02ca298df 100644 --- a/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java +++ b/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java @@ -25,7 +25,7 @@ import org.junit.Test; import java.util.Map; /** - * @version $Id$ + * */ public class TestSolrDeletionPolicy1 extends SolrTestCaseJ4 { diff --git a/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy2.java b/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy2.java index 61ac85ae1be..cadf084773e 100644 --- a/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy2.java +++ b/solr/src/test/org/apache/solr/core/TestSolrDeletionPolicy2.java @@ -21,7 +21,7 @@ import org.junit.BeforeClass; import org.junit.Test; /** - * @version $Id$ + * */ public class TestSolrDeletionPolicy2 extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/src/test/org/apache/solr/handler/AnalysisRequestHandlerTestBase.java b/solr/src/test/org/apache/solr/handler/AnalysisRequestHandlerTestBase.java index c46c2751134..feb827e0a34 100644 --- a/solr/src/test/org/apache/solr/handler/AnalysisRequestHandlerTestBase.java +++ b/solr/src/test/org/apache/solr/handler/AnalysisRequestHandlerTestBase.java @@ -23,7 +23,7 @@ import org.apache.solr.common.util.NamedList; /** * A base class for all analysis request handler tests. * - * @version $Id$ + * * @since solr 1.4 */ public abstract class AnalysisRequestHandlerTestBase extends SolrTestCaseJ4 { diff --git a/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java b/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java index 1753d77cb84..cddf4946ec0 100644 --- a/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java +++ b/solr/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java @@ -40,7 +40,7 @@ import java.io.Reader; /** * A test for {@link DocumentAnalysisRequestHandler}. * - * @version $Id$ + * * @since solr 1.4 */ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestBase { diff --git a/solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java b/solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java index 2443a9b1489..da11b5f1d6a 100644 --- a/solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java +++ b/solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java @@ -35,7 +35,7 @@ import java.util.List; /** * A test for {@link FieldAnalysisRequestHandler}. * - * @version $Id$ + * * @since solr 1.4 */ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestBase { diff --git a/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java b/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java index ca75ff98c9f..c0df70e7659 100644 --- a/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java +++ b/solr/src/test/org/apache/solr/handler/TestReplicationHandler.java @@ -50,7 +50,7 @@ import java.util.HashMap; /** * Test for ReplicationHandler * - * @version $Id$ + * * @since 1.4 */ public class TestReplicationHandler extends SolrTestCaseJ4 { diff --git a/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java b/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java index ed0edbb97b6..73efb4b908e 100644 --- a/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java @@ -25,7 +25,7 @@ import org.apache.solr.common.params.ModifiableSolrParams; * Test for SpellCheckComponent's distributed querying * * @since solr 1.5 - * @version $Id$ + * * @see org.apache.solr.handler.component.SpellCheckComponent */ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTestCase { diff --git a/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java index bcf91c268d3..89f3b660a0f 100644 --- a/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java @@ -22,7 +22,7 @@ import org.apache.solr.BaseDistributedSearchTestCase; /** * Test for TermsComponent distributed querying * - * @version $Id$ + * * @since solr 1.5 */ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase { diff --git a/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java b/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java index 703a7e121b5..2215501a2ca 100644 --- a/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java +++ b/solr/src/test/org/apache/solr/request/TestBinaryResponseWriter.java @@ -33,7 +33,7 @@ import java.util.UUID; /** * Test for BinaryResponseWriter * - * @version $Id$ + * * @since solr 1.4 */ public class TestBinaryResponseWriter extends AbstractSolrTestCase { diff --git a/solr/src/test/org/apache/solr/request/TestFaceting.java b/solr/src/test/org/apache/solr/request/TestFaceting.java index 140de82e14a..89acdaf18a1 100755 --- a/solr/src/test/org/apache/solr/request/TestFaceting.java +++ b/solr/src/test/org/apache/solr/request/TestFaceting.java @@ -30,7 +30,7 @@ import org.junit.BeforeClass; import org.junit.Test; /** - * @version $Id$ + * */ public class TestFaceting extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/src/test/org/apache/solr/search/TestDocSet.java b/solr/src/test/org/apache/solr/search/TestDocSet.java index 1e8d1377ee8..e643b8b1cf4 100644 --- a/solr/src/test/org/apache/solr/search/TestDocSet.java +++ b/solr/src/test/org/apache/solr/search/TestDocSet.java @@ -35,7 +35,7 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; /** - * @version $Id$ + * */ public class TestDocSet extends LuceneTestCase { Random rand = random; diff --git a/solr/src/test/org/apache/solr/search/TestFastLRUCache.java b/solr/src/test/org/apache/solr/search/TestFastLRUCache.java index 2d226fbab0f..0637c29c650 100644 --- a/solr/src/test/org/apache/solr/search/TestFastLRUCache.java +++ b/solr/src/test/org/apache/solr/search/TestFastLRUCache.java @@ -31,7 +31,7 @@ import java.util.concurrent.atomic.AtomicInteger; /** * Test for FastLRUCache * - * @version $Id$ + * * @see org.apache.solr.search.FastLRUCache * @since solr 1.4 */ diff --git a/solr/src/test/org/apache/solr/search/TestQueryUtils.java b/solr/src/test/org/apache/solr/search/TestQueryUtils.java index 3143431cc5e..4904d9ba3e7 100755 --- a/solr/src/test/org/apache/solr/search/TestQueryUtils.java +++ b/solr/src/test/org/apache/solr/search/TestQueryUtils.java @@ -27,7 +27,7 @@ import org.apache.solr.util.AbstractSolrTestCase; import java.util.List; /** - * @version $Id$ + * */ public class TestQueryUtils extends AbstractSolrTestCase { diff --git a/solr/src/test/org/apache/solr/search/TestSearchPerf.java b/solr/src/test/org/apache/solr/search/TestSearchPerf.java index 57e0d86d5f8..36afad063b1 100755 --- a/solr/src/test/org/apache/solr/search/TestSearchPerf.java +++ b/solr/src/test/org/apache/solr/search/TestSearchPerf.java @@ -31,7 +31,7 @@ import java.util.*; import java.io.IOException; /** - * @version $Id$ + * */ public class TestSearchPerf extends AbstractSolrTestCase { diff --git a/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java b/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java index b6efa9fb8e6..4cbebf91b1c 100644 --- a/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java +++ b/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java @@ -29,7 +29,7 @@ import java.util.Collection; /** * Test for SpellingQueryConverter * - * @version $Id$ + * * @since solr 1.3 */ public class SpellingQueryConverterTest extends LuceneTestCase { diff --git a/solr/src/test/org/apache/solr/util/BitSetPerf.java b/solr/src/test/org/apache/solr/util/BitSetPerf.java index 2836ccc0057..f7324f4dbf7 100644 --- a/solr/src/test/org/apache/solr/util/BitSetPerf.java +++ b/solr/src/test/org/apache/solr/util/BitSetPerf.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.OpenBitSetIterator; * is at least 10 seconds for better accuracy. Close browsers on your system (javascript * or flash may be running and cause more erratic results). * - * @version $Id$ + * */ public class BitSetPerf { static Random rand = new Random(0); diff --git a/solr/src/test/org/apache/solr/util/TestUtils.java b/solr/src/test/org/apache/solr/util/TestUtils.java index e1279e572e1..cf33955266f 100755 --- a/solr/src/test/org/apache/solr/util/TestUtils.java +++ b/solr/src/test/org/apache/solr/util/TestUtils.java @@ -28,7 +28,7 @@ import org.apache.solr.common.util.StrUtils; import org.junit.Assert; /** - * @version $Id$ + * */ public class TestUtils extends LuceneTestCase { public void testSplitEscaping() { diff --git a/solr/src/webapp/src/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java b/solr/src/webapp/src/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java index 95ce0fc44d7..dbae78cbdc6 100644 --- a/solr/src/webapp/src/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java +++ b/solr/src/webapp/src/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java @@ -56,7 +56,7 @@ import org.apache.solr.servlet.SolrRequestParsers; * TODO -- this implementation sends the response to XML and then parses it. * It *should* be able to convert the response directly into a named list. * - * @version $Id$ + * * @since solr 1.3 */ public class EmbeddedSolrServer extends SolrServer diff --git a/solr/src/webapp/src/org/apache/solr/servlet/DirectSolrConnection.java b/solr/src/webapp/src/org/apache/solr/servlet/DirectSolrConnection.java index 28943a9df97..0ec4a902dda 100644 --- a/solr/src/webapp/src/org/apache/solr/servlet/DirectSolrConnection.java +++ b/solr/src/webapp/src/org/apache/solr/servlet/DirectSolrConnection.java @@ -48,7 +48,7 @@ import org.apache.solr.schema.IndexSchema; * This class is designed to be as simple as possible and allow for more flexibility * in how you interface to Solr. * - * @version $Id$ + * * @since solr 1.2 */ public class DirectSolrConnection diff --git a/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java b/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java index e8996e26346..01e58eb9066 100644 --- a/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java +++ b/solr/src/webapp/src/org/apache/solr/servlet/LogLevelSelection.java @@ -32,7 +32,7 @@ import java.util.logging.Logger; /** * Admin JDK Logger level report and selection servlet. * - * @version $Id$ + * * @since solr 1.3 */ public final class LogLevelSelection extends HttpServlet { From b9455527c5f056fdfd315f4d1d8c3f7e71e9fa51 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 9 Jun 2011 12:25:00 +0000 Subject: [PATCH 107/116] LUCENE-3108: make IR#docValues final in IR git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1133823 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/IndexReader.java | 2 +- lucene/src/java/org/apache/lucene/index/SegmentReader.java | 6 ------ .../src/java/org/apache/lucene/index/values/IntsImpl.java | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 3f766fd2720..9d229dfcf0e 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -1587,7 +1587,7 @@ public abstract class IndexReader implements Cloneable,Closeable { throw new UnsupportedOperationException("This reader does not support this method."); } - public IndexDocValues docValues(String field) throws IOException { + public final IndexDocValues docValues(String field) throws IOException { final PerDocValues perDoc = perDocValues(); if (perDoc == null) { return null; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index e2047214ad1..451f6d4c530 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -841,13 +841,7 @@ public class SegmentReader extends IndexReader implements Cloneable { // longer used (all SegmentReaders sharing it have been // closed). } - - @Override - public IndexDocValues docValues(String field) throws IOException { - return core.perDocProducer.docValues(field); - } - @Override public PerDocValues perDocValues() throws IOException { return core.perDocProducer; diff --git a/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java index 29d6c929a53..dc626e6fa84 100644 --- a/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java +++ b/lucene/src/java/org/apache/lucene/index/values/IntsImpl.java @@ -138,7 +138,6 @@ class IntsImpl { private void writePackedInts(int docCount) throws IOException { datOut.writeByte(PACKED); - // TODO -- long can't work right since it's signed datOut.writeLong(minValue); // write a default value to recognize docs without a value for that // field From e0c2a0186bbc45130627bc68f4b4a0ae8b3ce177 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 9 Jun 2011 12:31:48 +0000 Subject: [PATCH 108/116] merged with trunk git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1133828 13f79535-47bb-0310-9956-ffa450edef68 From b662041cbc6b321d318d16b2f92f1da2d3abe6c2 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Thu, 9 Jun 2011 12:54:47 +0000 Subject: [PATCH 109/116] fix small issue in docs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133839 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/docs/fileformats.html | 4 ++-- lucene/docs/fileformats.pdf | Bin 68949 -> 68985 bytes .../content/xdocs/fileformats.xml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lucene/docs/fileformats.html b/lucene/docs/fileformats.html index 0b2b610ff7a..6c627fbdd4e 100644 --- a/lucene/docs/fileformats.html +++ b/lucene/docs/fileformats.html @@ -1859,7 +1859,7 @@ document.write("Last Published: " + document.lastModified); (if compression is enabled, the algorithm used is ZLIB), only available for indexes until Lucene version 2.9.x -
  • 4th to 6th bits (mask: 0x7<<3) define the type of a +
  • 4th to 6th bit (mask: 0x7<<3) define the type of a numeric field:
    • all bits in mask are cleared if no numeric field at all
    • @@ -1868,7 +1868,7 @@ document.write("Last Published: " + document.lastModified);
    • 2<<3: Value is Long
    • -
    • 3<<3: Value is Int as Float (as of Integer.intBitsToFloat)
    • +
    • 3<<3: Value is Int as Float (as of Float.intBitsToFloat)
    • 4<<3: Value is Long as Double (as of Double.longBitsToDouble)
    • diff --git a/lucene/docs/fileformats.pdf b/lucene/docs/fileformats.pdf index 98b1746c4f34865586bb77841d643feb8337c319..99f8a8fba8d22d75fc1ba76a7493fdfce44af028 100644 GIT binary patch delta 5586 zcmai2dst0b8&{b@x*1Bw*99@@X6?&bd+#IFsg#f0O`_5qIwdNp!xUwY`EtpS%dSZm zhGg7Ix{Z;L>lnm0xfLej%cZ6?>M_V=)(NS-%=4W;&a>Cr?|Lu4_xHZ*J?Cmh_vabi zH*7LMK%iKPXQ4fy3%2_7)-jaod%NRr2Lp5IH$#A*bc}}L?_ov|fBPGGn;PmnkBANq zQFy9j=0t}@X~Gsny1bRIFamb?ZEQr8V!T?T3RY=Uo+^#ng$5Mm2#_NXrctbd;uY=Y zB$crlR{6@PzaW350Kj>KR*grr8Y^g5@=s>RiTF+M90j(fcEOKY*QTl$zJCl4`v>#w zHnhT6;=S&7=SFl=haWXzn(z+Q%5cpG)z+~Q;R-3;z?plm;oWPkhK^%eZJn`t>tnm5 zwAR2<8fs|l_L1|lGbHBk@-W5I3*(<^Y^`gkFDdu;E&A2Cyr+&(w8ngj=O<7Q1it9^ zfXYw00{|!~fA&i411-ThNjkK2Y|bh@=mezk!AUwux*6R5l9E*FBppF&P_NUa*`l}* zXfHNQ9KxBr*?R1Rl~VV5e!>=$DMH4YHT&Fhrq$QkIVMSd;frH34HN5MKTG~P;xXeq z(2}_KIuD8dxx*9vtyH6 zZnPZMSoXIYS5Pen_8ejbo>Gjf}4uM*XbPsd;0|1`1OGI;Er9Rb-Z zb}*K{iIqJpvvW1>mzR8=-P3#H!aBcSgI~8rMog`5y_MUPUzsfqG~&;>UudlEU1zZ8 zQeauX=sQ`>3r&-%V$pe^>4H^pxHt zJ>%@xKlUFPaC&Urn;wsf?0k*H)+Ktv;T0Et+Qp4NvN7!C27cAV^;hoR5q8yN2?Guc zacD^~328jwb}u-+?wfNVjgbv0ibG;xP@vEHiCG(y7Dc@5x6iZTj~RIyi&WooC&!c{ z#d(EoHZ3a$8Fs*TkR<7w5)jdMR(D)_c zlDAWehyT*N*y<{Eh+f~!&{F^N0TmgwZ`efNv;_&hPn`6M%l^geaQ$DC(hSt2b~_eC z1@zi7^fTnN*K9$C#i!wFKdaQ4ZNs8o-(Pfnn5N2>uPSYGnXuSw*ZG6XqxX~~b!!^D zx;B4)!-0Z>#et!vk*!yisq~h6PubWX zRCu&)*0YNI=7FMe&;<+hr%V45Wl+fB7k7=f?>CRq4Yj{zy4^V7cJo<{eZKn@&&hwC zJUlpm{!8PQGTZqHE#D+eUHZcNcJlCm#&tdqO5CQeWN#=_)-K8OY}yyu8bAKXf3}95 zzb~dY-G3mCe6b<^XWNP^5zUrcZNIv>{J4XDuk{;d_1tZ~q_&UkfJ2rso2MJHY&lr= zXqNAK2u4=c_?_(2qVmi+m0H=8*%3E5=BAy~RUa<5@bXOCt6!bo7BTNjK&MJ6R?4Ht z*T>G^rduzrO@G?S_7R zyL-T48?9Chvlt!;rXR>$crS7KRQG54A*oJ5V z&w6k+-SYZ#>QHIEqT$~|!hcYuI5}*8`QPOZ-=+%_9R6&Y9PxRvi&>4+zk==_afx>P zA-FJNZ)G=e)qRhqq`jE~{2eaL`u2?u;0JCpXFHlP@6g>>W4ec3 znl}DHg4@{VrcvcPtxWC}Z1wjsQ4GHLhrP$c)laN%^0$8P=3@TJH+P=*UnMx*J zJJ)FC^ECr2Yd3B)&8`36ahI#Em&yt&s1|`rynk$|dd#5s)`n!$jAH(ZGSd5vuIf~e zFQkXxpVnoBqWrW5UrF;ANT-{AluDbCwBxb6l+k1_nKx%kw;qoyYHn8Rcr(@%M_U8y zDf-sdBfKD9F&!%kuVBXt;T7Al67ULjtmvYwCo>J~ISPwwKh}}f`t1*h5-6;`>mChQ zK^pXCwuk`nie|BmF5-}*d2Ff69zYaD$@f^m$@Kw5bAntBG1x(^I`M;_05LS7kOu;( zkYVNV0G@`t%m@V-iZIN}oDs1P6kvHyewgMtQXvGy9s^i8G88KSVvhlcumJ%l%Aux^ zz!F9v5ad~HZhOEWY9HG|=j=M$e5aIt7V9|B=WeqauAa%5ljA;HnG@TS8N=`yV z3p5!WM^uiM=yIoNhUecCSnQ}$2FRengBuHo93zH8axTy;1?4=##l#ZGu-NOa0gJfm z$g;y7fR^VN%LzmzEF}8}hLu2u#jNgX7~6#qT5IdCbM9E4k$H#(5y4=Z6^KTIrAQt@ zT-=0V?gMAUcTJLpI4gu8Pm%^Q+Me6RX~YO5XCR~q!z>^;BhnBN<%C!Y5IKe%2_gP5 zN8}g^G!Y2SCc%CL5Dugr{|=@j7AIL29gWCRq;c}(3K_@}>}NRKm}UB~TqgpHZ|^9E zqoKUS7#}F{%XnSr#Eu zA0kj-G10YhTKqfUXA%1`L0l#>uMvWTgFMb1DW@G5x<`oklDq@(wYp z+X;;0?|>g+V#?uyG)F{-SdM@i()>ppydK!?v+7} zARwHE3`=xT$Usp$Yj({Ohr^TYAxI7pn1V#Rg&a!^Dj0mebHrL>kmE4!+NVzQALQ^q zQ!>NarbiHo2L$^ePZ3oD@k}U32J%eTvs(Niwo4F+{|IS#jwblQ^TeQn<0bBVDBv+u z9-Ke{k$^_PgBT%5lduXbalbBnE#-F!y^t0#6XDvAQU1%leLY< zZ#h~Bu(F*2df;J)XDBb;141u%&fUe@`TvRp>btn; Z$7od1n%HP{h(4}2#On_oI(nj){@>qm7Qp}j delta 5531 zcmai1dt6l27QT}&m_Z&Pnu0iDprzzF`<%VcIdcI4c_>#b%|{4bLYPrt1Oi0xbxZ_9 zBhV31u@Vr%cZkph%Mc?)ElEUEFDVk5@hre+=-o3QkF)!Ahd(%H&)R#f^{sDxd(GoL zLu>a8P2X+;8H(m0-9IRNM(pvMNvJ0V2j?fdv@tV>OIO3sD~k@;XOea^7AsGbgNA4b)T3( zU5$yiW$*SL!#=xNA#$hq6;Z00(P|u+hd#K!i9}-Av(;A}8-zm%`kT$V?2)5i#6K{A z-k9clc<`4sHotT`5yS+^MK34 zbhS&9b%DVtGFo>(c=HJM4FP|6T}(*IFsOgjb%!MDHmg%)oX$d%Dx-_`Y2Z}RFY>0B z`TA?ee)3jCm8|()ZZc$Lz?e7oRwOSRvwX)(Cr)f1oj)rnM_byS);6v5q*G(AZ~Ag! zZ$nn_vgA<~kDjJfh;H9jYn`9aIrgbJM$7H*Yr&KI8?((7tlzF{%8CQ!E~z=^X3Fxj z(;UJ(TYkOg>*QZWtK8e$!Z_W5b&tf5_0wxg|C`m3KYmx0 zrpUkbz1bxJ9U+O}Y1<-TSj5a;6v;2j3XsTa*1@Q@QN|pWC?sIZ3&U?V?R! zxNXj16Ze`m<8q9=w;4AkZLj+CVcm_LS3Z9`rtIV#achLibw|=J|LaVn&4I=rK2-?H z$!l#q(q}J8nLDjH?(ochhqwT7M8{OT;@e&_LlSS|F}|~c=(&Km8Ode z=Rbb#_GJH-3~|WUKf8V(k$wC~XMVwika4#rzj3=}QZYZ}-bL%(4YC;($LAUcT{Tm% zb9efg7HDf*OMWP*d3vq3@p|mq=Dl;Y%Nx$VV}C<)DLn6K!3lX>Xm)enisCx&-JDGj zU-9&V1@r7WeSbXCR2;DK`lhWbx9v`BoAt0ODa-aqL(7?E9Uc$F(^bZweY~RUTvBvY z(-&5ktk+!BYL<3nwO$2_i!{$>n4PZe_76;K|6=i{zZO;=?S=twp-nd|6=%Du{p{*b z=hfcvEjqK|h^habyxLQWz2790Jsja$CD(}l9_63H8+ikdD6!xg;s!H4J z7`}LSgw48`>ij*c?xvTDrq*I+%)+-E@7q-c?Ef5IUpV^1b9omFeH6}99Q*v%*M~0}5S(VM*|D$P6$d&Mn=v%SD&EdZ}ekA(d6yEG~ zozZ>s$IYE*7KV*M=D(v%YS@Os^X+^7x zKIq!^09k*$%go=-{%DrhuCW&^ci#+s|EOlQCgaRi_XBZLhwI+X+IZwZK>nXbzm0tM z;g@l*-yXU0qjm17jjQKP3%k&f+!&X4cfNSZ+j#MJE9s{P$-8rE`k z=;?o_&i=(a`!(+^))%dsH9zz9wK?XeHu~jl)i!-s;D6L6dZgTJ)UahOqjN2zjLXJN zm{K#zWOaT+nTtc_s@f>$i2W@JZB3H>j$5j-;$PCG2 zMKJN9%&~EE`3LFiT~p7l4x(rOTEG_`Gs(_9pBUB z-gFl(X~vZP88o5mae#xT*7#aNNu)*Wdp;HS&6BBSk2>9{?k1Z>W!)KbGAqv?sB_$K z_q|m z(j2R#l=J0E<#aEA-(cUHfZtHx8!9UQV~NRB#0g4D|4~LL<%S1D^Ln$vYxufibe_+b zIslsZ3Q)Xc4Q5UpF0&LVJP=6DDHi`p%qav|X%u*k`wKgPIFH9(i1Qd4C@FDJ3KS23 zG)|fVJmHupgP|!FauO#r;92rKmg`>=d+?<3Y$U~#W=V{(9YZ*Q6pPC_C=nV7bib2c zACN-0(1Yznj^YW&JP)N1(i}|@js=RAA_FJ^5{?D3oPef*-mf12K2`xB5Kv=7R_Yiq zGyydNkYFjc8vu0=3JgYT02d7P$2>yBfq*d(WDs1KZ;TW|z_6?oK?Hc3kw6U?$PaLA zr~?8bPp}m*94)OU0s==KNRkUN6j3_B2s~!`;6!k4hSK5u`{8G4Kt#v@qV9kNM2!&x zcmf%g5lF`j8kmRyf51Wm8E}Yw!5|E)l!SmqL?0stk|bm~K}tfzAc{Os5E-I&3@LX2 zQiRYTh9fxx0Y#LK;YrRwhUp1V9GI~{k_NII386r81|pI(EQN^ngd7}{upWQJ{zZ#H zULZ1I0U&COusll1#bT9`tN}z52m-{=qBJ`OvXs;b!%?jCKx{{FhT*VUNE|aff>Kgr zJEqsK*he^q=LyNgp-@r?u^l5_!$@;dkzz5c2SJUI)>mXOS{Q$v2arOIB?XWNB0^|b z%yhyIk%GjsJkce%nP?2m;JZ z2@2Q_BW=*Xepn$^Q7Np5?APCaz5X&Q}6Zk^s2P!Ko^ed~_JJ3M}UxCsH^@WnG zL53w{6f%$)N+8D)YY5WBUJ)-6_cq=P3DR&7N;8HWLQ=v(jwf0cPX?5<74c=PpSTS8 z_h|s+X`)Ra4`?YekY|W-4e~5;dt!EwoZ%7QuK~UDALKbg=OND%cRp?={pha$^am|a zq@6(j#Tp<{6$O^K-=V+?L`MjS%mmLQQu&4gPqwTeuu=pOMH$Z1=)0L^)La;%r&hH#~6#*O8?JOMv*rWfy`bo#$0@5)_Vthird bit is one for fields with compression option enabled (if compression is enabled, the algorithm used is ZLIB), only available for indexes until Lucene version 2.9.x -
    • 4th to 6th bits (mask: 0x7<<3) define the type of a +
    • 4th to 6th bit (mask: 0x7<<3) define the type of a numeric field:
      • all bits in mask are cleared if no numeric field at all
      • 1<<3: Value is Int
      • 2<<3: Value is Long
      • -
      • 3<<3: Value is Int as Float (as of Integer.intBitsToFloat)
      • +
      • 3<<3: Value is Int as Float (as of Float.intBitsToFloat)
      • 4<<3: Value is Long as Double (as of Double.longBitsToDouble)
    From 8a9d2ce4fa315efdf78e66c6571b3dc3b60a9740 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 9 Jun 2011 13:11:28 +0000 Subject: [PATCH 110/116] LUCENE-3108: manually merged with trunk git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1133855 13f79535-47bb-0310-9956-ffa450edef68 --- .../maven/lucene/contrib/ant/pom.xml.template | 114 ------------------ .../lucene/contrib/db/bdb-je/pom.xml.template | 75 ------------ .../lucene/contrib/db/bdb/pom.xml.template | 86 ------------- .../maven/lucene/contrib/db/pom.xml.template | 49 -------- .../lucene/contrib/lucli/pom.xml.template | 107 ---------------- .../lucene/contrib/swing/pom.xml.template | 100 --------------- .../icu/lib/lucene-icu4j-pom.xml.template | 36 ------ .../org/apache/solr/SolrInfoMBeanTest.java | 5 +- 8 files changed, 1 insertion(+), 571 deletions(-) delete mode 100644 dev-tools/maven/lucene/contrib/ant/pom.xml.template delete mode 100644 dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template delete mode 100644 dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template delete mode 100644 dev-tools/maven/lucene/contrib/db/pom.xml.template delete mode 100644 dev-tools/maven/lucene/contrib/lucli/pom.xml.template delete mode 100644 dev-tools/maven/lucene/contrib/swing/pom.xml.template delete mode 100644 modules/analysis/icu/lib/lucene-icu4j-pom.xml.template diff --git a/dev-tools/maven/lucene/contrib/ant/pom.xml.template b/dev-tools/maven/lucene/contrib/ant/pom.xml.template deleted file mode 100644 index e300d0f8ada..00000000000 --- a/dev-tools/maven/lucene/contrib/ant/pom.xml.template +++ /dev/null @@ -1,114 +0,0 @@ - - - 4.0.0 - - org.apache.lucene - lucene-parent - @version@ - ../../pom.xml - - org.apache.lucene - lucene-ant - jar - Lucene Contrib Ant - Ant task to create Lucene indexes - - lucene/contrib/ant - ../../build/contrib/ant - - - - ${project.groupId} - lucene-core - ${project.version} - - - ${project.groupId} - lucene-test-framework - ${project.version} - test - - - ${project.groupId} - lucene-analyzers-common - ${project.version} - - - jtidy - jtidy - - - org.apache.ant - ant - - - org.apache.ant - ant-junit - test - - - junit - junit - test - - - - ${build-directory} - ${build-directory}/classes/java - ${build-directory}/classes/test - src/java - src/test - - - src/resources - - - - - ${project.build.testSourceDirectory} - - **/*.java - - - - - - org.codehaus.mojo - appassembler-maven-plugin - - -Xmx128M - flat - - windows - unix - - - - org.apache.lucene.ant.HtmlDocument - HtmlDocument - - - - - - - diff --git a/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template b/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template deleted file mode 100644 index a5d7647b526..00000000000 --- a/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template +++ /dev/null @@ -1,75 +0,0 @@ - - - 4.0.0 - - org.apache.lucene - lucene-parent - @version@ - ../../../pom.xml - - org.apache.lucene - lucene-bdb-je - jar - Lucene Contrib bdb-je - Berkeley DB based Directory implementation - - lucene/contrib/db/bdb-je - ../../../build/contrib/db/bdb-je - - - - ${project.groupId} - lucene-core - ${project.version} - - - ${project.groupId} - lucene-test-framework - ${project.version} - test - - - com.sleepycat - berkeleydb-je - - - junit - junit - test - - - - ${build-directory} - ${build-directory}/classes/java - ${build-directory}/classes/test - src/java - src/test - - - ${project.build.testSourceDirectory} - - **/*.java - - - - - diff --git a/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template b/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template deleted file mode 100644 index d5d5123ab5f..00000000000 --- a/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template +++ /dev/null @@ -1,86 +0,0 @@ - - - 4.0.0 - - org.apache.lucene - lucene-parent - @version@ - ../../../pom.xml - - org.apache.lucene - lucene-bdb - jar - Lucene Contrib bdb - Berkeley DB based Directory implementation - - lucene/contrib/db/bdb - ../../../build/contrib/db/bdb - - - - ${project.groupId} - lucene-core - ${project.version} - - - ${project.groupId} - lucene-test-framework - ${project.version} - test - - - com.sleepycat - berkeleydb - - - junit - junit - test - - - - ${build-directory} - ${build-directory}/classes/java - ${build-directory}/classes/test - src/java - src/test - - - ${project.build.testSourceDirectory} - - **/*.java - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - true - - - - - diff --git a/dev-tools/maven/lucene/contrib/db/pom.xml.template b/dev-tools/maven/lucene/contrib/db/pom.xml.template deleted file mode 100644 index aa3b0daa77f..00000000000 --- a/dev-tools/maven/lucene/contrib/db/pom.xml.template +++ /dev/null @@ -1,49 +0,0 @@ - - - 4.0.0 - - org.apache.lucene - lucene-parent - @version@ - ../../pom.xml - - org.apache.lucene - lucene-db-aggregator - Lucene Database aggregator POM - pom - - bdb - bdb-je - - - ../../build/contrib/db/lucene-db-aggregator - - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - - - diff --git a/dev-tools/maven/lucene/contrib/lucli/pom.xml.template b/dev-tools/maven/lucene/contrib/lucli/pom.xml.template deleted file mode 100644 index efa4c53a882..00000000000 --- a/dev-tools/maven/lucene/contrib/lucli/pom.xml.template +++ /dev/null @@ -1,107 +0,0 @@ - - - 4.0.0 - - org.apache.lucene - lucene-parent - @version@ - ../../pom.xml - - org.apache.lucene - lucene-lucli - jar - Lucene Lucli - Lucene Command Line Interface - - lucene/contrib/lucli - ../../build/contrib/lucli - - - - ${project.groupId} - lucene-core - ${project.version} - - - ${project.groupId} - lucene-test-framework - ${project.version} - test - - - ${project.groupId} - lucene-analyzers-common - ${project.version} - - - jline - jline - - - junit - junit - test - - - - ${build-directory} - ${build-directory}/classes/java - ${build-directory}/classes/test - src/java - src/test - - - ${project.build.testSourceDirectory} - - **/*.java - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - true - - - - org.codehaus.mojo - appassembler-maven-plugin - - -Xmx128M - flat - - windows - unix - - - - lucli.Lucli - lucli - - - - - - - diff --git a/dev-tools/maven/lucene/contrib/swing/pom.xml.template b/dev-tools/maven/lucene/contrib/swing/pom.xml.template deleted file mode 100644 index bb5178d1b60..00000000000 --- a/dev-tools/maven/lucene/contrib/swing/pom.xml.template +++ /dev/null @@ -1,100 +0,0 @@ - - - 4.0.0 - - org.apache.lucene - lucene-parent - @version@ - ../../pom.xml - - org.apache.lucene - lucene-swing - jar - Lucene Swing - Swing Models - - lucene/contrib/swing - ../../build/contrib/swing - - - - ${project.groupId} - lucene-core - ${project.version} - - - ${project.groupId} - lucene-test-framework - ${project.version} - test - - - ${project.groupId} - lucene-analyzers-common - ${project.version} - - - junit - junit - test - - - - ${build-directory} - ${build-directory}/classes/java - ${build-directory}/classes/test - src/java - src/test - - - ${project.build.testSourceDirectory} - - **/*.java - - - - - - org.codehaus.mojo - appassembler-maven-plugin - - -Xmx128M - flat - - windows - unix - - - - org.apache.lucene.swing.models.ListSearcherSimulator - ListSearchSimulator - - - org.apache.lucene.swing.models.TableSearcherSimulator - TableSearchSimulator - - - - - - - diff --git a/modules/analysis/icu/lib/lucene-icu4j-pom.xml.template b/modules/analysis/icu/lib/lucene-icu4j-pom.xml.template deleted file mode 100644 index 2b481863de8..00000000000 --- a/modules/analysis/icu/lib/lucene-icu4j-pom.xml.template +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - org.apache.lucene - lucene-parent - @version@ - - 4.0.0 - org.apache.lucene - lucene-icu4j - Lucene Specific ICU4J - @version@ - Lucene Specific ICU4J v4.6 - jar - diff --git a/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java b/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java index 8d561c961c7..ed83863f926 100644 --- a/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java +++ b/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java @@ -24,7 +24,6 @@ import org.apache.solr.handler.component.SearchHandler; import org.apache.solr.highlight.DefaultSolrHighlighter; import org.apache.solr.search.LRUCache; import org.junit.BeforeClass; -import org.junit.Ignore; import java.io.File; import java.net.URL; import java.util.ArrayList; @@ -45,9 +44,7 @@ public class SolrInfoMBeanTest extends SolrTestCaseJ4 * Gets a list of everything we can find in the classpath and makes sure it has * a name, description, etc... */ - @Ignore // TODO: reenable once SOLR-2160 is fixed public void testCallMBeanInfo() throws Exception { -// Object[] init = org.apache.solr.search.QParserPlugin.standardPlugins; List classes = new ArrayList(); classes.addAll(getClassesForPackage(StandardRequestHandler.class.getPackage().getName())); classes.addAll(getClassesForPackage(SearchHandler.class.getPackage().getName())); @@ -88,7 +85,7 @@ public class SolrInfoMBeanTest extends SolrTestCaseJ4 } } assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath, found " + checked, checked > 10 ); - } + } private static List getClassesForPackage(String pckgname) throws Exception { ArrayList directories = new ArrayList(); From a54dbc985c6bb94f9769a0a2e7a80a969e59aec1 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Thu, 9 Jun 2011 13:48:11 +0000 Subject: [PATCH 111/116] debugging: show when coord is disabled git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133872 13f79535-47bb-0310-9956-ffa450edef68 --- solr/src/java/org/apache/solr/search/QueryParsing.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/solr/src/java/org/apache/solr/search/QueryParsing.java b/solr/src/java/org/apache/solr/search/QueryParsing.java index 66c5da7e278..9644625737b 100644 --- a/solr/src/java/org/apache/solr/search/QueryParsing.java +++ b/solr/src/java/org/apache/solr/search/QueryParsing.java @@ -465,7 +465,7 @@ public class QueryParsing { BooleanQuery q = (BooleanQuery) query; boolean needParens = false; - if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) { + if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0 || q.isCoordDisabled()) { needParens = true; } if (needParens) { @@ -511,6 +511,9 @@ public class QueryParsing { out.append('~'); out.append(Integer.toString(q.getMinimumNumberShouldMatch())); } + if (q.isCoordDisabled()) { + out.append("/no_coord"); + } } else if (query instanceof PrefixQuery) { PrefixQuery q = (PrefixQuery) query; From 3ad6ba55d521946fc614ddd3436ef2428b57f8be Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 9 Jun 2011 15:32:17 +0000 Subject: [PATCH 112/116] LUCENE-3183: fix corner case seeking to Term(, ) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133937 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 +++++ .../index/codecs/preflex/SegmentTermEnum.java | 8 ++++++-- .../index/codecs/preflex/TermInfosReader.java | 9 ++++++++- .../org/apache/lucene/index/TestIndexWriter.java | 15 +++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1b84eb4b5d2..eef81536a9f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -468,6 +468,11 @@ Bug fixes * LUCENE-3102: CachingCollector.replay was failing to call setScorer per-segment (Martijn van Groningen via Mike McCandless) +* LUCENE-3183: Fix rare corner case where seeking to empty term + (field="", term="") with terms index interval 1 could hit + ArrayIndexOutOfBoundsException (selckin, Robert Muir, Mike + McCandless) + New Features * LUCENE-3140: Added experimental FST implementation to Lucene. diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java index fb7c8ceec46..6c6681d6a1d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java @@ -153,8 +153,12 @@ public final class SegmentTermEnum implements Cloneable { return true; } - /** Optimized scan, without allocating new terms. - * Return number of invocations to next(). */ + /* Optimized scan, without allocating new terms. + * Return number of invocations to next(). + * + * NOTE: LUCENE-3183: if you pass Term("", "") here then this + * will incorrectly return before positioning the enum, + * and position will be -1; caller must detect this. */ final int scanTo(Term term) throws IOException { scanBuffer.set(term); int count = 0; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java index c5fd09b7741..8178bd33cf6 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java @@ -57,6 +57,7 @@ public final class TermInfosReader { final long termOrd; public TermInfoAndOrd(TermInfo ti, long termOrd) { super(ti); + assert termOrd >= 0; this.termOrd = termOrd; } } @@ -306,7 +307,13 @@ public final class TermInfosReader { ti = enumerator.termInfo; if (tiOrd == null) { if (useCache) { - termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position)); + // LUCENE-3183: it's possible, if term is Term("", + // ""), for the STE to be incorrectly un-positioned + // after scan-to; work around this by not caching in + // this case: + if (enumerator.position >= 0) { + termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position)); + } } } else { assert sameTermInfo(ti, tiOrd, enumerator); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index 536a7749a22..ecceb81e2fa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -73,6 +73,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec; public class TestIndexWriter extends LuceneTestCase { @@ -1763,4 +1764,18 @@ public class TestIndexWriter extends LuceneTestCase { reader.close(); dir.close(); } + + // LUCENE-3183 + public void testEmptyFieldNameTIIOne() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + iwc.setTermIndexInterval(1); + iwc.setReaderTermsIndexDivisor(1); + IndexWriter writer = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + dir.close(); + } } From 739efd591378090757b684ccbcd51b442daef2fb Mon Sep 17 00:00:00 2001 From: Koji Sekiguchi Date: Fri, 10 Jun 2011 01:16:08 +0000 Subject: [PATCH 113/116] SOLR-2579: UIMAUpdateRequestProcessor ignore error fails if text.length() < 100 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1134163 13f79535-47bb-0310-9956-ffa450edef68 --- solr/contrib/uima/CHANGES.txt | 6 ++++- .../processor/UIMAUpdateRequestProcessor.java | 9 ++++--- .../UIMAUpdateRequestProcessorTest.java | 25 +++++++++++++++++++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/solr/contrib/uima/CHANGES.txt b/solr/contrib/uima/CHANGES.txt index 72b497236ee..80b3c500fc5 100644 --- a/solr/contrib/uima/CHANGES.txt +++ b/solr/contrib/uima/CHANGES.txt @@ -24,8 +24,12 @@ $Id$ (No Changes) ================== 3.3.0-dev ============== + +Bug Fixes +---------------------- -(No Changes) +* SOLR-2579: UIMAUpdateRequestProcessor ignore error fails if text.length() < 100. + (Elmer Garduno via koji) ================== 3.2.0 ================== diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java index 69d8d934690..f0aa2b2e89c 100644 --- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java +++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java @@ -89,15 +89,16 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { new StringBuilder(". ").append(logField).append("=") .append((String)cmd.getSolrInputDocument().getField(logField).getValue()) .append(", ").toString(); - if (solrUIMAConfiguration.isIgnoreErrors()) + int len = Math.min(text.length(), 100); + if (solrUIMAConfiguration.isIgnoreErrors()) { log.warn(new StringBuilder("skip the text processing due to ") .append(e.getLocalizedMessage()).append(optionalFieldInfo) - .append(" text=\"").append(text.substring(0, 100)).append("...\"").toString()); - else{ + .append(" text=\"").append(text.substring(0, len)).append("...\"").toString()); + } else { throw new SolrException(ErrorCode.SERVER_ERROR, new StringBuilder("processing error: ") .append(e.getLocalizedMessage()).append(optionalFieldInfo) - .append(" text=\"").append(text.substring(0, 100)).append("...\"").toString(), e); + .append(" text=\"").append(text.substring(0, len)).append("...\"").toString(), e); } } super.processAdd(cmd); diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java index b921d15808b..74164a54aef 100644 --- a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java +++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.Map; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.MultiMapSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.UpdateParams; @@ -158,6 +159,30 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 { + " Last Lucene European Conference has been held in Prague.")); assertU(commit()); assertQ(req("*:*"), "//*[@numFound='1']"); + + try{ + addDoc("uima-not-ignoreErrors", adoc( + "id", + "2312312321312", + "text", + "SpellCheckComponent got improvement related to recent Lucene changes.")); + fail("exception shouldn't be ignored"); + } + catch(StringIndexOutOfBoundsException e){ // SOLR-2579 + fail("exception shouldn't be raised"); + } + catch(SolrException expected){} + + try{ + addDoc("uima-ignoreErrors", adoc( + "id", + "2312312321312", + "text", + "SpellCheckComponent got improvement related to recent Lucene changes.")); + } + catch(StringIndexOutOfBoundsException e){ // SOLR-2579 + fail("exception shouldn't be raised"); + } } private void addDoc(String chain, String doc) throws Exception { From 6f8344a2f764047a0c1e2c0fc8e75f07fbe96301 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 10 Jun 2011 13:49:57 +0000 Subject: [PATCH 114/116] cut over to IOUtils#closeSafely git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1134326 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/PerFieldCodecWrapper.java | 50 ++----------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java index 7e8b73aa060..0a9a18718b1 100644 --- a/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java @@ -237,17 +237,7 @@ final class PerFieldCodecWrapper extends Codec { success = true; } finally { if (!success) { - // If we hit exception (eg, IOE because writer was - // committing, or, for any other reason) we must - // go back and close all FieldsProducers we opened: - for(PerDocValues producer : producers.values()) { - try { - producer.close(); - } catch (Throwable t) { - // Suppress all exceptions here so we continue - // to throw the original one - } - } + IOUtils.closeSafely(true, producers.values()); } } } @@ -266,24 +256,7 @@ final class PerFieldCodecWrapper extends Codec { } public void close() throws IOException { - final Collection values = codecs.values(); - IOException err = null; - for (PerDocValues perDocValues : values) { - try { - if (perDocValues != null) { - perDocValues.close(); - } - } catch (IOException ioe) { - // keep first IOException we hit but keep - // closing the rest - if (err == null) { - err = ioe; - } - } - } - if (err != null) { - throw err; - } + IOUtils.closeSafely(false, codecs.values()); } } @@ -300,24 +273,7 @@ final class PerFieldCodecWrapper extends Codec { } public void close() throws IOException { - IOException err = null; - for (int i = 0; i < consumers.length; i++) { - try { - final PerDocConsumer next = consumers[i]; - if (next != null) { - next.close(); - } - } catch (IOException ioe) { - // keep first IOException we hit but keep - // closing the rest - if (err == null) { - err = ioe; - } - } - } - if (err != null) { - throw err; - } + IOUtils.closeSafely(false, consumers); } @Override From eca56e0564d6b1feea54a51e0f7bf09a90533fd3 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 10 Jun 2011 14:00:32 +0000 Subject: [PATCH 115/116] LUCENE-152: minor optimization to avoid some char[]/String creation git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1134328 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/analysis/en/KStemFilter.java | 3 +-- .../src/java/org/apache/lucene/analysis/en/KStemmer.java | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java index 859180a1987..6e88ab9446d 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java @@ -55,8 +55,7 @@ public final class KStemFilter extends TokenFilter { char[] term = termAttribute.buffer(); int len = termAttribute.length(); if ((!keywordAtt.isKeyword()) && stemmer.stem(term, len)) { - char[] chars = stemmer.asString().toCharArray(); - termAttribute.copyBuffer(chars, 0, chars.length); + termAttribute.setEmpty().append(stemmer.asCharSequence()); } return true; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java index 4e6cba3cc3d..3cfd81319af 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java @@ -1373,6 +1373,10 @@ public class KStemmer { return word.toString(); } + CharSequence asCharSequence() { + return result != null ? result : word; + } + String getString() { return result; } From cf94c3bd4968512d182b5979b1bac7eae6d94f7b Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 10 Jun 2011 15:22:01 +0000 Subject: [PATCH 116/116] removed unused imports git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1134340 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/solr/handler/component/StatsComponent.java | 1 - solr/src/java/org/apache/solr/search/function/StrDocValues.java | 1 - 2 files changed, 2 deletions(-) diff --git a/solr/src/java/org/apache/solr/handler/component/StatsComponent.java b/solr/src/java/org/apache/solr/handler/component/StatsComponent.java index 805ad10c668..33dd18b075f 100644 --- a/solr/src/java/org/apache/solr/handler/component/StatsComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/StatsComponent.java @@ -24,7 +24,6 @@ import java.util.Map; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; -import org.apache.noggit.CharArr; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.StatsParams; import org.apache.solr.common.params.ShardParams; diff --git a/solr/src/java/org/apache/solr/search/function/StrDocValues.java b/solr/src/java/org/apache/solr/search/function/StrDocValues.java index 5726824388c..40d398eb8d8 100644 --- a/solr/src/java/org/apache/solr/search/function/StrDocValues.java +++ b/solr/src/java/org/apache/solr/search/function/StrDocValues.java @@ -1,7 +1,6 @@ package org.apache.solr.search.function; import org.apache.lucene.util.BytesRef; -import org.apache.noggit.CharArr; import org.apache.solr.search.MutableValue; import org.apache.solr.search.MutableValueFloat; import org.apache.solr.search.MutableValueStr;