From e4cb8e3a176243bae67da03365c96cd6d49a0eeb Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 5 Jan 2012 16:21:17 +0000 Subject: [PATCH] LUCENE-3628: Cut over Norms to DocValues git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1227676 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../lucene/index/memory/MemoryIndex.java | 57 ++-- .../memory/MemoryIndexNormDocValues.java | 78 +++++ .../lucene/codecs/BlockTreeTermsWriter.java | 5 +- .../lucene/codecs/DocValuesConsumer.java | 117 +++++++- .../lucene/codecs/DocValuesReaderBase.java | 18 +- .../lucene/codecs/DocValuesWriterBase.java | 3 - .../org/apache/lucene/codecs/NormsFormat.java | 12 +- .../org/apache/lucene/codecs/NormsWriter.java | 70 ----- .../apache/lucene/codecs/PerDocConsumer.java | 55 +++- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 3 +- .../codecs/lucene3x/Lucene3xNormsFormat.java | 65 ++++ .../Lucene3xNormsProducer.java} | 141 ++++++--- .../lucene40/Lucene40DocValuesConsumer.java | 22 +- .../lucene40/Lucene40DocValuesFormat.java | 4 +- .../lucene40/Lucene40DocValuesProducer.java | 7 +- .../codecs/lucene40/Lucene40NormsFormat.java | 108 ++++++- .../codecs/lucene40/Lucene40NormsWriter.java | 130 -------- .../lucene40/Lucene40StoredFieldsWriter.java | 11 +- .../lucene40/Lucene40TermVectorsWriter.java | 15 +- .../lucene/codecs/lucene40/values/Bytes.java | 8 +- .../values/FixedStraightBytesImpl.java | 2 +- .../lucene/codecs/lucene40/values/Floats.java | 5 +- .../lucene/codecs/lucene40/values/Ints.java | 5 +- .../lucene40/values/PackedIntValues.java | 2 +- .../lucene40/values/VarStraightBytesImpl.java | 2 +- .../lucene/codecs/lucene40/values/Writer.java | 101 +------ .../codecs/sep/SepDocValuesConsumer.java | 31 +- .../simpletext/SimpleTextNormsConsumer.java | 276 +++++++++++++++++ .../simpletext/SimpleTextNormsFormat.java | 32 +- ...ader.java => SimpleTextNormsProducer.java} | 85 +++++- .../simpletext/SimpleTextNormsWriter.java | 114 ------- .../SimpleTextStoredFieldsWriter.java | 4 +- .../SimpleTextTermVectorsWriter.java | 5 +- .../apache/lucene/index/BaseMultiReader.java | 10 +- .../lucene/index/DocFieldProcessor.java | 12 +- .../lucene/index/DocInverterPerField.java | 3 - .../org/apache/lucene/index/DocValue.java | 47 --- .../lucene/index/FilterIndexReader.java | 12 +- .../org/apache/lucene/index/IndexReader.java | 15 +- .../index/InvertedDocEndConsumerPerField.java | 4 +- .../apache/lucene/index/MultiDocValues.java | 112 ++++++- .../apache/lucene/index/NormsConsumer.java | 88 +++--- .../lucene/index/NormsConsumerPerField.java | 98 ++++--- .../apache/lucene/index/ParallelReader.java | 35 +-- .../lucene/index/SegmentCoreReaders.java | 5 +- .../apache/lucene/index/SegmentMerger.java | 24 +- .../apache/lucene/index/SegmentReader.java | 20 +- .../lucene/index/SlowMultiReaderWrapper.java | 32 +- .../lucene/store/CompoundFileWriter.java | 34 ++- .../java/org/apache/lucene/util/IOUtils.java | 12 + .../preflexrw/PreFlexNormsConsumer.java | 277 ++++++++++++++++++ .../codecs/preflexrw/PreFlexRWCodec.java | 13 +- .../preflexrw/PreFlexRWNormsFormat.java} | 19 +- .../org/apache/lucene/index/MultiNorms.java | 0 .../lucene/codecs/lucene40/TestDocValues.java | 97 ++++-- .../lucene/index/TestDocValuesIndexing.java | 119 +++++++- .../lucene/search/TestDocValuesScoring.java | 2 - 58 files changed, 1819 insertions(+), 867 deletions(-) create mode 100644 lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java delete mode 100644 lucene/src/java/org/apache/lucene/codecs/NormsWriter.java create mode 100644 lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java rename lucene/src/java/org/apache/lucene/codecs/{lucene40/Lucene40NormsReader.java => lucene3x/Lucene3xNormsProducer.java} (69%) delete mode 100644 lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsWriter.java create mode 100644 lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java rename lucene/src/java/org/apache/lucene/codecs/simpletext/{SimpleTextNormsReader.java => SimpleTextNormsProducer.java} (62%) delete mode 100644 lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsWriter.java create mode 100644 lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java rename lucene/src/{java/org/apache/lucene/codecs/NormsReader.java => test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWNormsFormat.java} (64%) rename lucene/src/{ => test-framework}/java/org/apache/lucene/index/MultiNorms.java (100%) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 921fdd012a7..17cf81cde67 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -619,6 +619,9 @@ New features * LUCENE-3638: Added sugar methods to IndexReader and IndexSearcher to load only certain fields when loading a document. (Peter Chang via Mike McCandless) + +* LUCENE-3628: Norms are represented as DocValues. IndexReader exposes + a #normValues(String) method to obtain norms per field. (Simon Willnauer) Optimizations diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 0a48a8685de..27a7261a098 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -48,6 +48,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleByteSource; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -1082,34 +1083,6 @@ public class MemoryIndex { private void setSearcher(IndexSearcher searcher) { this.searcher = searcher; } - - /** performance hack: cache norms to avoid repeated expensive calculations */ - private byte[] cachedNorms; - private String cachedFieldName; - private SimilarityProvider cachedSimilarity; - - @Override - public byte[] norms(String fieldName) { - byte[] norms = cachedNorms; - SimilarityProvider sim = getSimilarityProvider(); - if (!fieldName.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached? - Info info = getInfo(fieldName); - Similarity fieldSim = sim.get(fieldName); - int numTokens = info != null ? info.numTokens : 0; - int numOverlapTokens = info != null ? info.numOverlapTokens : 0; - float boost = info != null ? info.getBoost() : 1.0f; - FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); - byte norm = fieldSim.computeNorm(invertState); - norms = new byte[] {norm}; - - // cache it for future reuse - cachedNorms = norms; - cachedFieldName = fieldName; - cachedSimilarity = sim; - if (DEBUG) System.err.println("MemoryIndexReader.norms: " + fieldName + ":" + norm + ":" + numTokens); - } - return norms; - } @Override public int numDocs() { @@ -1160,6 +1133,34 @@ public class MemoryIndex { public DocValues docValues(String field) throws IOException { return null; } + + /** performance hack: cache norms to avoid repeated expensive calculations */ + private DocValues cachedNormValues; + private String cachedFieldName; + private SimilarityProvider cachedSimilarity; + + @Override + public DocValues normValues(String field) throws IOException { + DocValues norms = cachedNormValues; + SimilarityProvider sim = getSimilarityProvider(); + if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached? + Info info = getInfo(field); + Similarity fieldSim = sim.get(field); + int numTokens = info != null ? info.numTokens : 0; + int numOverlapTokens = info != null ? info.numOverlapTokens : 0; + float boost = info != null ? info.getBoost() : 1.0f; + FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); + byte norm = fieldSim.computeNorm(invertState); + SingleByteSource singleByteSource = new SingleByteSource(new byte[] {norm}); + norms = new MemoryIndexNormDocValues(singleByteSource); + // cache it for future reuse + cachedNormValues = norms; + cachedFieldName = field; + cachedSimilarity = sim; + if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + norm + ":" + numTokens); + } + return norms; + } } diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java new file mode 100644 index 00000000000..7c359a5c84a --- /dev/null +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java @@ -0,0 +1,78 @@ +package org.apache.lucene.index.memory; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.util.BytesRef; + +/** + * + * @lucene.internal + */ +class MemoryIndexNormDocValues extends DocValues { + + private final Source source; + + MemoryIndexNormDocValues(Source source) { + this.source = source; + } + @Override + public Source load() throws IOException { + return source; + } + + @Override + public Source getDirectSource() throws IOException { + return source; + } + + @Override + public Type type() { + return source.type(); + } + + public static class SingleByteSource extends Source { + + private final byte[] bytes; + + protected SingleByteSource(byte[] bytes) { + super(Type.BYTES_FIXED_STRAIGHT); + this.bytes = bytes; + } + + @Override + public BytesRef getBytes(int docID, BytesRef ref) { + ref.bytes = bytes; + ref.offset = docID; + ref.length = 1; + return ref; + } + + @Override + public boolean hasArray() { + return true; + } + + @Override + public Object getArray() { + return bytes; + } + + } + +} diff --git a/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java b/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java index d93ba1bab81..e9190545ae4 100644 --- a/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java @@ -17,10 +17,7 @@ package org.apache.lucene.codecs; * limitations under the License. */ -import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -42,7 +39,6 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.NoOutputs; -import org.apache.lucene.util.fst.Util; /* TODO: @@ -641,6 +637,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer { } // for debugging + @SuppressWarnings("unused") private String toString(BytesRef b) { try { return b.utf8ToString() + " " + b; diff --git a/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 3644add33ca..cf69388f332 100644 --- a/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -18,11 +18,14 @@ package org.apache.lucene.codecs; */ import java.io.IOException; +import org.apache.lucene.codecs.lucene40.values.Writer; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.DocValue; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; /** * Abstract API that consumes {@link DocValue}s. @@ -35,6 +38,9 @@ import org.apache.lucene.util.Bits; */ public abstract class DocValuesConsumer { + protected Source currentMergeSource; + protected final BytesRef spare = new BytesRef(); + /** * Adds the given {@link DocValue} instance to this * {@link DocValuesConsumer} @@ -83,6 +89,7 @@ public abstract class DocValuesConsumer { hasMerged = true; merge(new SingleSubMergeState(docValues[readerIDX], mergeState.docBase[readerIDX], reader.reader.maxDoc(), reader.liveDocs)); + mergeState.checkAbort.work(reader.reader.maxDoc()); } } // only finish if no exception is thrown! @@ -99,10 +106,112 @@ public abstract class DocValuesConsumer { * @throws IOException * if an {@link IOException} occurs */ - // TODO: can't we have a default implementation here that merges naively with our apis? - // this is how stored fields and term vectors work. its a pain to have to impl merging - // (should be an optimization to override it) - protected abstract void merge(SingleSubMergeState mergeState) throws IOException; + protected void merge(SingleSubMergeState state) throws IOException { + // This enables bulk copies in subclasses per MergeState, subclasses can + // simply override this and decide if they want to merge + // segments using this generic implementation or if a bulk merge is possible + // / feasible. + final Source source = state.reader.getDirectSource(); + assert source != null; + setNextMergeSource(source); // set the current enum we are working on - the + // impl. will get the correct reference for the type + // it supports + int docID = state.docBase; + final Bits liveDocs = state.liveDocs; + final int docCount = state.docCount; + for (int i = 0; i < docCount; i++) { + if (liveDocs == null || liveDocs.get(i)) { + mergeDoc(docID++, i); + } + } + } + + /** + * Records the specified long value for the docID or throws an + * {@link UnsupportedOperationException} if this {@link Writer} doesn't record + * long values. + * + * @throws UnsupportedOperationException + * if this writer doesn't record long values + */ + protected void add(int docID, long value) throws IOException { + throw new UnsupportedOperationException("override this method to support integer types"); + } + + /** + * Records the specified double value for the docID or throws an + * {@link UnsupportedOperationException} if this {@link Writer} doesn't record + * double values. + * + * @throws UnsupportedOperationException + * if this writer doesn't record double values + */ + protected void add(int docID, double value) throws IOException { + throw new UnsupportedOperationException("override this method to support floating point types"); + } + + /** + * Records the specified {@link BytesRef} value for the docID or throws an + * {@link UnsupportedOperationException} if this {@link Writer} doesn't record + * {@link BytesRef} values. + * + * @throws UnsupportedOperationException + * if this writer doesn't record {@link BytesRef} values + */ + protected void add(int docID, BytesRef value) throws IOException { + throw new UnsupportedOperationException("override this method to support byte types"); + } + + /** + * Merges a document with the given docID. The methods + * implementation obtains the value for the sourceDoc id from the + * current {@link Source} set to setNextMergeSource(Source). + *

+ * This method is used during merging to provide implementation agnostic + * default merge implementation. + *

+ *

+ * All documents IDs between the given ID and the previously given ID or + * 0 if the method is call the first time are filled with default + * values depending on the {@link Writer} implementation. The given document + * ID must always be greater than the previous ID or 0 if called the + * first time. + */ + protected void mergeDoc(int docID, int sourceDoc) + throws IOException { + switch(currentMergeSource.type()) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + add(docID, currentMergeSource.getBytes(sourceDoc, spare)); + break; + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + case FIXED_INTS_8: + case VAR_INTS: + add(docID, currentMergeSource.getInt(sourceDoc)); + break; + case FLOAT_32: + case FLOAT_64: + add(docID, currentMergeSource.getFloat(sourceDoc)); + break; + } + } + + /** + * Sets the next {@link Source} to consume values from on calls to + * {@link #mergeDoc(int, int)} + * + * @param mergeSource + * the next {@link Source}, this must not be null + */ + protected final void setNextMergeSource(Source mergeSource) { + currentMergeSource = mergeSource; + } /** * Specialized auxiliary MergeState is necessary since we don't want to diff --git a/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java b/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java index 7053e0ce273..40239a97c6f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java +++ b/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java @@ -41,7 +41,7 @@ import org.apache.lucene.util.BytesRef; */ // TODO: this needs to go under lucene40 codec (its specific to its impl) public abstract class DocValuesReaderBase extends PerDocProducer { - + protected abstract void closeInternal(Collection closeables) throws IOException; protected abstract Map docValues(); @@ -68,14 +68,14 @@ public abstract class DocValuesReaderBase extends PerDocProducer { try { for (FieldInfo fieldInfo : fieldInfos) { - if (fieldInfo.hasDocValues()) { + if (canLoad(fieldInfo)) { final String field = fieldInfo.name; // TODO can we have a compound file per segment and codec for // docvalues? final String id = DocValuesWriterBase.docValuesId(segment, fieldInfo.number); values.put(field, - loadDocValues(docCount, dir, id, fieldInfo.getDocValuesType(), context)); + loadDocValues(docCount, dir, id, getDocValuesType(fieldInfo), context)); } } success = true; @@ -88,6 +88,18 @@ public abstract class DocValuesReaderBase extends PerDocProducer { return values; } + protected boolean canLoad(FieldInfo info) { + return info.hasDocValues(); + } + + protected Type getDocValuesType(FieldInfo info) { + return info.getDocValuesType(); + } + + protected boolean anyDocValuesFields(FieldInfos infos) { + return infos.anyDocValuesFields(); + } + /** * Loads a {@link DocValues} instance depending on the given {@link Type}. * Codecs that use different implementations for a certain {@link Type} can diff --git a/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java b/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java index b3e5013e0a7..d6fae8b0aa0 100644 --- a/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java +++ b/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java @@ -23,7 +23,6 @@ import java.util.Comparator; import org.apache.lucene.codecs.lucene40.values.Writer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.PerDocWriteState; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Type; // javadoc import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -38,7 +37,6 @@ import org.apache.lucene.util.Counter; //TODO: this needs to go under lucene40 codec (its specific to its impl) public abstract class DocValuesWriterBase extends PerDocConsumer { protected final String segmentName; - protected final String segmentSuffix; private final Counter bytesUsed; protected final IOContext context; private final boolean fasterButMoreRam; @@ -58,7 +56,6 @@ public abstract class DocValuesWriterBase extends PerDocConsumer { */ protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) { this.segmentName = state.segmentName; - this.segmentSuffix = state.segmentSuffix; this.bytesUsed = state.bytesUsed; this.context = state.context; this.fasterButMoreRam = fasterButMoreRam; diff --git a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java index d829e8bbadf..d7745bab90b 100644 --- a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java @@ -20,19 +20,19 @@ package org.apache.lucene.codecs; import java.io.IOException; import java.util.Set; -import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; /** * format for normalization factors */ public abstract class NormsFormat { - /** Note: separateNormsDir should not be used! */ - public abstract NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException; - public abstract NormsWriter normsWriter(SegmentWriteState state) throws IOException; + public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException; + @Deprecated + public abstract PerDocProducer docsProducer(SegmentReadState state, Directory separateNormsDir) throws IOException; + public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException; public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; /** diff --git a/lucene/src/java/org/apache/lucene/codecs/NormsWriter.java b/lucene/src/java/org/apache/lucene/codecs/NormsWriter.java deleted file mode 100644 index 17949bc8d8b..00000000000 --- a/lucene/src/java/org/apache/lucene/codecs/NormsWriter.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.apache.lucene.codecs; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.io.Closeable; -import java.io.IOException; -import java.util.Arrays; - -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.MergeState; -import org.apache.lucene.util.Bits; - -// simple api just for now before switching to docvalues apis -public abstract class NormsWriter implements Closeable { - - // TODO: I think IW should set info.normValueType from Similarity, - // and then this method just returns DocValuesConsumer - public abstract void startField(FieldInfo info) throws IOException; - public abstract void writeNorm(byte norm) throws IOException; - public abstract void finish(int numDocs) throws IOException; - - public int merge(MergeState mergeState) throws IOException { - int numMergedDocs = 0; - for (FieldInfo fi : mergeState.fieldInfos) { - if (fi.isIndexed && !fi.omitNorms) { - startField(fi); - int numMergedDocsForField = 0; - for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) { - final int maxDoc = reader.reader.maxDoc(); - byte normBuffer[] = reader.reader.norms(fi.name); - if (normBuffer == null) { - // Can be null if this segment doesn't have - // any docs with this field - normBuffer = new byte[maxDoc]; - Arrays.fill(normBuffer, (byte)0); - } - // this segment has deleted docs, so we have to - // check for every doc if it is deleted or not - final Bits liveDocs = reader.liveDocs; - for (int k = 0; k < maxDoc; k++) { - if (liveDocs == null || liveDocs.get(k)) { - writeNorm(normBuffer[k]); - numMergedDocsForField++; - } - } - mergeState.checkAbort.work(maxDoc); - } - assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField; - numMergedDocs = numMergedDocsForField; - } - } - finish(numMergedDocs); - return numMergedDocs; - } -} diff --git a/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java b/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java index fd2f2da3e51..4228cc70ec6 100644 --- a/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java @@ -20,7 +20,9 @@ import java.io.IOException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.DocValues.Type; /** * Abstract API that consumes per document values. Concrete implementations of @@ -32,7 +34,7 @@ import org.apache.lucene.index.MergeState; * * @lucene.experimental */ -public abstract class PerDocConsumer implements Closeable{ +public abstract class PerDocConsumer implements Closeable { /** Adds a new DocValuesField */ public abstract DocValuesConsumer addValuesField(DocValues.Type type, FieldInfo field) throws IOException; @@ -46,14 +48,57 @@ public abstract class PerDocConsumer implements Closeable{ for (FieldInfo fieldInfo : mergeState.fieldInfos) { mergeState.fieldInfo = fieldInfo; // set the field we are merging - if (fieldInfo.hasDocValues()) { + if (canMerge(fieldInfo)) { for (int i = 0; i < docValues.length; i++) { - docValues[i] = mergeState.readers.get(i).reader.docValues(fieldInfo.name); + docValues[i] = getDocValuesForMerge(mergeState.readers.get(i).reader, fieldInfo); } - final DocValuesConsumer docValuesConsumer = addValuesField(fieldInfo.getDocValuesType(), fieldInfo); + final DocValuesConsumer docValuesConsumer = addValuesField(getDocValuesType(fieldInfo), fieldInfo); assert docValuesConsumer != null; docValuesConsumer.merge(mergeState, docValues); } } - } + } + + /** + * Returns a {@link DocValues} instance for merging from the given reader for the given + * {@link FieldInfo}. This method is used for merging and uses + * {@link IndexReader#docValues(String)} by default. + *

+ * To enable {@link DocValues} merging for different {@link DocValues} than + * the default override this method accordingly. + *

+ */ + protected DocValues getDocValuesForMerge(IndexReader reader, FieldInfo info) throws IOException { + return reader.docValues(info.name); + } + + /** + * Returns true iff the given field can be merged ie. has {@link DocValues}. + * By default this method uses {@link FieldInfo#hasDocValues()}. + *

+ * To enable {@link DocValues} merging for different {@link DocValues} than + * the default override this method accordingly. + *

+ */ + protected boolean canMerge(FieldInfo info) { + return info.hasDocValues(); + } + + /** + * Returns the {@link DocValues} {@link Type} for the given {@link FieldInfo}. + * By default this method uses {@link FieldInfo#getDocValuesType()}. + *

+ * To enable {@link DocValues} merging for different {@link DocValues} than + * the default override this method accordingly. + *

+ */ + protected Type getDocValuesType(FieldInfo info) { + return info.getDocValuesType(); + } + + /** + * Called during indexing if the indexing session is aborted due to a unrecoverable exception. + * This method should cleanup all resources. + */ + public abstract void abort(); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index b24b0a2d06e..0fa6e7f56d0 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -31,7 +31,6 @@ import org.apache.lucene.codecs.SegmentInfosFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; -import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfosFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat; @@ -65,7 +64,7 @@ public class Lucene3xCodec extends Codec { private final SegmentInfosFormat infosFormat = new Lucene40SegmentInfosFormat(); // TODO: this should really be a different impl - private final NormsFormat normsFormat = new Lucene40NormsFormat(); + private final NormsFormat normsFormat = new Lucene3xNormsFormat(); // 3.x doesn't support docvalues private final DocValuesFormat docValuesFormat = new DocValuesFormat() { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java new file mode 100644 index 00000000000..4cd90ada711 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java @@ -0,0 +1,65 @@ +package org.apache.lucene.codecs.lucene3x; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.codecs.PerDocProducer; +import org.apache.lucene.index.PerDocWriteState; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.store.Directory; + +/** + * Read-Only Lucene 3.x Norms Format + * + * @lucene.experimental + */ +public class Lucene3xNormsFormat extends NormsFormat { + + + @Override + public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + Lucene3xNormsProducer.files(dir, info, files); + } + + @Override + public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + Lucene3xNormsProducer.separateFiles(dir, info, files); + } + + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + throw new IllegalArgumentException("this codec can only be used for reading"); + } + + @Override + public PerDocProducer docsProducer(SegmentReadState state) throws IOException { + return docsProducer(state, null); + } + + @Override + public PerDocProducer docsProducer(SegmentReadState state, + Directory separateNormsDir) throws IOException { + return new Lucene3xNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context, separateNormsDir); + } +} diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java similarity index 69% rename from lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java rename to lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java index 289ee6e8f7d..7995cb31074 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.lucene40; +package org.apache.lucene.codecs.lucene3x; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -24,7 +24,10 @@ import java.util.Map; import java.util.Set; import java.util.Map.Entry; -import org.apache.lucene.codecs.NormsReader; +import org.apache.lucene.codecs.PerDocProducer; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; @@ -32,14 +35,29 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.MapBackedSet; import org.apache.lucene.util.StringHelper; -public class Lucene40NormsReader extends NormsReader { - // this would be replaced by Source/SourceCache in a dv impl. - // for now we have our own mini-version - final Map norms = new HashMap(); +/** + * Reads Lucene 3.x norms format and exposes it via DocValues API + * @lucene.experimental + */ +class Lucene3xNormsProducer extends PerDocProducer { + + /** norms header placeholder */ + static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + + /** Extension of norms file */ + static final String NORMS_EXTENSION = "nrm"; + + /** Extension of separate norms file + * @deprecated */ + @Deprecated + static final String SEPARATE_NORMS_EXTENSION = "s"; + + final Map norms = new HashMap(); // any .nrm or .sNN files we have open at any time. // TODO: just a list, and double-close() separate norms files? final Set openFiles = new MapBackedSet(new IdentityHashMap()); @@ -49,20 +67,20 @@ public class Lucene40NormsReader extends NormsReader { // note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front. // but we just don't do any seeks or reading yet. - public Lucene40NormsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException { + public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException { maxdoc = info.docCount; String segmentName = info.name; Map normGen = info.getNormGen(); boolean success = false; try { - long nextNormSeek = Lucene40NormsWriter.NORMS_HEADER.length; //skip header (header unused for now) + long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now) for (FieldInfo fi : fields) { if (fi.isIndexed && !fi.omitNorms) { String fileName = getNormFilename(segmentName, normGen, fi.number); Directory d = hasSeparateNorms(normGen, fi.number) ? separateNormsDir : dir; // singleNormFile means multiple norms share this file - boolean singleNormFile = IndexFileNames.matchesExtension(fileName, Lucene40NormsWriter.NORMS_EXTENSION); + boolean singleNormFile = IndexFileNames.matchesExtension(fileName, NORMS_EXTENSION); IndexInput normInput = null; long normSeek; @@ -90,19 +108,16 @@ public class Lucene40NormsReader extends NormsReader { if (isUnversioned) { normSeek = 0; } else { - normSeek = Lucene40NormsWriter.NORMS_HEADER.length; + normSeek = NORMS_HEADER.length; } } - - Norm norm = new Norm(); - norm.file = normInput; - norm.offset = normSeek; + NormsDocValues norm = new NormsDocValues(normInput, normSeek); norms.put(fi.name, norm); nextNormSeek += maxdoc; // increment also if some norms are separate } } // TODO: change to a real check? see LUCENE-3619 - assert singleNormStream == null || nextNormSeek == singleNormStream.length(); + assert singleNormStream == null || nextNormSeek == singleNormStream.length() : singleNormStream != null ? "len: " + singleNormStream.length() + " expected: " + nextNormSeek : "null"; success = true; } finally { if (!success) { @@ -112,12 +127,10 @@ public class Lucene40NormsReader extends NormsReader { } @Override - public byte[] norms(String name) throws IOException { - Norm norm = norms.get(name); - return norm == null ? null : norm.bytes(); + public DocValues docValues(String field) throws IOException { + return norms.get(field); } - @Override public void close() throws IOException { try { @@ -130,10 +143,10 @@ public class Lucene40NormsReader extends NormsReader { private static String getNormFilename(String segmentName, Map normGen, int number) { if (hasSeparateNorms(normGen, number)) { - return IndexFileNames.fileNameFromGeneration(segmentName, Lucene40NormsWriter.SEPARATE_NORMS_EXTENSION + number, normGen.get(number)); + return IndexFileNames.fileNameFromGeneration(segmentName, SEPARATE_NORMS_EXTENSION + number, normGen.get(number)); } else { // single file for all norms - return IndexFileNames.fileNameFromGeneration(segmentName, Lucene40NormsWriter.NORMS_EXTENSION, SegmentInfo.WITHOUT_GEN); + return IndexFileNames.fileNameFromGeneration(segmentName, NORMS_EXTENSION, SegmentInfo.WITHOUT_GEN); } } @@ -146,34 +159,38 @@ public class Lucene40NormsReader extends NormsReader { return gen != null && gen.longValue() != SegmentInfo.NO; } - class Norm { - IndexInput file; - long offset; - byte bytes[]; + static final class NormSource extends Source { + protected NormSource(byte[] bytes) { + super(Type.BYTES_FIXED_STRAIGHT); + this.bytes = bytes; + } + + final byte bytes[]; - synchronized byte[] bytes() throws IOException { - if (bytes == null) { - bytes = new byte[maxdoc]; - // some norms share fds - synchronized(file) { - file.seek(offset); - file.readBytes(bytes, 0, bytes.length, false); - } - // we are done with this file - if (file != singleNormStream) { - openFiles.remove(file); - file.close(); - file = null; - } - } + @Override + public BytesRef getBytes(int docID, BytesRef ref) { + ref.bytes = bytes; + ref.offset = docID; + ref.length = 1; + return ref; + } + + @Override + public boolean hasArray() { + return true; + } + + @Override + public Object getArray() { return bytes; } + } static void files(Directory dir, SegmentInfo info, Set files) throws IOException { // TODO: This is what SI always did... but we can do this cleaner? // like first FI that has norms but doesn't have separate norms? - final String normsFileName = IndexFileNames.segmentFileName(info.name, "", Lucene40NormsWriter.NORMS_EXTENSION); + final String normsFileName = IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION); if (dir.fileExists(normsFileName)) { files.add(normsFileName); } @@ -188,9 +205,49 @@ public class Lucene40NormsReader extends NormsReader { long gen = entry.getValue(); if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: - files.add(IndexFileNames.fileNameFromGeneration(info.name, Lucene40NormsWriter.SEPARATE_NORMS_EXTENSION + entry.getKey(), gen)); + files.add(IndexFileNames.fileNameFromGeneration(info.name, SEPARATE_NORMS_EXTENSION + entry.getKey(), gen)); } } } } + + private class NormsDocValues extends DocValues { + private final IndexInput file; + private final long offset; + public NormsDocValues(IndexInput normInput, long normSeek) { + this.file = normInput; + this.offset = normSeek; + } + + @Override + public Source load() throws IOException { + return new NormSource(bytes()); + } + + @Override + public Source getDirectSource() throws IOException { + return getSource(); + } + + @Override + public Type type() { + return Type.BYTES_FIXED_STRAIGHT; + } + + byte[] bytes() throws IOException { + byte[] bytes = new byte[maxdoc]; + // some norms share fds + synchronized(file) { + file.seek(offset); + file.readBytes(bytes, 0, bytes.length, false); + } + // we are done with this file + if (file != singleNormStream) { + openFiles.remove(file); + file.close(); + } + return bytes; + } + + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java index a58e60d360b..5dc92fb1225 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; /** * Default PerDocConsumer implementation that uses compound file. @@ -36,11 +37,13 @@ import org.apache.lucene.store.Directory; public class Lucene40DocValuesConsumer extends DocValuesWriterBase { private final Directory mainDirectory; private Directory directory; + private final String segmentSuffix; + public final static String DOC_VALUES_SEGMENT_SUFFIX = "dv"; - final static String DOC_VALUES_SEGMENT_SUFFIX = "dv"; - - public Lucene40DocValuesConsumer(PerDocWriteState state) throws IOException { + + public Lucene40DocValuesConsumer(PerDocWriteState state, String segmentSuffix) throws IOException { super(state); + this.segmentSuffix = segmentSuffix; mainDirectory = state.directory; //TODO maybe we should enable a global CFS that all codecs can pull on demand to further reduce the number of files? } @@ -50,7 +53,7 @@ public class Lucene40DocValuesConsumer extends DocValuesWriterBase { // lazy init if (directory == null) { directory = new CompoundFileDirectory(mainDirectory, - IndexFileNames.segmentFileName(segmentName, DOC_VALUES_SEGMENT_SUFFIX, + IndexFileNames.segmentFileName(segmentName, segmentSuffix, IndexFileNames.COMPOUND_FILE_EXTENSION), context, true); } return directory; @@ -75,4 +78,15 @@ public class Lucene40DocValuesConsumer extends DocValuesWriterBase { } } } + + @Override + public void abort() { + try { + close(); + } catch (IOException ignored) {} + IOUtils.deleteFilesIgnoringExceptions(mainDirectory, IndexFileNames.segmentFileName( + segmentName, segmentSuffix, IndexFileNames.COMPOUND_FILE_EXTENSION), + IndexFileNames.segmentFileName(segmentName, segmentSuffix, + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java index 8bb5ab7a3fb..90ff33494af 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java @@ -32,12 +32,12 @@ public class Lucene40DocValuesFormat extends DocValuesFormat { @Override public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { - return new Lucene40DocValuesConsumer(state); + return new Lucene40DocValuesConsumer(state, Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX); } @Override public PerDocProducer docsProducer(SegmentReadState state) throws IOException { - return new Lucene40DocValuesProducer(state); + return new Lucene40DocValuesProducer(state, Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX); } @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java index 7cdabe6bda1..2162095fd35 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java @@ -39,16 +39,15 @@ import org.apache.lucene.util.IOUtils; public class Lucene40DocValuesProducer extends DocValuesReaderBase { protected final TreeMap docValues; private final Directory cfs; - /** * Creates a new {@link Lucene40DocValuesProducer} instance and loads all * {@link DocValues} instances for this segment and codec. */ - public Lucene40DocValuesProducer(SegmentReadState state) throws IOException { - if (state.fieldInfos.anyDocValuesFields()) { + public Lucene40DocValuesProducer(SegmentReadState state, String segmentSuffix) throws IOException { + if (anyDocValuesFields(state.fieldInfos)) { cfs = new CompoundFileDirectory(state.dir, IndexFileNames.segmentFileName(state.segmentInfo.name, - Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION), + segmentSuffix, IndexFileNames.COMPOUND_FILE_EXTENSION), state.context, false); docValues = load(state.fieldInfos, state.segmentInfo.name, state.segmentInfo.docCount, cfs, state.context); } else { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java index 776c0e45589..e235db22c2d 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java @@ -1,5 +1,4 @@ package org.apache.lucene.codecs.lucene40; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,38 +15,117 @@ package org.apache.lucene.codecs.lucene40; * See the License for the specific language governing permissions and * limitations under the License. */ - import java.io.IOException; import java.util.Set; import org.apache.lucene.codecs.NormsFormat; -import org.apache.lucene.codecs.NormsReader; -import org.apache.lucene.codecs.NormsWriter; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.codecs.PerDocProducer; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; +/** + * Norms Format for the default codec. + * @lucene.experimental + */ public class Lucene40NormsFormat extends NormsFormat { - + private final static String NORMS_SEGMENT_SUFFIX = "nrm"; + @Override - public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException { - return new Lucene40NormsReader(dir, info, fields, context, separateNormsDir); + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new Lucene40NormsDocValuesConsumer(state, NORMS_SEGMENT_SUFFIX); } @Override - public NormsWriter normsWriter(SegmentWriteState state) throws IOException { - return new Lucene40NormsWriter(state.directory, state.segmentName, state.context); + public PerDocProducer docsProducer(SegmentReadState state) throws IOException { + return new Lucene40NormsDocValuesProducer(state, NORMS_SEGMENT_SUFFIX); } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene40NormsReader.files(dir, info, files); + public void files(Directory dir, SegmentInfo info, Set files) + throws IOException { + Lucene40NormsDocValuesConsumer.files(dir, info, files); + } @Override - public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene40NormsReader.separateFiles(dir, info, files); + public PerDocProducer docsProducer(SegmentReadState state, + Directory separateNormsDir) throws IOException { + return docsProducer(state); } + + + public static class Lucene40NormsDocValuesProducer extends Lucene40DocValuesProducer { + + public Lucene40NormsDocValuesProducer(SegmentReadState state, + String segmentSuffix) throws IOException { + super(state, segmentSuffix); + } + + @Override + protected boolean canLoad(FieldInfo info) { + return !info.omitNorms && info.isIndexed; + } + + @Override + protected Type getDocValuesType(FieldInfo info) { + return Type.BYTES_FIXED_STRAIGHT; + } + + @Override + protected boolean anyDocValuesFields(FieldInfos infos) { + return infos.hasNorms(); + } + + } + + public static class Lucene40NormsDocValuesConsumer extends Lucene40DocValuesConsumer { + + public Lucene40NormsDocValuesConsumer(PerDocWriteState state, + String segmentSuffix) throws IOException { + super(state, segmentSuffix); + } + + @Override + protected DocValues getDocValuesForMerge(IndexReader reader, FieldInfo info) + throws IOException { + return reader.normValues(info.name); + } + + @Override + protected boolean canMerge(FieldInfo info) { + return !info.omitNorms && info.isIndexed; + } + + @Override + protected Type getDocValuesType(FieldInfo info) { + return Type.BYTES_FIXED_STRAIGHT; + } + + public static void files(Directory dir, SegmentInfo segmentInfo, Set files) throws IOException { + FieldInfos fieldInfos = segmentInfo.getFieldInfos(); + for (FieldInfo fieldInfo : fieldInfos) { + if (!fieldInfo.omitNorms && fieldInfo.isIndexed) { + files.add(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION)); + files.add(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION)); + break; + } + } + } + + } + + + + } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsWriter.java deleted file mode 100644 index 0903d9088ba..00000000000 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsWriter.java +++ /dev/null @@ -1,130 +0,0 @@ -package org.apache.lucene.codecs.lucene40; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Arrays; - -import org.apache.lucene.codecs.NormsWriter; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.MergeState; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.IOUtils; - -public class Lucene40NormsWriter extends NormsWriter { - private IndexOutput out; - private int normCount = 0; - - /** norms header placeholder */ - static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; - - /** Extension of norms file */ - static final String NORMS_EXTENSION = "nrm"; - - /** Extension of separate norms file - * @deprecated */ - @Deprecated - static final String SEPARATE_NORMS_EXTENSION = "s"; - - public Lucene40NormsWriter(Directory directory, String segment, IOContext context) throws IOException { - final String normsFileName = IndexFileNames.segmentFileName(segment, "", NORMS_EXTENSION); - boolean success = false; - try { - out = directory.createOutput(normsFileName, context); - out.writeBytes(NORMS_HEADER, 0, NORMS_HEADER.length); - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(out); - } - } - } - - @Override - public void startField(FieldInfo info) throws IOException { - assert info.omitNorms == false; - normCount++; - } - - @Override - public void writeNorm(byte norm) throws IOException { - out.writeByte(norm); - } - - @Override - public void finish(int numDocs) throws IOException { - if (4+normCount*(long)numDocs != out.getFilePointer()) { - throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer()); - } - } - - /** we override merge and bulk-merge norms when there are no deletions */ - @Override - public int merge(MergeState mergeState) throws IOException { - int numMergedDocs = 0; - for (FieldInfo fi : mergeState.fieldInfos) { - if (fi.isIndexed && !fi.omitNorms) { - startField(fi); - int numMergedDocsForField = 0; - for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) { - final int maxDoc = reader.reader.maxDoc(); - byte normBuffer[] = reader.reader.norms(fi.name); - if (normBuffer == null) { - // Can be null if this segment doesn't have - // any docs with this field - normBuffer = new byte[maxDoc]; - Arrays.fill(normBuffer, (byte)0); - } - if (reader.liveDocs == null) { - //optimized case for segments without deleted docs - out.writeBytes(normBuffer, maxDoc); - numMergedDocsForField += maxDoc; - } else { - // this segment has deleted docs, so we have to - // check for every doc if it is deleted or not - final Bits liveDocs = reader.liveDocs; - for (int k = 0; k < maxDoc; k++) { - if (liveDocs.get(k)) { - numMergedDocsForField++; - out.writeByte(normBuffer[k]); - } - } - } - mergeState.checkAbort.work(maxDoc); - } - assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField; - numMergedDocs = numMergedDocsForField; - } - } - finish(numMergedDocs); - return numMergedDocs; - } - - @Override - public void close() throws IOException { - try { - IOUtils.close(out); - } finally { - out = null; - } - } -} diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java index f161012a5f1..05282c4613c 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java @@ -122,14 +122,9 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { try { close(); } catch (IOException ignored) {} - - try { - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION)); - } catch (IOException ignored) {} - - try { - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION)); - } catch (IOException ignored) {} + IOUtils.deleteFilesIgnoringExceptions(directory, + IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), + IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION)); } public final void writeField(FieldInfo info, IndexableField field) throws IOException { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java index 09ba8b06a4e..0ccb62efac5 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java @@ -202,18 +202,9 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter { try { close(); } catch (IOException ignored) {} - - try { - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION)); - } catch (IOException ignored) {} - - try { - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); - } catch (IOException ignored) {} - - try { - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION)); - } catch (IOException ignored) {} + IOUtils.deleteFilesIgnoringExceptions(directory, IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION), + IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), + IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION)); } /** diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java index 53fccd68f2a..5a3fcad2f84 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.DocValue; @@ -122,7 +123,7 @@ public final class Bytes { * @throws IOException * if the files for the writer can not be created. */ - public static Writer getWriter(Directory dir, String id, Mode mode, + public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode, boolean fixedSize, Comparator sortComparator, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException { @@ -295,7 +296,8 @@ public final class Bytes { * skipped; they will be filled with 0 bytes. */ @Override - public abstract void add(int docID, BytesRef bytes) throws IOException; + protected + abstract void add(int docID, BytesRef bytes) throws IOException; @Override public abstract void finish(int docCount) throws IOException; @@ -431,7 +433,7 @@ public final class Bytes { } @Override - public void add(int docID, BytesRef bytes) throws IOException { + protected void add(int docID, BytesRef bytes) throws IOException { if (bytes.length == 0) { // default value - skip it return; } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java index 63d8f77c2a1..042250db837 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java @@ -64,7 +64,7 @@ class FixedStraightBytesImpl { } @Override - public void add(int docID, BytesRef bytes) throws IOException { + protected void add(int docID, BytesRef bytes) throws IOException { assert lastDocID < docID; if (size == -1) { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java index 644b28b8d98..321c82d5186 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene40.values; */ import java.io.IOException; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValue; import org.apache.lucene.index.DocValues.Source; @@ -44,7 +45,7 @@ public class Floats { protected static final int VERSION_START = 0; protected static final int VERSION_CURRENT = VERSION_START; - public static Writer getWriter(Directory dir, String id, Counter bytesUsed, + public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed, IOContext context, Type type) throws IOException { return new FloatsWriter(dir, id, bytesUsed, context, type); } @@ -79,7 +80,7 @@ public class Floats { assert template != null; } - public void add(int docID, double v) throws IOException { + protected void add(int docID, double v) throws IOException { template.toBytes(v, bytesRef); add(docID, bytesRef); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java index 131e5f0a063..f22e8e1a341 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java @@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40.values; import java.io.IOException; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValue; @@ -42,7 +43,7 @@ public final class Ints { private Ints() { } - public static Writer getWriter(Directory dir, String id, Counter bytesUsed, + public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed, Type type, IOContext context) throws IOException { return type == Type.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id, bytesUsed, context) : new IntsWriter(dir, id, bytesUsed, context, type); @@ -103,7 +104,7 @@ public final class Ints { } @Override - public void add(int docID, long v) throws IOException { + protected void add(int docID, long v) throws IOException { template.toBytes(v, bytesRef); add(docID, bytesRef); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java index 7c8c87cc28d..e6a1c6a223d 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java @@ -63,7 +63,7 @@ class PackedIntValues { } @Override - public void add(int docID, long v) throws IOException { + protected void add(int docID, long v) throws IOException { assert lastDocId < docID; if (!started) { started = true; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java index cf716c735d1..80924f21b6b 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java @@ -80,7 +80,7 @@ class VarStraightBytesImpl { } @Override - public void add(int docID, BytesRef bytes) throws IOException { + protected void add(int docID, BytesRef bytes) throws IOException { assert !merge; if (bytes.length == 0) { return; // default diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java index b5e97e11c32..a7f621b0d57 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java @@ -20,11 +20,9 @@ import java.io.IOException; import java.util.Comparator; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; @@ -41,7 +39,6 @@ import org.apache.lucene.util.Counter; * @lucene.experimental */ public abstract class Writer extends DocValuesConsumer { - protected Source currentMergeSource; protected final Counter bytesUsed; /** @@ -66,102 +63,6 @@ public abstract class Writer extends DocValuesConsumer { */ public static final String DATA_EXTENSION = "dat"; - /** - * Records the specified long value for the docID or throws an - * {@link UnsupportedOperationException} if this {@link Writer} doesn't record - * long values. - * - * @throws UnsupportedOperationException - * if this writer doesn't record long values - */ - public void add(int docID, long value) throws IOException { - throw new UnsupportedOperationException(); - } - - /** - * Records the specified double value for the docID or throws an - * {@link UnsupportedOperationException} if this {@link Writer} doesn't record - * double values. - * - * @throws UnsupportedOperationException - * if this writer doesn't record double values - */ - public void add(int docID, double value) throws IOException { - throw new UnsupportedOperationException(); - } - - /** - * Records the specified {@link BytesRef} value for the docID or throws an - * {@link UnsupportedOperationException} if this {@link Writer} doesn't record - * {@link BytesRef} values. - * - * @throws UnsupportedOperationException - * if this writer doesn't record {@link BytesRef} values - */ - public void add(int docID, BytesRef value) throws IOException { - throw new UnsupportedOperationException(); - } - - /** - * Merges a document with the given docID. The methods - * implementation obtains the value for the sourceDoc id from the - * current {@link Source} set to setNextMergeSource(Source). - *

- * This method is used during merging to provide implementation agnostic - * default merge implementation. - *

- *

- * All documents IDs between the given ID and the previously given ID or - * 0 if the method is call the first time are filled with default - * values depending on the {@link Writer} implementation. The given document - * ID must always be greater than the previous ID or 0 if called the - * first time. - */ - protected abstract void mergeDoc(int docID, int sourceDoc) throws IOException; - - /** - * Sets the next {@link Source} to consume values from on calls to - * {@link #mergeDoc(int, int)} - * - * @param mergeSource - * the next {@link Source}, this must not be null - */ - protected void setNextMergeSource(Source mergeSource) { - currentMergeSource = mergeSource; - } - - /** - * Finish writing and close any files and resources used by this Writer. - * - * @param docCount - * the total number of documents for this writer. This must be - * greater that or equal to the largest document id passed to one of - * the add methods after the {@link Writer} was created. - */ - public abstract void finish(int docCount) throws IOException; - - @Override - protected void merge(SingleSubMergeState state) throws IOException { - // This enables bulk copies in subclasses per MergeState, subclasses can - // simply override this and decide if they want to merge - // segments using this generic implementation or if a bulk merge is possible - // / feasible. - final Source source = state.reader.getDirectSource(); - assert source != null; - setNextMergeSource(source); // set the current enum we are working on - the - // impl. will get the correct reference for the type - // it supports - int docID = state.docBase; - final Bits liveDocs = state.liveDocs; - final int docCount = state.docCount; - for (int i = 0; i < docCount; i++) { - if (liveDocs == null || liveDocs.get(i)) { - mergeDoc(docID++, i); - } - } - - } - /** * Factory method to create a {@link Writer} instance for a given type. This * method returns default implementations for each of the different types @@ -181,7 +82,7 @@ public abstract class Writer extends DocValuesConsumer { * @return a new {@link Writer} instance for the given {@link Type} * @throws IOException */ - public static Writer create(Type type, String id, Directory directory, + public static DocValuesConsumer create(Type type, String id, Directory directory, Comparator comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException { if (comp == null) { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); diff --git a/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java b/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java index 45f05b43531..8c9a854b495 100644 --- a/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.sep; */ import java.io.IOException; +import java.util.HashSet; import java.util.Set; import org.apache.lucene.codecs.DocValuesWriterBase; @@ -28,6 +29,7 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; /** * Implementation of PerDocConsumer that uses separate files. @@ -35,10 +37,11 @@ import org.apache.lucene.store.Directory; */ public class SepDocValuesConsumer extends DocValuesWriterBase { private final Directory directory; - + private final FieldInfos fieldInfos; public SepDocValuesConsumer(PerDocWriteState state) throws IOException { super(state); this.directory = state.directory; + fieldInfos = state.fieldInfos; } @Override @@ -46,13 +49,16 @@ public class SepDocValuesConsumer extends DocValuesWriterBase { return directory; } - @SuppressWarnings("fallthrough") public static void files(Directory dir, SegmentInfo segmentInfo, Set files) throws IOException { - FieldInfos fieldInfos = segmentInfo.getFieldInfos(); + files(dir, segmentInfo.getFieldInfos(), segmentInfo.name, files); + } + + @SuppressWarnings("fallthrough") + private static void files(Directory dir,FieldInfos fieldInfos, String segmentName, Set files) { for (FieldInfo fieldInfo : fieldInfos) { if (fieldInfo.hasDocValues()) { - String filename = docValuesId(segmentInfo.name, fieldInfo.number); + String filename = docValuesId(segmentName, fieldInfo.number); switch (fieldInfo.getDocValuesType()) { case BYTES_FIXED_DEREF: case BYTES_VAR_DEREF: @@ -61,8 +67,13 @@ public class SepDocValuesConsumer extends DocValuesWriterBase { case BYTES_VAR_SORTED: files.add(IndexFileNames.segmentFileName(filename, "", Writer.INDEX_EXTENSION)); + try { assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", Writer.INDEX_EXTENSION)); + } catch (IOException e) { + // don't throw checked exception - dir is only used in assert + throw new RuntimeException(e); + } // until here all types use an index case BYTES_FIXED_STRAIGHT: case FLOAT_32: @@ -74,8 +85,13 @@ public class SepDocValuesConsumer extends DocValuesWriterBase { case FIXED_INTS_8: files.add(IndexFileNames.segmentFileName(filename, "", Writer.DATA_EXTENSION)); + try { assert dir.fileExists(IndexFileNames.segmentFileName(filename, "", Writer.DATA_EXTENSION)); + } catch (IOException e) { + // don't throw checked exception - dir is only used in assert + throw new RuntimeException(e); + } break; default: assert false; @@ -83,4 +99,11 @@ public class SepDocValuesConsumer extends DocValuesWriterBase { } } } + + @Override + public void abort() { + Set files = new HashSet(); + files(directory, fieldInfos, segmentName, files); + IOUtils.deleteFilesIgnoringExceptions(directory, files.toArray(new String[0])); + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java new file mode 100644 index 00000000000..65e77dcb544 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java @@ -0,0 +1,276 @@ +package org.apache.lucene.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.index.DocValue; +import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * Writes plain-text norms + *

+ * FOR RECREATIONAL USE ONLY + * + * @lucene.experimental + */ +public class SimpleTextNormsConsumer extends PerDocConsumer { + + /** Extension of norms file */ + static final String NORMS_EXTENSION = "len"; + final static BytesRef END = new BytesRef("END"); + final static BytesRef FIELD = new BytesRef("field "); + final static BytesRef DOC = new BytesRef(" doc "); + final static BytesRef NORM = new BytesRef(" norm "); + + private NormsWriter writer; + + private final Directory directory; + + private final String segment; + + private final IOContext context; + + public SimpleTextNormsConsumer(Directory directory, String segment, + IOContext context) throws IOException { + this.directory = directory; + this.segment = segment; + this.context = context; + } + + @Override + public void close() throws IOException { + if (writer != null) { + writer.finish(); + } + } + + @Override + protected DocValues getDocValuesForMerge(IndexReader reader, FieldInfo info) + throws IOException { + return reader.normValues(info.name); + } + + @Override + protected boolean canMerge(FieldInfo info) { + return !info.omitNorms && info.isIndexed; + } + + @Override + protected Type getDocValuesType(FieldInfo info) { + return Type.BYTES_FIXED_STRAIGHT; + } + + @Override + public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo) + throws IOException { + return new SimpleTextNormsDocValuesConsumer(fieldInfo); + } + + @Override + public void abort() { + if (writer != null) { + try { + writer.abort(); + } catch (IOException e) { + } + } + } + + private class SimpleTextNormsDocValuesConsumer extends DocValuesConsumer { + // Holds all docID/norm pairs we've seen + int[] docIDs = new int[1]; + byte[] norms = new byte[1]; + int upto; + private final FieldInfo fi; + + public SimpleTextNormsDocValuesConsumer(FieldInfo fieldInfo) { + fi = fieldInfo; + } + + @Override + public void add(int docID, DocValue docValue) throws IOException { + add(docID, docValue.getBytes()); + } + + protected void add(int docID, BytesRef value) throws IOException { + if (docIDs.length <= upto) { + assert docIDs.length == upto; + docIDs = ArrayUtil.grow(docIDs, 1 + upto); + } + if (norms.length <= upto) { + assert norms.length == upto; + norms = ArrayUtil.grow(norms, 1 + upto); + } + assert value.length == 1; + norms[upto] = value.bytes[value.offset]; + docIDs[upto] = docID; + upto++; + } + + @Override + public void finish(int docCount) throws IOException { + final NormsWriter normsWriter = getNormsWriter(); + boolean success = false; + try { + int uptoDoc = 0; + normsWriter.setNumTotalDocs(docCount); + if (upto > 0) { + normsWriter.startField(fi); + int docID = 0; + for (; docID < docCount; docID++) { + if (uptoDoc < upto && docIDs[uptoDoc] == docID) { + normsWriter.writeNorm(norms[uptoDoc]); + uptoDoc++; + } else { + normsWriter.writeNorm((byte) 0); + } + } + // we should have consumed every norm + assert uptoDoc == upto; + + } else { + // Fill entire field with default norm: + normsWriter.startField(fi); + for (; upto < docCount; upto++) + normsWriter.writeNorm((byte) 0); + } + success = true; + } finally { + if (!success) { + normsWriter.abort(); + } + } + } + } + + public NormsWriter getNormsWriter() throws IOException { + if (writer == null) { + writer = new NormsWriter(directory, segment, context); + } + return writer; + } + + private static class NormsWriter { + + private final IndexOutput output; + private int numTotalDocs = 0; + private int docid = 0; + + private final BytesRef scratch = new BytesRef(); + + + public NormsWriter(Directory directory, String segment, IOContext context) + throws IOException { + final String normsFileName = IndexFileNames.segmentFileName(segment, "", + NORMS_EXTENSION); + output = directory.createOutput(normsFileName, context); + + } + + public void startField(FieldInfo info) throws IOException { + assert info.omitNorms == false; + docid = 0; + write(FIELD); + write(info.name); + newLine(); + } + + public void writeNorm(byte norm) throws IOException { + write(DOC); + write(Integer.toString(docid)); + newLine(); + + write(NORM); + write(norm); + newLine(); + docid++; + } + + public void finish(int numDocs) throws IOException { + if (docid != numDocs) { + throw new RuntimeException( + "mergeNorms produced an invalid result: docCount is " + numDocs + + " but only saw " + docid + " file=" + output.toString() + + "; now aborting this merge to prevent index corruption"); + } + write(END); + newLine(); + } + + private void write(String s) throws IOException { + SimpleTextUtil.write(output, s, scratch); + } + + private void write(BytesRef bytes) throws IOException { + SimpleTextUtil.write(output, bytes); + } + + private void write(byte b) throws IOException { + scratch.grow(1); + scratch.bytes[scratch.offset] = b; + scratch.length = 1; + SimpleTextUtil.write(output, scratch); + } + + private void newLine() throws IOException { + SimpleTextUtil.writeNewline(output); + } + + public void setNumTotalDocs(int numTotalDocs) { + assert this.numTotalDocs == 0 || numTotalDocs == this.numTotalDocs; + this.numTotalDocs = numTotalDocs; + } + + public void abort() throws IOException { + IOUtils.close(output); + } + + public void finish() throws IOException { + finish(numTotalDocs); + IOUtils.close(output); + } + } + + public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + FieldInfos fieldInfos = info.getFieldInfos(); + + for (FieldInfo fieldInfo : fieldInfos) { + if (!fieldInfo.omitNorms && fieldInfo.isIndexed) { + files.add(IndexFileNames.segmentFileName(info.name, "", + NORMS_EXTENSION)); + break; + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java index a0aa6506619..7ddacf98bd1 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java @@ -21,13 +21,12 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.codecs.NormsFormat; -import org.apache.lucene.codecs.NormsReader; -import org.apache.lucene.codecs.NormsWriter; -import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.codecs.PerDocProducer; +import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; /** * plain-text norms format @@ -36,19 +35,28 @@ import org.apache.lucene.store.IOContext; * @lucene.experimental */ public class SimpleTextNormsFormat extends NormsFormat { - + @Override - public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException { - return new SimpleTextNormsReader(dir, info, fields, context); + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new SimpleTextNormsConsumer(state.directory, state.segmentName, state.context); } @Override - public NormsWriter normsWriter(SegmentWriteState state) throws IOException { - return new SimpleTextNormsWriter(state.directory, state.segmentName, state.context); + public PerDocProducer docsProducer(SegmentReadState state) throws IOException { + return new SimpleTextNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context); } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - SimpleTextNormsReader.files(dir, info, files); + public void files(Directory dir, SegmentInfo info, Set files) + throws IOException { + SimpleTextNormsConsumer.files(dir, info, files); + } + + @Override + public PerDocProducer docsProducer(SegmentReadState state, + Directory separateNormsDir) throws IOException { + return docsProducer(state); + } + } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsReader.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java similarity index 62% rename from lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsReader.java rename to lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java index 93c058bb0c5..bf40d06de63 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java @@ -17,12 +17,21 @@ package org.apache.lucene.codecs.simpletext; * limitations under the License. */ +import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.DOC; +import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.END; +import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.FIELD; +import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORM; +import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORMS_EXTENSION; + import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; -import org.apache.lucene.codecs.NormsReader; +import org.apache.lucene.codecs.PerDocProducer; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; @@ -33,18 +42,17 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; -import static org.apache.lucene.codecs.simpletext.SimpleTextNormsWriter.*; - /** * Reads plain-text norms *

* FOR RECREATIONAL USE ONLY * @lucene.experimental */ -public class SimpleTextNormsReader extends NormsReader { - private Map norms = new HashMap(); +public class SimpleTextNormsProducer extends PerDocProducer { - public SimpleTextNormsReader(Directory directory, SegmentInfo si, FieldInfos fields, IOContext context) throws IOException { + Map norms = new HashMap(); + + public SimpleTextNormsProducer(Directory directory, SegmentInfo si, FieldInfos fields, IOContext context) throws IOException { if (fields.hasNorms()) { readNorms(directory.openInput(IndexFileNames.segmentFileName(si.name, "", NORMS_EXTENSION), context), si.docCount); } @@ -58,7 +66,7 @@ public class SimpleTextNormsReader extends NormsReader { SimpleTextUtil.readLine(in, scratch); while (!scratch.equals(END)) { assert StringHelper.startsWith(scratch, FIELD); - String fieldName = readString(FIELD.length, scratch); + final String fieldName = readString(FIELD.length, scratch); byte bytes[] = new byte[maxDoc]; for (int i = 0; i < bytes.length; i++) { SimpleTextUtil.readLine(in, scratch); @@ -67,7 +75,7 @@ public class SimpleTextNormsReader extends NormsReader { assert StringHelper.startsWith(scratch, NORM); bytes[i] = scratch.bytes[scratch.offset + NORM.length]; } - norms.put(fieldName, bytes); + norms.put(fieldName, new NormsDocValues(new Norm(bytes))); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch, FIELD) || scratch.equals(END); } @@ -81,11 +89,6 @@ public class SimpleTextNormsReader extends NormsReader { } } - @Override - public byte[] norms(String name) throws IOException { - return norms.get(name); - } - @Override public void close() throws IOException { norms = null; @@ -94,7 +97,7 @@ public class SimpleTextNormsReader extends NormsReader { static void files(Directory dir, SegmentInfo info, Set files) throws IOException { // TODO: This is what SI always did... but we can do this cleaner? // like first FI that has norms but doesn't have separate norms? - final String normsFileName = IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsWriter.NORMS_EXTENSION); + final String normsFileName = IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION); if (dir.fileExists(normsFileName)) { files.add(normsFileName); } @@ -103,4 +106,58 @@ public class SimpleTextNormsReader extends NormsReader { private String readString(int offset, BytesRef scratch) { return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8); } + + @Override + public DocValues docValues(String field) throws IOException { + return norms.get(field); + } + + private class NormsDocValues extends DocValues { + private final Source source; + public NormsDocValues(Source source) { + this.source = source; + } + + @Override + public Source load() throws IOException { + return source; + } + + @Override + public Source getDirectSource() throws IOException { + return getSource(); + } + + @Override + public Type type() { + return Type.BYTES_FIXED_STRAIGHT; + } + } + + static final class Norm extends Source { + protected Norm(byte[] bytes) { + super(Type.BYTES_FIXED_STRAIGHT); + this.bytes = bytes; + } + final byte bytes[]; + + @Override + public BytesRef getBytes(int docID, BytesRef ref) { + ref.bytes = bytes; + ref.offset = docID; + ref.length = 1; + return ref; + } + + @Override + public boolean hasArray() { + return true; + } + + @Override + public Object getArray() { + return bytes; + } + + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsWriter.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsWriter.java deleted file mode 100644 index 7b3729f2df2..00000000000 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsWriter.java +++ /dev/null @@ -1,114 +0,0 @@ -package org.apache.lucene.codecs.simpletext; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.codecs.NormsWriter; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -/** - * Writes plain-text norms - *

- * FOR RECREATIONAL USE ONLY - * @lucene.experimental - */ -public class SimpleTextNormsWriter extends NormsWriter { - private IndexOutput out; - private int docid = 0; - - /** Extension of norms file */ - static final String NORMS_EXTENSION = "len"; - - private final BytesRef scratch = new BytesRef(); - - final static BytesRef END = new BytesRef("END"); - final static BytesRef FIELD = new BytesRef("field "); - final static BytesRef DOC = new BytesRef(" doc "); - final static BytesRef NORM = new BytesRef(" norm "); - - public SimpleTextNormsWriter(Directory directory, String segment, IOContext context) throws IOException { - final String normsFileName = IndexFileNames.segmentFileName(segment, "", NORMS_EXTENSION); - out = directory.createOutput(normsFileName, context); - } - - @Override - public void startField(FieldInfo info) throws IOException { - assert info.omitNorms == false; - docid = 0; - write(FIELD); - write(info.name); - newLine(); - } - - @Override - public void writeNorm(byte norm) throws IOException { - write(DOC); - write(Integer.toString(docid)); - newLine(); - - write(NORM); - write(norm); - newLine(); - docid++; - } - - @Override - public void finish(int numDocs) throws IOException { - if (docid != numDocs) { - throw new RuntimeException("mergeNorms produced an invalid result: docCount is " + numDocs - + " but only saw " + docid + " file=" + out.toString() + "; now aborting this merge to prevent index corruption"); - } - write(END); - newLine(); - } - - @Override - public void close() throws IOException { - try { - IOUtils.close(out); - } finally { - out = null; - } - } - - private void write(String s) throws IOException { - SimpleTextUtil.write(out, s, scratch); - } - - private void write(BytesRef bytes) throws IOException { - SimpleTextUtil.write(out, bytes); - } - - private void write(byte b) throws IOException { - scratch.grow(1); - scratch.bytes[scratch.offset] = b; - scratch.length = 1; - SimpleTextUtil.write(out, scratch); - } - - private void newLine() throws IOException { - SimpleTextUtil.writeNewline(out); - } -} diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java index 9341ddc3174..82480a68dce 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java @@ -166,9 +166,7 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter { try { close(); } catch (IOException ignored) {} - try { - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION)); - } catch (IOException ignored) {} + IOUtils.deleteFilesIgnoringExceptions(directory, IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION)); } @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java index 0a1836fe27a..7257ac1622d 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java @@ -149,10 +149,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter { try { close(); } catch (IOException ignored) {} - - try { - directory.deleteFile(IndexFileNames.segmentFileName(segment, "", VECTORS_EXTENSION)); - } catch (IOException ignored) {} + IOUtils.deleteFilesIgnoringExceptions(directory, IndexFileNames.segmentFileName(segment, "", VECTORS_EXTENSION)); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java b/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java index ffb2c9182d1..592569b8e2f 100644 --- a/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java +++ b/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java @@ -117,11 +117,6 @@ abstract class BaseMultiReader extends IndexReader { return false; } - @Override - public synchronized byte[] norms(String field) throws IOException { - throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); - } - @Override public int docFreq(String field, BytesRef t) throws IOException { ensureOpen(); @@ -157,4 +152,9 @@ abstract class BaseMultiReader extends IndexReader { public DocValues docValues(String field) throws IOException { throw new UnsupportedOperationException("please use MultiDocValues#getDocValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level DocValues"); } + + @Override + public DocValues normValues(String field) throws IOException { + throw new UnsupportedOperationException("please use MultiDocValues#getNormValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Norm DocValues "); + } } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index ffb9869b4df..50b4808f88d 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -131,6 +131,17 @@ final class DocFieldProcessor extends DocConsumer { } } + try { + PerDocConsumer perDocConsumer = perDocConsumers.get(0); + if (perDocConsumer != null) { + perDocConsumer.abort(); + } + } catch (Throwable t) { + if (th == null) { + th = t; + } + } + // If any errors occured, throw it. if (th != null) { if (th instanceof RuntimeException) throw (RuntimeException) th; @@ -329,7 +340,6 @@ final class DocFieldProcessor extends DocConsumer { perDocConsumer = dvFormat.docsConsumer(perDocWriteState); perDocConsumers.put(0, perDocConsumer); } - DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo); fieldInfo.setDocValuesType(valueType); diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java index fa3087cbb17..5d6c4bf78fe 100644 --- a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -18,7 +18,6 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -34,7 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; final class DocInverterPerField extends DocFieldConsumerPerField { - final private DocInverter parent; final FieldInfo fieldInfo; final InvertedDocConsumerPerField consumer; final InvertedDocEndConsumerPerField endConsumer; @@ -42,7 +40,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField { final FieldInvertState fieldState; public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) { - this.parent = parent; this.fieldInfo = fieldInfo; docState = parent.docState; fieldState = parent.fieldState; diff --git a/lucene/src/java/org/apache/lucene/index/DocValue.java b/lucene/src/java/org/apache/lucene/index/DocValue.java index 4526197b674..63b69cd86e7 100644 --- a/lucene/src/java/org/apache/lucene/index/DocValue.java +++ b/lucene/src/java/org/apache/lucene/index/DocValue.java @@ -20,8 +20,6 @@ import java.util.Comparator; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.document.DocValuesField; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocValues.Type; // javadocs import org.apache.lucene.util.BytesRef; /** @@ -32,36 +30,6 @@ import org.apache.lucene.util.BytesRef; */ public interface DocValue { - /** - * Sets the given long value. - */ - public void setInt(long value); - - /** - * Sets the given float value. - */ - public void setFloat(float value); - - /** - * Sets the given double value. - */ - public void setFloat(double value); - - /** - * Sets the given {@link BytesRef} value and the field's {@link Type}. The - * comparator for this field is set to null. If a - * null comparator is set the default comparator for the given - * {@link Type} is used. - */ - public void setBytes(BytesRef value, DocValues.Type type); - - /** - * Sets the given {@link BytesRef} value, the field's {@link Type} and the - * field's comparator. If the {@link Comparator} is set to null - * the default for the given {@link Type} is used instead. - */ - public void setBytes(BytesRef value, DocValues.Type type, Comparator comp); - /** * Returns the set {@link BytesRef} or null if not set. */ @@ -82,19 +50,4 @@ public interface DocValue { */ public long getInt(); - /** - * Sets the {@link BytesRef} comparator for this field. If the field has a - * numeric {@link Type} the comparator will be ignored. - */ - public void setBytesComparator(Comparator comp); - - /** - * Sets the {@link Type} - */ - public void setDocValuesType(DocValues.Type type); - - /** - * Returns the {@link Type} - */ - public DocValues.Type docValueType(); } diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index a365e80ee57..d019b481135 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -331,12 +331,6 @@ public class FilterIndexReader extends IndexReader { return in.hasNorms(field); } - @Override - public byte[] norms(String f) throws IOException { - ensureOpen(); - return in.norms(f); - } - @Override public int docFreq(String field, BytesRef t) throws IOException { ensureOpen(); @@ -419,6 +413,12 @@ public class FilterIndexReader extends IndexReader { ensureOpen(); return in.docValues(field); } + + @Override + public DocValues normValues(String field) throws IOException { + ensureOpen(); + return in.normValues(field); + } @Override public IndexCommit getIndexCommit() throws IOException { diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 2589c982c32..ceedd5598e3 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.DocumentStoredFieldVisitor; +import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.search.SearcherManager; // javadocs import org.apache.lucene.store.*; import org.apache.lucene.util.Bits; @@ -735,7 +736,17 @@ public abstract class IndexReader implements Closeable { * * @see org.apache.lucene.document.Field#setBoost(float) */ - public abstract byte[] norms(String field) throws IOException; + // TODO: cut over to source once we support other formats like float + public final byte[] norms(String field) throws IOException { + DocValues docValues = normValues(field); + if (docValues != null) { + Source source = docValues.getSource(); + assert source.hasArray(); // TODO cut over to source + return (byte[])source.getArray(); + } + return null; + } + /** * Returns {@link Fields} for this reader. @@ -1056,6 +1067,8 @@ public abstract class IndexReader implements Closeable { * using {@link ReaderUtil#gatherSubReaders} and iterate * through them yourself. */ public abstract DocValues docValues(String field) throws IOException; + + public abstract DocValues normValues(String field) throws IOException; private volatile Fields fields; diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java index b9e2df606da..e46ae2506d4 100644 --- a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java +++ b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import java.io.IOException; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -18,6 +20,6 @@ package org.apache.lucene.index; */ abstract class InvertedDocEndConsumerPerField { - abstract void finish(); + abstract void finish() throws IOException; abstract void abort(); } diff --git a/lucene/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiDocValues.java index bea1efeb8d0..2df3046e1a5 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiDocValues.java @@ -17,6 +17,7 @@ package org.apache.lucene.index; * limitations under the License. */ import java.io.IOException; +import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -33,6 +34,13 @@ import org.apache.lucene.util.ReaderUtil.Gather; * @lucene.internal */ public class MultiDocValues extends DocValues { + + private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller(); + private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() { + public DocValues pull(IndexReader reader, String field) throws IOException { + return reader.normValues(field); + } + }; public static class DocValuesSlice { public final static DocValuesSlice[] EMPTY_ARRAY = new DocValuesSlice[0]; @@ -46,6 +54,12 @@ public class MultiDocValues extends DocValues { this.length = length; } } + + private static class DocValuesPuller { + public DocValues pull(IndexReader reader, String field) throws IOException { + return reader.docValues(field); + } + } private DocValuesSlice[] slices; private int[] starts; @@ -58,7 +72,6 @@ public class MultiDocValues extends DocValues { this.type = promotedType.type(); this.valueSize = promotedType.getValueSize(); } - /** * Returns a single {@link DocValues} instance for this field, merging * their values on the fly. @@ -68,15 +81,32 @@ public class MultiDocValues extends DocValues { * sub-readers (using {@link Gather}) and iterate through them yourself. */ public static DocValues getDocValues(IndexReader r, final String field) throws IOException { + return getDocValues(r, field, DEFAULT_PULLER); + } + + /** + * Returns a single {@link DocValues} instance for this norms field, merging + * their values on the fly. + * + *

+ * NOTE: this is a slow way to access DocValues. It's better to get the + * sub-readers (using {@link Gather}) and iterate through them yourself. + */ + public static DocValues getNormDocValues(IndexReader r, final String field) throws IOException { + return getDocValues(r, field, NORMS_PULLER); + } + + + private static DocValues getDocValues(IndexReader r, final String field, final DocValuesPuller puller) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); if (subs == null) { // already an atomic reader - return r.docValues(field); + return puller.pull(r, field); } else if (subs.length == 0) { // no fields return null; } else if (subs.length == 1) { - return getDocValues(subs[0], field); + return getDocValues(subs[0], field, puller); } else { final List slices = new ArrayList(); @@ -89,7 +119,7 @@ public class MultiDocValues extends DocValues { new ReaderUtil.Gather(r) { @Override protected void add(int base, IndexReader r) throws IOException { - final DocValues d = r.docValues(field); + final DocValues d = puller.pull(r, field); if (d != null) { TypePromoter incoming = TypePromoter.create(d.type(), d.getValueSize()); promotedType[0] = promotedType[0].promote(incoming); @@ -195,6 +225,7 @@ public class MultiDocValues extends DocValues { private final int[] starts; private final DocValuesSlice[] slices; private boolean direct; + private Object cachedArray; // cached array if supported public MultiSource(DocValuesSlice[] slices, int[] starts, boolean direct, Type type) { super(type); @@ -243,6 +274,77 @@ public class MultiDocValues extends DocValues { final int doc = ensureSource(docID); return current.getBytes(doc, bytesRef); } + + @Override + public boolean hasArray() { + boolean oneRealSource = false; + for (DocValuesSlice slice : slices) { + try { + Source source = slice.docValues.getSource(); + if (source instanceof EmptySource) { + /* + * empty source marks a gap in the array skip if we encounter one + */ + continue; + } + oneRealSource = true; + if (!source.hasArray()) { + return false; + } + } catch (IOException e) { + throw new RuntimeException("load failed", e); + } + } + return oneRealSource; + } + + @Override + public Object getArray() { + if (!hasArray()) { + return null; + } + try { + Class componentType = null; + Object[] arrays = new Object[slices.length]; + int numDocs = 0; + for (int i = 0; i < slices.length; i++) { + DocValuesSlice slice = slices[i]; + Source source = slice.docValues.getSource(); + Object array = null; + if (!(source instanceof EmptySource)) { + // EmptySource is skipped - marks a gap in the array + array = source.getArray(); + } + numDocs += slice.length; + if (array != null) { + if (componentType == null) { + componentType = array.getClass().getComponentType(); + } + assert componentType == array.getClass().getComponentType(); + } + arrays[i] = array; + } + assert componentType != null; + synchronized (this) { + if (cachedArray != null) { + return cachedArray; + } + final Object globalArray = Array.newInstance(componentType, numDocs); + + for (int i = 0; i < slices.length; i++) { + DocValuesSlice slice = slices[i]; + if (arrays[i] != null) { + assert slice.length == Array.getLength(arrays[i]); + System.arraycopy(arrays[i], 0, globalArray, slice.start, + slice.length); + } + } + return cachedArray = globalArray; + } + } catch (IOException e) { + throw new RuntimeException("load failed", e); + } + } } // TODO: this is dup of DocValues.getDefaultSource()? @@ -269,7 +371,7 @@ public class MultiDocValues extends DocValues { } } - private static class EmptyFixedSource extends Source { + private static class EmptyFixedSource extends EmptySource { private final int valueSize; public EmptyFixedSource(Type type, int valueSize) { diff --git a/lucene/src/java/org/apache/lucene/index/NormsConsumer.java b/lucene/src/java/org/apache/lucene/index/NormsConsumer.java index ea2013798df..3e68e04c838 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/NormsConsumer.java @@ -21,8 +21,12 @@ import java.io.IOException; import java.util.Collection; import java.util.Map; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.NormsFormat; -import org.apache.lucene.codecs.NormsWriter; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; // TODO FI: norms could actually be stored as doc store @@ -33,14 +37,21 @@ import org.apache.lucene.util.IOUtils; */ final class NormsConsumer extends InvertedDocEndConsumer { - final NormsFormat normsFormat; + private final NormsFormat normsFormat; + private PerDocConsumer consumer; + private final DocumentsWriterPerThread dwpt; public NormsConsumer(DocumentsWriterPerThread dwpt) { normsFormat = dwpt.codec.normsFormat(); + this.dwpt = dwpt; } @Override - public void abort() {} + public void abort(){ + if (consumer != null) { + consumer.abort(); + } + } // We only write the _X.nrm file at flush void files(Collection files) {} @@ -49,50 +60,39 @@ final class NormsConsumer extends InvertedDocEndConsumer { * not disabled */ @Override public void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { - if (!state.fieldInfos.hasNorms()) { - return; - } - - NormsWriter normsOut = null; boolean success = false; + boolean anythingFlushed = false; try { - normsOut = normsFormat.normsWriter(state); - - for (FieldInfo fi : state.fieldInfos) { - final NormsConsumerPerField toWrite = (NormsConsumerPerField) fieldsToFlush.get(fi); - int upto = 0; - // we must check the final value of omitNorms for the fieldinfo, it could have - // changed for this field since the first time we added it. - if (!fi.omitNorms && toWrite != null && toWrite.upto > 0) { - normsOut.startField(fi); - int docID = 0; - for (; docID < state.numDocs; docID++) { - if (upto < toWrite.upto && toWrite.docIDs[upto] == docID) { - normsOut.writeNorm(toWrite.norms[upto]); - upto++; - } else { - normsOut.writeNorm((byte) 0); + if (state.fieldInfos.hasNorms()) { + for (FieldInfo fi : state.fieldInfos) { + final NormsConsumerPerField toWrite = (NormsConsumerPerField) fieldsToFlush.get(fi); + if (!fi.omitNorms) { + if (toWrite != null) { + anythingFlushed = true; + toWrite.flush(state.numDocs); + } else if (fi.isIndexed) { + anythingFlushed = true; + // we must check the final value of omitNorms for the fieldinfo, it could have + // changed for this field since the first time we added it. + final DocValuesConsumer valuesConsumer = newConsumer(new PerDocWriteState(state), fi); + final DocValuesField value = new DocValuesField(""); + value.setBytes(new BytesRef(new byte[] {0x00}), Type.BYTES_FIXED_STRAIGHT); + valuesConsumer.add(state.numDocs-1, value); + valuesConsumer.finish(state.numDocs); } } - - // we should have consumed every norm - assert upto == toWrite.upto; - - toWrite.reset(); - } else if (fi.isIndexed && !fi.omitNorms) { - // Fill entire field with default norm: - normsOut.startField(fi); - for(;upto { - - final FieldInfo fieldInfo; - final DocumentsWriterPerThread.DocState docState; - final Similarity similarity; +public class NormsConsumerPerField extends InvertedDocEndConsumerPerField implements Comparable { + private final FieldInfo fieldInfo; + private final DocumentsWriterPerThread.DocState docState; + private final Similarity similarity; + private final FieldInvertState fieldState; + private DocValuesConsumer consumer; + private final DocValuesField value = new DocValuesField(""); + private final BytesRef spare = new BytesRef(1); + private final NormsConsumer parent; - // Holds all docID/norm pairs we've seen - int[] docIDs = new int[1]; - byte[] norms = new byte[1]; - int upto; - - final FieldInvertState fieldState; - - public void reset() { - // Shrink back if we are overallocated now: - docIDs = ArrayUtil.shrink(docIDs, upto); - norms = ArrayUtil.shrink(norms, upto); - upto = 0; - } - - public NormsConsumerPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { + public NormsConsumerPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo, NormsConsumer parent) { this.fieldInfo = fieldInfo; + this.parent = parent; docState = docInverterPerField.docState; fieldState = docInverterPerField.fieldState; similarity = docState.similarityProvider.get(fieldInfo.name); - } + spare.length = 1; + spare.offset = 0; + } @Override - void abort() { - upto = 0; - } - public int compareTo(NormsConsumerPerField other) { return fieldInfo.name.compareTo(other.fieldInfo.name); } - + @Override - void finish() { + void finish() throws IOException { if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { - if (docIDs.length <= upto) { - assert docIDs.length == upto; - docIDs = ArrayUtil.grow(docIDs, 1+upto); - } - if (norms.length <= upto) { - assert norms.length == upto; - norms = ArrayUtil.grow(norms, 1+upto); - } - norms[upto] = similarity.computeNorm(fieldState); - docIDs[upto] = docState.docID; - upto++; + DocValuesConsumer consumer = getConsumer(); + spare.bytes[0] = similarity.computeNorm(fieldState); + value.setBytes(spare, Type.BYTES_FIXED_STRAIGHT); + consumer.add(docState.docID, value); + + } + } + + void flush(int docCount) throws IOException { + DocValuesConsumer consumer = this.consumer; + if (consumer == null && fieldInfo.isIndexed) { + consumer = getConsumer(); + spare.bytes[0] = 0x00; + value.setBytes(spare, Type.BYTES_FIXED_STRAIGHT); + consumer.add(docCount-1, value); + } + if (consumer != null) { + consumer.finish(docCount); } } + + private DocValuesConsumer getConsumer() throws IOException { + if (consumer == null) { + consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(""), fieldInfo); + } + return consumer; + } + + + @Override + void abort() { + // + } + } diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java index 0448277bb1d..245c5a54e53 100644 --- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java +++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java @@ -48,7 +48,7 @@ public class ParallelReader extends IndexReader { private SortedMap fieldToReader = new TreeMap(); private Map> readerToFields = new HashMap>(); private List storedFieldReaders = new ArrayList(); - private Map normsCache = new HashMap(); + private Map normsCache = new HashMap(); private final ReaderContext topLevelReaderContext = new AtomicReaderContext(this); private int maxDoc; private int numDocs; @@ -336,27 +336,6 @@ public class ParallelReader extends IndexReader { return reader==null ? false : reader.hasNorms(field); } - @Override - public synchronized byte[] norms(String field) throws IOException { - ensureOpen(); - IndexReader reader = fieldToReader.get(field); - - if (reader==null) - return null; - - byte[] bytes = normsCache.get(field); - if (bytes != null) - return bytes; - if (!hasNorms(field)) - return null; - if (normsCache.containsKey(field)) // cached omitNorms, not missing key - return null; - - bytes = MultiNorms.norms(reader, field); - normsCache.put(field, bytes); - return bytes; - } - @Override public int docFreq(String field, BytesRef term) throws IOException { ensureOpen(); @@ -427,4 +406,16 @@ public class ParallelReader extends IndexReader { IndexReader reader = fieldToReader.get(field); return reader == null ? null : MultiDocValues.getDocValues(reader, field); } + + // TODO: I suspect this is completely untested!!!!! + @Override + public synchronized DocValues normValues(String field) throws IOException { + DocValues values = normsCache.get(field); + if (values == null) { + IndexReader reader = fieldToReader.get(field); + values = reader == null ? null : MultiDocValues.getNormDocValues(reader, field); + normsCache.put(field, values); + } + return values; + } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 2d7e865a6d0..ac67c3d9747 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -25,7 +25,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.NormsReader; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsReader; @@ -54,7 +53,7 @@ final class SegmentCoreReaders { final FieldsProducer fields; final PerDocProducer perDocProducer; - final NormsReader norms; + final PerDocProducer norms; final Directory dir; final Directory cfsDir; @@ -120,7 +119,7 @@ final class SegmentCoreReaders { // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! - norms = codec.normsFormat().normsReader(cfsDir, si, fieldInfos, context, dir); + norms = codec.normsFormat().docsProducer(segmentReadState, dir); perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState); final Directory storeDir; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 76f705b0d14..ea32beaf0c6 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -27,7 +27,6 @@ import java.util.Map; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.codecs.NormsWriter; import org.apache.lucene.codecs.PerDocConsumer; import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsWriter; @@ -125,8 +124,7 @@ final class SegmentMerger { mergePerDoc(segmentWriteState); if (mergeState.fieldInfos.hasNorms()) { - int numMerged = mergeNorms(segmentWriteState); - assert numMerged == mergeState.mergedDocCount; + mergeNorms(segmentWriteState); } if (mergeState.fieldInfos.hasVectors()) { @@ -379,20 +377,24 @@ final class SegmentMerger { } } } - - private int mergeNorms(SegmentWriteState segmentWriteState) throws IOException { - final NormsWriter writer = codec.normsFormat().normsWriter(segmentWriteState); - + + private void mergeNorms(SegmentWriteState segmentWriteState) throws IOException { + final PerDocConsumer docsConsumer = codec.normsFormat() + .docsConsumer(new PerDocWriteState(segmentWriteState)); + // TODO: remove this check when 3.x indexes are no longer supported + // (3.x indexes don't have docvalues) + if (docsConsumer == null) { + return; + } boolean success = false; try { - int numMerged = writer.merge(mergeState); + docsConsumer.merge(mergeState); success = true; - return numMerged; } finally { if (success) { - IOUtils.close(writer); + IOUtils.close(docsConsumer); } else { - IOUtils.closeWhileHandlingException(writer); + IOUtils.closeWhileHandlingException(docsConsumer); } } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index bd57b7d51f9..4e310f04389 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -18,16 +18,15 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; -import java.util.List; import java.util.Set; import org.apache.lucene.store.Directory; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.store.IOContext; @@ -262,12 +261,6 @@ public final class SegmentReader extends IndexReader { return fi != null && fi.isIndexed && !fi.omitNorms; } - @Override - public byte[] norms(String field) throws IOException { - ensureOpen(); - return core.norms.norms(field); - } - /** @lucene.internal */ public TermVectorsReader getTermVectorsReader() { ensureOpen(); @@ -352,6 +345,17 @@ public final class SegmentReader extends IndexReader { } return perDoc.docValues(field); } + + @Override + public DocValues normValues(String field) throws IOException { + ensureOpen(); + final PerDocProducer perDoc = core.norms; + if (perDoc == null) { + return null; + } + return perDoc.docValues(field); + } + /** * Called when the shared core for this SegmentReader diff --git a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java index 1d021db3579..ec66cf20ece 100644 --- a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java @@ -34,7 +34,7 @@ import org.apache.lucene.index.MultiReader; // javadoc * IndexReader#getSequentialSubReaders}) to emulate an * atomic reader. This requires implementing the postings * APIs on-the-fly, using the static methods in {@link - * MultiFields}, {@link MultiNorms}, {@link MultiDocValues}, + * MultiFields}, {@link MultiDocValues}, * by stepping through the sub-readers to merge fields/terms, * appending docs, etc. * @@ -53,7 +53,7 @@ import org.apache.lucene.index.MultiReader; // javadoc public final class SlowMultiReaderWrapper extends FilterIndexReader { private final ReaderContext readerContext; - private final Map normsCache = new HashMap(); + private final Map normsCache = new HashMap(); public SlowMultiReaderWrapper(IndexReader other) { super(other); @@ -76,7 +76,17 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader { ensureOpen(); return MultiDocValues.getDocValues(in, field); } - + + @Override + public synchronized DocValues normValues(String field) throws IOException { + ensureOpen(); + DocValues values = normsCache.get(field); + if (values == null) { + values = MultiDocValues.getNormDocValues(in, field); + normsCache.put(field, values); + } + return values; + } @Override public Bits getLiveDocs() { ensureOpen(); @@ -87,22 +97,6 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader { public IndexReader[] getSequentialSubReaders() { return null; } - - @Override - public synchronized byte[] norms(String field) throws IOException { - ensureOpen(); - byte[] bytes = normsCache.get(field); - if (bytes != null) - return bytes; - if (!hasNorms(field)) - return null; - if (normsCache.containsKey(field)) // cached omitNorms, not missing key - return null; - - bytes = MultiNorms.norms(in, field); - normsCache.put(field, bytes); - return bytes; - } @Override public ReaderContext getTopReaderContext() { diff --git a/lucene/src/java/org/apache/lucene/store/CompoundFileWriter.java b/lucene/src/java/org/apache/lucene/store/CompoundFileWriter.java index 37ae0a538c6..d24f1b94276 100644 --- a/lucene/src/java/org/apache/lucene/store/CompoundFileWriter.java +++ b/lucene/src/java/org/apache/lucene/store/CompoundFileWriter.java @@ -91,7 +91,7 @@ final class CompoundFileWriter implements Closeable{ // all entries that are written to a sep. file but not yet moved into CFS private final Queue pendingEntries = new LinkedList(); private boolean closed = false; - private volatile IndexOutput dataOut; + private IndexOutput dataOut; private final AtomicBoolean outputTaken = new AtomicBoolean(false); final String entryTableName; final String dataFileName; @@ -113,16 +113,25 @@ final class CompoundFileWriter implements Closeable{ IndexFileNames.stripExtension(name), "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); dataFileName = name; - boolean success = false; - try { - dataOut = directory.createOutput(dataFileName, IOContext.DEFAULT); - dataOut.writeVInt(FORMAT_CURRENT); - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(dataOut); + + } + + private synchronized IndexOutput getOutput() throws IOException { + if (dataOut == null) { + IndexOutput dataOutput = null; + boolean success = false; + try { + dataOutput = directory.createOutput(dataFileName, IOContext.DEFAULT); + dataOutput.writeVInt(FORMAT_CURRENT); + dataOut = dataOutput; + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(dataOutput); + } } - } + } + return dataOut; } /** Returns the directory of the compound file. */ @@ -154,6 +163,7 @@ final class CompoundFileWriter implements Closeable{ } closed = true; // open the compound stream + getOutput(); assert dataOut != null; long finalLength = dataOut.getFilePointer(); assert assertFileLength(finalLength, dataOut); @@ -246,7 +256,7 @@ final class CompoundFileWriter implements Closeable{ seenIDs.add(id); final DirectCFSIndexOutput out; if (outputTaken.compareAndSet(false, true)) { - out = new DirectCFSIndexOutput(dataOut, entry, false); + out = new DirectCFSIndexOutput(getOutput(), entry, false); outputLocked = true; success = true; } else { @@ -280,7 +290,7 @@ final class CompoundFileWriter implements Closeable{ try { while (!pendingEntries.isEmpty()) { FileEntry entry = pendingEntries.poll(); - copyFileEntry(dataOut, entry); + copyFileEntry(getOutput(), entry); entries.put(entry.file, entry); } } finally { diff --git a/lucene/src/java/org/apache/lucene/util/IOUtils.java b/lucene/src/java/org/apache/lucene/util/IOUtils.java index be196cb4d8d..dcd5e6bb5af 100644 --- a/lucene/src/java/org/apache/lucene/util/IOUtils.java +++ b/lucene/src/java/org/apache/lucene/util/IOUtils.java @@ -30,6 +30,8 @@ import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import org.apache.lucene.store.Directory; + /** This class emulates the new Java 7 "Try-With-Resources" statement. * Remove once Lucene is on Java 7. * @lucene.internal */ @@ -318,6 +320,16 @@ public final class IOUtils { } } } + + public static void deleteFilesIgnoringExceptions(Directory dir, String... files) { + for (String name : files) { + try { + dir.deleteFile(name); + } catch (IOException ignored) { + // ignore + } + } + } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java new file mode 100644 index 00000000000..0ec0cdef938 --- /dev/null +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java @@ -0,0 +1,277 @@ +package org.apache.lucene.codecs.preflexrw; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.index.DocValue; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * Writes and Merges Lucene 3.x norms format + * @lucene.experimental + */ +class PreFlexNormsConsumer extends PerDocConsumer { + + /** norms header placeholder */ + private static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + + /** Extension of norms file */ + private static final String NORMS_EXTENSION = "nrm"; + + /** Extension of separate norms file + * @deprecated */ + @Deprecated + private static final String SEPARATE_NORMS_EXTENSION = "s"; + + private final Directory directory; + + private final String segment; + + private final IOContext context; + + private NormsWriter writer; + + public PreFlexNormsConsumer(Directory directory, String segment, IOContext context){ + this.directory = directory; + this.segment = segment; + this.context = context; + } + + @Override + public void merge(MergeState mergeState) throws IOException { + getNormsWriter().merge(mergeState); + } + + @Override + public void close() throws IOException { + if (writer != null) { + writer.finish(); + } + } + + @Override + public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo) + throws IOException { + return new Lucene3xNormsDocValuesConsumer(fieldInfo); + } + + class Lucene3xNormsDocValuesConsumer extends DocValuesConsumer { + // Holds all docID/norm pairs we've seen + private int[] docIDs = new int[1]; + private byte[] norms = new byte[1]; + private int upto; + private final FieldInfo fi; + + Lucene3xNormsDocValuesConsumer(FieldInfo fieldInfo) { + fi = fieldInfo; + } + + @Override + public void finish(int docCount) throws IOException { + final NormsWriter normsWriter = getNormsWriter(); + boolean success = false; + try { + int uptoDoc = 0; + normsWriter.setNumTotalDocs(docCount); + if (upto > 0) { + normsWriter.startField(fi); + int docID = 0; + for (; docID < docCount; docID++) { + if (uptoDoc < upto && docIDs[uptoDoc] == docID) { + normsWriter.writeNorm(norms[uptoDoc]); + uptoDoc++; + } else { + normsWriter.writeNorm((byte) 0); + } + } + // we should have consumed every norm + assert uptoDoc == upto; + + } else { + // Fill entire field with default norm: + normsWriter.startField(fi); + for (; upto < docCount; upto++) + normsWriter.writeNorm((byte) 0); + } + success = true; + } finally { + if (!success) { + normsWriter.abort(); + } + } + } + + @Override + public void add(int docID, DocValue docValue) throws IOException { + add(docID, docValue.getBytes()); + } + + protected void add(int docID, BytesRef value) throws IOException { + if (docIDs.length <= upto) { + assert docIDs.length == upto; + docIDs = ArrayUtil.grow(docIDs, 1 + upto); + } + if (norms.length <= upto) { + assert norms.length == upto; + norms = ArrayUtil.grow(norms, 1 + upto); + } + assert value.length == 1; + norms[upto] = value.bytes[value.offset]; + + docIDs[upto] = docID; + upto++; + } + + + } + + public NormsWriter getNormsWriter() throws IOException { + if (writer == null) { + writer = new NormsWriter(directory, segment, context); + } + return writer; + } + + private static class NormsWriter { + + private final IndexOutput output; + private int normCount = 0; + private int numTotalDocs = 0; + + public NormsWriter(Directory directory, String segment, IOContext context) throws IOException { + final String normsFileName = IndexFileNames.segmentFileName(segment, "", NORMS_EXTENSION); + boolean success = false; + IndexOutput out = null; + try { + out = directory.createOutput(normsFileName, context); + output = out; + output.writeBytes(NORMS_HEADER, 0, NORMS_HEADER.length); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(out); + } + } + + } + + + public void setNumTotalDocs(int numTotalDocs) { + assert this.numTotalDocs == 0 || numTotalDocs == this.numTotalDocs; + this.numTotalDocs = numTotalDocs; + } + + public void startField(FieldInfo info) throws IOException { + assert info.omitNorms == false; + normCount++; + } + + public void writeNorm(byte norm) throws IOException { + output.writeByte(norm); + } + + public void abort() throws IOException { + IOUtils.close(output); + } + + public void finish() throws IOException { + IOUtils.close(output); + + if (4+normCount*(long)numTotalDocs != output.getFilePointer()) { + throw new IOException(".nrm file size mismatch: expected=" + (4+normCount*(long)numTotalDocs) + " actual=" + output.getFilePointer()); + } + } + // TODO: we can actually use the defaul DV merge here and drop this specific stuff entirely + /** we override merge and bulk-merge norms when there are no deletions */ + public void merge(MergeState mergeState) throws IOException { + int numMergedDocs = 0; + for (FieldInfo fi : mergeState.fieldInfos) { + if (fi.isIndexed && !fi.omitNorms) { + startField(fi); + int numMergedDocsForField = 0; + for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) { + final int maxDoc = reader.reader.maxDoc(); + byte[] normBuffer; + DocValues normValues = reader.reader.normValues(fi.name); + if (normValues == null) { + // Can be null if this segment doesn't have + // any docs with this field + normBuffer = new byte[maxDoc]; + Arrays.fill(normBuffer, (byte)0); + } else { + Source directSource = normValues.getDirectSource(); + assert directSource.hasArray(); + normBuffer = (byte[]) directSource.getArray(); + } + if (reader.liveDocs == null) { + //optimized case for segments without deleted docs + output.writeBytes(normBuffer, maxDoc); + numMergedDocsForField += maxDoc; + } else { + // this segment has deleted docs, so we have to + // check for every doc if it is deleted or not + final Bits liveDocs = reader.liveDocs; + for (int k = 0; k < maxDoc; k++) { + if (liveDocs.get(k)) { + numMergedDocsForField++; + output.writeByte(normBuffer[k]); + } + } + } + mergeState.checkAbort.work(maxDoc); + } + assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField; + numMergedDocs = numMergedDocsForField; + } + } + this.numTotalDocs = numMergedDocs; + } + } + + @Override + public void abort() { + try { + try { + if (writer != null) { + writer.abort(); + } + } finally { + directory.deleteFile(IndexFileNames.segmentFileName(segment, "", + NORMS_EXTENSION)); + } + } catch (IOException e) { + // ignore + } + } +} diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index df48c5c3a42..78d43cbe30f 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -17,6 +17,7 @@ package org.apache.lucene.codecs.preflexrw; * limitations under the License. */ +import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; import org.apache.lucene.util.LuceneTestCase; @@ -27,7 +28,8 @@ import org.apache.lucene.util.LuceneTestCase; */ public class PreFlexRWCodec extends Lucene3xCodec { private final PostingsFormat postings = new PreFlexRWPostingsFormat(); - + private final NormsFormat norms = new PreFlexRWNormsFormat(); + @Override public PostingsFormat postingsFormat() { if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { @@ -36,4 +38,13 @@ public class PreFlexRWCodec extends Lucene3xCodec { return super.postingsFormat(); } } + + @Override + public NormsFormat normsFormat() { + if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { + return norms; + } else { + return super.normsFormat(); + } + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/NormsReader.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWNormsFormat.java similarity index 64% rename from lucene/src/java/org/apache/lucene/codecs/NormsReader.java rename to lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWNormsFormat.java index ca741d4269f..4f54ba8862a 100644 --- a/lucene/src/java/org/apache/lucene/codecs/NormsReader.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWNormsFormat.java @@ -1,5 +1,4 @@ -package org.apache.lucene.codecs; - +package org.apache.lucene.codecs.preflexrw; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,11 +15,17 @@ package org.apache.lucene.codecs; * See the License for the specific language governing permissions and * limitations under the License. */ - -import java.io.Closeable; import java.io.IOException; -//simple api just for now before switching to docvalues apis -public abstract class NormsReader implements Closeable { - public abstract byte[] norms(String name) throws IOException; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.codecs.lucene3x.Lucene3xNormsFormat; +import org.apache.lucene.index.PerDocWriteState; + +public class PreFlexRWNormsFormat extends Lucene3xNormsFormat { + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return new PreFlexNormsConsumer(state.directory, state.segmentName, state.context); + } + } diff --git a/lucene/src/java/org/apache/lucene/index/MultiNorms.java b/lucene/src/test-framework/java/org/apache/lucene/index/MultiNorms.java similarity index 100% rename from lucene/src/java/org/apache/lucene/index/MultiNorms.java rename to lucene/src/test-framework/java/org/apache/lucene/index/MultiNorms.java diff --git a/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java b/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java index 4c00a3b448a..1d2f97c7eae 100644 --- a/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java @@ -20,10 +20,11 @@ package org.apache.lucene.codecs.lucene40; import java.io.IOException; import java.util.Comparator; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.lucene40.values.Bytes; import org.apache.lucene.codecs.lucene40.values.Floats; import org.apache.lucene.codecs.lucene40.values.Ints; -import org.apache.lucene.codecs.lucene40.values.Writer; +import org.apache.lucene.index.DocValue; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; @@ -58,12 +59,13 @@ public class TestDocValues extends LuceneTestCase { public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize) throws IOException { - + DocValueHolder valueHolder = new DocValueHolder(); + valueHolder.comp = COMP; final BytesRef bytesRef = new BytesRef(); Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random), + DocValuesConsumer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random), random.nextBoolean()); int maxDoc = 220; final String[] values = new String[maxDoc]; @@ -79,7 +81,8 @@ public class TestDocValues extends LuceneTestCase { values[2 * i] = s; UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef); - w.add(2 * i, bytesRef); + valueHolder.bytes = bytesRef; + w.add(2 * i, valueHolder); } w.finish(maxDoc); assertEquals(0, trackBytes.get()); @@ -167,12 +170,15 @@ public class TestDocValues extends LuceneTestCase { Type.FIXED_INTS_64, Type.FIXED_INTS_64, Type.FIXED_INTS_64, Type.VAR_INTS, Type.VAR_INTS, Type.VAR_INTS, }; + DocValueHolder valueHolder = new DocValueHolder(); for (int i = 0; i < minMax.length; i++) { Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Ints.getWriter(dir, "test", trackBytes, Type.VAR_INTS, newIOContext(random)); - w.add(0, minMax[i][0]); - w.add(1, minMax[i][1]); + DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.VAR_INTS, newIOContext(random)); + valueHolder.intValue = minMax[i][0]; + w.add(0, valueHolder); + valueHolder.intValue = minMax[i][1]; + w.add(1, valueHolder); w.finish(2); assertEquals(0, trackBytes.get()); DocValues r = Ints.getValues(dir, "test", 2, Type.VAR_INTS, newIOContext(random)); @@ -200,12 +206,14 @@ public class TestDocValues extends LuceneTestCase { } public void testGetInt8Array() throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); byte[] sourceArray = new byte[] {1,2,3}; Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_8, newIOContext(random)); + DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_8, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - w.add(i, (long) sourceArray[i]); + valueHolder.intValue = (long) sourceArray[i]; + w.add(i, valueHolder); } w.finish(sourceArray.length); DocValues r = Ints.getValues(dir, "test", sourceArray.length, Type.FIXED_INTS_8, newIOContext(random)); @@ -221,12 +229,14 @@ public class TestDocValues extends LuceneTestCase { } public void testGetInt16Array() throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); short[] sourceArray = new short[] {1,2,3}; Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_16, newIOContext(random)); + DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_16, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - w.add(i, (long) sourceArray[i]); + valueHolder.intValue = (long) sourceArray[i]; + w.add(i, valueHolder); } w.finish(sourceArray.length); DocValues r = Ints.getValues(dir, "test", sourceArray.length, Type.FIXED_INTS_16, newIOContext(random)); @@ -242,12 +252,14 @@ public class TestDocValues extends LuceneTestCase { } public void testGetInt64Array() throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); long[] sourceArray = new long[] {1,2,3}; Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_64, newIOContext(random)); + DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_64, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - w.add(i, sourceArray[i]); + valueHolder.intValue = sourceArray[i]; + w.add(i, valueHolder); } w.finish(sourceArray.length); DocValues r = Ints.getValues(dir, "test", sourceArray.length, Type.FIXED_INTS_64, newIOContext(random)); @@ -263,12 +275,14 @@ public class TestDocValues extends LuceneTestCase { } public void testGetInt32Array() throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); int[] sourceArray = new int[] {1,2,3}; Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_32, newIOContext(random)); + DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_32, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - w.add(i, (long) sourceArray[i]); + valueHolder.intValue = (long) sourceArray[i]; + w.add(i, valueHolder); } w.finish(sourceArray.length); DocValues r = Ints.getValues(dir, "test", sourceArray.length, Type.FIXED_INTS_32, newIOContext(random)); @@ -284,12 +298,14 @@ public class TestDocValues extends LuceneTestCase { } public void testGetFloat32Array() throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); float[] sourceArray = new float[] {1,2,3}; Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), Type.FLOAT_32); + DocValuesConsumer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), Type.FLOAT_32); for (int i = 0; i < sourceArray.length; i++) { - w.add(i, sourceArray[i]); + valueHolder.floatValue = sourceArray[i]; + w.add(i, valueHolder); } w.finish(sourceArray.length); DocValues r = Floats.getValues(dir, "test", 3, newIOContext(random), Type.FLOAT_32); @@ -305,12 +321,14 @@ public class TestDocValues extends LuceneTestCase { } public void testGetFloat64Array() throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); double[] sourceArray = new double[] {1,2,3}; Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), Type.FLOAT_64); + DocValuesConsumer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), Type.FLOAT_64); for (int i = 0; i < sourceArray.length; i++) { - w.add(i, sourceArray[i]); + valueHolder.floatValue = sourceArray[i]; + w.add(i, valueHolder); } w.finish(sourceArray.length); DocValues r = Floats.getValues(dir, "test", 3, newIOContext(random), Type.FLOAT_64); @@ -326,17 +344,18 @@ public class TestDocValues extends LuceneTestCase { } private void testInts(Type type, int maxBit) throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); long maxV = 1; final int NUM_VALUES = 333 + random.nextInt(333); final long[] values = new long[NUM_VALUES]; for (int rx = 1; rx < maxBit; rx++, maxV *= 2) { Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Ints.getWriter(dir, "test", trackBytes, type, newIOContext(random)); + DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, type, newIOContext(random)); for (int i = 0; i < NUM_VALUES; i++) { final long v = random.nextLong() % (1 + maxV); - values[i] = v; - w.add(i, v); + valueHolder.intValue = values[i] = v; + w.add(i, valueHolder); } final int additionalDocs = 1 + random.nextInt(9); w.finish(NUM_VALUES + additionalDocs); @@ -362,16 +381,17 @@ public class TestDocValues extends LuceneTestCase { } private void runTestFloats(Type type, double delta) throws IOException { + DocValueHolder valueHolder = new DocValueHolder(); Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), type); + DocValuesConsumer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), type); final int NUM_VALUES = 777 + random.nextInt(777);; final double[] values = new double[NUM_VALUES]; for (int i = 0; i < NUM_VALUES; i++) { final double v = type == Type.FLOAT_32 ? random.nextFloat() : random .nextDouble(); - values[i] = v; - w.add(i, v); + valueHolder.floatValue = values[i] = v; + w.add(i, valueHolder); } final int additionalValues = 1 + random.nextInt(10); w.finish(NUM_VALUES + additionalValues); @@ -411,4 +431,31 @@ public class TestDocValues extends LuceneTestCase { return getSource(values).asSortedSource(); } + public static class DocValueHolder implements DocValue { + BytesRef bytes; + long intValue; + double floatValue; + Comparator comp; + @Override + public BytesRef getBytes() { + return bytes; + } + + @Override + public Comparator bytesComparator() { + return comp; + } + + @Override + public double getFloat() { + return floatValue; + } + + @Override + public long getInt() { + return intValue; + } + + } + } diff --git a/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index 30bd0272389..fbd1c3b4dcd 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -26,7 +26,6 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.DocValuesField; @@ -417,6 +416,124 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.close(); d.close(); } + + public void testGetArrayNumerics() throws CorruptIndexException, IOException { + Directory d = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + IndexWriter w = new IndexWriter(d, cfg); + final int numValues = 50 + atLeast(10); + final List numVariantList = new ArrayList(NUMERICS); + Collections.shuffle(numVariantList, random); + for (Type val : numVariantList) { + indexValues(w, numValues, val, numVariantList, + false, 7); + IndexReader r = IndexReader.open(w, true); + DocValues docValues = getDocValues(r, val.name()); + assertNotNull(docValues); + // make sure we don't get a direct source since they don't support getArray() + Source source = docValues.getSource(); + + switch (source.type()) { + case FIXED_INTS_8: + { + assertTrue(source.hasArray()); + byte[] values = (byte[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((long)values[i], source.getInt(i)); + } + } + break; + case FIXED_INTS_16: + { + assertTrue(source.hasArray()); + short[] values = (short[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((long)values[i], source.getInt(i)); + } + } + break; + case FIXED_INTS_32: + { + assertTrue(source.hasArray()); + int[] values = (int[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((long)values[i], source.getInt(i)); + } + } + break; + case FIXED_INTS_64: + { + assertTrue(source.hasArray()); + long[] values = (long[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals(values[i], source.getInt(i)); + } + } + break; + case VAR_INTS: + assertFalse(source.hasArray()); + break; + case FLOAT_32: + { + assertTrue(source.hasArray()); + float[] values = (float[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals((double)values[i], source.getFloat(i), 0.0d); + } + } + break; + case FLOAT_64: + { + assertTrue(source.hasArray()); + double[] values = (double[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + assertEquals(values[i], source.getFloat(i), 0.0d); + } + } + break; + default: + fail("unexpected value " + source.type()); + } + r.close(); + } + w.close(); + d.close(); + } + + public void testGetArrayBytes() throws CorruptIndexException, IOException { + Directory d = newDirectory(); + IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); + IndexWriter w = new IndexWriter(d, cfg); + final int numValues = 50 + atLeast(10); + // only single byte fixed straight supports getArray() + indexValues(w, numValues, Type.BYTES_FIXED_STRAIGHT, null, false, 1); + IndexReader r = IndexReader.open(w, true); + DocValues docValues = getDocValues(r, Type.BYTES_FIXED_STRAIGHT.name()); + assertNotNull(docValues); + // make sure we don't get a direct source since they don't support + // getArray() + Source source = docValues.getSource(); + + switch (source.type()) { + case BYTES_FIXED_STRAIGHT: { + BytesRef ref = new BytesRef(); + assertTrue(source.hasArray()); + byte[] values = (byte[]) source.getArray(); + for (int i = 0; i < numValues; i++) { + source.getBytes(i, ref); + assertEquals(1, ref.length); + assertEquals(values[i], ref.bytes[ref.offset]); + } + } + break; + default: + fail("unexpected value " + source.type()); + } + r.close(); + w.close(); + d.close(); + } private DocValues getDocValues(IndexReader reader, String field) throws IOException { return MultiDocValues.getDocValues(reader, field); diff --git a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java index 03dfef0d471..1a20de526c9 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java @@ -30,13 +30,11 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.search.similarities.DefaultSimilarityProvider; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.TermContext; /** * Tests the use of indexdocvalues in scoring.