mirror of https://github.com/apache/lucene.git
LUCENE-3606: move norms reading to codec
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210268 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
92c0119b37
commit
b0d50dcc81
|
@ -61,6 +61,8 @@ final class NormsConsumer extends InvertedDocEndConsumer {
|
||||||
for (FieldInfo fi : state.fieldInfos) {
|
for (FieldInfo fi : state.fieldInfos) {
|
||||||
final NormsConsumerPerField toWrite = (NormsConsumerPerField) fieldsToFlush.get(fi);
|
final NormsConsumerPerField toWrite = (NormsConsumerPerField) fieldsToFlush.get(fi);
|
||||||
int upto = 0;
|
int upto = 0;
|
||||||
|
// we must check the final value of omitNorms for the fieldinfo, it could have
|
||||||
|
// changed for this field since the first time we added it.
|
||||||
if (!fi.omitNorms && toWrite != null && toWrite.upto > 0) {
|
if (!fi.omitNorms && toWrite != null && toWrite.upto > 0) {
|
||||||
normsOut.startField(fi);
|
normsOut.startField(fi);
|
||||||
int docID = 0;
|
int docID = 0;
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.codecs.NormsReader;
|
||||||
import org.apache.lucene.index.codecs.PostingsFormat;
|
import org.apache.lucene.index.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.index.codecs.StoredFieldsReader;
|
import org.apache.lucene.index.codecs.StoredFieldsReader;
|
||||||
|
@ -48,6 +49,7 @@ final class SegmentCoreReaders {
|
||||||
|
|
||||||
final FieldsProducer fields;
|
final FieldsProducer fields;
|
||||||
final PerDocValues perDocProducer;
|
final PerDocValues perDocProducer;
|
||||||
|
final NormsReader norms;
|
||||||
|
|
||||||
final Directory dir;
|
final Directory dir;
|
||||||
final Directory cfsDir;
|
final Directory cfsDir;
|
||||||
|
@ -92,6 +94,8 @@ final class SegmentCoreReaders {
|
||||||
// Ask codec for its Fields
|
// Ask codec for its Fields
|
||||||
fields = format.fieldsProducer(segmentReadState);
|
fields = format.fieldsProducer(segmentReadState);
|
||||||
assert fields != null;
|
assert fields != null;
|
||||||
|
// ask codec for its Norms
|
||||||
|
norms = codec.normsFormat().normsReader(cfsDir, si, fieldInfos, context, dir);
|
||||||
perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState);
|
perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -126,7 +130,7 @@ final class SegmentCoreReaders {
|
||||||
synchronized void decRef() throws IOException {
|
synchronized void decRef() throws IOException {
|
||||||
if (ref.decrementAndGet() == 0) {
|
if (ref.decrementAndGet() == 0) {
|
||||||
IOUtils.close(fields, perDocProducer, termVectorsReaderOrig,
|
IOUtils.close(fields, perDocProducer, termVectorsReaderOrig,
|
||||||
fieldsReaderOrig, cfsReader, storeCFSReader);
|
fieldsReaderOrig, cfsReader, storeCFSReader, norms);
|
||||||
// Now, notify any ReaderFinished listeners:
|
// Now, notify any ReaderFinished listeners:
|
||||||
if (owner != null) {
|
if (owner != null) {
|
||||||
owner.notifyReaderFinishedListeners();
|
owner.notifyReaderFinishedListeners();
|
||||||
|
|
|
@ -1,187 +0,0 @@
|
||||||
package org.apache.lucene.index;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.FlushInfo;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
/**
|
|
||||||
* Byte[] referencing is used because a new norm object needs
|
|
||||||
* to be created for each clone, and the byte array is all
|
|
||||||
* that is needed for sharing between cloned readers. The
|
|
||||||
* current norm referencing is for sharing between readers
|
|
||||||
* whereas the byte[] referencing is for copy on write which
|
|
||||||
* is independent of reader references (i.e. incRef, decRef).
|
|
||||||
*/
|
|
||||||
|
|
||||||
final class SegmentNorms implements Cloneable {
|
|
||||||
|
|
||||||
int refCount = 1;
|
|
||||||
|
|
||||||
// If this instance is a clone, the originalNorm
|
|
||||||
// references the Norm that has a real open IndexInput:
|
|
||||||
private SegmentNorms origNorm;
|
|
||||||
|
|
||||||
private IndexInput in;
|
|
||||||
private long normSeek;
|
|
||||||
|
|
||||||
// null until bytes is set
|
|
||||||
private AtomicInteger bytesRef;
|
|
||||||
private byte[] bytes;
|
|
||||||
private int number;
|
|
||||||
|
|
||||||
private final SegmentReader owner;
|
|
||||||
|
|
||||||
public SegmentNorms(IndexInput in, int number, long normSeek, SegmentReader owner) {
|
|
||||||
this.in = in;
|
|
||||||
this.number = number;
|
|
||||||
this.normSeek = normSeek;
|
|
||||||
this.owner = owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void incRef() {
|
|
||||||
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
|
|
||||||
refCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void closeInput() throws IOException {
|
|
||||||
if (in != null) {
|
|
||||||
if (in != owner.singleNormStream) {
|
|
||||||
// It's private to us -- just close it
|
|
||||||
in.close();
|
|
||||||
} else {
|
|
||||||
// We are sharing this with others -- decRef and
|
|
||||||
// maybe close the shared norm stream
|
|
||||||
if (owner.singleNormRef.decrementAndGet() == 0) {
|
|
||||||
owner.singleNormStream.close();
|
|
||||||
owner.singleNormStream = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
in = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void decRef() throws IOException {
|
|
||||||
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
|
|
||||||
|
|
||||||
if (--refCount == 0) {
|
|
||||||
if (origNorm != null) {
|
|
||||||
origNorm.decRef();
|
|
||||||
origNorm = null;
|
|
||||||
} else {
|
|
||||||
closeInput();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bytes != null) {
|
|
||||||
assert bytesRef != null;
|
|
||||||
bytesRef.decrementAndGet();
|
|
||||||
bytes = null;
|
|
||||||
bytesRef = null;
|
|
||||||
} else {
|
|
||||||
assert bytesRef == null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load & cache full bytes array. Returns bytes.
|
|
||||||
public synchronized byte[] bytes() throws IOException {
|
|
||||||
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
|
|
||||||
if (bytes == null) { // value not yet read
|
|
||||||
assert bytesRef == null;
|
|
||||||
if (origNorm != null) {
|
|
||||||
// Ask origNorm to load so that for a series of
|
|
||||||
// reopened readers we share a single read-only
|
|
||||||
// byte[]
|
|
||||||
bytes = origNorm.bytes();
|
|
||||||
bytesRef = origNorm.bytesRef;
|
|
||||||
bytesRef.incrementAndGet();
|
|
||||||
|
|
||||||
// Once we've loaded the bytes we no longer need
|
|
||||||
// origNorm:
|
|
||||||
origNorm.decRef();
|
|
||||||
origNorm = null;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// We are the origNorm, so load the bytes for real
|
|
||||||
// ourself:
|
|
||||||
final int count = owner.maxDoc();
|
|
||||||
bytes = new byte[count];
|
|
||||||
|
|
||||||
// Since we are orig, in must not be null
|
|
||||||
assert in != null;
|
|
||||||
|
|
||||||
// Read from disk.
|
|
||||||
synchronized(in) {
|
|
||||||
in.seek(normSeek);
|
|
||||||
in.readBytes(bytes, 0, count, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
bytesRef = new AtomicInteger(1);
|
|
||||||
closeInput();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only for testing
|
|
||||||
AtomicInteger bytesRef() {
|
|
||||||
return bytesRef;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns a copy of this Norm instance that shares
|
|
||||||
// IndexInput & bytes with the original one
|
|
||||||
@Override
|
|
||||||
public synchronized Object clone() {
|
|
||||||
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
|
|
||||||
|
|
||||||
SegmentNorms clone;
|
|
||||||
try {
|
|
||||||
clone = (SegmentNorms) super.clone();
|
|
||||||
} catch (CloneNotSupportedException cnse) {
|
|
||||||
// Cannot happen
|
|
||||||
throw new RuntimeException("unexpected CloneNotSupportedException", cnse);
|
|
||||||
}
|
|
||||||
clone.refCount = 1;
|
|
||||||
|
|
||||||
if (bytes != null) {
|
|
||||||
assert bytesRef != null;
|
|
||||||
assert origNorm == null;
|
|
||||||
|
|
||||||
// Clone holds a reference to my bytes:
|
|
||||||
clone.bytesRef.incrementAndGet();
|
|
||||||
} else {
|
|
||||||
assert bytesRef == null;
|
|
||||||
if (origNorm == null) {
|
|
||||||
// I become the origNorm for the clone:
|
|
||||||
clone.origNorm = this;
|
|
||||||
}
|
|
||||||
clone.origNorm.incRef();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only the origNorm will actually readBytes from in:
|
|
||||||
clone.in = null;
|
|
||||||
|
|
||||||
return clone;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -20,25 +20,20 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.codecs.StoredFieldsReader;
|
import org.apache.lucene.index.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.index.codecs.PerDocValues;
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.index.codecs.TermVectorsReader;
|
import org.apache.lucene.index.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.index.codecs.lucene40.Lucene40NormsWriter;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.CloseableThreadLocal;
|
import org.apache.lucene.util.CloseableThreadLocal;
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
|
@ -65,10 +60,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
private SegmentInfo rollbackSegmentInfo;
|
private SegmentInfo rollbackSegmentInfo;
|
||||||
private int rollbackPendingDeleteCount;
|
private int rollbackPendingDeleteCount;
|
||||||
|
|
||||||
// optionally used for the .nrm file shared by multiple norms
|
|
||||||
IndexInput singleNormStream;
|
|
||||||
AtomicInteger singleNormRef;
|
|
||||||
|
|
||||||
SegmentCoreReaders core;
|
SegmentCoreReaders core;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -80,8 +71,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
return core.getFieldsReaderOrig().clone();
|
return core.getFieldsReaderOrig().clone();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String,SegmentNorms> norms = new HashMap<String,SegmentNorms>();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
|
@ -114,7 +103,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
instance.core.openDocStores(si);
|
instance.core.openDocStores(si);
|
||||||
}
|
}
|
||||||
instance.loadLiveDocs(context);
|
instance.loadLiveDocs(context);
|
||||||
instance.openNorms(instance.core.cfsDir, context);
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
||||||
|
@ -278,24 +266,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
clone.liveDocsRef = liveDocsRef;
|
clone.liveDocsRef = liveDocsRef;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
clone.norms = new HashMap<String,SegmentNorms>();
|
|
||||||
|
|
||||||
// Clone norms
|
|
||||||
for (FieldInfo fi : core.fieldInfos) {
|
|
||||||
// Clone unchanged norms to the cloned reader
|
|
||||||
if (doClone || !fieldNormsChanged.contains(fi.number)) {
|
|
||||||
final String curField = fi.name;
|
|
||||||
SegmentNorms norm = this.norms.get(curField);
|
|
||||||
if (norm != null)
|
|
||||||
clone.norms.put(curField, (SegmentNorms) norm.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we are not cloning, then this will open anew
|
|
||||||
// any norms that have changed:
|
|
||||||
clone.openNorms(si.getUseCompoundFile() ? core.getCFSReader() : directory(), IOContext.DEFAULT);
|
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -395,9 +365,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
liveDocs = null;
|
liveDocs = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final SegmentNorms norm : norms.values()) {
|
|
||||||
norm.decRef();
|
|
||||||
}
|
|
||||||
if (core != null) {
|
if (core != null) {
|
||||||
core.decRef();
|
core.decRef();
|
||||||
}
|
}
|
||||||
|
@ -545,95 +512,14 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNorms(String field) {
|
public boolean hasNorms(String field) {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return norms.containsKey(field);
|
FieldInfo fi = core.fieldInfos.fieldInfo(field);
|
||||||
|
return fi != null && fi.isIndexed && !fi.omitNorms;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public byte[] norms(String field) throws IOException {
|
public byte[] norms(String field) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
final SegmentNorms norm = norms.get(field);
|
return core.norms.norms(field);
|
||||||
if (norm == null) {
|
|
||||||
// not indexed, or norms not stored
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return norm.bytes();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void openNorms(Directory cfsDir, IOContext context) throws IOException {
|
|
||||||
boolean normsInitiallyEmpty = norms.isEmpty(); // only used for assert
|
|
||||||
long nextNormSeek = Lucene40NormsWriter.NORMS_HEADER.length; //skip header (header unused for now)
|
|
||||||
int maxDoc = maxDoc();
|
|
||||||
for (FieldInfo fi : core.fieldInfos) {
|
|
||||||
if (norms.containsKey(fi.name)) {
|
|
||||||
// in case this SegmentReader is being re-opened, we might be able to
|
|
||||||
// reuse some norm instances and skip loading them here
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (fi.isIndexed && !fi.omitNorms) {
|
|
||||||
Directory d = directory();
|
|
||||||
String fileName = si.getNormFileName(fi.number);
|
|
||||||
if (!si.hasSeparateNorms(fi.number)) {
|
|
||||||
d = cfsDir;
|
|
||||||
}
|
|
||||||
|
|
||||||
// singleNormFile means multiple norms share this file
|
|
||||||
boolean singleNormFile = IndexFileNames.matchesExtension(fileName, IndexFileNames.NORMS_EXTENSION);
|
|
||||||
IndexInput normInput = null;
|
|
||||||
long normSeek;
|
|
||||||
|
|
||||||
if (singleNormFile) {
|
|
||||||
normSeek = nextNormSeek;
|
|
||||||
if (singleNormStream == null) {
|
|
||||||
singleNormStream = d.openInput(fileName, context);
|
|
||||||
singleNormRef = new AtomicInteger(1);
|
|
||||||
} else {
|
|
||||||
singleNormRef.incrementAndGet();
|
|
||||||
}
|
|
||||||
// All norms in the .nrm file can share a single IndexInput since
|
|
||||||
// they are only used in a synchronized context.
|
|
||||||
// If this were to change in the future, a clone could be done here.
|
|
||||||
normInput = singleNormStream;
|
|
||||||
} else {
|
|
||||||
normInput = d.openInput(fileName, context);
|
|
||||||
// if the segment was created in 3.2 or after, we wrote the header for sure,
|
|
||||||
// and don't need to do the sketchy file size check. otherwise, we check
|
|
||||||
// if the size is exactly equal to maxDoc to detect a headerless file.
|
|
||||||
// NOTE: remove this check in Lucene 5.0!
|
|
||||||
String version = si.getVersion();
|
|
||||||
final boolean isUnversioned =
|
|
||||||
(version == null || StringHelper.getVersionComparator().compare(version, "3.2") < 0)
|
|
||||||
&& normInput.length() == maxDoc();
|
|
||||||
if (isUnversioned) {
|
|
||||||
normSeek = 0;
|
|
||||||
} else {
|
|
||||||
normSeek = Lucene40NormsWriter.NORMS_HEADER.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
norms.put(fi.name, new SegmentNorms(normInput, fi.number, normSeek, this));
|
|
||||||
nextNormSeek += maxDoc; // increment also if some norms are separate
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// nocommit: change to a real check? see LUCENE-3619
|
|
||||||
assert singleNormStream == null || !normsInitiallyEmpty || nextNormSeek == singleNormStream.length();
|
|
||||||
}
|
|
||||||
|
|
||||||
// for testing only
|
|
||||||
boolean normsClosed() {
|
|
||||||
if (singleNormStream != null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (final SegmentNorms norm : norms.values()) {
|
|
||||||
if (norm.refCount > 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// for testing only
|
|
||||||
boolean normsClosed(String field) {
|
|
||||||
return norms.get(field).refCount == 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -20,14 +20,18 @@ package org.apache.lucene.index.codecs;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* format for normalization factors
|
* format for normalization factors
|
||||||
*/
|
*/
|
||||||
public abstract class NormsFormat {
|
public abstract class NormsFormat {
|
||||||
|
/** Note: separateNormsDir should not be used! */
|
||||||
|
public abstract NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException;
|
||||||
public abstract NormsWriter normsWriter(SegmentWriteState state) throws IOException;
|
public abstract NormsWriter normsWriter(SegmentWriteState state) throws IOException;
|
||||||
public abstract void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException;
|
public abstract void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
package org.apache.lucene.index.codecs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
//simple api just for now before switching to docvalues apis
|
||||||
|
public abstract class NormsReader implements Closeable {
|
||||||
|
public abstract byte[] norms(String name) throws IOException;
|
||||||
|
}
|
|
@ -20,14 +20,22 @@ package org.apache.lucene.index.codecs.lucene40;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.codecs.NormsFormat;
|
import org.apache.lucene.index.codecs.NormsFormat;
|
||||||
|
import org.apache.lucene.index.codecs.NormsReader;
|
||||||
import org.apache.lucene.index.codecs.NormsWriter;
|
import org.apache.lucene.index.codecs.NormsWriter;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
|
||||||
public class Lucene40NormsFormat extends NormsFormat {
|
public class Lucene40NormsFormat extends NormsFormat {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
|
||||||
|
return new Lucene40NormsReader(dir, info, fields, context, separateNormsDir);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NormsWriter normsWriter(SegmentWriteState state) throws IOException {
|
public NormsWriter normsWriter(SegmentWriteState state) throws IOException {
|
||||||
return new Lucene40NormsWriter(state.directory, state.segmentName, state.context);
|
return new Lucene40NormsWriter(state.directory, state.segmentName, state.context);
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
package org.apache.lucene.index.codecs.lucene40;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.IdentityHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.codecs.NormsReader;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
|
public class Lucene40NormsReader extends NormsReader {
|
||||||
|
// this would be replaced by Source/SourceCache in a dv impl.
|
||||||
|
// for now we have our own mini-version
|
||||||
|
Map<String,Norm> norms = new HashMap<String,Norm>();
|
||||||
|
// any .nrm or .sNN files we have open at any time.
|
||||||
|
// TODO: just a list, and double-close() separate norms files?
|
||||||
|
Map<IndexInput,Boolean> openFiles = new IdentityHashMap<IndexInput,Boolean>();
|
||||||
|
// points to a singleNormFile
|
||||||
|
IndexInput singleNormStream;
|
||||||
|
final int maxdoc;
|
||||||
|
|
||||||
|
// note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front.
|
||||||
|
// but we just don't do any seeks or reading yet.
|
||||||
|
public Lucene40NormsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
|
||||||
|
maxdoc = info.docCount;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
long nextNormSeek = Lucene40NormsWriter.NORMS_HEADER.length; //skip header (header unused for now)
|
||||||
|
for (FieldInfo fi : fields) {
|
||||||
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
|
String fileName = info.getNormFileName(fi.number);
|
||||||
|
Directory d = info.hasSeparateNorms(fi.number) ? separateNormsDir : dir;
|
||||||
|
|
||||||
|
// singleNormFile means multiple norms share this file
|
||||||
|
boolean singleNormFile = IndexFileNames.matchesExtension(fileName, IndexFileNames.NORMS_EXTENSION);
|
||||||
|
IndexInput normInput = null;
|
||||||
|
long normSeek;
|
||||||
|
|
||||||
|
if (singleNormFile) {
|
||||||
|
normSeek = nextNormSeek;
|
||||||
|
if (singleNormStream == null) {
|
||||||
|
singleNormStream = d.openInput(fileName, context);
|
||||||
|
openFiles.put(singleNormStream, Boolean.TRUE);
|
||||||
|
}
|
||||||
|
// All norms in the .nrm file can share a single IndexInput since
|
||||||
|
// they are only used in a synchronized context.
|
||||||
|
// If this were to change in the future, a clone could be done here.
|
||||||
|
normInput = singleNormStream;
|
||||||
|
} else {
|
||||||
|
normInput = d.openInput(fileName, context);
|
||||||
|
openFiles.put(normInput, Boolean.TRUE);
|
||||||
|
// if the segment was created in 3.2 or after, we wrote the header for sure,
|
||||||
|
// and don't need to do the sketchy file size check. otherwise, we check
|
||||||
|
// if the size is exactly equal to maxDoc to detect a headerless file.
|
||||||
|
// NOTE: remove this check in Lucene 5.0!
|
||||||
|
String version = info.getVersion();
|
||||||
|
final boolean isUnversioned =
|
||||||
|
(version == null || StringHelper.getVersionComparator().compare(version, "3.2") < 0)
|
||||||
|
&& normInput.length() == maxdoc;
|
||||||
|
if (isUnversioned) {
|
||||||
|
normSeek = 0;
|
||||||
|
} else {
|
||||||
|
normSeek = Lucene40NormsWriter.NORMS_HEADER.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Norm norm = new Norm();
|
||||||
|
norm.file = normInput;
|
||||||
|
norm.offset = normSeek;
|
||||||
|
norms.put(fi.name, norm);
|
||||||
|
nextNormSeek += maxdoc; // increment also if some norms are separate
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// nocommit: change to a real check? see LUCENE-3619
|
||||||
|
assert singleNormStream == null || nextNormSeek == singleNormStream.length();
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
if (openFiles != null) {
|
||||||
|
IOUtils.closeWhileHandlingException(openFiles.keySet());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] norms(String name) throws IOException {
|
||||||
|
Norm norm = norms.get(name);
|
||||||
|
return norm == null ? null : norm.bytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
if (openFiles != null) {
|
||||||
|
IOUtils.close(openFiles.keySet());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
norms = null;
|
||||||
|
openFiles = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class Norm {
|
||||||
|
IndexInput file;
|
||||||
|
long offset;
|
||||||
|
byte bytes[];
|
||||||
|
|
||||||
|
synchronized byte[] bytes() throws IOException {
|
||||||
|
if (bytes == null) {
|
||||||
|
bytes = new byte[maxdoc];
|
||||||
|
// some norms share fds
|
||||||
|
synchronized(file) {
|
||||||
|
file.seek(offset);
|
||||||
|
file.readBytes(bytes, 0, bytes.length, false);
|
||||||
|
}
|
||||||
|
// we are done with this file
|
||||||
|
if (file != singleNormStream) {
|
||||||
|
openFiles.remove(file);
|
||||||
|
file.close();
|
||||||
|
file = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -62,8 +62,8 @@ public class Lucene40NormsWriter extends NormsWriter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(int numDocs) throws IOException {
|
public void finish(int numDocs) throws IOException {
|
||||||
if (4+normCount*numDocs != out.getFilePointer()) {
|
if (4+normCount*(long)numDocs != out.getFilePointer()) {
|
||||||
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*numDocs) + " actual=" + out.getFilePointer());
|
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -761,7 +761,8 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
||||||
assertEquals(0, reader.getRefCount());
|
assertEquals(0, reader.getRefCount());
|
||||||
|
|
||||||
if (checkNormsClosed && reader instanceof SegmentReader) {
|
if (checkNormsClosed && reader instanceof SegmentReader) {
|
||||||
assertTrue(((SegmentReader) reader).normsClosed());
|
// TODO: should we really assert something here? we check for open files and this is obselete...
|
||||||
|
// assertTrue(((SegmentReader) reader).normsClosed());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (checkSubReaders) {
|
if (checkSubReaders) {
|
||||||
|
|
Loading…
Reference in New Issue