LUCENE-3606: move norms reading to codec

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210268 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-12-04 22:59:27 +00:00
parent 92c0119b37
commit b0d50dcc81
10 changed files with 205 additions and 308 deletions

View File

@ -61,6 +61,8 @@ final class NormsConsumer extends InvertedDocEndConsumer {
for (FieldInfo fi : state.fieldInfos) {
final NormsConsumerPerField toWrite = (NormsConsumerPerField) fieldsToFlush.get(fi);
int upto = 0;
// we must check the final value of omitNorms for the fieldinfo, it could have
// changed for this field since the first time we added it.
if (!fi.omitNorms && toWrite != null && toWrite.upto > 0) {
normsOut.startField(fi);
int docID = 0;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.NormsReader;
import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.StoredFieldsReader;
@ -48,6 +49,7 @@ final class SegmentCoreReaders {
final FieldsProducer fields;
final PerDocValues perDocProducer;
final NormsReader norms;
final Directory dir;
final Directory cfsDir;
@ -92,6 +94,8 @@ final class SegmentCoreReaders {
// Ask codec for its Fields
fields = format.fieldsProducer(segmentReadState);
assert fields != null;
// ask codec for its Norms
norms = codec.normsFormat().normsReader(cfsDir, si, fieldInfos, context, dir);
perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState);
success = true;
} finally {
@ -126,7 +130,7 @@ final class SegmentCoreReaders {
synchronized void decRef() throws IOException {
if (ref.decrementAndGet() == 0) {
IOUtils.close(fields, perDocProducer, termVectorsReaderOrig,
fieldsReaderOrig, cfsReader, storeCFSReader);
fieldsReaderOrig, cfsReader, storeCFSReader, norms);
// Now, notify any ReaderFinished listeners:
if (owner != null) {
owner.notifyReaderFinishedListeners();

View File

@ -1,187 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
/**
* Byte[] referencing is used because a new norm object needs
* to be created for each clone, and the byte array is all
* that is needed for sharing between cloned readers. The
* current norm referencing is for sharing between readers
* whereas the byte[] referencing is for copy on write which
* is independent of reader references (i.e. incRef, decRef).
*/
final class SegmentNorms implements Cloneable {
int refCount = 1;
// If this instance is a clone, the originalNorm
// references the Norm that has a real open IndexInput:
private SegmentNorms origNorm;
private IndexInput in;
private long normSeek;
// null until bytes is set
private AtomicInteger bytesRef;
private byte[] bytes;
private int number;
private final SegmentReader owner;
public SegmentNorms(IndexInput in, int number, long normSeek, SegmentReader owner) {
this.in = in;
this.number = number;
this.normSeek = normSeek;
this.owner = owner;
}
public synchronized void incRef() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
refCount++;
}
private void closeInput() throws IOException {
if (in != null) {
if (in != owner.singleNormStream) {
// It's private to us -- just close it
in.close();
} else {
// We are sharing this with others -- decRef and
// maybe close the shared norm stream
if (owner.singleNormRef.decrementAndGet() == 0) {
owner.singleNormStream.close();
owner.singleNormStream = null;
}
}
in = null;
}
}
public synchronized void decRef() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (--refCount == 0) {
if (origNorm != null) {
origNorm.decRef();
origNorm = null;
} else {
closeInput();
}
if (bytes != null) {
assert bytesRef != null;
bytesRef.decrementAndGet();
bytes = null;
bytesRef = null;
} else {
assert bytesRef == null;
}
}
}
// Load & cache full bytes array. Returns bytes.
public synchronized byte[] bytes() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (bytes == null) { // value not yet read
assert bytesRef == null;
if (origNorm != null) {
// Ask origNorm to load so that for a series of
// reopened readers we share a single read-only
// byte[]
bytes = origNorm.bytes();
bytesRef = origNorm.bytesRef;
bytesRef.incrementAndGet();
// Once we've loaded the bytes we no longer need
// origNorm:
origNorm.decRef();
origNorm = null;
} else {
// We are the origNorm, so load the bytes for real
// ourself:
final int count = owner.maxDoc();
bytes = new byte[count];
// Since we are orig, in must not be null
assert in != null;
// Read from disk.
synchronized(in) {
in.seek(normSeek);
in.readBytes(bytes, 0, count, false);
}
bytesRef = new AtomicInteger(1);
closeInput();
}
}
return bytes;
}
// Only for testing
AtomicInteger bytesRef() {
return bytesRef;
}
// Returns a copy of this Norm instance that shares
// IndexInput & bytes with the original one
@Override
public synchronized Object clone() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
SegmentNorms clone;
try {
clone = (SegmentNorms) super.clone();
} catch (CloneNotSupportedException cnse) {
// Cannot happen
throw new RuntimeException("unexpected CloneNotSupportedException", cnse);
}
clone.refCount = 1;
if (bytes != null) {
assert bytesRef != null;
assert origNorm == null;
// Clone holds a reference to my bytes:
clone.bytesRef.incrementAndGet();
} else {
assert bytesRef == null;
if (origNorm == null) {
// I become the origNorm for the clone:
clone.origNorm = this;
}
clone.origNorm.incRef();
}
// Only the origNorm will actually readBytes from in:
clone.in = null;
return clone;
}
}

View File

@ -20,25 +20,20 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.StoredFieldsReader;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.codecs.TermVectorsReader;
import org.apache.lucene.index.codecs.lucene40.Lucene40NormsWriter;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.StringHelper;
/**
* @lucene.experimental
@ -65,10 +60,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
private SegmentInfo rollbackSegmentInfo;
private int rollbackPendingDeleteCount;
// optionally used for the .nrm file shared by multiple norms
IndexInput singleNormStream;
AtomicInteger singleNormRef;
SegmentCoreReaders core;
/**
@ -80,8 +71,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
return core.getFieldsReaderOrig().clone();
}
}
Map<String,SegmentNorms> norms = new HashMap<String,SegmentNorms>();
/**
* @throws CorruptIndexException if the index is corrupt
@ -114,7 +103,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
instance.core.openDocStores(si);
}
instance.loadLiveDocs(context);
instance.openNorms(instance.core.cfsDir, context);
success = true;
} finally {
@ -278,24 +266,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
clone.liveDocsRef = liveDocsRef;
}
}
clone.norms = new HashMap<String,SegmentNorms>();
// Clone norms
for (FieldInfo fi : core.fieldInfos) {
// Clone unchanged norms to the cloned reader
if (doClone || !fieldNormsChanged.contains(fi.number)) {
final String curField = fi.name;
SegmentNorms norm = this.norms.get(curField);
if (norm != null)
clone.norms.put(curField, (SegmentNorms) norm.clone());
}
}
// If we are not cloning, then this will open anew
// any norms that have changed:
clone.openNorms(si.getUseCompoundFile() ? core.getCFSReader() : directory(), IOContext.DEFAULT);
success = true;
} finally {
if (!success) {
@ -395,9 +365,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
liveDocs = null;
}
for (final SegmentNorms norm : norms.values()) {
norm.decRef();
}
if (core != null) {
core.decRef();
}
@ -545,95 +512,14 @@ public class SegmentReader extends IndexReader implements Cloneable {
@Override
public boolean hasNorms(String field) {
ensureOpen();
return norms.containsKey(field);
FieldInfo fi = core.fieldInfos.fieldInfo(field);
return fi != null && fi.isIndexed && !fi.omitNorms;
}
@Override
public byte[] norms(String field) throws IOException {
ensureOpen();
final SegmentNorms norm = norms.get(field);
if (norm == null) {
// not indexed, or norms not stored
return null;
}
return norm.bytes();
}
private void openNorms(Directory cfsDir, IOContext context) throws IOException {
boolean normsInitiallyEmpty = norms.isEmpty(); // only used for assert
long nextNormSeek = Lucene40NormsWriter.NORMS_HEADER.length; //skip header (header unused for now)
int maxDoc = maxDoc();
for (FieldInfo fi : core.fieldInfos) {
if (norms.containsKey(fi.name)) {
// in case this SegmentReader is being re-opened, we might be able to
// reuse some norm instances and skip loading them here
continue;
}
if (fi.isIndexed && !fi.omitNorms) {
Directory d = directory();
String fileName = si.getNormFileName(fi.number);
if (!si.hasSeparateNorms(fi.number)) {
d = cfsDir;
}
// singleNormFile means multiple norms share this file
boolean singleNormFile = IndexFileNames.matchesExtension(fileName, IndexFileNames.NORMS_EXTENSION);
IndexInput normInput = null;
long normSeek;
if (singleNormFile) {
normSeek = nextNormSeek;
if (singleNormStream == null) {
singleNormStream = d.openInput(fileName, context);
singleNormRef = new AtomicInteger(1);
} else {
singleNormRef.incrementAndGet();
}
// All norms in the .nrm file can share a single IndexInput since
// they are only used in a synchronized context.
// If this were to change in the future, a clone could be done here.
normInput = singleNormStream;
} else {
normInput = d.openInput(fileName, context);
// if the segment was created in 3.2 or after, we wrote the header for sure,
// and don't need to do the sketchy file size check. otherwise, we check
// if the size is exactly equal to maxDoc to detect a headerless file.
// NOTE: remove this check in Lucene 5.0!
String version = si.getVersion();
final boolean isUnversioned =
(version == null || StringHelper.getVersionComparator().compare(version, "3.2") < 0)
&& normInput.length() == maxDoc();
if (isUnversioned) {
normSeek = 0;
} else {
normSeek = Lucene40NormsWriter.NORMS_HEADER.length;
}
}
norms.put(fi.name, new SegmentNorms(normInput, fi.number, normSeek, this));
nextNormSeek += maxDoc; // increment also if some norms are separate
}
}
// nocommit: change to a real check? see LUCENE-3619
assert singleNormStream == null || !normsInitiallyEmpty || nextNormSeek == singleNormStream.length();
}
// for testing only
boolean normsClosed() {
if (singleNormStream != null) {
return false;
}
for (final SegmentNorms norm : norms.values()) {
if (norm.refCount > 0) {
return false;
}
}
return true;
}
// for testing only
boolean normsClosed(String field) {
return norms.get(field).refCount == 0;
return core.norms.norms(field);
}
/**

View File

@ -20,14 +20,18 @@ package org.apache.lucene.index.codecs;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* format for normalization factors
*/
public abstract class NormsFormat {
/** Note: separateNormsDir should not be used! */
public abstract NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException;
public abstract NormsWriter normsWriter(SegmentWriteState state) throws IOException;
public abstract void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException;
}

View File

@ -0,0 +1,26 @@
package org.apache.lucene.index.codecs;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
//simple api just for now before switching to docvalues apis
public abstract class NormsReader implements Closeable {
public abstract byte[] norms(String name) throws IOException;
}

View File

@ -20,14 +20,22 @@ package org.apache.lucene.index.codecs.lucene40;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.NormsFormat;
import org.apache.lucene.index.codecs.NormsReader;
import org.apache.lucene.index.codecs.NormsWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
public class Lucene40NormsFormat extends NormsFormat {
@Override
public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
return new Lucene40NormsReader(dir, info, fields, context, separateNormsDir);
}
@Override
public NormsWriter normsWriter(SegmentWriteState state) throws IOException {
return new Lucene40NormsWriter(state.directory, state.segmentName, state.context);

View File

@ -0,0 +1,153 @@
package org.apache.lucene.index.codecs.lucene40;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.codecs.NormsReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
public class Lucene40NormsReader extends NormsReader {
// this would be replaced by Source/SourceCache in a dv impl.
// for now we have our own mini-version
Map<String,Norm> norms = new HashMap<String,Norm>();
// any .nrm or .sNN files we have open at any time.
// TODO: just a list, and double-close() separate norms files?
Map<IndexInput,Boolean> openFiles = new IdentityHashMap<IndexInput,Boolean>();
// points to a singleNormFile
IndexInput singleNormStream;
final int maxdoc;
// note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front.
// but we just don't do any seeks or reading yet.
public Lucene40NormsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
maxdoc = info.docCount;
boolean success = false;
try {
long nextNormSeek = Lucene40NormsWriter.NORMS_HEADER.length; //skip header (header unused for now)
for (FieldInfo fi : fields) {
if (fi.isIndexed && !fi.omitNorms) {
String fileName = info.getNormFileName(fi.number);
Directory d = info.hasSeparateNorms(fi.number) ? separateNormsDir : dir;
// singleNormFile means multiple norms share this file
boolean singleNormFile = IndexFileNames.matchesExtension(fileName, IndexFileNames.NORMS_EXTENSION);
IndexInput normInput = null;
long normSeek;
if (singleNormFile) {
normSeek = nextNormSeek;
if (singleNormStream == null) {
singleNormStream = d.openInput(fileName, context);
openFiles.put(singleNormStream, Boolean.TRUE);
}
// All norms in the .nrm file can share a single IndexInput since
// they are only used in a synchronized context.
// If this were to change in the future, a clone could be done here.
normInput = singleNormStream;
} else {
normInput = d.openInput(fileName, context);
openFiles.put(normInput, Boolean.TRUE);
// if the segment was created in 3.2 or after, we wrote the header for sure,
// and don't need to do the sketchy file size check. otherwise, we check
// if the size is exactly equal to maxDoc to detect a headerless file.
// NOTE: remove this check in Lucene 5.0!
String version = info.getVersion();
final boolean isUnversioned =
(version == null || StringHelper.getVersionComparator().compare(version, "3.2") < 0)
&& normInput.length() == maxdoc;
if (isUnversioned) {
normSeek = 0;
} else {
normSeek = Lucene40NormsWriter.NORMS_HEADER.length;
}
}
Norm norm = new Norm();
norm.file = normInput;
norm.offset = normSeek;
norms.put(fi.name, norm);
nextNormSeek += maxdoc; // increment also if some norms are separate
}
}
// nocommit: change to a real check? see LUCENE-3619
assert singleNormStream == null || nextNormSeek == singleNormStream.length();
success = true;
} finally {
if (!success) {
if (openFiles != null) {
IOUtils.closeWhileHandlingException(openFiles.keySet());
}
}
}
}
@Override
public byte[] norms(String name) throws IOException {
Norm norm = norms.get(name);
return norm == null ? null : norm.bytes();
}
@Override
public void close() throws IOException {
try {
if (openFiles != null) {
IOUtils.close(openFiles.keySet());
}
} finally {
norms = null;
openFiles = null;
}
}
class Norm {
IndexInput file;
long offset;
byte bytes[];
synchronized byte[] bytes() throws IOException {
if (bytes == null) {
bytes = new byte[maxdoc];
// some norms share fds
synchronized(file) {
file.seek(offset);
file.readBytes(bytes, 0, bytes.length, false);
}
// we are done with this file
if (file != singleNormStream) {
openFiles.remove(file);
file.close();
file = null;
}
}
return bytes;
}
}
}

View File

@ -62,8 +62,8 @@ public class Lucene40NormsWriter extends NormsWriter {
@Override
public void finish(int numDocs) throws IOException {
if (4+normCount*numDocs != out.getFilePointer()) {
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*numDocs) + " actual=" + out.getFilePointer());
if (4+normCount*(long)numDocs != out.getFilePointer()) {
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer());
}
}

View File

@ -761,7 +761,8 @@ public class TestIndexReaderReopen extends LuceneTestCase {
assertEquals(0, reader.getRefCount());
if (checkNormsClosed && reader instanceof SegmentReader) {
assertTrue(((SegmentReader) reader).normsClosed());
// TODO: should we really assert something here? we check for open files and this is obselete...
// assertTrue(((SegmentReader) reader).normsClosed());
}
if (checkSubReaders) {