LUCENE-3117: pull SegmentReader.Norms out of SegmentReader.java

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1125376 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-05-20 13:33:21 +00:00
parent ba7845ae74
commit cee538500e
4 changed files with 262 additions and 233 deletions

View File

@ -0,0 +1,240 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
/**
* Byte[] referencing is used because a new norm object needs
* to be created for each clone, and the byte array is all
* that is needed for sharing between cloned readers. The
* current norm referencing is for sharing between readers
* whereas the byte[] referencing is for copy on write which
* is independent of reader references (i.e. incRef, decRef).
*/
final class SegmentNorms implements Cloneable {
int refCount = 1;
// If this instance is a clone, the originalNorm
// references the Norm that has a real open IndexInput:
private SegmentNorms origNorm;
private IndexInput in;
private long normSeek;
// null until bytes is set
private AtomicInteger bytesRef;
private byte[] bytes;
private int number;
boolean dirty;
boolean rollbackDirty;
private final SegmentReader owner;
public SegmentNorms(IndexInput in, int number, long normSeek, SegmentReader owner) {
this.in = in;
this.number = number;
this.normSeek = normSeek;
this.owner = owner;
}
public synchronized void incRef() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
refCount++;
}
private void closeInput() throws IOException {
if (in != null) {
if (in != owner.singleNormStream) {
// It's private to us -- just close it
in.close();
} else {
// We are sharing this with others -- decRef and
// maybe close the shared norm stream
if (owner.singleNormRef.decrementAndGet() == 0) {
owner.singleNormStream.close();
owner.singleNormStream = null;
}
}
in = null;
}
}
public synchronized void decRef() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (--refCount == 0) {
if (origNorm != null) {
origNorm.decRef();
origNorm = null;
} else {
closeInput();
}
if (bytes != null) {
assert bytesRef != null;
bytesRef.decrementAndGet();
bytes = null;
bytesRef = null;
} else {
assert bytesRef == null;
}
}
}
// Load & cache full bytes array. Returns bytes.
public synchronized byte[] bytes() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (bytes == null) { // value not yet read
assert bytesRef == null;
if (origNorm != null) {
// Ask origNorm to load so that for a series of
// reopened readers we share a single read-only
// byte[]
bytes = origNorm.bytes();
bytesRef = origNorm.bytesRef;
bytesRef.incrementAndGet();
// Once we've loaded the bytes we no longer need
// origNorm:
origNorm.decRef();
origNorm = null;
} else {
// We are the origNorm, so load the bytes for real
// ourself:
final int count = owner.maxDoc();
bytes = new byte[count];
// Since we are orig, in must not be null
assert in != null;
// Read from disk.
synchronized(in) {
in.seek(normSeek);
in.readBytes(bytes, 0, count, false);
}
bytesRef = new AtomicInteger(1);
closeInput();
}
}
return bytes;
}
// Only for testing
AtomicInteger bytesRef() {
return bytesRef;
}
// Called if we intend to change a norm value. We make a
// private copy of bytes if it's shared with others:
public synchronized byte[] copyOnWrite() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
bytes();
assert bytes != null;
assert bytesRef != null;
if (bytesRef.get() > 1) {
// I cannot be the origNorm for another norm
// instance if I'm being changed. Ie, only the
// "head Norm" can be changed:
assert refCount == 1;
final AtomicInteger oldRef = bytesRef;
bytes = owner.cloneNormBytes(bytes);
bytesRef = new AtomicInteger(1);
oldRef.decrementAndGet();
}
dirty = true;
return bytes;
}
// Returns a copy of this Norm instance that shares
// IndexInput & bytes with the original one
@Override
public synchronized Object clone() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
SegmentNorms clone;
try {
clone = (SegmentNorms) super.clone();
} catch (CloneNotSupportedException cnse) {
// Cannot happen
throw new RuntimeException("unexpected CloneNotSupportedException", cnse);
}
clone.refCount = 1;
if (bytes != null) {
assert bytesRef != null;
assert origNorm == null;
// Clone holds a reference to my bytes:
clone.bytesRef.incrementAndGet();
} else {
assert bytesRef == null;
if (origNorm == null) {
// I become the origNorm for the clone:
clone.origNorm = this;
}
clone.origNorm.incRef();
}
// Only the origNorm will actually readBytes from in:
clone.in = null;
return clone;
}
// Flush all pending changes to the next generation
// separate norms file.
public void reWrite(SegmentInfo si) throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0): "refCount=" + refCount + " origNorm=" + origNorm;
// NOTE: norms are re-written in regular directory, not cfs
si.advanceNormGen(this.number);
final String normFileName = si.getNormFileName(this.number);
IndexOutput out = owner.directory().createOutput(normFileName);
boolean success = false;
try {
try {
out.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
out.writeBytes(bytes, owner.maxDoc());
} finally {
out.close();
}
success = true;
} finally {
if (!success) {
try {
owner.directory().deleteFile(normFileName);
} catch (Throwable t) {
// suppress this so we keep throwing the
// original exception
}
}
}
this.dirty = false;
}
}

View File

@ -33,7 +33,6 @@ import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -68,8 +67,8 @@ public class SegmentReader extends IndexReader implements Cloneable {
private int rollbackPendingDeleteCount; private int rollbackPendingDeleteCount;
// optionally used for the .nrm file shared by multiple norms // optionally used for the .nrm file shared by multiple norms
private IndexInput singleNormStream; IndexInput singleNormStream;
private AtomicInteger singleNormRef; AtomicInteger singleNormRef;
CoreReaders core; CoreReaders core;
@ -250,219 +249,9 @@ public class SegmentReader extends IndexReader implements Cloneable {
} }
} }
/**
* Byte[] referencing is used because a new norm object needs
* to be created for each clone, and the byte array is all
* that is needed for sharing between cloned readers. The
* current norm referencing is for sharing between readers
* whereas the byte[] referencing is for copy on write which
* is independent of reader references (i.e. incRef, decRef).
*/
final class Norm implements Cloneable {
private int refCount = 1;
// If this instance is a clone, the originalNorm Map<String,SegmentNorms> norms = new HashMap<String,SegmentNorms>();
// references the Norm that has a real open IndexInput:
private Norm origNorm;
private IndexInput in;
private long normSeek;
// null until bytes is set
private AtomicInteger bytesRef;
private byte[] bytes;
private boolean dirty;
private int number;
private boolean rollbackDirty;
public Norm(IndexInput in, int number, long normSeek) {
this.in = in;
this.number = number;
this.normSeek = normSeek;
}
public synchronized void incRef() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
refCount++;
}
private void closeInput() throws IOException {
if (in != null) {
if (in != singleNormStream) {
// It's private to us -- just close it
in.close();
} else {
// We are sharing this with others -- decRef and
// maybe close the shared norm stream
if (singleNormRef.decrementAndGet() == 0) {
singleNormStream.close();
singleNormStream = null;
}
}
in = null;
}
}
public synchronized void decRef() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (--refCount == 0) {
if (origNorm != null) {
origNorm.decRef();
origNorm = null;
} else {
closeInput();
}
if (bytes != null) {
assert bytesRef != null;
bytesRef.decrementAndGet();
bytes = null;
bytesRef = null;
} else {
assert bytesRef == null;
}
}
}
// Load & cache full bytes array. Returns bytes.
public synchronized byte[] bytes() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
if (bytes == null) { // value not yet read
assert bytesRef == null;
if (origNorm != null) {
// Ask origNorm to load so that for a series of
// reopened readers we share a single read-only
// byte[]
bytes = origNorm.bytes();
bytesRef = origNorm.bytesRef;
bytesRef.incrementAndGet();
// Once we've loaded the bytes we no longer need
// origNorm:
origNorm.decRef();
origNorm = null;
} else {
// We are the origNorm, so load the bytes for real
// ourself:
final int count = maxDoc();
bytes = new byte[count];
// Since we are orig, in must not be null
assert in != null;
// Read from disk.
synchronized(in) {
in.seek(normSeek);
in.readBytes(bytes, 0, count, false);
}
bytesRef = new AtomicInteger(1);
closeInput();
}
}
return bytes;
}
// Only for testing
AtomicInteger bytesRef() {
return bytesRef;
}
// Called if we intend to change a norm value. We make a
// private copy of bytes if it's shared with others:
public synchronized byte[] copyOnWrite() throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
bytes();
assert bytes != null;
assert bytesRef != null;
if (bytesRef.get() > 1) {
// I cannot be the origNorm for another norm
// instance if I'm being changed. Ie, only the
// "head Norm" can be changed:
assert refCount == 1;
final AtomicInteger oldRef = bytesRef;
bytes = cloneNormBytes(bytes);
bytesRef = new AtomicInteger(1);
oldRef.decrementAndGet();
}
dirty = true;
return bytes;
}
// Returns a copy of this Norm instance that shares
// IndexInput & bytes with the original one
@Override
public synchronized Object clone() {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
Norm clone;
try {
clone = (Norm) super.clone();
} catch (CloneNotSupportedException cnse) {
// Cannot happen
throw new RuntimeException("unexpected CloneNotSupportedException", cnse);
}
clone.refCount = 1;
if (bytes != null) {
assert bytesRef != null;
assert origNorm == null;
// Clone holds a reference to my bytes:
clone.bytesRef.incrementAndGet();
} else {
assert bytesRef == null;
if (origNorm == null) {
// I become the origNorm for the clone:
clone.origNorm = this;
}
clone.origNorm.incRef();
}
// Only the origNorm will actually readBytes from in:
clone.in = null;
return clone;
}
// Flush all pending changes to the next generation
// separate norms file.
public void reWrite(SegmentInfo si) throws IOException {
assert refCount > 0 && (origNorm == null || origNorm.refCount > 0): "refCount=" + refCount + " origNorm=" + origNorm;
// NOTE: norms are re-written in regular directory, not cfs
si.advanceNormGen(this.number);
final String normFileName = si.getNormFileName(this.number);
IndexOutput out = directory().createOutput(normFileName);
boolean success = false;
try {
try {
out.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
out.writeBytes(bytes, maxDoc());
} finally {
out.close();
}
success = true;
} finally {
if (!success) {
try {
directory().deleteFile(normFileName);
} catch (Throwable t) {
// suppress this so we keep throwing the
// original exception
}
}
}
this.dirty = false;
}
}
Map<String,Norm> norms = new HashMap<String,Norm>();
/** /**
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
@ -660,16 +449,16 @@ public class SegmentReader extends IndexReader implements Cloneable {
} }
} }
clone.norms = new HashMap<String,Norm>(); clone.norms = new HashMap<String,SegmentNorms>();
// Clone norms // Clone norms
for (FieldInfo fi : core.fieldInfos) { for (FieldInfo fi : core.fieldInfos) {
// Clone unchanged norms to the cloned reader // Clone unchanged norms to the cloned reader
if (doClone || !fieldNormsChanged.contains(fi.number)) { if (doClone || !fieldNormsChanged.contains(fi.number)) {
final String curField = fi.name; final String curField = fi.name;
Norm norm = this.norms.get(curField); SegmentNorms norm = this.norms.get(curField);
if (norm != null) if (norm != null)
clone.norms.put(curField, (Norm) norm.clone()); clone.norms.put(curField, (SegmentNorms) norm.clone());
} }
} }
@ -739,7 +528,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
if (normsDirty) { // re-write norms if (normsDirty) { // re-write norms
si.initNormGen(); si.initNormGen();
for (final Norm norm : norms.values()) { for (final SegmentNorms norm : norms.values()) {
if (norm.dirty) { if (norm.dirty) {
norm.reWrite(si); norm.reWrite(si);
} }
@ -765,7 +554,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
deletedDocs = null; deletedDocs = null;
} }
for (final Norm norm : norms.values()) { for (final SegmentNorms norm : norms.values()) {
norm.decRef(); norm.decRef();
} }
if (core != null) { if (core != null) {
@ -935,7 +724,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
@Override @Override
public byte[] norms(String field) throws IOException { public byte[] norms(String field) throws IOException {
ensureOpen(); ensureOpen();
final Norm norm = norms.get(field); final SegmentNorms norm = norms.get(field);
if (norm == null) { if (norm == null) {
// not indexed, or norms not stored // not indexed, or norms not stored
return null; return null;
@ -946,7 +735,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
@Override @Override
protected void doSetNorm(int doc, String field, byte value) protected void doSetNorm(int doc, String field, byte value)
throws IOException { throws IOException {
Norm norm = norms.get(field); SegmentNorms norm = norms.get(field);
if (norm == null) // not an indexed field if (norm == null) // not an indexed field
return; return;
@ -1004,7 +793,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
} }
} }
norms.put(fi.name, new Norm(normInput, fi.number, normSeek)); norms.put(fi.name, new SegmentNorms(normInput, fi.number, normSeek, this));
nextNormSeek += maxDoc; // increment also if some norms are separate nextNormSeek += maxDoc; // increment also if some norms are separate
} }
} }
@ -1024,7 +813,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
if (singleNormStream != null) { if (singleNormStream != null) {
return false; return false;
} }
for (final Norm norm : norms.values()) { for (final SegmentNorms norm : norms.values()) {
if (norm.refCount > 0) { if (norm.refCount > 0) {
return false; return false;
} }
@ -1171,7 +960,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
rollbackDeletedDocsDirty = deletedDocsDirty; rollbackDeletedDocsDirty = deletedDocsDirty;
rollbackNormsDirty = normsDirty; rollbackNormsDirty = normsDirty;
rollbackPendingDeleteCount = pendingDeleteCount; rollbackPendingDeleteCount = pendingDeleteCount;
for (Norm norm : norms.values()) { for (SegmentNorms norm : norms.values()) {
norm.rollbackDirty = norm.dirty; norm.rollbackDirty = norm.dirty;
} }
} }
@ -1182,7 +971,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
deletedDocsDirty = rollbackDeletedDocsDirty; deletedDocsDirty = rollbackDeletedDocsDirty;
normsDirty = rollbackNormsDirty; normsDirty = rollbackNormsDirty;
pendingDeleteCount = rollbackPendingDeleteCount; pendingDeleteCount = rollbackPendingDeleteCount;
for (Norm norm : norms.values()) { for (SegmentNorms norm : norms.values()) {
norm.dirty = norm.rollbackDirty; norm.dirty = norm.rollbackDirty;
} }
} }

View File

@ -17,7 +17,7 @@ package org.apache.lucene.index;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.SegmentReader.Norm; import org.apache.lucene.index.SegmentNorms;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
@ -338,7 +338,7 @@ public class TestIndexReaderClone extends LuceneTestCase {
origSegmentReader.close(); origSegmentReader.close();
assertDelDocsRefCountEquals(1, origSegmentReader); assertDelDocsRefCountEquals(1, origSegmentReader);
// check the norm refs // check the norm refs
Norm norm = clonedSegmentReader.norms.get("field1"); SegmentNorms norm = clonedSegmentReader.norms.get("field1");
assertEquals(1, norm.bytesRef().get()); assertEquals(1, norm.bytesRef().get());
clonedSegmentReader.close(); clonedSegmentReader.close();
dir1.close(); dir1.close();

View File

@ -29,7 +29,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.SegmentReader.Norm; import org.apache.lucene.index.SegmentNorms;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DefaultSimilarityProvider; import org.apache.lucene.search.DefaultSimilarityProvider;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
@ -184,7 +184,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
TestIndexReaderReopen.createIndex(random, dir1, false); TestIndexReaderReopen.createIndex(random, dir1, false);
SegmentReader reader1 = getOnlySegmentReader(IndexReader.open(dir1, false)); SegmentReader reader1 = getOnlySegmentReader(IndexReader.open(dir1, false));
reader1.norms("field1"); reader1.norms("field1");
Norm r1norm = reader1.norms.get("field1"); SegmentNorms r1norm = reader1.norms.get("field1");
AtomicInteger r1BytesRef = r1norm.bytesRef(); AtomicInteger r1BytesRef = r1norm.bytesRef();
SegmentReader reader2 = (SegmentReader)reader1.clone(); SegmentReader reader2 = (SegmentReader)reader1.clone();
assertEquals(2, r1norm.bytesRef().get()); assertEquals(2, r1norm.bytesRef().get());
@ -203,14 +203,14 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
IndexReader reader2C = (IndexReader) reader1.clone(); IndexReader reader2C = (IndexReader) reader1.clone();
SegmentReader segmentReader2C = getOnlySegmentReader(reader2C); SegmentReader segmentReader2C = getOnlySegmentReader(reader2C);
segmentReader2C.norms("field1"); // load the norms for the field segmentReader2C.norms("field1"); // load the norms for the field
Norm reader2CNorm = segmentReader2C.norms.get("field1"); SegmentNorms reader2CNorm = segmentReader2C.norms.get("field1");
assertTrue("reader2CNorm.bytesRef()=" + reader2CNorm.bytesRef(), reader2CNorm.bytesRef().get() == 2); assertTrue("reader2CNorm.bytesRef()=" + reader2CNorm.bytesRef(), reader2CNorm.bytesRef().get() == 2);
IndexReader reader3C = (IndexReader) reader2C.clone(); IndexReader reader3C = (IndexReader) reader2C.clone();
SegmentReader segmentReader3C = getOnlySegmentReader(reader3C); SegmentReader segmentReader3C = getOnlySegmentReader(reader3C);
Norm reader3CCNorm = segmentReader3C.norms.get("field1"); SegmentNorms reader3CCNorm = segmentReader3C.norms.get("field1");
assertEquals(3, reader3CCNorm.bytesRef().get()); assertEquals(3, reader3CCNorm.bytesRef().get());
// edit a norm and the refcount should be 1 // edit a norm and the refcount should be 1
@ -231,13 +231,13 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
// norm values should be different // norm values should be different
assertTrue(sim.decodeNormValue(segmentReader3C.norms("field1")[5]) assertTrue(sim.decodeNormValue(segmentReader3C.norms("field1")[5])
!= sim.decodeNormValue(segmentReader4C.norms("field1")[5])); != sim.decodeNormValue(segmentReader4C.norms("field1")[5]));
Norm reader4CCNorm = segmentReader4C.norms.get("field1"); SegmentNorms reader4CCNorm = segmentReader4C.norms.get("field1");
assertEquals(3, reader3CCNorm.bytesRef().get()); assertEquals(3, reader3CCNorm.bytesRef().get());
assertEquals(1, reader4CCNorm.bytesRef().get()); assertEquals(1, reader4CCNorm.bytesRef().get());
IndexReader reader5C = (IndexReader) reader4C.clone(); IndexReader reader5C = (IndexReader) reader4C.clone();
SegmentReader segmentReader5C = getOnlySegmentReader(reader5C); SegmentReader segmentReader5C = getOnlySegmentReader(reader5C);
Norm reader5CCNorm = segmentReader5C.norms.get("field1"); SegmentNorms reader5CCNorm = segmentReader5C.norms.get("field1");
reader5C.setNorm(5, "field1", sim.encodeNormValue(0.7f)); reader5C.setNorm(5, "field1", sim.encodeNormValue(0.7f));
assertEquals(1, reader5CCNorm.bytesRef().get()); assertEquals(1, reader5CCNorm.bytesRef().get());