LUCENE-3108: revered README.TXT

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1133785 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-06-09 10:44:25 +00:00
parent 2a6315db4a
commit 622564d474
1 changed files with 6 additions and 214 deletions

View File

@ -1,215 +1,7 @@
package org.apache.lucene.index.codecs.preflexrw;
Although Solr strives to be agnostic of the Locale where the server is
running, some code paths in DataImportHandler are known to depend on the
System default Locale, Timezone, or Charset. It is recommended that when
running Solr you set the following system properties:
-Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.standard.DefaultSkipListWriter;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.codecs.preflex.TermInfo;
import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
import java.util.Comparator;
class PreFlexFieldsWriter extends FieldsConsumer {
private final TermInfosWriter termsOut;
private final IndexOutput freqOut;
private final IndexOutput proxOut;
private final DefaultSkipListWriter skipListWriter;
private final int totalNumDocs;
public PreFlexFieldsWriter(SegmentWriteState state) throws IOException {
termsOut = new TermInfosWriter(state.directory,
state.segmentName,
state.fieldInfos,
state.termIndexInterval);
final String freqFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(freqFile);
totalNumDocs = state.numDocs;
if (state.fieldInfos.hasProx()) {
final String proxFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.PROX_EXTENSION);
proxOut = state.directory.createOutput(proxFile);
} else {
proxOut = null;
}
skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
termsOut.maxSkipLevels,
totalNumDocs,
freqOut,
proxOut);
//System.out.println("\nw start seg=" + segment);
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
assert field.number != -1;
//System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number);
return new PreFlexTermsWriter(field);
}
@Override
public void close() throws IOException {
termsOut.close();
freqOut.close();
if (proxOut != null) {
proxOut.close();
}
}
private class PreFlexTermsWriter extends TermsConsumer {
private final FieldInfo fieldInfo;
private final boolean omitTF;
private final boolean storePayloads;
private final TermInfo termInfo = new TermInfo();
private final PostingsWriter postingsWriter = new PostingsWriter();
public PreFlexTermsWriter(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
omitTF = fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
}
private class PostingsWriter extends PostingsConsumer {
private int lastDocID;
private int lastPayloadLength = -1;
private int lastPosition;
private int df;
public PostingsWriter reset() {
df = 0;
lastDocID = 0;
lastPayloadLength = -1;
return this;
}
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
//System.out.println(" w doc=" + docID);
final int delta = docID - lastDocID;
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
}
if ((++df % termsOut.skipInterval) == 0) {
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
lastDocID = docID;
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
if (omitTF) {
freqOut.writeVInt(delta);
} else {
final int code = delta << 1;
if (termDocFreq == 1) {
freqOut.writeVInt(code|1);
} else {
freqOut.writeVInt(code);
freqOut.writeVInt(termDocFreq);
}
}
lastPosition = 0;
}
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
assert proxOut != null;
//System.out.println(" w pos=" + position + " payl=" + payload);
final int delta = position - lastPosition;
lastPosition = position;
if (storePayloads) {
final int payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
//System.out.println(" write payload len=" + payloadLength);
lastPayloadLength = payloadLength;
proxOut.writeVInt((delta<<1)|1);
proxOut.writeVInt(payloadLength);
} else {
proxOut.writeVInt(delta << 1);
}
if (payloadLength > 0) {
proxOut.writeBytes(payload.bytes, payload.offset, payload.length);
}
} else {
proxOut.writeVInt(delta);
}
}
@Override
public void finishDoc() throws IOException {
}
}
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
//System.out.println(" w term=" + text.utf8ToString());
skipListWriter.resetSkip();
termInfo.freqPointer = freqOut.getFilePointer();
if (proxOut != null) {
termInfo.proxPointer = proxOut.getFilePointer();
}
return postingsWriter.reset();
}
@Override
public void finishTerm(BytesRef text, int numDocs) throws IOException {
if (numDocs > 0) {
long skipPointer = skipListWriter.writeSkip(freqOut);
termInfo.docFreq = numDocs;
termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer);
//System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number);
termsOut.add(fieldInfo.number,
text,
termInfo);
}
}
@Override
public void finish() throws IOException {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUTF16Comparator();
}
}
@Override
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
//TODO(simonw): can we fix this easily?
throw new UnsupportedOperationException("no implemented");
}
}
where xx, YY, and ZZZ are consistent with any database server's configuration.