diff --git a/solr/contrib/dataimporthandler/README.txt b/solr/contrib/dataimporthandler/README.txt index 21b4b5d97a5..5bc66ac3201 100644 --- a/solr/contrib/dataimporthandler/README.txt +++ b/solr/contrib/dataimporthandler/README.txt @@ -1,215 +1,7 @@ -package org.apache.lucene.index.codecs.preflexrw; +Although Solr strives to be agnostic of the Locale where the server is +running, some code paths in DataImportHandler are known to depend on the +System default Locale, Timezone, or Charset. It is recommended that when +running Solr you set the following system properties: + -Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.index.codecs.PostingsConsumer; -import org.apache.lucene.index.codecs.standard.DefaultSkipListWriter; -import org.apache.lucene.index.codecs.preflex.PreFlexCodec; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.codecs.preflex.TermInfo; -import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; -import org.apache.lucene.store.IndexOutput; - -import java.io.IOException; -import java.util.Comparator; - -class PreFlexFieldsWriter extends FieldsConsumer { - - private final TermInfosWriter termsOut; - private final IndexOutput freqOut; - private final IndexOutput proxOut; - private final DefaultSkipListWriter skipListWriter; - private final int totalNumDocs; - - public PreFlexFieldsWriter(SegmentWriteState state) throws IOException { - termsOut = new TermInfosWriter(state.directory, - state.segmentName, - state.fieldInfos, - state.termIndexInterval); - - final String freqFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.FREQ_EXTENSION); - freqOut = state.directory.createOutput(freqFile); - totalNumDocs = state.numDocs; - - if (state.fieldInfos.hasProx()) { - final String proxFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.PROX_EXTENSION); - proxOut = state.directory.createOutput(proxFile); - } else { - proxOut = null; - } - - skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, - termsOut.maxSkipLevels, - totalNumDocs, - freqOut, - proxOut); - //System.out.println("\nw start seg=" + segment); - } - - @Override - public TermsConsumer addField(FieldInfo field) throws IOException { - assert field.number != -1; - //System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number); - return new PreFlexTermsWriter(field); - } - - @Override - public void close() throws IOException { - termsOut.close(); - freqOut.close(); - if (proxOut != null) { - proxOut.close(); - } - } - - private class PreFlexTermsWriter extends TermsConsumer { - private final FieldInfo fieldInfo; - private final boolean omitTF; - private final boolean storePayloads; - - private final TermInfo termInfo = new TermInfo(); - private final PostingsWriter postingsWriter = new PostingsWriter(); - - public PreFlexTermsWriter(FieldInfo fieldInfo) { - this.fieldInfo = fieldInfo; - omitTF = fieldInfo.omitTermFreqAndPositions; - storePayloads = fieldInfo.storePayloads; - } - - private class PostingsWriter extends PostingsConsumer { - private int lastDocID; - private int lastPayloadLength = -1; - private int lastPosition; - private int df; - - public PostingsWriter reset() { - df = 0; - lastDocID = 0; - lastPayloadLength = -1; - return this; - } - - @Override - public void startDoc(int docID, int termDocFreq) throws IOException { - //System.out.println(" w doc=" + docID); - - final int delta = docID - lastDocID; - if (docID < 0 || (df > 0 && delta <= 0)) { - throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )"); - } - - if ((++df % termsOut.skipInterval) == 0) { - skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength); - skipListWriter.bufferSkip(df); - } - - lastDocID = docID; - - assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs; - - if (omitTF) { - freqOut.writeVInt(delta); - } else { - final int code = delta << 1; - if (termDocFreq == 1) { - freqOut.writeVInt(code|1); - } else { - freqOut.writeVInt(code); - freqOut.writeVInt(termDocFreq); - } - } - lastPosition = 0; - } - - @Override - public void addPosition(int position, BytesRef payload) throws IOException { - assert proxOut != null; - - //System.out.println(" w pos=" + position + " payl=" + payload); - final int delta = position - lastPosition; - lastPosition = position; - - if (storePayloads) { - final int payloadLength = payload == null ? 0 : payload.length; - if (payloadLength != lastPayloadLength) { - //System.out.println(" write payload len=" + payloadLength); - lastPayloadLength = payloadLength; - proxOut.writeVInt((delta<<1)|1); - proxOut.writeVInt(payloadLength); - } else { - proxOut.writeVInt(delta << 1); - } - if (payloadLength > 0) { - proxOut.writeBytes(payload.bytes, payload.offset, payload.length); - } - } else { - proxOut.writeVInt(delta); - } - } - - @Override - public void finishDoc() throws IOException { - } - } - - @Override - public PostingsConsumer startTerm(BytesRef text) throws IOException { - //System.out.println(" w term=" + text.utf8ToString()); - skipListWriter.resetSkip(); - termInfo.freqPointer = freqOut.getFilePointer(); - if (proxOut != null) { - termInfo.proxPointer = proxOut.getFilePointer(); - } - return postingsWriter.reset(); - } - - @Override - public void finishTerm(BytesRef text, int numDocs) throws IOException { - if (numDocs > 0) { - long skipPointer = skipListWriter.writeSkip(freqOut); - termInfo.docFreq = numDocs; - termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer); - //System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number); - termsOut.add(fieldInfo.number, - text, - termInfo); - } - } - - @Override - public void finish() throws IOException { - } - - @Override - public Comparator getComparator() throws IOException { - return BytesRef.getUTF8SortedAsUTF16Comparator(); - } - } - - @Override - public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { - //TODO(simonw): can we fix this easily? - throw new UnsupportedOperationException("no implemented"); - } -} \ No newline at end of file +where xx, YY, and ZZZ are consistent with any database server's configuration.