mirror of https://github.com/apache/lucene.git
LUCENE-3576: add surrogates shuffle for 3.x term vectors, test surrogates in TestBackwardsCompatibility
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1232974 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3d66fa922a
commit
98e5328845
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
@ -273,4 +274,8 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
assert termCount == numTerms;
|
assert termCount == numTerms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return the BytesRef Comparator used to sort terms
|
||||||
|
* before feeding to this API. */
|
||||||
|
public abstract Comparator<BytesRef> getComparator() throws IOException;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene3x;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -265,11 +266,13 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
private class TVTerms extends Terms {
|
private class TVTerms extends Terms {
|
||||||
private final int numTerms;
|
private final int numTerms;
|
||||||
private final long tvfFPStart;
|
private final long tvfFPStart;
|
||||||
|
private final boolean unicodeSortOrder;
|
||||||
|
|
||||||
public TVTerms(long tvfFP) throws IOException {
|
public TVTerms(long tvfFP) throws IOException {
|
||||||
tvf.seek(tvfFP);
|
tvf.seek(tvfFP);
|
||||||
numTerms = tvf.readVInt();
|
numTerms = tvf.readVInt();
|
||||||
tvfFPStart = tvf.getFilePointer();
|
tvfFPStart = tvf.getFilePointer();
|
||||||
|
unicodeSortOrder = sortTermsByUnicode();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -283,7 +286,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
} else {
|
} else {
|
||||||
termsEnum = new TVTermsEnum();
|
termsEnum = new TVTermsEnum();
|
||||||
}
|
}
|
||||||
termsEnum.reset(numTerms, tvfFPStart);
|
termsEnum.reset(numTerms, tvfFPStart, unicodeSortOrder);
|
||||||
return termsEnum;
|
return termsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,27 +313,32 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
// TODO: really indexer hardwires
|
if (unicodeSortOrder) {
|
||||||
// this...? I guess codec could buffer and re-sort...
|
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
} else {
|
||||||
|
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class TermAndPostings {
|
||||||
|
BytesRef term;
|
||||||
|
int freq;
|
||||||
|
int[] positions;
|
||||||
|
int[] startOffsets;
|
||||||
|
int[] endOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
private class TVTermsEnum extends TermsEnum {
|
private class TVTermsEnum extends TermsEnum {
|
||||||
|
private boolean unicodeSortOrder;
|
||||||
private final IndexInput origTVF;
|
private final IndexInput origTVF;
|
||||||
private final IndexInput tvf;
|
private final IndexInput tvf;
|
||||||
private int numTerms;
|
private int numTerms;
|
||||||
private int nextTerm;
|
private int currentTerm;
|
||||||
private int freq;
|
|
||||||
private BytesRef lastTerm = new BytesRef();
|
|
||||||
private BytesRef term = new BytesRef();
|
|
||||||
private boolean storePositions;
|
private boolean storePositions;
|
||||||
private boolean storeOffsets;
|
private boolean storeOffsets;
|
||||||
private long tvfFP;
|
|
||||||
|
|
||||||
private int[] positions;
|
private TermAndPostings[] termAndPostings;
|
||||||
private int[] startOffsets;
|
|
||||||
private int[] endOffsets;
|
|
||||||
|
|
||||||
// NOTE: tvf is pre-positioned by caller
|
// NOTE: tvf is pre-positioned by caller
|
||||||
public TVTermsEnum() throws IOException {
|
public TVTermsEnum() throws IOException {
|
||||||
|
@ -342,37 +350,81 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
return tvf == origTVF;
|
return tvf == origTVF;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(int numTerms, long tvfFPStart) throws IOException {
|
public void reset(int numTerms, long tvfFPStart, boolean unicodeSortOrder) throws IOException {
|
||||||
this.numTerms = numTerms;
|
this.numTerms = numTerms;
|
||||||
nextTerm = 0;
|
currentTerm = -1;
|
||||||
tvf.seek(tvfFPStart);
|
tvf.seek(tvfFPStart);
|
||||||
final byte bits = tvf.readByte();
|
final byte bits = tvf.readByte();
|
||||||
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
||||||
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||||
tvfFP = 1+tvfFPStart;
|
this.unicodeSortOrder = unicodeSortOrder;
|
||||||
positions = null;
|
readVectors();
|
||||||
startOffsets = null;
|
if (unicodeSortOrder) {
|
||||||
endOffsets = null;
|
Arrays.sort(termAndPostings, new Comparator<TermAndPostings>() {
|
||||||
|
public int compare(TermAndPostings left, TermAndPostings right) {
|
||||||
|
return left.term.compareTo(right.term);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void readVectors() throws IOException {
|
||||||
|
termAndPostings = new TermAndPostings[numTerms];
|
||||||
|
BytesRef lastTerm = new BytesRef();
|
||||||
|
for (int i = 0; i < numTerms; i++) {
|
||||||
|
TermAndPostings t = new TermAndPostings();
|
||||||
|
BytesRef term = new BytesRef();
|
||||||
|
term.copyBytes(lastTerm);
|
||||||
|
final int start = tvf.readVInt();
|
||||||
|
final int deltaLen = tvf.readVInt();
|
||||||
|
term.length = start + deltaLen;
|
||||||
|
term.grow(term.length);
|
||||||
|
tvf.readBytes(term.bytes, start, deltaLen);
|
||||||
|
t.term = term;
|
||||||
|
int freq = tvf.readVInt();
|
||||||
|
t.freq = freq;
|
||||||
|
|
||||||
|
if (storePositions) {
|
||||||
|
int positions[] = new int[freq];
|
||||||
|
int pos = 0;
|
||||||
|
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||||
|
pos += tvf.readVInt();
|
||||||
|
positions[posUpto] = pos;
|
||||||
|
}
|
||||||
|
t.positions = positions;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (storeOffsets) {
|
||||||
|
int startOffsets[] = new int[freq];
|
||||||
|
int endOffsets[] = new int[freq];
|
||||||
|
int offset = 0;
|
||||||
|
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||||
|
startOffsets[posUpto] = offset + tvf.readVInt();
|
||||||
|
offset = endOffsets[posUpto] = startOffsets[posUpto] + tvf.readVInt();
|
||||||
|
}
|
||||||
|
t.startOffsets = startOffsets;
|
||||||
|
t.endOffsets = endOffsets;
|
||||||
|
}
|
||||||
|
lastTerm.copyBytes(term);
|
||||||
|
termAndPostings[i] = t;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: slow! (linear scan)
|
// NOTE: slow! (linear scan)
|
||||||
@Override
|
@Override
|
||||||
public SeekStatus seekCeil(BytesRef text, boolean useCache)
|
public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
|
||||||
throws IOException {
|
Comparator<BytesRef> comparator = getComparator();
|
||||||
if (nextTerm != 0 && text.compareTo(term) < 0) {
|
for (int i = 0; i < numTerms; i++) {
|
||||||
nextTerm = 0;
|
int cmp = comparator.compare(text, termAndPostings[i].term);
|
||||||
tvf.seek(tvfFP);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (next() != null) {
|
|
||||||
final int cmp = text.compareTo(term);
|
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
|
currentTerm = i;
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else if (cmp == 0) {
|
} else if (cmp == 0) {
|
||||||
|
currentTerm = i;
|
||||||
return SeekStatus.FOUND;
|
return SeekStatus.FOUND;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
currentTerm = termAndPostings.length;
|
||||||
return SeekStatus.END;
|
return SeekStatus.END;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,47 +435,15 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef next() throws IOException {
|
public BytesRef next() throws IOException {
|
||||||
if (nextTerm >= numTerms) {
|
if (++currentTerm >= numTerms) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
term.copyBytes(lastTerm);
|
return term();
|
||||||
final int start = tvf.readVInt();
|
|
||||||
final int deltaLen = tvf.readVInt();
|
|
||||||
term.length = start + deltaLen;
|
|
||||||
term.grow(term.length);
|
|
||||||
tvf.readBytes(term.bytes, start, deltaLen);
|
|
||||||
freq = tvf.readVInt();
|
|
||||||
|
|
||||||
if (storePositions) {
|
|
||||||
// TODO: we could maybe reuse last array, if we can
|
|
||||||
// somehow be careful about consumer never using two
|
|
||||||
// D&PEnums at once...
|
|
||||||
positions = new int[freq];
|
|
||||||
int pos = 0;
|
|
||||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
|
||||||
pos += tvf.readVInt();
|
|
||||||
positions[posUpto] = pos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (storeOffsets) {
|
|
||||||
startOffsets = new int[freq];
|
|
||||||
endOffsets = new int[freq];
|
|
||||||
int offset = 0;
|
|
||||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
|
||||||
startOffsets[posUpto] = offset + tvf.readVInt();
|
|
||||||
offset = endOffsets[posUpto] = startOffsets[posUpto] + tvf.readVInt();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lastTerm.copyBytes(term);
|
|
||||||
nextTerm++;
|
|
||||||
return term;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef term() {
|
public BytesRef term() {
|
||||||
return term;
|
return termAndPostings[currentTerm].term;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -438,7 +458,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long totalTermFreq() {
|
public long totalTermFreq() {
|
||||||
return freq;
|
return termAndPostings[currentTerm].freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -449,7 +469,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
} else {
|
} else {
|
||||||
docsEnum = new TVDocsEnum();
|
docsEnum = new TVDocsEnum();
|
||||||
}
|
}
|
||||||
docsEnum.reset(liveDocs, freq);
|
docsEnum.reset(liveDocs, termAndPostings[currentTerm]);
|
||||||
return docsEnum;
|
return docsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -469,15 +489,17 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
} else {
|
} else {
|
||||||
docsAndPositionsEnum = new TVDocsAndPositionsEnum();
|
docsAndPositionsEnum = new TVDocsAndPositionsEnum();
|
||||||
}
|
}
|
||||||
docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets);
|
docsAndPositionsEnum.reset(liveDocs, termAndPostings[currentTerm]);
|
||||||
return docsAndPositionsEnum;
|
return docsAndPositionsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
// TODO: really indexer hardwires
|
if (unicodeSortOrder) {
|
||||||
// this...? I guess codec could buffer and re-sort...
|
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
} else {
|
||||||
|
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -518,9 +540,9 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(Bits liveDocs, int freq) {
|
public void reset(Bits liveDocs, TermAndPostings termAndPostings) {
|
||||||
this.liveDocs = liveDocs;
|
this.liveDocs = liveDocs;
|
||||||
this.freq = freq;
|
this.freq = termAndPostings.freq;
|
||||||
this.doc = -1;
|
this.doc = -1;
|
||||||
didNext = false;
|
didNext = false;
|
||||||
}
|
}
|
||||||
|
@ -569,11 +591,11 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) {
|
public void reset(Bits liveDocs, TermAndPostings termAndPostings) {
|
||||||
this.liveDocs = liveDocs;
|
this.liveDocs = liveDocs;
|
||||||
this.positions = positions;
|
this.positions = termAndPostings.positions;
|
||||||
this.startOffsets = startOffsets;
|
this.startOffsets = termAndPostings.startOffsets;
|
||||||
this.endOffsets = endOffsets;
|
this.endOffsets = termAndPostings.endOffsets;
|
||||||
this.doc = -1;
|
this.doc = -1;
|
||||||
didNext = false;
|
didNext = false;
|
||||||
nextPos = 0;
|
nextPos = 0;
|
||||||
|
@ -668,5 +690,14 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If this returns, we do the surrogates shuffle so that the
|
||||||
|
// terms are sorted by unicode sort order. This should be
|
||||||
|
// true when segments are used for "normal" searching;
|
||||||
|
// it's only false during testing, to create a pre-flex
|
||||||
|
// index, using the test-only PreFlexRW.
|
||||||
|
protected boolean sortTermsByUnicode() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
|
@ -365,4 +366,9 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
||||||
IOUtils.close(tvx, tvd, tvf);
|
IOUtils.close(tvx, tvd, tvf);
|
||||||
tvx = tvd = tvf = null;
|
tvx = tvd = tvf = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -170,6 +171,11 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
}
|
||||||
|
|
||||||
private void write(String s) throws IOException {
|
private void write(String s) throws IOException {
|
||||||
SimpleTextUtil.write(out, s, scratch);
|
SimpleTextUtil.write(out, s, scratch);
|
||||||
}
|
}
|
||||||
|
|
|
@ -118,9 +118,7 @@ final class TermVectorsConsumerPerField extends TermsHashConsumerPerField {
|
||||||
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
|
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
|
||||||
final TermVectorsWriter tv = termsWriter.writer;
|
final TermVectorsWriter tv = termsWriter.writer;
|
||||||
|
|
||||||
// TODO: we may want to make this sort in same order
|
final int[] termIDs = termsHashPerField.sortPostings(tv.getComparator());
|
||||||
// as Codec's terms dict?
|
|
||||||
final int[] termIDs = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUnicodeComparator());
|
|
||||||
|
|
||||||
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets);
|
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets);
|
||||||
|
|
||||||
|
|
|
@ -19,10 +19,15 @@ package org.apache.lucene.codecs.preflexrw;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.codecs.lucene3x.Lucene3xTermVectorsFormat;
|
import org.apache.lucene.codecs.lucene3x.Lucene3xTermVectorsFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene3x.Lucene3xTermVectorsReader;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
public class PreFlexRWTermVectorsFormat extends Lucene3xTermVectorsFormat {
|
public class PreFlexRWTermVectorsFormat extends Lucene3xTermVectorsFormat {
|
||||||
|
|
||||||
|
@ -30,4 +35,30 @@ public class PreFlexRWTermVectorsFormat extends Lucene3xTermVectorsFormat {
|
||||||
public TermVectorsWriter vectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
public TermVectorsWriter vectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
||||||
return new PreFlexRWTermVectorsWriter(directory, segment, context);
|
return new PreFlexRWTermVectorsWriter(directory, segment, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
|
||||||
|
return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context) {
|
||||||
|
@Override
|
||||||
|
protected boolean sortTermsByUnicode() {
|
||||||
|
// We carefully peek into stack track above us: if
|
||||||
|
// we are part of a "merge", we must sort by UTF16:
|
||||||
|
boolean unicodeSortOrder = true;
|
||||||
|
|
||||||
|
StackTraceElement[] trace = new Exception().getStackTrace();
|
||||||
|
for (int i = 0; i < trace.length; i++) {
|
||||||
|
//System.out.println(trace[i].getClassName());
|
||||||
|
if ("merge".equals(trace[i].getMethodName())) {
|
||||||
|
unicodeSortOrder = false;
|
||||||
|
if (LuceneTestCase.VERBOSE) {
|
||||||
|
System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 vector term sort order");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return unicodeSortOrder;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.preflexrw;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.codecs.lucene3x.Lucene3xTermVectorsReader;
|
import org.apache.lucene.codecs.lucene3x.Lucene3xTermVectorsReader;
|
||||||
|
@ -32,7 +33,6 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
// TODO: surrogates dance!
|
|
||||||
public final class PreFlexRWTermVectorsWriter extends TermVectorsWriter {
|
public final class PreFlexRWTermVectorsWriter extends TermVectorsWriter {
|
||||||
private final Directory directory;
|
private final Directory directory;
|
||||||
private final String segment;
|
private final String segment;
|
||||||
|
@ -213,4 +213,9 @@ public final class PreFlexRWTermVectorsWriter extends TermVectorsWriter {
|
||||||
IOUtils.close(tvx, tvd, tvf);
|
IOUtils.close(tvx, tvd, tvf);
|
||||||
tvx = tvd = tvf = null;
|
tvx = tvd = tvf = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -735,4 +735,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String surrogatesIndexName = "index.36.surrogates.zip";
|
||||||
|
|
||||||
|
public void testSurrogates() throws Exception {
|
||||||
|
File oldIndexDir = _TestUtil.getTempDir("surrogates");
|
||||||
|
_TestUtil.unzip(getDataFile(surrogatesIndexName), oldIndexDir);
|
||||||
|
Directory dir = newFSDirectory(oldIndexDir);
|
||||||
|
// TODO: more tests
|
||||||
|
_TestUtil.checkIndex(dir);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue