();
for (Fieldable field : fields) {
@@ -230,6 +242,8 @@ public final class Document {
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
+ * For {@link NumericField}s it returns the string value of the number. If you want
+ * the actual {@code NumericField} instances back, use {@link #getFieldables}.
* @param name the name of the field
* @return a String[]
of field values
*/
diff --git a/lucene/src/java/org/apache/lucene/document/NumericField.java b/lucene/src/java/org/apache/lucene/document/NumericField.java
index 6cae722a1d9..3bd46cf0e2f 100644
--- a/lucene/src/java/org/apache/lucene/document/NumericField.java
+++ b/lucene/src/java/org/apache/lucene/document/NumericField.java
@@ -127,18 +127,18 @@ import org.apache.lucene.search.FieldCache; // javadocs
* class is a wrapper around this token stream type for
* easier, more intuitive usage.
*
- * NOTE: This class is only used during
- * indexing. When retrieving the stored field value from a
- * {@link Document} instance after search, you will get a
- * conventional {@link Fieldable} instance where the numeric
- * values are returned as {@link String}s (according to
- * toString(value)
of the used data type).
- *
* @since 2.9
*/
public final class NumericField extends AbstractField {
- private final NumericTokenStream numericTS;
+ /** Data type of the value in {@link NumericField}.
+ * @since 3.2
+ */
+ public static enum DataType { INT, LONG, FLOAT, DOUBLE }
+
+ private transient NumericTokenStream numericTS;
+ private DataType type;
+ private final int precisionStep;
/**
* Creates a field for numeric values using the default precisionStep
@@ -158,8 +158,8 @@ public final class NumericField extends AbstractField {
* a numeric value, before indexing a document containing this field,
* set a value using the various set??? Value() methods.
* @param name the field name
- * @param store if the field should be stored in plain text form
- * (according to toString(value)
of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, Field.Store store, boolean index) {
@@ -186,19 +186,43 @@ public final class NumericField extends AbstractField {
* set a value using the various set??? Value() methods.
* @param name the field name
* @param precisionStep the used precision step
- * @param store if the field should be stored in plain text form
- * (according to toString(value)
of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+ this.precisionStep = precisionStep;
setOmitTermFreqAndPositions(true);
- numericTS = new NumericTokenStream(precisionStep);
}
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
public TokenStream tokenStreamValue() {
- return isIndexed() ? numericTS : null;
+ if (!isIndexed())
+ return null;
+ if (numericTS == null) {
+ // lazy init the TokenStream as it is heavy to instantiate (attributes,...),
+ // if not needed (stored field loading)
+ numericTS = new NumericTokenStream(precisionStep);
+ // initialize value in TokenStream
+ if (fieldsData != null) {
+ assert type != null;
+ final Number val = (Number) fieldsData;
+ switch (type) {
+ case INT:
+ numericTS.setIntValue(val.intValue()); break;
+ case LONG:
+ numericTS.setLongValue(val.longValue()); break;
+ case FLOAT:
+ numericTS.setFloatValue(val.floatValue()); break;
+ case DOUBLE:
+ numericTS.setDoubleValue(val.doubleValue()); break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ }
+ return numericTS;
}
/** Returns always null
for numeric fields */
@@ -212,7 +236,10 @@ public final class NumericField extends AbstractField {
return null;
}
- /** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
+ /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
+ * on search results. It is recommended to use {@link Document#getFieldable} instead
+ * that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
+ * to return the stored value. */
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
@@ -224,7 +251,14 @@ public final class NumericField extends AbstractField {
/** Returns the precision step. */
public int getPrecisionStep() {
- return numericTS.getPrecisionStep();
+ return precisionStep;
+ }
+
+ /** Returns the data type of the current value, {@code null} if not yet set.
+ * @since 3.2
+ */
+ public DataType getDataType() {
+ return type;
}
/**
@@ -234,8 +268,9 @@ public final class NumericField extends AbstractField {
* document.add(new NumericField(name, precisionStep).setLongValue(value))
*/
public NumericField setLongValue(final long value) {
- numericTS.setLongValue(value);
+ if (numericTS != null) numericTS.setLongValue(value);
fieldsData = Long.valueOf(value);
+ type = DataType.LONG;
return this;
}
@@ -246,8 +281,9 @@ public final class NumericField extends AbstractField {
* document.add(new NumericField(name, precisionStep).setIntValue(value))
*/
public NumericField setIntValue(final int value) {
- numericTS.setIntValue(value);
+ if (numericTS != null) numericTS.setIntValue(value);
fieldsData = Integer.valueOf(value);
+ type = DataType.INT;
return this;
}
@@ -258,8 +294,9 @@ public final class NumericField extends AbstractField {
* document.add(new NumericField(name, precisionStep).setDoubleValue(value))
*/
public NumericField setDoubleValue(final double value) {
- numericTS.setDoubleValue(value);
+ if (numericTS != null) numericTS.setDoubleValue(value);
fieldsData = Double.valueOf(value);
+ type = DataType.DOUBLE;
return this;
}
@@ -270,8 +307,9 @@ public final class NumericField extends AbstractField {
* document.add(new NumericField(name, precisionStep).setFloatValue(value))
*/
public NumericField setFloatValue(final float value) {
- numericTS.setFloatValue(value);
+ if (numericTS != null) numericTS.setFloatValue(value);
fieldsData = Float.valueOf(value);
+ type = DataType.FLOAT;
return this;
}
diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
index 11e55734046..745117daec0 100644
--- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
+++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
@@ -132,9 +132,9 @@ class BufferedDeletesStream {
public final long gen;
// If non-null, contains segments that are 100% deleted
- public final SegmentInfos allDeleted;
+ public final List allDeleted;
- ApplyDeletesResult(boolean anyDeletes, long gen, SegmentInfos allDeleted) {
+ ApplyDeletesResult(boolean anyDeletes, long gen, List allDeleted) {
this.anyDeletes = anyDeletes;
this.gen = gen;
this.allDeleted = allDeleted;
@@ -164,7 +164,7 @@ class BufferedDeletesStream {
/** Resolves the buffered deleted Term/Query/docIDs, into
* actual deleted docIDs in the deletedDocs BitVector for
* each SegmentReader. */
- public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos infos) throws IOException {
+ public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List infos) throws IOException {
final long t0 = System.currentTimeMillis();
if (infos.size() == 0) {
@@ -182,7 +182,7 @@ class BufferedDeletesStream {
message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
}
- SegmentInfos infos2 = new SegmentInfos();
+ List infos2 = new ArrayList();
infos2.addAll(infos);
Collections.sort(infos2, sortSegInfoByDelGen);
@@ -192,7 +192,7 @@ class BufferedDeletesStream {
int infosIDX = infos2.size()-1;
int delIDX = deletes.size()-1;
- SegmentInfos allDeleted = null;
+ List allDeleted = null;
while (infosIDX >= 0) {
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
@@ -245,7 +245,7 @@ class BufferedDeletesStream {
if (segAllDeletes) {
if (allDeleted == null) {
- allDeleted = new SegmentInfos();
+ allDeleted = new ArrayList();
}
allDeleted.add(info);
}
@@ -287,7 +287,7 @@ class BufferedDeletesStream {
if (segAllDeletes) {
if (allDeleted == null) {
- allDeleted = new SegmentInfos();
+ allDeleted = new ArrayList();
}
allDeleted.add(info);
}
diff --git a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
index c80a8343b16..f08da5c0b01 100644
--- a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
@@ -46,8 +46,10 @@ import org.apache.lucene.util.IOUtils;
* file. The {directory} that follows has that many entries. Each directory entry
* contains a long pointer to the start of this file's data section, and a String
* with that file's name.
+ *
+ * @lucene.internal
*/
-final class CompoundFileWriter {
+public final class CompoundFileWriter {
static final class FileEntry {
@@ -137,8 +139,7 @@ final class CompoundFileWriter {
/** Merge files with the extensions added up to now.
* All files with these extensions are combined sequentially into the
- * compound stream. After successful merge, the source files
- * are deleted.
+ * compound stream.
* @throws IllegalStateException if close() had been called before or
* if no file has been added to this object
*/
diff --git a/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java b/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
index b9cafc7c5c2..bc29b35c241 100644
--- a/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
+++ b/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
@@ -135,8 +135,8 @@ public class ConcurrentMergeScheduler extends MergeScheduler {
final MergePolicy.OneMerge m1 = t1.getCurrentMerge();
final MergePolicy.OneMerge m2 = t2.getCurrentMerge();
- final int c1 = m1 == null ? Integer.MAX_VALUE : m1.segments.totalDocCount();
- final int c2 = m2 == null ? Integer.MAX_VALUE : m2.segments.totalDocCount();
+ final int c1 = m1 == null ? Integer.MAX_VALUE : m1.totalDocCount;
+ final int c2 = m2 == null ? Integer.MAX_VALUE : m2.totalDocCount;
return c2 - c1;
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index 154b4884b8d..53765f84f1d 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -263,9 +263,10 @@ final class DocFieldProcessor extends DocConsumer {
// enabled; we could save [small amount of] CPU
// here.
ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
-
- for(int i=0;i it = perThreadPool.getAllPerThreadsIterator();
while (it.hasNext()) {
- it.next().perThread.docState.infoStream = infoStream;
+ it.next().perThread.setInfoStream(infoStream);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
index 486c12659f7..85d2550a066 100644
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
@@ -63,9 +63,10 @@ import org.apache.lucene.search.Query;
*/
final class DocumentsWriterDeleteQueue {
- private volatile Node tail;
+ private volatile Node> tail;
- private static final AtomicReferenceFieldUpdater tailUpdater = AtomicReferenceFieldUpdater
+ @SuppressWarnings("rawtypes")
+ private static final AtomicReferenceFieldUpdater tailUpdater = AtomicReferenceFieldUpdater
.newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail");
private final DeleteSlice globalSlice;
@@ -90,7 +91,7 @@ final class DocumentsWriterDeleteQueue {
* we use a sentinel instance as our initial tail. No slice will ever try to
* apply this tail since the head is always omitted.
*/
- tail = new Node(null); // sentinel
+ tail = new Node(null); // sentinel
globalSlice = new DeleteSlice(tail);
}
@@ -126,14 +127,14 @@ final class DocumentsWriterDeleteQueue {
// we can do it just every n times or so?
}
- void add(Node item) {
+ void add(Node> item) {
/*
* this non-blocking / 'wait-free' linked list add was inspired by Apache
* Harmony's ConcurrentLinkedQueue Implementation.
*/
while (true) {
- final Node currentTail = this.tail;
- final Node tailNext = currentTail.next;
+ final Node> currentTail = this.tail;
+ final Node> tailNext = currentTail.next;
if (tail == currentTail) {
if (tailNext != null) {
/*
@@ -196,7 +197,7 @@ final class DocumentsWriterDeleteQueue {
* deletes in the queue and reset the global slice to let the GC prune the
* queue.
*/
- final Node currentTail = tail; // take the current tail make this local any
+ final Node> currentTail = tail; // take the current tail make this local any
// Changes after this call are applied later
// and not relevant here
if (callerSlice != null) {
@@ -232,10 +233,10 @@ final class DocumentsWriterDeleteQueue {
static class DeleteSlice {
// No need to be volatile, slices are thread captive (only accessed by one thread)!
- Node sliceHead; // we don't apply this one
- Node sliceTail;
+ Node> sliceHead; // we don't apply this one
+ Node> sliceTail;
- DeleteSlice(Node currentTail) {
+ DeleteSlice(Node> currentTail) {
assert currentTail != null;
/*
* Initially this is a 0 length slice pointing to the 'current' tail of
@@ -256,7 +257,7 @@ final class DocumentsWriterDeleteQueue {
* tail in this slice are not equal then there will be at least one more
* non-null node in the slice!
*/
- Node current = sliceHead;
+ Node> current = sliceHead;
do {
current = current.next;
assert current != null : "slice property violated between the head on the tail must not be a null node";
@@ -290,7 +291,7 @@ final class DocumentsWriterDeleteQueue {
void clear() {
globalBufferLock.lock();
try {
- final Node currentTail = tail;
+ final Node> currentTail = tail;
globalSlice.sliceHead = globalSlice.sliceTail = currentTail;
globalBufferedDeletes.clear();
} finally {
@@ -298,27 +299,28 @@ final class DocumentsWriterDeleteQueue {
}
}
- private static class Node {
- volatile Node next;
- final Object item;
+ private static class Node {
+ volatile Node> next;
+ final T item;
- private Node(Object item) {
+ Node(T item) {
this.item = item;
}
- static final AtomicReferenceFieldUpdater nextUpdater = AtomicReferenceFieldUpdater
+ @SuppressWarnings("rawtypes")
+ static final AtomicReferenceFieldUpdater nextUpdater = AtomicReferenceFieldUpdater
.newUpdater(Node.class, Node.class, "next");
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
assert false : "sentinel item must never be applied";
}
- boolean casNext(Node cmp, Node val) {
+ boolean casNext(Node> cmp, Node> val) {
return nextUpdater.compareAndSet(this, cmp, val);
}
}
- private static final class TermNode extends Node {
+ private static final class TermNode extends Node {
TermNode(Term term) {
super(term);
@@ -326,33 +328,31 @@ final class DocumentsWriterDeleteQueue {
@Override
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
- bufferedDeletes.addTerm((Term) item, docIDUpto);
+ bufferedDeletes.addTerm(item, docIDUpto);
}
}
- private static final class QueryArrayNode extends Node {
+ private static final class QueryArrayNode extends Node {
QueryArrayNode(Query[] query) {
super(query);
}
@Override
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
- final Query[] queries = (Query[]) item;
- for (Query query : queries) {
+ for (Query query : item) {
bufferedDeletes.addQuery(query, docIDUpto);
}
}
}
- private static final class TermArrayNode extends Node {
+ private static final class TermArrayNode extends Node {
TermArrayNode(Term[] term) {
super(term);
}
@Override
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
- final Term[] terms = (Term[]) item;
- for (Term term : terms) {
+ for (Term term : item) {
bufferedDeletes.addTerm(term, docIDUpto);
}
}
@@ -361,7 +361,7 @@ final class DocumentsWriterDeleteQueue {
private boolean forceApplyGlobalSlice() {
globalBufferLock.lock();
- final Node currentTail = tail;
+ final Node> currentTail = tail;
try {
if (globalSlice.sliceTail != currentTail) {
globalSlice.sliceTail = currentTail;
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
index 443df5139ca..932b3d29b36 100644
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
@@ -122,13 +122,13 @@ public final class DocumentsWriterFlushControl {
// is super important since we can not address more than 2048 MB per DWPT
setFlushPending(perThread);
if (fullFlush) {
- DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread, false);
+ DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
assert toBlock != null;
blockedFlushes.add(toBlock);
}
}
}
- final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread, false);
+ final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
healthiness.updateStalled(this);
return flushingDWPT;
}
@@ -189,18 +189,15 @@ public final class DocumentsWriterFlushControl {
}
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
- ThreadState perThread, boolean setPending) {
+ ThreadState perThread) {
if (fullFlush) {
return null;
}
- return internalTryCheckOutForFlush(perThread, setPending);
+ return internalTryCheckOutForFlush(perThread);
}
private DocumentsWriterPerThread internalTryCheckOutForFlush(
- ThreadState perThread, boolean setPending) {
- if (setPending && !perThread.flushPending) {
- setFlushPending(perThread);
- }
+ ThreadState perThread) {
if (perThread.flushPending) {
// We are pending so all memory is already moved to flushBytes
if (perThread.tryLock()) {
@@ -245,7 +242,7 @@ public final class DocumentsWriterFlushControl {
while (allActiveThreads.hasNext() && numPending > 0) {
ThreadState next = allActiveThreads.next();
if (next.flushPending) {
- final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next, false);
+ final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next);
if (dwpt != null) {
return dwpt;
}
@@ -330,7 +327,12 @@ public final class DocumentsWriterFlushControl {
}
if (next.perThread.getNumDocsInRAM() > 0 ) {
final DocumentsWriterPerThread dwpt = next.perThread; // just for assert
- final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next, true);
+ synchronized (this) {
+ if (!next.flushPending) {
+ setFlushPending(next);
+ }
+ }
+ final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
toFlush.add(flushingDWPT);
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
index e943055bc37..4f14fd8f341 100644
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@@ -163,7 +163,7 @@ public class DocumentsWriterPerThread {
boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting
private FieldInfos fieldInfos;
- private final PrintStream infoStream;
+ private PrintStream infoStream;
private int numDocsInRAM;
private int flushedDocCount;
DocumentsWriterDeleteQueue deleteQueue;
@@ -235,6 +235,7 @@ public class DocumentsWriterPerThread {
// mark document as deleted
deleteDocID(docState.docID);
numDocsInRAM++;
+ fieldInfos.revertUncommitted();
} else {
abort();
}
@@ -377,15 +378,12 @@ public class DocumentsWriterPerThread {
boolean success = false;
try {
-
- SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
consumer.flush(flushState);
pendingDeletes.terms.clear();
- newSegment.setHasVectors(flushState.hasVectors);
-
+ final SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, flushState.segmentCodecs, fieldInfos.asReadOnly());
if (infoStream != null) {
message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs");
- message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
+ message("new segment has " + (newSegment.getHasVectors() ? "vectors" : "no vectors"));
message("flushedFiles=" + newSegment.files());
message("flushed codecs=" + newSegment.getSegmentCodecs());
}
@@ -435,10 +433,6 @@ public class DocumentsWriterPerThread {
return bytesUsed.get() + pendingDeletes.bytesUsed.get();
}
- FieldInfos getFieldInfos() {
- return fieldInfos;
- }
-
void message(String message) {
writer.message("DWPT: " + message);
}
@@ -498,4 +492,9 @@ public class DocumentsWriterPerThread {
assert segment != null;
return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId);
}
+
+ void setInfoStream(PrintStream infoStream) {
+ this.infoStream = infoStream;
+ docState.infoStream = infoStream;
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
index 3aba2850b42..26b8d30a3ea 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
@@ -22,7 +22,6 @@ import org.apache.lucene.index.values.Type;
/** @lucene.experimental */
public final class FieldInfo {
public static final int UNASSIGNED_CODEC_ID = -1;
-
public final String name;
public final int number;
@@ -113,7 +112,6 @@ public final class FieldInfo {
}
assert !this.omitTermFreqAndPositions || !this.storePayloads;
}
-
void setDocValues(Type v) {
if (docValues == null) {
docValues = v;
@@ -127,4 +125,29 @@ public final class FieldInfo {
public Type getDocValues() {
return docValues;
}
+
+ private boolean vectorsCommitted;
+
+ /**
+ * Reverts all uncommitted changes on this {@link FieldInfo}
+ * @see #commitVectors()
+ */
+ void revertUncommitted() {
+ if (storeTermVector && !vectorsCommitted) {
+ storeOffsetWithTermVector = false;
+ storePositionWithTermVector = false;
+ storeTermVector = false;
+ }
+ }
+
+ /**
+ * Commits term vector modifications. Changes to term-vectors must be
+ * explicitly committed once the necessary files are created. If those changes
+ * are not committed subsequent {@link #revertUncommitted()} will reset the
+ * all term-vector flags before the next document.
+ */
+ void commitVectors() {
+ assert storeTermVector;
+ vectorsCommitted = true;
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
index c62649a6bf1..422560ea057 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
@@ -220,6 +220,10 @@ public final class FieldInfos implements Iterable {
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
private int format;
+ private boolean hasProx; // only set if readonly
+ private boolean hasVectors; // only set if readonly
+ private long version; // internal use to track changes
+
/**
* Creates a new {@link FieldInfos} instance with a private
@@ -267,7 +271,7 @@ public final class FieldInfos implements Iterable {
*/
public FieldInfos(Directory d, String name) throws IOException {
this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
- IndexInput input = d.openInput(name);
+ final IndexInput input = d.openInput(name);
try {
read(input, name);
} finally {
@@ -303,6 +307,9 @@ public final class FieldInfos implements Iterable {
@Override
synchronized public Object clone() {
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
+ fis.format = format;
+ fis.hasProx = hasProx;
+ fis.hasVectors = hasVectors;
for (FieldInfo fi : this) {
FieldInfo clone = (FieldInfo) (fi).clone();
fis.putInternal(clone);
@@ -312,6 +319,10 @@ public final class FieldInfos implements Iterable {
/** Returns true if any fields do not omitTermFreqAndPositions */
public boolean hasProx() {
+ if (isReadOnly()) {
+ return hasProx;
+ }
+ // mutable FIs must check!
for (FieldInfo fi : this) {
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
return true;
@@ -445,6 +456,7 @@ public final class FieldInfos implements Iterable {
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
segmentCodecsBuilder.tryAddAndSet(fi);
}
+ version++;
return fi;
}
@@ -514,6 +526,10 @@ public final class FieldInfos implements Iterable {
}
public boolean hasVectors() {
+ if (isReadOnly()) {
+ return hasVectors;
+ }
+ // mutable FIs must check
for (FieldInfo fi : this) {
if (fi.storeTermVector) {
return true;
@@ -566,6 +582,10 @@ public final class FieldInfos implements Iterable {
public final boolean isReadOnly() {
return globalFieldNumbers == null;
}
+
+ synchronized final long getVersion() {
+ return version;
+ }
public void write(IndexOutput output) throws IOException {
output.writeVInt(FORMAT_CURRENT);
@@ -658,7 +678,8 @@ public final class FieldInfos implements Iterable {
if (omitTermFreqAndPositions) {
storePayloads = false;
}
-
+ hasVectors |= storeTermVector;
+ hasProx |= isIndexed && !omitTermFreqAndPositions;
Type docValuesType = null;
if (format <= FORMAT_INDEX_VALUES) {
final byte b = input.readByte();
@@ -705,5 +726,29 @@ public final class FieldInfos implements Iterable {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length());
}
}
+
+ /**
+ * Reverts all uncommitted changes
+ * @see FieldInfo#revertUncommitted()
+ */
+ void revertUncommitted() {
+ for (FieldInfo fieldInfo : this) {
+ fieldInfo.revertUncommitted();
+ }
+ }
+
+ final FieldInfos asReadOnly() {
+ if (isReadOnly()) {
+ return this;
+ }
+ final FieldInfos roFis = new FieldInfos((FieldNumberBiMap)null, null);
+ for (FieldInfo fieldInfo : this) {
+ FieldInfo clone = (FieldInfo) (fieldInfo).clone();
+ roFis.putInternal(clone);
+ roFis.hasVectors |= clone.storeTermVector;
+ roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions;
+ }
+ return roFis;
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldsReader.java b/lucene/src/java/org/apache/lucene/index/FieldsReader.java
index 76c0ed23552..e135d6d2870 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsReader.java
@@ -24,10 +24,11 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.CloseableThreadLocal;
import java.io.IOException;
@@ -212,40 +213,39 @@ public final class FieldsReader implements Cloneable {
Document doc = new Document();
int numFields = fieldsStream.readVInt();
- for (int i = 0; i < numFields; i++) {
+ out: for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
- byte bits = fieldsStream.readByte();
- assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
+ int bits = fieldsStream.readByte() & 0xFF;
+ assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
- //TODO: Find an alternative approach here if this list continues to grow beyond the
- //list of 5 or 6 currently here. See Lucene 762 for discussion
- if (acceptField.equals(FieldSelectorResult.LOAD)) {
- addField(doc, fi, binary, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
- addField(doc, fi, binary, tokenize);
- break;//Get out of this loop
- }
- else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
- addFieldLazy(doc, fi, binary, tokenize, true);
- }
- else if (acceptField.equals(FieldSelectorResult.LATENT)) {
- addFieldLazy(doc, fi, binary, tokenize, false);
- }
- else if (acceptField.equals(FieldSelectorResult.SIZE)){
- skipField(addFieldSize(doc, fi, binary));
- }
- else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
- addFieldSize(doc, fi, binary);
- break;
- }
- else {
- skipField();
+ final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
+
+ switch (acceptField) {
+ case LOAD:
+ addField(doc, fi, binary, tokenize, numeric);
+ break;
+ case LOAD_AND_BREAK:
+ addField(doc, fi, binary, tokenize, numeric);
+ break out; //Get out of this loop
+ case LAZY_LOAD:
+ addFieldLazy(doc, fi, binary, tokenize, true, numeric);
+ break;
+ case LATENT:
+ addFieldLazy(doc, fi, binary, tokenize, false, numeric);
+ break;
+ case SIZE:
+ skipFieldBytes(addFieldSize(doc, fi, binary, numeric));
+ break;
+ case SIZE_AND_BREAK:
+ addFieldSize(doc, fi, binary, numeric);
+ break out; //Get out of this loop
+ default:
+ skipField(numeric);
}
}
@@ -282,72 +282,121 @@ public final class FieldsReader implements Cloneable {
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
* This will have the most payoff on large fields.
*/
- private void skipField() throws IOException {
- skipField(fieldsStream.readVInt());
+ private void skipField(int numeric) throws IOException {
+ final int numBytes;
+ switch(numeric) {
+ case 0:
+ numBytes = fieldsStream.readVInt();
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ numBytes = 4;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ numBytes = 8;
+ break;
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+
+ skipFieldBytes(numBytes);
}
- private void skipField(int toRead) throws IOException {
+ private void skipFieldBytes(int toRead) throws IOException {
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
}
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult) throws IOException {
+ private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
+ assert numeric != 0;
+ switch(numeric) {
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+ }
+
+ private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
+ final AbstractField f;
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
- //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult));
+ f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult);
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
+ } else if (numeric != 0) {
+ f = loadNumericField(fi, numeric);
} else {
Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
- AbstractField f;
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
fieldsStream.seek(pointer+length);
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
- f.setOmitNorms(fi.omitNorms);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
-
- doc.add(f);
}
-
+
+ f.setOmitNorms(fi.omitNorms);
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+ doc.add(f);
}
- private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
+ private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
+ final AbstractField f;
if (binary) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
- doc.add(new Field(fi.name, b));
+ f = new Field(fi.name, b);
+ } else if (numeric != 0) {
+ f = loadNumericField(fi, numeric);
} else {
- Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
-
- AbstractField f;
f = new Field(fi.name, // name
- false,
- fieldsStream.readString(), // read value
- store,
- index,
- termVector);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- f.setOmitNorms(fi.omitNorms);
-
- doc.add(f);
+ false,
+ fieldsStream.readString(), // read value
+ Field.Store.YES,
+ index,
+ termVector);
}
+
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+ f.setOmitNorms(fi.omitNorms);
+ doc.add(f);
}
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
- private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
- int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
+ private int addFieldSize(Document doc, FieldInfo fi, boolean binary, int numeric) throws IOException {
+ final int bytesize, size;
+ switch(numeric) {
+ case 0:
+ size = fieldsStream.readVInt();
+ bytesize = binary ? size : 2*size;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ size = bytesize = 4;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ size = bytesize = 8;
+ break;
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
@@ -358,7 +407,7 @@ public final class FieldsReader implements Cloneable {
}
/**
- * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
+ * A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
* loaded.
*/
private class LazyField extends AbstractField implements Fieldable {
diff --git a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
index 303aa912bc3..9efd909574e 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
@@ -21,22 +21,40 @@ import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
final class FieldsWriter {
- static final byte FIELD_IS_TOKENIZED = 0x1;
- static final byte FIELD_IS_BINARY = 0x2;
+ static final int FIELD_IS_TOKENIZED = 1 << 0;
+ static final int FIELD_IS_BINARY = 1 << 1;
+ // the old bit 1 << 2 was compressed, is now left out
+
+ private static final int _NUMERIC_BIT_SHIFT = 3;
+ static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
+
+ static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
+ // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
+ // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
+
+ // the next possible bits are: 1 << 6; 1 << 7
+
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
+ // Lucene 3.2: NumericFields are stored in binary format
+ static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+
// NOTE: if you introduce a new format, make it 1 higher
// than the current one, and always change this if you
// switch to a new format!
- static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+ static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
// when removing support for old versions, leave the last supported version here
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
@@ -121,13 +139,26 @@ final class FieldsWriter {
final void writeField(int fieldNumber, Fieldable field) throws IOException {
fieldsStream.writeVInt(fieldNumber);
- byte bits = 0;
+ int bits = 0;
if (field.isTokenized())
- bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+ bits |= FIELD_IS_TOKENIZED;
if (field.isBinary())
- bits |= FieldsWriter.FIELD_IS_BINARY;
-
- fieldsStream.writeByte(bits);
+ bits |= FIELD_IS_BINARY;
+ if (field instanceof NumericField) {
+ switch (((NumericField) field).getDataType()) {
+ case INT:
+ bits |= FIELD_IS_NUMERIC_INT; break;
+ case LONG:
+ bits |= FIELD_IS_NUMERIC_LONG; break;
+ case FLOAT:
+ bits |= FIELD_IS_NUMERIC_FLOAT; break;
+ case DOUBLE:
+ bits |= FIELD_IS_NUMERIC_DOUBLE; break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ fieldsStream.writeByte((byte) bits);
if (field.isBinary()) {
final byte[] data;
@@ -139,8 +170,22 @@ final class FieldsWriter {
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, offset, len);
- }
- else {
+ } else if (field instanceof NumericField) {
+ final NumericField nf = (NumericField) field;
+ final Number n = nf.getNumericValue();
+ switch (nf.getDataType()) {
+ case INT:
+ fieldsStream.writeInt(n.intValue()); break;
+ case LONG:
+ fieldsStream.writeLong(n.longValue()); break;
+ case FLOAT:
+ fieldsStream.writeInt(Float.floatToIntBits(n.floatValue())); break;
+ case DOUBLE:
+ fieldsStream.writeLong(Double.doubleToLongBits(n.doubleValue())); break;
+ default:
+ assert false : "Should never get here";
+ }
+ } else {
fieldsStream.writeString(field.stringValue());
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java b/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
index ecf41bacabc..c4559870cfb 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
@@ -22,6 +22,7 @@ import java.io.FilenameFilter;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
@@ -196,7 +197,31 @@ final class IndexFileDeleter {
}
}
if (sis != null) {
- CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
+ final SegmentInfos infos = sis;
+ for (SegmentInfo segmentInfo : infos) {
+ try {
+ /*
+ * Force FI to load for each segment since we could see a
+ * segments file and load successfully above if the files are
+ * still referenced when they are deleted and the os doesn't let
+ * you delete them. Yet its likely that fnm files are removed
+ * while seg file is still around Since LUCENE-2984 we need FI
+ * to find out if a seg has vectors and prox so we need those
+ * files to be opened for a commit point.
+ */
+ segmentInfo.getFieldInfos();
+ } catch (FileNotFoundException e) {
+ refresh(segmentInfo.name);
+ sis = null;
+ if (infoStream != null) {
+ message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+ }
+ }
+ }
+
+ }
+ if (sis != null) {
+ final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
if (sis.getGeneration() == segmentInfos.getGeneration()) {
currentCommitPoint = commitPoint;
}
diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java
index 984f77b7117..ed7d472c33f 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java
@@ -1428,7 +1428,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
cfr = new CompoundFileReader(dir, filename);
String [] files = cfr.listAll();
- ArrayUtil.quickSort(files); // sort the array of filename so that the output is more readable
+ ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable
for (int i = 0; i < files.length; ++i) {
long len = cfr.fileLength(files[i]);
diff --git a/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java b/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
new file mode 100644
index 00000000000..e53dae99a2c
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
@@ -0,0 +1,129 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collection;
+
+/**
+ * This is an easy-to-use tool that upgrades all segments of an index from previous Lucene versions
+ * to the current segment file format. It can be used from command line:
+ *
+ * java -cp lucene-core.jar org.apache.lucene.index.IndexUpgrader [-delete-prior-commits] [-verbose] indexDir
+ *
+ * Alternatively this class can be instantiated and {@link #upgrade} invoked. It uses {@link UpgradeIndexMergePolicy}
+ * and triggers the upgrade via an optimize request to {@link IndexWriter}.
+ * This tool keeps only the last commit in an index; for this
+ * reason, if the incoming index has more than one commit, the tool
+ * refuses to run by default. Specify {@code -delete-prior-commits}
+ * to override this, allowing the tool to delete all but the last commit.
+ * From Java code this can be enabled by passing {@code true} to
+ * {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
+ */
+public final class IndexUpgrader {
+
+ private static void printUsage() {
+ System.err.println("Upgrades an index so all segments created with a previous Lucene version are rewritten.");
+ System.err.println("Usage:");
+ System.err.println(" java " + IndexUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] indexDir");
+ System.err.println("This tool keeps only the last commit in an index; for this");
+ System.err.println("reason, if the incoming index has more than one commit, the tool");
+ System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
+ System.err.println("this, allowing the tool to delete all but the last commit.");
+ System.exit(1);
+ }
+
+ public static void main(String[] args) throws IOException {
+ String dir = null;
+ boolean deletePriorCommits = false;
+ PrintStream out = null;
+ for (String arg : args) {
+ if ("-delete-prior-commits".equals(arg)) {
+ deletePriorCommits = true;
+ } else if ("-verbose".equals(arg)) {
+ out = System.out;
+ } else if (dir == null) {
+ dir = arg;
+ } else {
+ printUsage();
+ }
+ }
+ if (dir == null) {
+ printUsage();
+ }
+
+ new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
+ }
+
+ private final Directory dir;
+ private final PrintStream infoStream;
+ private final IndexWriterConfig iwc;
+ private final boolean deletePriorCommits;
+
+ @SuppressWarnings("deprecation")
+ public IndexUpgrader(Directory dir) {
+ this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
+ }
+
+ @SuppressWarnings("deprecation")
+ public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
+ this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
+ }
+
+ public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
+ this.dir = dir;
+ this.iwc = iwc;
+ this.infoStream = infoStream;
+ this.deletePriorCommits = deletePriorCommits;
+ }
+
+ public void upgrade() throws IOException {
+ if (!IndexReader.indexExists(dir)) {
+ throw new IndexNotFoundException(dir.toString());
+ }
+
+ if (!deletePriorCommits) {
+ final Collection commits = IndexReader.listCommits(dir);
+ if (commits.size() > 1) {
+ throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
+ }
+ }
+
+ final IndexWriterConfig c = (IndexWriterConfig) iwc.clone();
+ c.setMergePolicy(new UpgradeIndexMergePolicy(c.getMergePolicy()));
+ c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
+
+ final IndexWriter w = new IndexWriter(dir, c);
+ try {
+ w.setInfoStream(infoStream);
+ w.message("Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
+ w.optimize();
+ w.message("All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
+ } finally {
+ w.close();
+ }
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
index 826049c997f..2a6d4ae9519 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
@@ -421,7 +421,7 @@ public class IndexWriter implements Closeable {
private final Map readerMap = new HashMap();
/** Forcefully clear changes for the specified segments. This is called on successful merge. */
- synchronized void clear(SegmentInfos infos) throws IOException {
+ synchronized void clear(List infos) throws IOException {
if (infos == null) {
for (Map.Entry ent: readerMap.entrySet()) {
ent.getValue().hasChanges = false;
@@ -511,7 +511,7 @@ public class IndexWriter implements Closeable {
return false;
}
- public synchronized void drop(SegmentInfos infos) throws IOException {
+ public synchronized void drop(List infos) throws IOException {
for(SegmentInfo info : infos) {
drop(info);
}
@@ -2355,7 +2355,7 @@ public class IndexWriter implements Closeable {
String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
- mergedName, null, codecs, payloadProcessorProvider,
+ mergedName, null, payloadProcessorProvider,
globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
for (IndexReader reader : readers) // add new indexes
@@ -2365,8 +2365,7 @@ public class IndexWriter implements Closeable {
final FieldInfos fieldInfos = merger.fieldInfos();
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
- false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
- fieldInfos.hasVectors(),
+ false, merger.getSegmentCodecs(),
fieldInfos);
setDiagnostics(info, "addIndexes(IndexReader...)");
@@ -2729,7 +2728,7 @@ public class IndexWriter implements Closeable {
assert testPoint("startCommitMergeDeletes");
- final SegmentInfos sourceSegments = merge.segments;
+ final List sourceSegments = merge.segments;
if (infoStream != null)
message("commitMergeDeletes " + merge.segString(directory));
@@ -2741,7 +2740,7 @@ public class IndexWriter implements Closeable {
long minGen = Long.MAX_VALUE;
for(int i=0; i < sourceSegments.size(); i++) {
- SegmentInfo info = sourceSegments.info(i);
+ SegmentInfo info = sourceSegments.get(i);
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
int docCount = info.docCount;
final SegmentReader previousReader = merge.readerClones.get(i);
@@ -3041,7 +3040,16 @@ public class IndexWriter implements Closeable {
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
// threads, start
- message("registerMerge merging=" + mergingSegments);
+ if (infoStream != null) {
+ StringBuilder builder = new StringBuilder("registerMerge merging= [");
+ for (SegmentInfo info : mergingSegments) {
+ builder.append(info.name).append(", ");
+ }
+ builder.append("]");
+ // don't call mergingSegments.toString() could lead to ConcurrentModException
+ // since merge updates the segments FieldInfos
+ message(builder.toString());
+ }
for(SegmentInfo info : merge.segments) {
message("registerMerge info=" + info);
mergingSegments.add(info);
@@ -3094,7 +3102,7 @@ public class IndexWriter implements Closeable {
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
- merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
+ merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, null, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
// Lock order: IW -> BD
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
@@ -3133,6 +3141,16 @@ public class IndexWriter implements Closeable {
message("merge seg=" + merge.info.name);
}
+ assert merge.estimatedMergeBytes == 0;
+ for(SegmentInfo info : merge.segments) {
+ if (info.docCount > 0) {
+ final int delCount = numDeletedDocs(info);
+ assert delCount <= info.docCount;
+ final double delRatio = ((double) delCount)/info.docCount;
+ merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
+ }
+ }
+
// TODO: I think this should no longer be needed (we
// now build CFS before adding segment to the infos);
// however, on removing it, tests fail for some reason!
@@ -3174,7 +3192,7 @@ public class IndexWriter implements Closeable {
// It's possible we are called twice, eg if there was an
// exception inside mergeInit
if (merge.registerDone) {
- final SegmentInfos sourceSegments = merge.segments;
+ final List sourceSegments = merge.segments;
for(SegmentInfo info : sourceSegments) {
mergingSegments.remove(info);
}
@@ -3245,21 +3263,17 @@ public class IndexWriter implements Closeable {
int mergedDocCount = 0;
- SegmentInfos sourceSegments = merge.segments;
+ List sourceSegments = merge.segments;
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
- codecs, payloadProcessorProvider,
- merge.info.getFieldInfos());
+ payloadProcessorProvider, merge.info.getFieldInfos());
if (infoStream != null) {
- message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
+ message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getFieldInfos().hasVectors());
}
merge.readers = new ArrayList();
merge.readerClones = new ArrayList();
-
- merge.estimatedMergeBytes = 0;
-
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
@@ -3268,7 +3282,7 @@ public class IndexWriter implements Closeable {
int segUpto = 0;
while(segUpto < sourceSegments.size()) {
- final SegmentInfo info = sourceSegments.info(segUpto);
+ final SegmentInfo info = sourceSegments.get(segUpto);
// Hold onto the "live" reader; we will use this to
// commit merged deletes
@@ -3277,13 +3291,6 @@ public class IndexWriter implements Closeable {
-config.getReaderTermsIndexDivisor());
merge.readers.add(reader);
- final int readerMaxDoc = reader.maxDoc();
- if (readerMaxDoc > 0) {
- final int delCount = reader.numDeletedDocs();
- final double delRatio = ((double) delCount)/readerMaxDoc;
- merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
- }
-
// We clone the segment readers because other
// deletes may come in while we're merging so we
// need readers that will not change
@@ -3308,8 +3315,6 @@ public class IndexWriter implements Closeable {
// Record which codec was used to write the segment
merge.info.setSegmentCodecs(merger.getSegmentCodecs());
- // Record if we have merged vectors
- merge.info.setHasVectors(merger.fieldInfos().hasVectors());
if (infoStream != null) {
message("merge segmentCodecs=" + merger.getSegmentCodecs());
@@ -3323,13 +3328,11 @@ public class IndexWriter implements Closeable {
// because codec must know if prox was written for
// this segment:
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
- merge.info.setHasProx(merger.fieldInfos().hasProx());
-
boolean useCompoundFile;
synchronized (this) { // Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
}
-
+
if (useCompoundFile) {
success = false;
final String compoundFileName = IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
@@ -3469,14 +3472,14 @@ public class IndexWriter implements Closeable {
}
/** @lucene.internal */
- public synchronized String segString(SegmentInfos infos) throws IOException {
+ public synchronized String segString(List infos) throws IOException {
StringBuilder buffer = new StringBuilder();
final int count = infos.size();
for(int i = 0; i < count; i++) {
if (i > 0) {
buffer.append(' ');
}
- buffer.append(segString(infos.info(i)));
+ buffer.append(segString(infos.get(i)));
}
return buffer.toString();
@@ -3531,6 +3534,7 @@ public class IndexWriter implements Closeable {
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
+
Collection files = toSync.files(directory, false);
for(final String fileName: files) {
assert directory.fileExists(fileName): "file " + fileName + " does not exist";
diff --git a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
index 1be4f26b77f..fc419bd2f7c 100644
--- a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Comparator;
import java.util.List;
import java.util.Set;
@@ -595,7 +594,7 @@ public abstract class LogMergePolicy extends MergePolicy {
} else if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- final SegmentInfos mergeInfos = new SegmentInfos();
+ final List mergeInfos = new ArrayList();
for(int i=start;i 0;
// Make sure it all adds up:
- assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.info(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
+ assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.get(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
}
public int remap(int oldDocID) {
diff --git a/lucene/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/src/java/org/apache/lucene/index/MergePolicy.java
index 31289bd18d6..bbced4e9cef 100644
--- a/lucene/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/MergePolicy.java
@@ -75,15 +75,21 @@ public abstract class MergePolicy implements java.io.Closeable {
long estimatedMergeBytes; // used by IndexWriter
List readers; // used by IndexWriter
List readerClones; // used by IndexWriter
- public final SegmentInfos segments;
+ public final List segments;
+ public final int totalDocCount;
boolean aborted;
Throwable error;
boolean paused;
- public OneMerge(SegmentInfos segments) {
+ public OneMerge(List segments) {
if (0 == segments.size())
throw new RuntimeException("segments must include at least one segment");
this.segments = segments;
+ int count = 0;
+ for(SegmentInfo info : segments) {
+ count += info.docCount;
+ }
+ totalDocCount = count;
}
/** Record that an exception occurred while executing
@@ -147,7 +153,7 @@ public abstract class MergePolicy implements java.io.Closeable {
final int numSegments = segments.size();
for(int i=0;i 0) b.append(' ');
- b.append(segments.info(i).toString(dir, 0));
+ b.append(segments.get(i).toString(dir, 0));
}
if (info != null)
b.append(" into ").append(info.name);
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
index f7999da4219..15c400e6c87 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -43,7 +43,8 @@ import org.apache.lucene.util.Constants;
* @lucene.experimental
*/
public final class SegmentInfo {
-
+ // TODO: remove with hasVector and hasProx
+ private static final int CHECK_FIELDINFO = -2;
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
@@ -85,10 +86,12 @@ public final class SegmentInfo {
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
private int delCount; // How many deleted docs in this segment
+
+ //TODO: remove when we don't have to support old indexes anymore that had this field
+ private int hasVectors = CHECK_FIELDINFO;
+ //TODO: remove when we don't have to support old indexes anymore that had this field
+ private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false
- private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
-
- private boolean hasVectors; // True if this segment wrote term vectors
private FieldInfos fieldInfos;
@@ -106,9 +109,12 @@ public final class SegmentInfo {
// NOTE: only used in-RAM by IW to track buffered deletes;
// this is never written to/read from the Directory
private long bufferedDeletesGen;
-
+
+ // holds the fieldInfos Version to refresh files() cache if FI has changed
+ private long fieldInfosVersion;
+
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
- boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
+ SegmentCodecs segmentCodecs, FieldInfos fieldInfos) {
this.name = name;
this.docCount = docCount;
this.dir = dir;
@@ -116,9 +122,7 @@ public final class SegmentInfo {
this.isCompoundFile = isCompoundFile;
this.docStoreOffset = -1;
this.docStoreSegment = name;
- this.hasProx = hasProx;
this.segmentCodecs = segmentCodecs;
- this.hasVectors = hasVectors;
delCount = 0;
version = Constants.LUCENE_MAIN_VERSION;
this.fieldInfos = fieldInfos;
@@ -213,7 +217,7 @@ public final class SegmentInfo {
delCount = input.readInt();
assert delCount <= docCount;
- hasProx = input.readByte() == YES;
+ hasProx = input.readByte();
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
@@ -226,7 +230,7 @@ public final class SegmentInfo {
diagnostics = input.readStringStringMap();
if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
- hasVectors = input.readByte() == 1;
+ hasVectors = input.readByte();
} else {
final String storesSegment;
final String ext;
@@ -247,7 +251,7 @@ public final class SegmentInfo {
dirToTest = dir;
}
try {
- hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
+ hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
} finally {
if (isCompoundFile) {
dirToTest.close();
@@ -311,14 +315,9 @@ public final class SegmentInfo {
}
public boolean getHasVectors() throws IOException {
- return hasVectors;
+ return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES;
}
-
- public void setHasVectors(boolean v) {
- hasVectors = v;
- clearFilesCache();
- }
-
+
public FieldInfos getFieldInfos() throws IOException {
loadFieldInfos(dir, true);
return fieldInfos;
@@ -349,7 +348,7 @@ public final class SegmentInfo {
@Override
public Object clone() {
- final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, hasVectors,
+ final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, segmentCodecs,
fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
si.docStoreOffset = docStoreOffset;
si.docStoreSegment = docStoreSegment;
@@ -364,6 +363,8 @@ public final class SegmentInfo {
}
}
si.version = version;
+ si.hasProx = hasProx;
+ si.hasVectors = hasVectors;
return si;
}
@@ -569,19 +570,14 @@ public final class SegmentInfo {
output.writeByte((byte) (isCompoundFile ? YES : NO));
output.writeInt(delCount);
- output.writeByte((byte) (hasProx ? 1:0));
+ output.writeByte((byte) (hasProx));
segmentCodecs.write(output);
output.writeStringStringMap(diagnostics);
- output.writeByte((byte) (hasVectors ? 1 : 0));
+ output.writeByte((byte) (hasVectors));
}
- void setHasProx(boolean hasProx) {
- this.hasProx = hasProx;
- clearFilesCache();
- }
-
- public boolean getHasProx() {
- return hasProx;
+ public boolean getHasProx() throws IOException {
+ return hasProx == CHECK_FIELDINFO ? getFieldInfos().hasProx() : hasProx == YES;
}
/** Can only be called once. */
@@ -609,13 +605,14 @@ public final class SegmentInfo {
*/
public List files() throws IOException {
-
- if (files != null) {
+ final long fisVersion = fieldInfosVersion;
+ if (fisVersion != (fieldInfosVersion = getFieldInfos().getVersion())) {
+ clearFilesCache(); // FIS has modifications - need to recompute
+ } else if (files != null) {
// Already cached:
return files;
}
-
- Set fileSet = new HashSet();
+ final Set fileSet = new HashSet();
boolean useCompoundFile = getUseCompoundFile();
@@ -637,7 +634,7 @@ public final class SegmentInfo {
} else {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_EXTENSION));
- if (hasVectors) {
+ if (getHasVectors()) {
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@@ -646,7 +643,7 @@ public final class SegmentInfo {
} else if (!useCompoundFile) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_EXTENSION));
- if (hasVectors) {
+ if (getHasVectors()) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@@ -709,8 +706,12 @@ public final class SegmentInfo {
if (this.dir != dir) {
s.append('x');
}
- if (hasVectors) {
- s.append('v');
+ try {
+ if (getHasVectors()) {
+ s.append('v');
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
s.append(docCount);
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
index 46c050e3588..4523d821286 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -72,7 +72,7 @@ final class SegmentMerger {
private PayloadProcessorProvider payloadProcessorProvider;
- SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
+ SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
segment = name;
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
index c29add9bd93..79c2638add4 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
@@ -32,7 +32,6 @@ public class SegmentWriteState {
public final String segmentName;
public final FieldInfos fieldInfos;
public final int numDocs;
- public boolean hasVectors;
// Deletes to apply while we are flushing the segment. A
// Term is enrolled in here if it was deleted at one
diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
index da43f3ad311..fa956dda190 100644
--- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
@@ -63,7 +63,6 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
}
lastDocID = 0;
- state.hasVectors = hasVectors;
hasVectors = false;
}
@@ -121,8 +120,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
fill(docState.docID);
// Append term vectors to the real outputs:
- long pointer = tvd.getFilePointer();
- tvx.writeLong(pointer);
+ tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
tvd.writeVInt(numVectorFields);
if (numVectorFields > 0) {
@@ -136,6 +134,8 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
tvd.writeVLong(pos-lastPos);
lastPos = pos;
perFields[i].finishDocument();
+ // commit the termVectors once successful success - FI will otherwise reset them
+ perFields[i].fieldInfo.commitVectors();
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
index a070ce0f8c4..e69f612553d 100644
--- a/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
@@ -23,6 +23,8 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Comparator;
+import java.util.List;
+import java.util.ArrayList;
/**
* Merges segments of approximately equal size, subject to
@@ -249,7 +251,7 @@ public class TieredMergePolicy extends MergePolicy {
final Collection merging = writer.get().getMergingSegments();
final Collection toBeMerged = new HashSet();
- final SegmentInfos infosSorted = new SegmentInfos();
+ final List infosSorted = new ArrayList();
infosSorted.addAll(infos);
Collections.sort(infosSorted, segmentByteSizeDescending);
@@ -277,7 +279,7 @@ public class TieredMergePolicy extends MergePolicy {
// If we have too-large segments, grace them out
// of the maxSegmentCount:
int tooBigCount = 0;
- while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
+ while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
totIndexBytes -= size(infosSorted.get(tooBigCount));
tooBigCount++;
}
@@ -310,7 +312,7 @@ public class TieredMergePolicy extends MergePolicy {
// Gather eligible segments for merging, ie segments
// not already being merged and not already picked (by
// prior iteration of this loop) for merging:
- final SegmentInfos eligible = new SegmentInfos();
+ final List eligible = new ArrayList();
for(int idx = tooBigCount; idx best = null;
boolean bestTooLarge = false;
long bestMergeBytes = 0;
@@ -341,10 +343,10 @@ public class TieredMergePolicy extends MergePolicy {
long totAfterMergeBytes = 0;
- final SegmentInfos candidate = new SegmentInfos();
+ final List candidate = new ArrayList();
boolean hitTooLarge = false;
for(int idx = startIdx;idx maxMergedSegmentBytes) {
@@ -398,7 +400,7 @@ public class TieredMergePolicy extends MergePolicy {
}
/** Expert: scores one merge; subclasses can override. */
- protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
+ protected MergeScore score(List candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
long totBeforeMergeBytes = 0;
long totAfterMergeBytes = 0;
long totAfterMergeBytesFloored = 0;
@@ -420,7 +422,7 @@ public class TieredMergePolicy extends MergePolicy {
// over time:
skew = 1.0/maxMergeAtOnce;
} else {
- skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored;
+ skew = ((double) floorSize(size(candidate.get(0))))/totAfterMergeBytesFloored;
}
// Strongly favor merges with less skew (smaller
@@ -458,7 +460,8 @@ public class TieredMergePolicy extends MergePolicy {
if (verbose()) {
message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
}
- SegmentInfos eligible = new SegmentInfos();
+
+ List eligible = new ArrayList();
boolean optimizeMergeRunning = false;
final Collection merging = writer.get().getMergingSegments();
for(SegmentInfo info : infos) {
@@ -499,7 +502,7 @@ public class TieredMergePolicy extends MergePolicy {
if (spec == null) {
spec = new MergeSpecification();
}
- final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end));
+ final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end));
if (verbose()) {
message("add merge=" + writer.get().segString(merge.segments));
}
@@ -510,7 +513,7 @@ public class TieredMergePolicy extends MergePolicy {
if (spec == null && !optimizeMergeRunning) {
// Do final merge
final int numToMerge = end - maxSegmentCount + 1;
- final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end));
+ final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end));
if (verbose()) {
message("add final merge=" + merge.segString(writer.get().getDirectory()));
}
@@ -527,7 +530,7 @@ public class TieredMergePolicy extends MergePolicy {
if (verbose()) {
message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
}
- final SegmentInfos eligible = new SegmentInfos();
+ final List eligible = new ArrayList();
final Collection merging = writer.get().getMergingSegments();
for(SegmentInfo info : infos) {
double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
@@ -580,7 +583,7 @@ public class TieredMergePolicy extends MergePolicy {
spec = new MergeSpecification();
}
- final OneMerge merge = new OneMerge(eligible.range(start, upto));
+ final OneMerge merge = new OneMerge(eligible.subList(start, upto));
if (verbose()) {
message("add merge=" + writer.get().segString(merge.segments));
}
diff --git a/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java b/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
new file mode 100644
index 00000000000..7e57888461d
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
@@ -0,0 +1,152 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Constants;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/** This {@link MergePolicy} is used for upgrading all existing segments of
+ * an index when calling {@link IndexWriter#optimize()}.
+ * All other methods delegate to the base {@code MergePolicy} given to the constructor.
+ * This allows for an as-cheap-as possible upgrade of an older index by only upgrading segments that
+ * are created by previous Lucene versions. Optimize does no longer really optimize
+ * it is just used to "optimize" older segment versions away.
+ * In general one would use {@link IndexUpgrader}, but for a fully customizeable upgrade,
+ * you can use this like any other {@code MergePolicy} and call {@link IndexWriter#optimize()}:
+ *
+ * IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_XX, new KeywordAnalyzer());
+ * iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy()));
+ * IndexWriter w = new IndexWriter(dir, iwc);
+ * w.optimize();
+ * w.close();
+ *
+ * @lucene.experimental
+ * @see IndexUpgrader
+ */
+public class UpgradeIndexMergePolicy extends MergePolicy {
+
+ protected final MergePolicy base;
+
+ /** Wrap the given {@link MergePolicy} and intercept optimize requests to
+ * only upgrade segments written with previous Lucene versions. */
+ public UpgradeIndexMergePolicy(MergePolicy base) {
+ this.base = base;
+ }
+
+ /** Returns if the given segment should be upgraded. The default implementation
+ * will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())},
+ * so all segments created with a different version number than this Lucene version will
+ * get upgraded.
+ */
+ protected boolean shouldUpgradeSegment(SegmentInfo si) {
+ return !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion());
+ }
+
+ @Override
+ public void setIndexWriter(IndexWriter writer) {
+ super.setIndexWriter(writer);
+ base.setIndexWriter(writer);
+ }
+
+ @Override
+ public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+ return base.findMerges(segmentInfos);
+ }
+
+ @Override
+ public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) throws CorruptIndexException, IOException {
+ // first find all old segments
+ final HashSet oldSegments = new HashSet();
+ for (final SegmentInfo si : segmentInfos) {
+ if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) {
+ oldSegments.add(si);
+ }
+ }
+
+ if (verbose()) message("findMergesForOptimize: segmentsToUpgrade=" + oldSegments);
+
+ if (oldSegments.isEmpty())
+ return null;
+
+ MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);
+
+ if (spec != null) {
+ // remove all segments that are in merge specification from oldSegments,
+ // the resulting set contains all segments that are left over
+ // and will be merged to one additional segment:
+ for (final OneMerge om : spec.merges) {
+ oldSegments.removeAll(om.segments);
+ }
+ }
+
+ if (!oldSegments.isEmpty()) {
+ if (verbose())
+ message("findMergesForOptimize: " + base.getClass().getSimpleName() +
+ " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
+ final List newInfos = new ArrayList();
+ for (final SegmentInfo si : segmentInfos) {
+ if (oldSegments.contains(si)) {
+ newInfos.add(si);
+ }
+ }
+ // add the final merge
+ if (spec == null) {
+ spec = new MergeSpecification();
+ }
+ spec.add(new OneMerge(newInfos));
+ }
+
+ return spec;
+ }
+
+ @Override
+ public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+ return base.findMergesToExpungeDeletes(segmentInfos);
+ }
+
+ @Override
+ public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException {
+ return base.useCompoundFile(segments, newSegment);
+ }
+
+ @Override
+ public void close() {
+ base.close();
+ }
+
+ @Override
+ public String toString() {
+ return "[" + getClass().getSimpleName() + "->" + base + "]";
+ }
+
+ private boolean verbose() {
+ IndexWriter w = writer.get();
+ return w != null && w.verbose();
+ }
+
+ private void message(String message) {
+ if (verbose())
+ writer.get().message("UPGMP: " + message);
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
index bd9046bf69a..590ef0eadeb 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
@@ -72,6 +72,11 @@ public class CodecProvider {
}
}
}
+
+ /** @lucene.internal */
+ public synchronized Set listAll() {
+ return codecs.keySet();
+ }
public Collection getAllExtensions() {
return knownExtensions;
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
index 4b42caa244b..9acb75e1d85 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
@@ -68,15 +68,8 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
@Override
public Object clone() {
- PulsingTermState clone;
- clone = (PulsingTermState) super.clone();
- if (postingsSize != -1) {
- clone.postings = new byte[postingsSize];
- System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
- } else {
- assert wrappedTermState != null;
- clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
- }
+ PulsingTermState clone = new PulsingTermState();
+ clone.copyFrom(this);
return clone;
}
@@ -90,8 +83,10 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
}
System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
- } else {
+ } else if (wrappedTermState != null) {
wrappedTermState.copyFrom(other.wrappedTermState);
+ } else {
+ wrappedTermState = (BlockTermState) other.wrappedTermState.clone();
}
// NOTE: we do not copy the
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
index 289df17ac9d..4d25e7afd5b 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
@@ -85,7 +85,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
}
}
- public static void files(SegmentInfo segmentInfo, String codecId, Collection files) {
+ public static void files(SegmentInfo segmentInfo, String codecId, Collection files) throws IOException {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
@@ -151,14 +151,8 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
@Override
public Object clone() {
- SepTermState other = (SepTermState) super.clone();
- other.docIndex = (IntIndexInput.Index) docIndex.clone();
- if (freqIndex != null) {
- other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
- }
- if (posIndex != null) {
- other.posIndex = (IntIndexInput.Index) posIndex.clone();
- }
+ SepTermState other = new SepTermState();
+ other.copyFrom(this);
return other;
}
@@ -166,12 +160,28 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
public void copyFrom(TermState _other) {
super.copyFrom(_other);
SepTermState other = (SepTermState) _other;
- docIndex.set(other.docIndex);
- if (freqIndex != null && other.freqIndex != null) {
- freqIndex.set(other.freqIndex);
+ if (docIndex == null) {
+ docIndex = (IntIndexInput.Index) other.docIndex.clone();
+ } else {
+ docIndex.set(other.docIndex);
}
- if (posIndex != null && other.posIndex != null) {
- posIndex.set(other.posIndex);
+ if (other.freqIndex != null) {
+ if (freqIndex == null) {
+ freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
+ } else {
+ freqIndex.set(other.freqIndex);
+ }
+ } else {
+ freqIndex = null;
+ }
+ if (other.posIndex != null) {
+ if (posIndex == null) {
+ posIndex = (IntIndexInput.Index) other.posIndex.clone();
+ } else {
+ posIndex.set(other.posIndex);
+ }
+ } else {
+ posIndex = null;
}
payloadFP = other.payloadFP;
skipFP = other.skipFP;
diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java b/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
index eaf6d3f0126..58c77fd2897 100644
--- a/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
+++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
@@ -806,6 +806,7 @@ public abstract class QueryParserBase {
}
try {
+ source.end();
source.close();
} catch (IOException ignored) {}
diff --git a/lucene/src/java/org/apache/lucene/search/HitQueue.java b/lucene/src/java/org/apache/lucene/search/HitQueue.java
index 15e2052568c..e5ce5bcbc7a 100644
--- a/lucene/src/java/org/apache/lucene/search/HitQueue.java
+++ b/lucene/src/java/org/apache/lucene/search/HitQueue.java
@@ -21,8 +21,6 @@ import org.apache.lucene.util.PriorityQueue;
final class HitQueue extends PriorityQueue {
- private boolean prePopulate;
-
/**
* Creates a new instance with size
elements. If
* prePopulate
is set to true, the queue will pre-populate itself
diff --git a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
index f199edc92c6..8429ec0c9af 100644
--- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -46,8 +46,18 @@ import org.apache.lucene.util.ThreadInterruptedException;
*
* Applications usually need only call the inherited
* {@link #search(Query,int)}
- * or {@link #search(Query,Filter,int)} methods. For performance reasons it is
- * recommended to open only one IndexSearcher and use it for all of your searches.
+ * or {@link #search(Query,Filter,int)} methods. For
+ * performance reasons, if your index is unchanging, you
+ * should share a single IndexSearcher instance across
+ * multiple searches instead of creating a new one
+ * per-search. If your index has changed and you wish to
+ * see the changes reflected in searching, you should
+ * use {@link IndexReader#reopen} to obtain a new reader and
+ * then create a new IndexSearcher from that. Also, for
+ * low-latency turnaround it's best to use a near-real-time
+ * reader ({@link IndexReader#open(IndexWriter,boolean)}).
+ * Once you have a new {@link IndexReader}, it's relatively
+ * cheap to create a new IndexSearcher from it.
*
*
NOTE : {@link
* IndexSearcher}
instances are completely
diff --git a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index 7cb6994ccaa..fc9598078d8 100644
--- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -214,12 +214,12 @@ public class MultiPhraseQuery extends Query {
docFreq = reader.docFreq(term.field(), term.bytes());
}
- postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
+ postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
}
// sort by increasing docFreq order
if (slop == 0) {
- ArrayUtil.quickSort(postingsFreqs);
+ ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) {
diff --git a/lucene/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
index 303cbd166b1..00c638965cc 100644
--- a/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
@@ -28,13 +28,15 @@ final class PhrasePositions {
int position; // position in doc
int count; // remaining pos in this doc
int offset; // position in phrase
+ final int ord; // unique across all PhrasePositions instances
final DocsAndPositionsEnum postings; // stream of docs & positions
PhrasePositions next; // used to make lists
boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
- PhrasePositions(DocsAndPositionsEnum postings, int o) {
+ PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
this.postings = postings;
offset = o;
+ this.ord = ord;
}
final boolean next() throws IOException { // increments to next doc
diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
index 2c8d977fa82..70adec70f7f 100644
--- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
final DocsAndPositionsEnum postings;
final int docFreq;
final int position;
+ final Term term;
- public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
+ public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
+ this.term = term;
}
public int compareTo(PostingsAndFreq other) {
+ if (docFreq == other.docFreq) {
+ if (position == other.position) {
+ return term.compareTo(other.term);
+ }
+ return position - other.position;
+ }
return docFreq - other.docFreq;
}
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + docFreq;
+ result = prime * result + position;
+ result = prime * result + ((term == null) ? 0 : term.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ PostingsAndFreq other = (PostingsAndFreq) obj;
+ if (docFreq != other.docFreq) return false;
+ if (position != other.position) return false;
+ if (term == null) {
+ if (other.term != null) return false;
+ } else if (!term.equals(other.term)) return false;
+ return true;
+ }
}
private class PhraseWeight extends Weight {
@@ -197,12 +229,12 @@ public class PhraseQuery extends Query {
return null;
}
}
- postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
+ postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order
if (slop == 0) {
- ArrayUtil.quickSort(postingsFreqs);
+ ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) { // optimize exact case
diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQueue.java b/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
index 5b19567c59c..bac0a971d7d 100644
--- a/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
@@ -30,10 +30,16 @@ final class PhraseQueue extends PriorityQueue {
if (pp1.position == pp2.position)
// same doc and pp.position, so decide by actual term positions.
// rely on: pp.position == tp.position - offset.
- return pp1.offset < pp2.offset;
- else
+ if (pp1.offset == pp2.offset) {
+ return pp1.ord < pp2.ord;
+ } else {
+ return pp1.offset < pp2.offset;
+ }
+ else {
return pp1.position < pp2.position;
- else
+ }
+ else {
return pp1.doc < pp2.doc;
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
index 1fedc2eb3ee..da84dbcca42 100644
--- a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
@@ -55,7 +55,7 @@ abstract class PhraseScorer extends Scorer {
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
for (int i = 0; i < postings.length; i++) {
- PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position);
+ PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
if (last != null) { // add next to end of list
last.next = pp;
} else {
diff --git a/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java b/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
index 472e99de705..24356e27bcf 100644
--- a/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
+++ b/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
@@ -134,7 +134,7 @@ public abstract class TopTermsRewrite extends TermCollectingRew
final Term placeholderTerm = new Term(query.field);
final Q q = getTopLevelQuery();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
- ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
+ ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
for (final ScoreTerm st : scoreTerms) {
final Term term = placeholderTerm.createTerm(st.bytes);
assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
diff --git a/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
index 2bc9f87d27f..0eae1582573 100644
--- a/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
+++ b/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
@@ -190,7 +190,7 @@ public class NearSpansOrdered extends Spans {
/** Advance the subSpans to the same document */
private boolean toSameDoc() throws IOException {
- ArrayUtil.quickSort(subSpansByDoc, spanDocComparator);
+ ArrayUtil.mergeSort(subSpansByDoc, spanDocComparator);
int firstIndex = 0;
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
diff --git a/lucene/src/java/org/apache/lucene/util/SorterTemplate.java b/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
index b0e558c1c20..1ce4619984f 100644
--- a/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
+++ b/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
@@ -62,13 +62,26 @@ public abstract class SorterTemplate {
/** Sorts via in-place, but unstable, QuickSort algorithm.
* For small collections falls back to {@link #insertionSort(int,int)}. */
- public final void quickSort(int lo, int hi) {
+ public final void quickSort(final int lo, final int hi) {
+ if (hi <= lo) return;
+ // from Integer's Javadocs: ceil(log2(x)) = 32 - numberOfLeadingZeros(x - 1)
+ quickSort(lo, hi, (Integer.SIZE - Integer.numberOfLeadingZeros(hi - lo)) << 1);
+ }
+
+ private void quickSort(int lo, int hi, int maxDepth) {
+ // fall back to insertion when array has short length
final int diff = hi - lo;
if (diff <= QUICKSORT_THRESHOLD) {
insertionSort(lo, hi);
return;
}
+ // fall back to merge sort when recursion depth gets too big
+ if (--maxDepth == 0) {
+ mergeSort(lo, hi);
+ return;
+ }
+
final int mid = lo + (diff >>> 1);
if (compare(lo, mid) > 0) {
@@ -101,8 +114,8 @@ public abstract class SorterTemplate {
}
}
- quickSort(lo, left);
- quickSort(left + 1, hi);
+ quickSort(lo, left, maxDepth);
+ quickSort(left + 1, hi, maxDepth);
}
/** Sorts via stable in-place MergeSort algorithm
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
index fed8cd21098..19949170936 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
@@ -261,9 +261,12 @@ public class Builder {
add(scratchIntsRef, output);
}
+ /** It's OK to add the same input twice in a row with
+ * different outputs, as long as outputs impls the merge
+ * method. */
public void add(IntsRef input, T output) throws IOException {
//System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
- assert lastInput.length == 0 || input.compareTo(lastInput) > 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
+ assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
assert validOutput(output);
//System.out.println("\nadd: " + input);
@@ -347,8 +350,15 @@ public class Builder {
assert validOutput(output);
}
- // push remaining output:
- frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+ if (lastInput.length == input.length && prefixLenPlus1 == 1+input.length) {
+ // same input more than 1 time in a row, mapping to
+ // multiple outputs
+ lastNode.output = fst.outputs.merge(lastNode.output, output);
+ } else {
+ // this new arc is private to this new input; set its
+ // arc output to the leftover output:
+ frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+ }
// save last input
lastInput.copy(input);
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
index dde66270873..dbce4c011c3 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
@@ -231,10 +231,13 @@ public class FST {
}
void setEmptyOutput(T v) throws IOException {
- if (emptyOutput != null && !emptyOutput.equals(v)) {
- throw new IllegalStateException("empty output is already set: " + outputs.outputToString(emptyOutput) + " vs " + outputs.outputToString(v));
+ if (emptyOutput != null) {
+ if (!emptyOutput.equals(v)) {
+ emptyOutput = outputs.merge(emptyOutput, v);
+ }
+ } else {
+ emptyOutput = v;
}
- emptyOutput = v;
// TODO: this is messy -- replace with sillyBytesWriter; maybe make
// bytes private
@@ -446,25 +449,17 @@ public class FST {
// reverse bytes in-place; we do this so that the
// "BIT_TARGET_NEXT" opto can work, ie, it reads the
// node just before the current one
- final int endAddress = writer.posWrite;
- final int stopAt = (endAddress - startAddress)/2;
- int upto = 0;
- while (upto < stopAt) {
- final byte b = bytes[startAddress+upto];
- bytes[startAddress+upto] = bytes[endAddress-upto-1];
- bytes[endAddress-upto-1] = b;
- upto++;
+ final int endAddress = lastFrozenNode = writer.posWrite - 1;
+
+ int left = startAddress;
+ int right = endAddress;
+ while (left < right) {
+ final byte b = bytes[left];
+ bytes[left++] = bytes[right];
+ bytes[right--] = b;
}
- lastFrozenNode = endAddress - 1;
- /*
- System.out.println(" return node addr=" + (endAddress-1));
- for(int i=endAddress-1;i>=startAddress;i--) {
- System.out.println(" bytes[" + i + "]=" + bytes[i]);
- }
- */
-
- return endAddress-1;
+ return endAddress;
}
/** Fills virtual 'start' arc, ie, an empty incoming arc to
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
index 77484164c01..db1b7ddee12 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
@@ -140,7 +140,7 @@ abstract class FSTEnum {
// Arcs are fixed array -- use binary search to find
// the target.
- final FST.BytesReader in = fst.getBytesReader(0);
+ final FST.BytesReader in = fst.getBytesReader(0);
int low = arc.arcIdx;
int high = arc.numArcs-1;
int mid = 0;
@@ -278,7 +278,7 @@ abstract class FSTEnum {
// Arcs are fixed array -- use binary search to find
// the target.
- final FST.BytesReader in = fst.getBytesReader(0);
+ final FST.BytesReader in = fst.getBytesReader(0);
int low = arc.arcIdx;
int high = arc.numArcs-1;
int mid = 0;
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
index 02719d81a6e..dde6409fc9a 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
@@ -40,7 +40,7 @@ final class NodeHash {
return false;
}
for(int arcUpto=0;arcUpto arc = node.arcs[arcUpto];
if (arc.label != scratchArc.label ||
!arc.output.equals(scratchArc.output) ||
((Builder.CompiledNode) arc.target).address != scratchArc.target ||
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
index 18f4dc29432..66efc3ff008 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
@@ -54,4 +54,8 @@ public abstract class Outputs {
public abstract T getNoOutput();
public abstract String outputToString(T output);
+
+ public T merge(T first, T second) {
+ throw new UnsupportedOperationException();
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
index fc8aa6691f3..7b6ead92a91 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
@@ -43,7 +43,7 @@ public class PairOutputs extends Outputs> {
this.output2 = output2;
}
- @Override @SuppressWarnings("unchecked")
+ @Override @SuppressWarnings("rawtypes")
public boolean equals(Object other) {
if (other == this) {
return true;
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
index ba17fe99dee..984324e07ce 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
@@ -22,14 +22,11 @@ import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
-// TODO: make a sharing and non-sharing variant; eg if you
-// output docFreq per term the FST will be smaller if you
-// don't share since they are not "well shared"
-
/**
* Output is a long, for each input term. NOTE: the
* resulting FST is not guaranteed to be minimal! See
- * {@link Builder}.
+ * {@link Builder}. You cannot store 0 output with this
+ * (that's reserved to mean "no output")!
* @lucene.experimental
*/
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/UpToTwoPositiveIntOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/UpToTwoPositiveIntOutputs.java
new file mode 100644
index 00000000000..0c388d28710
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/UpToTwoPositiveIntOutputs.java
@@ -0,0 +1,224 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * Holds one or two longs for each input term. If it's a
+ * single output, Long is returned; else, TwoLongs. Order
+ * is preseved in the TwoLongs case, ie .first is the first
+ * input/output added to Builder, and .second is the
+ * second. You cannot store 0 output with this (that's
+ * reserved to mean "no output")!
+ *
+ * NOTE: the resulting FST is not guaranteed to be minimal!
+ * See {@link Builder}.
+ *
+ * @lucene.experimental
+ */
+
+public final class UpToTwoPositiveIntOutputs extends Outputs {
+
+ public final static class TwoLongs {
+ final long first;
+ final long second;
+
+ public TwoLongs(long first, long second) {
+ this.first = first;
+ this.second = second;
+ assert first >= 0;
+ assert second >= 0;
+ }
+
+ @Override
+ public String toString() {
+ return "TwoLongs:" + first + "," + second;
+ }
+
+ @Override
+ public boolean equals(Object _other) {
+ if (_other instanceof TwoLongs) {
+ final TwoLongs other = (TwoLongs) _other;
+ return first == other.first && second == other.second;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) ((first^(first>>>32)) ^ (second^(second>>32)));
+ }
+ }
+
+ private final static Long NO_OUTPUT = new Long(0);
+
+ private final boolean doShare;
+
+ private final static UpToTwoPositiveIntOutputs singletonShare = new UpToTwoPositiveIntOutputs(true);
+ private final static UpToTwoPositiveIntOutputs singletonNoShare = new UpToTwoPositiveIntOutputs(false);
+
+ private UpToTwoPositiveIntOutputs(boolean doShare) {
+ this.doShare = doShare;
+ }
+
+ public static UpToTwoPositiveIntOutputs getSingleton(boolean doShare) {
+ return doShare ? singletonShare : singletonNoShare;
+ }
+
+ public Long get(long v) {
+ if (v == 0) {
+ return NO_OUTPUT;
+ } else {
+ return Long.valueOf(v);
+ }
+ }
+
+ public TwoLongs get(long first, long second) {
+ return new TwoLongs(first, second);
+ }
+
+ @Override
+ public Long common(Object _output1, Object _output2) {
+ assert valid(_output1, false);
+ assert valid(_output2, false);
+ final Long output1 = (Long) _output1;
+ final Long output2 = (Long) _output2;
+ if (output1 == NO_OUTPUT || output2 == NO_OUTPUT) {
+ return NO_OUTPUT;
+ } else if (doShare) {
+ assert output1 > 0;
+ assert output2 > 0;
+ return Math.min(output1, output2);
+ } else if (output1.equals(output2)) {
+ return output1;
+ } else {
+ return NO_OUTPUT;
+ }
+ }
+
+ @Override
+ public Long subtract(Object _output, Object _inc) {
+ assert valid(_output, false);
+ assert valid(_inc, false);
+ final Long output = (Long) _output;
+ final Long inc = (Long) _inc;
+ assert output >= inc;
+
+ if (inc == NO_OUTPUT) {
+ return output;
+ } else if (output.equals(inc)) {
+ return NO_OUTPUT;
+ } else {
+ return output - inc;
+ }
+ }
+
+ @Override
+ public Object add(Object _prefix, Object _output) {
+ assert valid(_prefix, false);
+ assert valid(_output, true);
+ final Long prefix = (Long) _prefix;
+ if (_output instanceof Long) {
+ final Long output = (Long) _output;
+ if (prefix == NO_OUTPUT) {
+ return output;
+ } else if (output == NO_OUTPUT) {
+ return prefix;
+ } else {
+ return prefix + output;
+ }
+ } else {
+ final TwoLongs output = (TwoLongs) _output;
+ final long v = prefix;
+ return new TwoLongs(output.first + v, output.second + v);
+ }
+ }
+
+ @Override
+ public void write(Object _output, DataOutput out) throws IOException {
+ assert valid(_output, true);
+ if (_output instanceof Long) {
+ final Long output = (Long) _output;
+ out.writeVLong(output<<1);
+ } else {
+ final TwoLongs output = (TwoLongs) _output;
+ out.writeVLong((output.first<<1) | 1);
+ out.writeVLong(output.second);
+ }
+ }
+
+ @Override
+ public Object read(DataInput in) throws IOException {
+ final long code = in.readVLong();
+ if ((code & 1) == 0) {
+ // single long
+ final long v = code >>> 1;
+ if (v == 0) {
+ return NO_OUTPUT;
+ } else {
+ return Long.valueOf(v);
+ }
+ } else {
+ // two longs
+ final long first = code >>> 1;
+ final long second = in.readVLong();
+ return new TwoLongs(first, second);
+ }
+ }
+
+ private boolean valid(Long o) {
+ assert o != null;
+ assert o instanceof Long;
+ assert o == NO_OUTPUT || o > 0;
+ return true;
+ }
+
+ // Used only by assert
+ private boolean valid(Object _o, boolean allowDouble) {
+ if (!allowDouble) {
+ assert _o instanceof Long;
+ return valid((Long) _o);
+ } else if (_o instanceof TwoLongs) {
+ return true;
+ } else {
+ return valid((Long) _o);
+ }
+ }
+
+ @Override
+ public Object getNoOutput() {
+ return NO_OUTPUT;
+ }
+
+ @Override
+ public String outputToString(Object output) {
+ return output.toString();
+ }
+
+ @Override
+ public Object merge(Object first, Object second) {
+ assert valid(first, false);
+ assert valid(second, false);
+ return new TwoLongs((Long) first, (Long) second);
+ }
+}
diff --git a/lucene/src/site/src/documentation/content/xdocs/fileformats.xml b/lucene/src/site/src/documentation/content/xdocs/fileformats.xml
index eacbc16c3e8..228e18a2b62 100644
--- a/lucene/src/site/src/documentation/content/xdocs/fileformats.xml
+++ b/lucene/src/site/src/documentation/content/xdocs/fileformats.xml
@@ -90,6 +90,14 @@
In version 3.1, segments records the code version
that created them. See LUCENE-2720 for details.
+
+ Additionally segments track explicitly whether or
+ not they have term vectors. See LUCENE-2811 for details.
+
+
+ In version 3.2, numeric fields are written as natively
+ to stored fields file, previously they were stored in
+ text format only.
@@ -935,7 +943,7 @@
3.1
Segments --> Format, Version, NameCounter, SegCount, <SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGenNumField ,
- IsCompoundFile, DeletionCount, HasProx, Diagnostics>SegCount , CommitUserData, Checksum
+ IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors>SegCount , CommitUserData, Checksum
@@ -957,7 +965,7 @@
IsCompoundFile, HasSingleNormFile,
- DocStoreIsCompoundFile, HasProx --> Int8
+ DocStoreIsCompoundFile, HasProx, HasVectors --> Int8
@@ -1083,6 +1091,10 @@
Lucene version, OS, Java version, why the segment
was created (merge, flush, addIndexes), etc.
+
+ HasVectors is 1 if this segment stores term vectors,
+ else it's 0.
+
@@ -1293,10 +1305,18 @@
third bit is one for fields with compression option enabled
(if compression is enabled, the algorithm used is ZLIB),
only available for indexes until Lucene version 2.9.x
+ 4th to 6th bits (mask: 0x7<<3) define the type of a
+ numeric field:
+ all bits in mask are cleared if no numeric field at all
+ 1<<3: Value is Int
+ 2<<3: Value is Long
+ 3<<3: Value is Int as Float (as of Integer.intBitsToFloat)
+ 4<<3: Value is Long as Double (as of Double.longBitsToDouble)
+
Value -->
- String | BinaryValue (depending on Bits)
+ String | BinaryValue | Int | Long (depending on Bits)
BinaryValue -->
ValueSize, <Byte>^ValueSize
diff --git a/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml b/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml
index 4dde0f34ecb..7ab6441214e 100644
--- a/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml
+++ b/lucene/src/site/src/documentation/content/xdocs/gettingstarted.xml
@@ -28,11 +28,11 @@ may wish to skip sections.
diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
index 4d4141c6ab3..c5bb9f26448 100644
--- a/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@@ -262,6 +262,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
tokens.add(termAtt.toString());
// TODO: we could collect offsets etc here for better checking that reset() really works.
}
+ ts.end();
ts.close();
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
if (!tokens.isEmpty())
diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java
index ae889c1c3b1..3818d071f99 100644
--- a/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java
+++ b/lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java
@@ -36,6 +36,7 @@ public final class MockAnalyzer extends Analyzer {
private int positionIncrementGap;
private final Random random;
private Map previousMappings = new HashMap();
+ private boolean enableChecks = true;
/**
* Creates a new MockAnalyzer.
@@ -75,6 +76,7 @@ public final class MockAnalyzer extends Analyzer {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
+ tokenizer.setEnableChecks(enableChecks);
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
filt = maybePayload(filt, fieldName);
return filt;
@@ -98,13 +100,13 @@ public final class MockAnalyzer extends Analyzer {
if (saved == null) {
saved = new SavedStreams();
saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
+ saved.tokenizer.setEnableChecks(enableChecks);
saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
saved.filter = maybePayload(saved.filter, fieldName);
map.put(fieldName, saved);
return saved.filter;
} else {
saved.tokenizer.reset(reader);
- saved.filter.reset();
return saved.filter;
}
}
@@ -139,4 +141,12 @@ public final class MockAnalyzer extends Analyzer {
public int getPositionIncrementGap(String fieldName){
return positionIncrementGap;
}
+
+ /**
+ * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+ * should leave this enabled.
+ */
+ public void setEnableChecks(boolean enableChecks) {
+ this.enableChecks = enableChecks;
+ }
}
diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java
index 63d99af28c6..fe64ad8884e 100644
--- a/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java
+++ b/lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java
@@ -86,6 +86,7 @@ final class MockPayloadFilter extends TokenFilter {
@Override
public void reset() throws IOException {
+ super.reset();
i = 0;
pos = 0;
}
diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java b/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
index 6e4f30b3968..15e501f0f41 100644
--- a/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
+++ b/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
@@ -20,14 +20,15 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* Automaton-based tokenizer for testing. Optionally lowercases.
*/
-public class MockTokenizer extends CharTokenizer {
+public class MockTokenizer extends Tokenizer {
/** Acts Similar to WhitespaceTokenizer */
public static final CharacterRunAutomaton WHITESPACE =
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
@@ -45,21 +46,88 @@ public class MockTokenizer extends CharTokenizer {
private final boolean lowerCase;
private int state;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ int off = 0;
+
+ // TODO: "register" with LuceneTestCase to ensure all streams are closed() ?
+ // currently, we can only check that the lifecycle is correct if someone is reusing,
+ // but not for "one-offs".
+ private static enum State {
+ SETREADER, // consumer set a reader input either via ctor or via reset(Reader)
+ RESET, // consumer has called reset()
+ INCREMENT, // consumer is consuming, has called incrementToken() == true
+ INCREMENT_FALSE, // consumer has called incrementToken() which returned false
+ END, // consumer has called end() to perform end of stream operations
+ CLOSE // consumer has called close() to release any resources
+ };
+
+ private State streamState = State.CLOSE;
+ private boolean enableChecks = true;
+
public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, factory, input);
+ super(factory, input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
+ this.streamState = State.SETREADER;
}
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, input);
+ super(input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
+ this.streamState = State.SETREADER;
}
@Override
+ public final boolean incrementToken() throws IOException {
+ assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT)
+ : "incrementToken() called while in wrong state: " + streamState;
+ clearAttributes();
+ for (;;) {
+ int startOffset = off;
+ int cp = readCodePoint();
+ if (cp < 0) {
+ break;
+ } else if (isTokenChar(cp)) {
+ int endOffset;
+ do {
+ char chars[] = Character.toChars(normalize(cp));
+ for (int i = 0; i < chars.length; i++)
+ termAtt.append(chars[i]);
+ endOffset = off;
+ cp = readCodePoint();
+ } while (cp >= 0 && isTokenChar(cp));
+ offsetAtt.setOffset(startOffset, endOffset);
+ streamState = State.INCREMENT;
+ return true;
+ }
+ }
+ streamState = State.INCREMENT_FALSE;
+ return false;
+ }
+
+ protected int readCodePoint() throws IOException {
+ int ch = input.read();
+ if (ch < 0) {
+ return ch;
+ } else {
+ assert !Character.isLowSurrogate((char) ch);
+ off++;
+ if (Character.isHighSurrogate((char) ch)) {
+ int ch2 = input.read();
+ if (ch2 >= 0) {
+ off++;
+ assert Character.isLowSurrogate((char) ch2);
+ return Character.toCodePoint((char) ch, (char) ch2);
+ }
+ }
+ return ch;
+ }
+ }
+
protected boolean isTokenChar(int c) {
state = runAutomaton.step(state, c);
if (state < 0) {
@@ -70,7 +138,6 @@ public class MockTokenizer extends CharTokenizer {
}
}
- @Override
protected int normalize(int c) {
return lowerCase ? Character.toLowerCase(c) : c;
}
@@ -79,5 +146,43 @@ public class MockTokenizer extends CharTokenizer {
public void reset() throws IOException {
super.reset();
state = runAutomaton.getInitialState();
+ off = 0;
+ assert !enableChecks || streamState != State.RESET : "double reset()";
+ streamState = State.RESET;
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ // in some exceptional cases (e.g. TestIndexWriterExceptions) a test can prematurely close()
+ // these tests should disable this check, by default we check the normal workflow.
+ // TODO: investigate the CachingTokenFilter "double-close"... for now we ignore this
+ assert !enableChecks || streamState == State.END || streamState == State.CLOSE : "close() called in wrong state: " + streamState;
+ streamState = State.CLOSE;
+ }
+
+ @Override
+ public void reset(Reader input) throws IOException {
+ super.reset(input);
+ assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
+ streamState = State.SETREADER;
+ }
+
+ @Override
+ public void end() throws IOException {
+ int finalOffset = correctOffset(off);
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
+ // these tests should disable this check (in general you should consume the entire stream)
+ assert !enableChecks || streamState == State.INCREMENT_FALSE : "end() called before incrementToken() returned false!";
+ streamState = State.END;
+ }
+
+ /**
+ * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+ * should leave this enabled.
+ */
+ public void setEnableChecks(boolean enableChecks) {
+ this.enableChecks = enableChecks;
}
}
diff --git a/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java b/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java
index e8bc977931b..0cc621aff6d 100644
--- a/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java
@@ -18,7 +18,9 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collections;
+import java.util.List;
import java.util.Random;
import java.util.Set;
@@ -58,21 +60,36 @@ public class MockRandomMergePolicy extends MergePolicy {
SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize)
throws CorruptIndexException, IOException {
- //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos);
+ final List eligibleSegments = new ArrayList();
+ for(SegmentInfo info : segmentInfos) {
+ if (segmentsToOptimize.contains(info)) {
+ eligibleSegments.add(info);
+ }
+ }
+
+ //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos + " eligible=" + eligibleSegments);
MergeSpecification mergeSpec = null;
- if (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && segmentInfos.info(0).hasDeletions())) {
+ if (eligibleSegments.size() > 1 || (eligibleSegments.size() == 1 && eligibleSegments.get(0).hasDeletions())) {
mergeSpec = new MergeSpecification();
- SegmentInfos segmentInfos2 = new SegmentInfos();
- segmentInfos2.addAll(segmentInfos);
- Collections.shuffle(segmentInfos2, random);
+ // Already shuffled having come out of a set but
+ // shuffle again for good measure:
+ Collections.shuffle(eligibleSegments, random);
int upto = 0;
- while(upto < segmentInfos.size()) {
- int max = Math.min(10, segmentInfos.size()-upto);
+ while(upto < eligibleSegments.size()) {
+ int max = Math.min(10, eligibleSegments.size()-upto);
int inc = max <= 2 ? max : _TestUtil.nextInt(random, 2, max);
- mergeSpec.add(new OneMerge(segmentInfos2.range(upto, upto+inc)));
+ mergeSpec.add(new OneMerge(eligibleSegments.subList(upto, upto+inc)));
upto += inc;
}
}
+
+ if (mergeSpec != null) {
+ for(OneMerge merge : mergeSpec.merges) {
+ for(SegmentInfo info : merge.segments) {
+ assert segmentsToOptimize.contains(info);
+ }
+ }
+ }
return mergeSpec;
}
diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
index c09a48a6e3b..68ecf499740 100644
--- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
@@ -146,6 +146,9 @@ public class MockRandomCodec extends Codec {
out.close();
final Random random = new Random(seed);
+
+ random.nextInt(); // consume a random for buffersize
+
PostingsWriterBase postingsWriter;
if (random.nextBoolean()) {
@@ -244,16 +247,22 @@ public class MockRandomCodec extends Codec {
in.close();
final Random random = new Random(seed);
+
+ int readBufferSize = _TestUtil.nextInt(random, 1, 4096);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize);
+ }
+
PostingsReaderBase postingsReader;
if (random.nextBoolean()) {
postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo,
- state.readBufferSize, new MockIntStreamFactory(random), state.codecId);
+ readBufferSize, new MockIntStreamFactory(random), state.codecId);
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading Standard postings");
}
- postingsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.readBufferSize, state.codecId);
+ postingsReader = new StandardPostingsReader(state.dir, state.segmentInfo, readBufferSize, state.codecId);
}
if (random.nextBoolean()) {
@@ -318,7 +327,7 @@ public class MockRandomCodec extends Codec {
state.fieldInfos,
state.segmentInfo.name,
postingsReader,
- state.readBufferSize,
+ readBufferSize,
termsCacheSize,
state.codecId);
success = true;
diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
index ca08b6e84ba..4331457bdca 100644
--- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
@@ -25,7 +25,6 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.DocValuesConsumer;
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
diff --git a/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
index 17b62a6f94d..c7b0d036dd1 100644
--- a/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
+++ b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
@@ -71,6 +71,7 @@ public class MockDirectoryWrapper extends Directory {
Set openFilesForWrite = new HashSet();
volatile boolean crashed;
private ThrottledIndexOutput throttledOutput;
+ private Throttling throttling = Throttling.SOMETIMES;
// use this for tracking files for crash.
// additionally: provides debugging information in case you leave one open
@@ -104,6 +105,8 @@ public class MockDirectoryWrapper extends Directory {
// called from different threads; else test failures may
// not be reproducible from the original seed
this.randomState = new Random(random.nextInt());
+ this.throttledOutput = new ThrottledIndexOutput(ThrottledIndexOutput
+ .mBitsToBytes(40 + randomState.nextInt(10)), 5 + randomState.nextInt(5), null);
init();
}
@@ -117,8 +120,17 @@ public class MockDirectoryWrapper extends Directory {
preventDoubleWrite = value;
}
- public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) {
- this.throttledOutput = throttledOutput;
+ public static enum Throttling {
+ /** always emulate a slow hard disk. could be very slow! */
+ ALWAYS,
+ /** sometimes (2% of the time) emulate a slow hard disk. */
+ SOMETIMES,
+ /** never throttle output */
+ NEVER
+ };
+
+ public void setThrottling(Throttling throttling) {
+ this.throttling = throttling;
}
@Override
@@ -354,7 +366,17 @@ public class MockDirectoryWrapper extends Directory {
IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name);
openFileHandles.put(io, new RuntimeException("unclosed IndexOutput"));
openFilesForWrite.add(name);
- return throttledOutput == null ? io : throttledOutput.newFromDelegate(io);
+
+ // throttling REALLY slows down tests, so don't do it very often for SOMETIMES.
+ if (throttling == Throttling.ALWAYS ||
+ (throttling == Throttling.SOMETIMES && randomState.nextInt(50) == 0)) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("MockDirectoryWrapper: throttling indexOutput");
+ }
+ return throttledOutput.newFromDelegate(io);
+ } else {
+ return io;
+ }
}
@Override
diff --git a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
index 5888a1c008c..7e95cffacd9 100644
--- a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
@@ -137,6 +137,8 @@ public abstract class LuceneTestCase extends Assert {
// tests)
/** Gets the codec to run tests with. */
public static final String TEST_CODEC = System.getProperty("tests.codec", "randomPerField");
+ /** Gets the codecprovider to run tests with */
+ public static final String TEST_CODECPROVIDER = System.getProperty("tests.codecprovider", "random");
/** Gets the locale to run tests with */
public static final String TEST_LOCALE = System.getProperty("tests.locale", "random");
/** Gets the timezone to run tests with */
@@ -329,15 +331,38 @@ public abstract class LuceneTestCase extends Assert {
tempDirs.clear();
stores = Collections.synchronizedMap(new IdentityHashMap());
savedCodecProvider = CodecProvider.getDefault();
- if ("randomPerField".equals(TEST_CODEC)) {
- if (random.nextInt(4) == 0) { // preflex-only setup
- codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
- } else { // per-field setup
- CodecProvider.setDefault(new RandomCodecProvider(random));
+ if ("random".equals(TEST_CODECPROVIDER)) {
+ if ("randomPerField".equals(TEST_CODEC)) {
+ if (random.nextInt(4) == 0) { // preflex-only setup
+ codec = installTestCodecs("PreFlex", CodecProvider.getDefault());
+ } else { // per-field setup
+ CodecProvider.setDefault(new RandomCodecProvider(random));
+ codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+ }
+ } else { // ordinary setup
codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
}
- } else { // ordinary setup
- codec = installTestCodecs(TEST_CODEC, CodecProvider.getDefault());
+ } else {
+ // someone specified their own codecprovider by class
+ try {
+ Class extends CodecProvider> cpClazz = Class.forName(TEST_CODECPROVIDER).asSubclass(CodecProvider.class);
+ CodecProvider cp = cpClazz.newInstance();
+ String codecName;
+ if (TEST_CODEC.startsWith("random")) { // TODO: somehow do random per-field?!
+ Set codecSet = cp.listAll();
+ String availableCodecs[] = codecSet.toArray(new String[codecSet.size()]);
+ codecName = availableCodecs[random.nextInt(availableCodecs.length)];
+ } else {
+ codecName = TEST_CODEC;
+ }
+
+ codec = cp.lookup(codecName);
+ cp.setDefaultFieldCodec(codecName);
+ CodecProvider.setDefault(cp);
+ } catch (Exception e) {
+ System.err.println("Could not instantiate CodecProvider: " + TEST_CODECPROVIDER);
+ throw new RuntimeException(e);
+ }
}
savedLocale = Locale.getDefault();
locale = TEST_LOCALE.equals("random") ? randomLocale(random) : localeForName(TEST_LOCALE);
@@ -360,16 +385,13 @@ public abstract class LuceneTestCase extends Assert {
String codecDescription;
CodecProvider cp = CodecProvider.getDefault();
- if ("randomPerField".equals(TEST_CODEC)) {
- if (cp instanceof RandomCodecProvider)
- codecDescription = cp.toString();
- else
- codecDescription = "PreFlex";
+ if ("randomPerField".equals(TEST_CODEC) && cp instanceof RandomCodecProvider) {
+ codecDescription = cp.toString();
} else {
codecDescription = codec.toString();
}
- if (CodecProvider.getDefault() == savedCodecProvider)
+ if ("random".equals(TEST_CODECPROVIDER) && CodecProvider.getDefault() == savedCodecProvider)
removeTestCodecs(codec, CodecProvider.getDefault());
CodecProvider.setDefault(savedCodecProvider);
Locale.setDefault(savedLocale);
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java b/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
index e5ec6fad862..4df9f1f456f 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
+++ b/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
@@ -107,6 +107,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
// consume
}
stream.end();
+ stream.close();
assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
}
diff --git a/lucene/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
index 25cf0c4d987..6fffc48664a 100644
--- a/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
+++ b/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
@@ -153,7 +153,8 @@ public class Test2BTerms extends LuceneTestCase {
List savedTerms = null;
- Directory dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
+ MockDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BTerms"));
+ dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
//Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
if (true) {
diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
index efee37fce32..b83f7369e50 100644
--- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -41,10 +42,12 @@ import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.Constants;
/*
Verify we can read the pre-4.0 file format, do searches
@@ -63,26 +66,27 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// oldNames array.
/*
- public void testCreatePreLocklessCFS() throws IOException {
- createIndex("index.cfs", true);
- }
-
- public void testCreatePreLocklessNoCFS() throws IOException {
- createIndex("index.nocfs", false);
- }
- */
-
-/*
public void testCreateCFS() throws IOException {
- String dirName = "testindex.cfs";
- createIndex(dirName, true);
- rmDir(dirName);
+ createIndex("index.cfs", true, false);
}
public void testCreateNoCFS() throws IOException {
- String dirName = "testindex.nocfs";
- createIndex(dirName, true);
- rmDir(dirName);
+ createIndex("index.nocfs", false, false);
+ }
+ */
+
+/*
+ // These are only needed for the special upgrade test to verify
+ // that also optimized indexes are correctly upgraded by IndexUpgrader.
+ // You don't need them to be build for non-3.1 (the test is happy with just one
+ // "old" segment format, version is unimportant:
+
+ public void testCreateOptimizedCFS() throws IOException {
+ createIndex("index.optimized.cfs", true, true);
+ }
+
+ public void testCreateOptimizedNoCFS() throws IOException {
+ createIndex("index.optimized.nocfs", false, true);
}
*/
@@ -90,6 +94,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"30.nocfs",
"31.cfs",
"31.nocfs",
+ "32.cfs",
+ "32.nocfs",
};
final String[] unsupportedNames = {"19.cfs",
@@ -108,6 +114,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"29.nocfs",
};
+ final String[] oldOptimizedNames = {"31.optimized.cfs",
+ "31.optimized.nocfs",
+ };
+
/** This test checks that *only* IndexFormatTooOldExceptions are throws when you open and operate on too old indexes! */
public void testUnsupportedOldIndexes() throws Exception {
for(int i=0;i names = new ArrayList(oldNames.length + oldOptimizedNames.length);
+ names.addAll(Arrays.asList(oldNames));
+ names.addAll(Arrays.asList(oldOptimizedNames));
+ for(String name : names) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldIndex: index=" +name);
+ }
+ File oldIndxeDir = _TestUtil.getTempDir(name);
+ _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+ Directory dir = newFSDirectory(oldIndxeDir);
+
+ new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+ .upgrade();
+
+ checkAllSegmentsUpgraded(dir);
+
+ _TestUtil.checkIndex(dir);
+
+ dir.close();
+ _TestUtil.rmDir(oldIndxeDir);
+ }
+ }
+
+ public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception {
+ for (String name : oldOptimizedNames) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name);
+ }
+ File oldIndxeDir = _TestUtil.getTempDir(name);
+ _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
+ Directory dir = newFSDirectory(oldIndxeDir);
+
+ assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir));
+
+ // create a bunch of dummy segments
+ int id = 40;
+ RAMDirectory ramDir = new RAMDirectory();
+ for (int i = 0; i < 3; i++) {
+ // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
+ MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+ .setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(ramDir, iwc);
+ // add few more docs:
+ for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) {
+ addDoc(w, id++);
+ }
+ w.close(false);
+ }
+
+ // add dummy segments (which are all in current version) to optimized index
+ MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null)
+ .setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ w.setInfoStream(VERBOSE ? System.out : null);
+ w.addIndexes(ramDir);
+ w.close(false);
+
+ // determine count of segments in modified index
+ final int origSegCount = getNumberOfSegments(dir);
+
+ new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
+ .upgrade();
+
+ final int segCount = checkAllSegmentsUpgraded(dir);
+ assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
+ origSegCount, segCount);
+
+ dir.close();
+ _TestUtil.rmDir(oldIndxeDir);
+ }
+ }
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
index 44b84b504ad..040e9d035f6 100644
--- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
@@ -241,8 +241,7 @@ public class TestCodecs extends LuceneTestCase {
final Directory dir = newDirectory();
FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
this.write(fieldInfos, dir, fields, true);
- final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
- si.setHasProx(false);
+ final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos);
final FieldsProducer reader = si.getSegmentCodecs().codec().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 64, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR));
@@ -294,7 +293,7 @@ public class TestCodecs extends LuceneTestCase {
FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
this.write(fieldInfos, dir, fields, false);
- final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
+ final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos);
if (VERBOSE) {
System.out.println("TEST: now read postings");
diff --git a/lucene/src/test/org/apache/lucene/index/TestDoc.java b/lucene/src/test/org/apache/lucene/index/TestDoc.java
index 874df62c91a..9352f9174c9 100644
--- a/lucene/src/test/org/apache/lucene/index/TestDoc.java
+++ b/lucene/src/test/org/apache/lucene/index/TestDoc.java
@@ -196,7 +196,7 @@ public class TestDoc extends LuceneTestCase {
SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
- SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, CodecProvider.getDefault(), null, new FieldInfos());
+ SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, null, new FieldInfos());
merger.add(r1);
merger.add(r2);
@@ -205,8 +205,7 @@ public class TestDoc extends LuceneTestCase {
r2.close();
final FieldInfos fieldInfos = merger.fieldInfos();
final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir,
- false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
- fieldInfos.hasVectors(), fieldInfos);
+ false, merger.getSegmentCodecs(), fieldInfos);
if (useCompoundFile) {
Collection filesToDelete = merger.createCompoundFile(merged + ".cfs", info);
diff --git a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
index 26b1717072f..75a9be9cc0e 100644
--- a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
+++ b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
@@ -24,12 +24,14 @@ import java.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
@@ -511,4 +513,69 @@ public class TestFieldsReader extends LuceneTestCase {
}
}
+
+ public void testNumericField() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random, dir);
+ final int numDocs = _TestUtil.nextInt(random, 500, 1000) * RANDOM_MULTIPLIER;
+ final Number[] answers = new Number[numDocs];
+ final NumericField.DataType[] typeAnswers = new NumericField.DataType[numDocs];
+ for(int id=0;id 0);
+ reader.close();
+ SegmentInfos sis = new SegmentInfos();
+ sis.read(dir);
+ for (SegmentInfo segmentInfo : sis) {
+ assertFalse(segmentInfo.getHasVectors());
+ }
+ dir.close();
+
+ }
+ }
+ }
+
+ private static class FailOnTermVectors extends MockDirectoryWrapper.Failure {
+
+ private static final String INIT_STAGE = "initTermVectorsWriter";
+ private static final String AFTER_INIT_STAGE = "finishDocument";
+ private static final String EXC_MSG = "FOTV";
+ private final String stage;
+
+ public FailOnTermVectors(String stage) {
+ this.stage = stage;
+ }
+
+ @Override
+ public void eval(MockDirectoryWrapper dir) throws IOException {
+ StackTraceElement[] trace = new Exception().getStackTrace();
+ boolean failOnInit = false;
+ boolean failOnfinish = false;
+ for (int i = 0; i < trace.length; i++) {
+ if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ failOnInit = true;
+ if ("org.apache.lucene.index.TermVectorsTermsWriter".equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ failOnfinish = true;
+ }
+
+ if (failOnInit) {
+ throw new RuntimeException(EXC_MSG + " fail on init");
+ } else if (failOnfinish) {
+ throw new RuntimeException(EXC_MSG + " fail on finishDoc");
+ }
+ }
+ }
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestLongPostings.java b/lucene/src/test/org/apache/lucene/index/TestLongPostings.java
index fbe6fa1e0f4..1d745d7d8f0 100644
--- a/lucene/src/test/org/apache/lucene/index/TestLongPostings.java
+++ b/lucene/src/test/org/apache/lucene/index/TestLongPostings.java
@@ -49,6 +49,7 @@ public class TestLongPostings extends LuceneTestCase {
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
final BytesRef termBytes = termAtt.getBytesRef();
int count = 0;
+ ts.reset();
while(ts.incrementToken()) {
termAtt.fillBytesRef();
if (count == 0 && !termBytes.utf8ToString().equals(s)) {
diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
index d161e130ccb..7961601c013 100644
--- a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
+++ b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
@@ -73,15 +73,15 @@ public class TestSegmentMerger extends LuceneTestCase {
}
public void testMerge() throws IOException {
- SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault(), null, new FieldInfos());
+ SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, null, new FieldInfos());
merger.add(reader1);
merger.add(reader2);
int docsMerged = merger.merge();
assertTrue(docsMerged == 2);
final FieldInfos fieldInfos = merger.fieldInfos();
//Should be able to open a new SegmentReader against the new directory
- SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, fieldInfos.hasProx(),
- merger.getSegmentCodecs(), fieldInfos.hasVectors(), fieldInfos),
+ SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false,
+ merger.getSegmentCodecs(), fieldInfos),
BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
diff --git a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
index a0fbe6d1f55..8eb6224805f 100644
--- a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
+++ b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
@@ -616,7 +616,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
}
for(int i=start;i lastDoc = docs.get(r.nextInt(docs.size()));
diff --git a/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java b/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java
index 5be8753df0b..67d9333d6e1 100644
--- a/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java
+++ b/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java
@@ -75,7 +75,7 @@ public class TestTimeLimitingCollector extends LuceneTestCase {
"blueberry pizza",
};
directory = newDirectory();
- RandomIndexWriter iw = new RandomIndexWriter(random, directory);
+ RandomIndexWriter iw = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
for (int i=0; i(random, dir, inputMode, pairs, outputs).doTest();
}
+
+ // Up to two positive ints, shared, generally but not
+ // monotonically increasing
+ {
+ if (VERBOSE) {
+ System.out.println("TEST: now test UpToTwoPositiveIntOutputs");
+ }
+ final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true);
+ final List> pairs = new ArrayList>(terms.length);
+ long lastOutput = 0;
+ for(int idx=0;idx(terms[idx], output));
+ }
+ new FSTTester(random, dir, inputMode, pairs, outputs).doTest();
+ }
}
private static class FSTTester {
@@ -328,11 +358,13 @@ public class TestFSTs extends LuceneTestCase {
// no pruning
doTest(0, 0);
- // simple pruning
- doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
-
- // leafy pruning
- doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
+ if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
+ // simple pruning
+ doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
+
+ // leafy pruning
+ doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
+ }
}
// runs the term, returning the output, or null if term
@@ -421,7 +453,14 @@ public class TestFSTs extends LuceneTestCase {
prune1==0 && prune2==0, outputs);
for(InputOutput pair : pairs) {
- builder.add(pair.input, pair.output);
+ if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
+ final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
+ final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
+ ((Builder) builder).add(pair.input, (Object) _outputs.get(twoLongs.first));
+ ((Builder) builder).add(pair.input, (Object) _outputs.get(twoLongs.second));
+ } else {
+ builder.add(pair.input, pair.output);
+ }
}
FST fst = builder.finish();
diff --git a/modules/analysis/CHANGES.txt b/modules/analysis/CHANGES.txt
index b636dc7cc20..e2b616e110f 100644
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@@ -83,6 +83,8 @@ New Features
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+ - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+ - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
* SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of
/something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi)
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
index 26f06d3ffa0..fff6148d19a 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ar;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
index a9853386d53..3bf349719d7 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
index c10972b701b..0e1c7e616fb 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
index a65d90b4ffa..ecdf550eb85 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
index ce2bc6abd7c..31cfa1e00dc 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
index 85ce28efc99..357350cef38 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
index 4bf4f049dee..01004c68ca5 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
index 1fed10384da..01c537b85cb 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
index b43a5c3b0dc..507a114336a 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
@@ -31,8 +31,6 @@ import org.apache.lucene.util.Version;
/**
* Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
* tokenized as "avion" (plane).
- *
- * Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out.
*
* @see Elision in Wikipedia
*/
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
index f89b07a3cbc..2e4c6e43e3f 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.in;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
index adb51f29d44..bd8cc47a40f 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@@ -19,11 +19,13 @@ package org.apache.lucene.analysis.it;
import java.io.IOException;
import java.io.Reader;
+import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -38,6 +40,14 @@ import org.tartarus.snowball.ext.ItalianStemmer;
/**
* {@link Analyzer} for Italian.
+ *
+ *
+ *
You must specify the required {@link Version}
+ * compatibility when creating ItalianAnalyzer:
+ *
+ * As of 3.2, ElisionFilter with a set of Italian
+ * contractions is used by default.
+ *
*/
public final class ItalianAnalyzer extends StopwordAnalyzerBase {
private final Set> stemExclusionSet;
@@ -45,6 +55,13 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
/** File containing default Italian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
+ private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
+ new CharArraySet(Version.LUCENE_CURRENT,
+ Arrays.asList(
+ "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell",
+ "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"
+ ), true));
+
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
@@ -112,7 +129,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
- * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+ * {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@@ -121,6 +138,9 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
+ if (matchVersion.onOrAfter(Version.LUCENE_32)) {
+ result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
+ }
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
index b0cd8d60cfc..608c386625d 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
@@ -25,57 +25,71 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
- *
+ *
* Take something like:
- *
+ *
*
- * /soemthing/something/else
+ * /something/something/else
*
- *
+ *
* and make:
- *
+ *
*
- * /soemthing
- * /soemthing/something
- * /soemthing/something/else
+ * /something
+ * /something/something
+ * /something/something/else
*
- *
*/
public class PathHierarchyTokenizer extends Tokenizer {
public PathHierarchyTokenizer(Reader input) {
- this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER);
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
}
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
- this(input, bufferSize, delimiter, delimiter);
+ this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
}
public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
- this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement);
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
}
- public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
+ public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
super(input);
termAtt.resizeBuffer(bufferSize);
+
this.delimiter = delimiter;
this.replacement = replacement;
- endDelimiter = false;
+ this.skip = skip;
resultToken = new StringBuilder(bufferSize);
}
-
+
private static final int DEFAULT_BUFFER_SIZE = 1024;
public static final char DEFAULT_DELIMITER = '/';
+ public static final int DEFAULT_SKIP = 0;
+
private final char delimiter;
private final char replacement;
-
+ private final int skip;
+
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+ private int startPosition = 0;
private int finalOffset = 0;
- private boolean endDelimiter;
+ private int skipped = 0;
+ private boolean endDelimiter = false;
private StringBuilder resultToken;
+
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
@@ -97,43 +111,69 @@ public class PathHierarchyTokenizer extends Tokenizer {
while (true) {
int c = input.read();
- if( c < 0 ) {
- length += resultToken.length();
- termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
- if( added ){
- resultToken.setLength(0);
- resultToken.append(termAtt.buffer(), 0, length);
- }
- return added;
- }
- added = true;
- if( c == delimiter ) {
- if( length > 0 ){
- endDelimiter = true;
- break;
+ if( c < 0 ){
+ if( skipped > skip ) {
+ length += resultToken.length();
+ termAtt.setLength(length);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
+ if( added ){
+ resultToken.setLength(0);
+ resultToken.append(termAtt.buffer(), 0, length);
+ }
+ return added;
}
else{
- termAtt.append(replacement);
+ finalOffset = correctOffset(startPosition + length);
+ return false;
+ }
+ }
+ if( !added ){
+ added = true;
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(c == delimiter ? replacement : (char)c);
length++;
}
+ else {
+ startPosition++;
+ }
}
else {
- termAtt.append((char)c);
- length++;
+ if( c == delimiter ){
+ if( skipped > skip ){
+ endDelimiter = true;
+ break;
+ }
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(replacement);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
+ else {
+ if( skipped > skip ){
+ termAtt.append((char)c);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
}
}
-
length += resultToken.length();
termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
resultToken.setLength(0);
resultToken.append(termAtt.buffer(), 0, length);
return true;
}
-
+
@Override
public final void end() {
// set final offset
@@ -146,5 +186,6 @@ public class PathHierarchyTokenizer extends Tokenizer {
resultToken.setLength(0);
finalOffset = 0;
endDelimiter = false;
+ skipped = 0;
}
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
new file mode 100644
index 00000000000..07aa11fbbaf
--- /dev/null
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
@@ -0,0 +1,173 @@
+package org.apache.lucene.analysis.path;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ *
+ * Take something like:
+ *
+ *
+ * www.site.co.uk
+ *
+ *
+ * and make:
+ *
+ *
+ * www.site.co.uk
+ * site.co.uk
+ * co.uk
+ * uk
+ *
+ *
+ */
+public class ReversePathHierarchyTokenizer extends Tokenizer {
+
+ public ReversePathHierarchyTokenizer(Reader input) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
+ this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
+ this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
+ super(input);
+ termAtt.resizeBuffer(bufferSize);
+ this.delimiter = delimiter;
+ this.replacement = replacement;
+ this.skip = skip;
+ resultToken = new StringBuilder(bufferSize);
+ resultTokenBuffer = new char[bufferSize];
+ delimiterPositions = new ArrayList(bufferSize/10);
+ }
+
+ private static final int DEFAULT_BUFFER_SIZE = 1024;
+ public static final char DEFAULT_DELIMITER = '/';
+ public static final int DEFAULT_SKIP = 0;
+
+ private final char delimiter;
+ private final char replacement;
+ private final int skip;
+
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+
+ private int endPosition = 0;
+ private int finalOffset = 0;
+ private int skipped = 0;
+ private StringBuilder resultToken;
+
+ private List delimiterPositions;
+ private int delimitersCount = -1;
+ private char[] resultTokenBuffer;
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ clearAttributes();
+ if(delimitersCount == -1){
+ int length = 0;
+ delimiterPositions.add(0);
+ while (true) {
+ int c = input.read();
+ if( c < 0 ) {
+ break;
+ }
+ length++;
+ if( c == delimiter ) {
+ delimiterPositions.add(length);
+ resultToken.append(replacement);
+ }
+ else{
+ resultToken.append((char)c);
+ }
+ }
+ delimitersCount = delimiterPositions.size();
+ if( delimiterPositions.get(delimitersCount-1) < length ){
+ delimiterPositions.add(length);
+ delimitersCount++;
+ }
+ if( resultTokenBuffer.length < resultToken.length() ){
+ resultTokenBuffer = new char[resultToken.length()];
+ }
+ resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0);
+ resultToken.setLength(0);
+ endPosition = delimiterPositions.get(delimitersCount-1 - skip);
+ finalOffset = correctOffset(length);
+ posAtt.setPositionIncrement(1);
+ }
+ else{
+ posAtt.setPositionIncrement(0);
+ }
+
+ while( skipped < delimitersCount-skip-1 ){
+ int start = delimiterPositions.get(skipped);
+ termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
+ offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
+ skipped++;
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override
+ public final void end() {
+ // set final offset
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ }
+
+ @Override
+ public void reset(Reader input) throws IOException {
+ super.reset(input);
+ resultToken.setLength(0);
+ finalOffset = 0;
+ skipped = 0;
+ delimitersCount = -1;
+ delimiterPositions.clear();
+ }
+}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
index e5426d775b2..088b8025064 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ru;
*/
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer; // for javadocs
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.util.AttributeSource;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
index cd52e392070..552ea3fd3dd 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
@@ -24,7 +24,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
diff --git a/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
similarity index 97%
rename from lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index 3055d19e5b2..5d91a3a3fe1 100644
--- a/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,12 +20,13 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
-import org.apache.lucene.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
/**
* An abstract base class for simple, character-oriented tokenizers.
diff --git a/lucene/src/java/org/apache/lucene/util/CharacterUtils.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
similarity index 99%
rename from lucene/src/java/org/apache/lucene/util/CharacterUtils.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
index 8f5a8af9ede..fe622788198 100644
--- a/lucene/src/java/org/apache/lucene/util/CharacterUtils.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
@@ -1,8 +1,10 @@
-package org.apache.lucene.util;
+package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.util.Version;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
index ae4bf2f2d24..83d7a863b35 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
@@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -55,4 +56,18 @@ public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+
+ /** test that the elisionfilter is working */
+ public void testContractions() throws IOException {
+ Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" });
+ assertAnalyzesTo(a, "l'Italiano", new String[] { "ital" });
+ }
+
+ /** test that we don't enable this before 3.2*/
+ public void testContractionsBackwards() throws IOException {
+ Analyzer a = new ItalianAnalyzer(Version.LUCENE_31);
+ assertAnalyzesTo(a, "dell'Italia", new String[] { "dell'ital" });
+ assertAnalyzesTo(a, "l'Italiano", new String[] { "l'ital" });
+ }
}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
index cb0adc9e474..9cc50735965 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
@@ -127,4 +127,70 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
new int[]{1, 0, 0, 0},
path.length());
}
+
+ public void testBasicSkip() throws Exception {
+ String path = "/a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{2, 2},
+ new int[]{4, 6},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterSkip() throws Exception {
+ String path = "/a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{2, 2, 2},
+ new int[]{4, 6, 7},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharSkip() throws Exception {
+ String path = "a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{1, 1},
+ new int[]{3, 5},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterSkip() throws Exception {
+ String path = "a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{1, 1, 1},
+ new int[]{3, 5, 6},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterSkip() throws Exception {
+ String path = "/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{},
+ new int[]{},
+ new int[]{},
+ new int[]{},
+ path.length());
+ }
+
+ public void testOnlyDelimitersSkip() throws Exception {
+ String path = "//";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{1},
+ new int[]{2},
+ new int[]{1},
+ path.length());
+ }
}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java
new file mode 100644
index 00000000000..a881be03ea3
--- /dev/null
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java
@@ -0,0 +1,157 @@
+package org.apache.lucene.analysis.path;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
+
+ public void testBasicReverse() throws Exception {
+ String path = "/a/b/c";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"/a/b/c", "a/b/c", "b/c", "c"},
+ new int[]{0, 1, 3, 5},
+ new int[]{6, 6, 6, 6},
+ new int[]{1, 0, 0, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterReverse() throws Exception {
+ String path = "/a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"/a/b/c/", "a/b/c/", "b/c/", "c/"},
+ new int[]{0, 1, 3, 5},
+ new int[]{7, 7, 7, 7},
+ new int[]{1, 0, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharReverse() throws Exception {
+ String path = "a/b/c";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/c", "b/c", "c"},
+ new int[]{0, 2, 4},
+ new int[]{5, 5, 5},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterReverse() throws Exception {
+ String path = "a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/c/", "b/c/", "c/"},
+ new int[]{0, 2, 4},
+ new int[]{6, 6, 6},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterReverse() throws Exception {
+ String path = "/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{0},
+ new int[]{1},
+ new int[]{1},
+ path.length());
+ }
+
+ public void testOnlyDelimitersReverse() throws Exception {
+ String path = "//";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"//", "/"},
+ new int[]{0, 1},
+ new int[]{2, 2},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterReverseSkip() throws Exception {
+ String path = "/a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/a/b/", "a/b/", "b/"},
+ new int[]{0, 1, 3},
+ new int[]{5, 5, 5},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharReverseSkip() throws Exception {
+ String path = "a/b/c";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/", "b/"},
+ new int[]{0, 2},
+ new int[]{4, 4},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterReverseSkip() throws Exception {
+ String path = "a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/", "b/"},
+ new int[]{0, 2},
+ new int[]{4, 4},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterReverseSkip() throws Exception {
+ String path = "/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{},
+ new int[]{},
+ new int[]{},
+ new int[]{},
+ path.length());
+ }
+
+ public void testOnlyDelimitersReverseSkip() throws Exception {
+ String path = "//";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{0},
+ new int[]{1},
+ new int[]{1},
+ path.length());
+ }
+
+ public void testReverseSkip2() throws Exception {
+ String path = "/a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 2 );
+ assertTokenStreamContents(t,
+ new String[]{"/a/", "a/"},
+ new int[]{0, 1},
+ new int[]{3, 3},
+ new int[]{1, 0},
+ path.length());
+ }
+}
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
similarity index 82%
rename from lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
index adb902d95f6..f129596df92 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,6 +20,10 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.StringReader;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+
/**
* Testcase for {@link CharTokenizer} subclasses
@@ -42,7 +46,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
}
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
}
@@ -59,7 +63,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("a");
}
builder.append("\ud801\udc1cabc");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
}
}
@@ -73,7 +77,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
for (int i = 0; i < 255; i++) {
builder.append("A");
}
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
@@ -87,7 +91,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("A");
}
builder.append("\ud801\udc1c");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
}
diff --git a/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
similarity index 97%
rename from lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
index 69393bca871..4e9fdbf6c24 100644
--- a/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.util;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,7 +21,9 @@ import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
import org.junit.Test;
/**
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0ed4698c902..42555a66b8a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -198,6 +198,9 @@ Bug Fixes
initialization if the schema.xml contains an analyzer configuration
for a fieldType that does not use TextField. (hossman)
+* SOLR-2467: Fix initialization so any errors
+ are logged properly. (hossman)
+
Other Changes
----------------------
@@ -267,6 +270,12 @@ Detailed Change List
New Features
----------------------
+* SOLR-2496: Add ability to specify overwrite and commitWithin as request
+ parameters (e.g. specified in the URL) when using the JSON update format,
+ and added a simplified format for specifying multiple documents.
+ Example: [{"id":"doc1"},{"id":"doc2"}]
+ (yonik)
+
Optimizations
----------------------
@@ -309,6 +318,13 @@ Bug Fixes
did not clear all attributes so they displayed incorrect attribute values for tokens
in later filter stages. (uschindler, rmuir, yonik)
+* SOLR-2493: SolrQueryParser was fixed to not parse the SolrConfig DOM tree on each
+ instantiation which is a huge slowdown. (Stephane Bailliez via uschindler)
+
+* SOLR-2495: The JSON parser could hang on corrupted input and could fail
+ to detect numbers that were too large to fit in a long. (yonik)
+
+
Other Changes
----------------------
diff --git a/solr/README.txt b/solr/README.txt
index 0a9e939128d..f9a72c0f187 100644
--- a/solr/README.txt
+++ b/solr/README.txt
@@ -64,18 +64,18 @@ docs/api/index.html
Instructions for Building Apache Solr from Source
-------------------------------------------------
-1. Download the Java SE 6 JDK (Java Development Kit) or later from http://java.sun.com.
- You will need the JDK installed, and the %JAVA_HOME%\bin directory included
- on your command path. To test this, issue a "java -version" command from your
- shell and verify that the Java version is 1.6 or later.
+1. Download the Java SE 6 JDK (Java Development Kit) or later from http://java.sun.com/
+ You will need the JDK installed, and the $JAVA_HOME/bin (Windows: %JAVA_HOME%\bin)
+ folder included on your command path. To test this, issue a "java -version" command
+ from your shell (command prompt) and verify that the Java version is 1.6 or later.
-2. Download the Apache Ant binary distribution (1.7.0 or greater) from http://ant.apache.org.
- You will need Ant installed and the %ANT_HOME%\bin directory included on your
- command path. To test this, issue a "ant -version" command from your
- shell and verify that Ant is available.
+2. Download the Apache Ant binary distribution (1.7.0 or greater) from http://ant.apache.org/
+ You will need Ant installed and the $ANT_HOME/bin (Windows: %ANT_HOME%\bin) folder
+ included on your command path. To test this, issue a "ant -version" command from your
+ shell (command prompt) and verify that Ant is available.
-3. Download the Apache Solr distribution, linked from the above
- web site. Expand the distribution to a folder of your choice, e.g. c:\solr.
+3. Download the Apache Solr distribution, linked from the above web site.
+ Unzip the distribution to a folder of your choice, e.g. C:\solr or ~/solr
Alternately, you can obtain a copy of the latest Apache Solr source code
directly from the Subversion repository:
diff --git a/solr/build.xml b/solr/build.xml
index 8c68ca3464e..de7ef217bee 100644
--- a/solr/build.xml
+++ b/solr/build.xml
@@ -450,6 +450,7 @@
>
+
@@ -1020,7 +1021,7 @@
jar.file="lib/commons-csv-1.0-SNAPSHOT-r966014.jar" />
+ jar.file="lib/apache-solr-noggit-r1099557.jar" />
diff --git a/solr/common-build.xml b/solr/common-build.xml
index 861ff237062..a57b4074e25 100644
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@@ -61,6 +61,7 @@
+
diff --git a/solr/contrib/analysis-extras/build.xml b/solr/contrib/analysis-extras/build.xml
index 9cc5aa217bc..6ec8ecdbaa8 100644
--- a/solr/contrib/analysis-extras/build.xml
+++ b/solr/contrib/analysis-extras/build.xml
@@ -146,6 +146,7 @@
>
+
diff --git a/solr/contrib/clustering/build.xml b/solr/contrib/clustering/build.xml
index aee297e3b8f..9a0c67eaa2f 100644
--- a/solr/contrib/clustering/build.xml
+++ b/solr/contrib/clustering/build.xml
@@ -118,6 +118,7 @@
>
+
diff --git a/solr/contrib/dataimporthandler/build.xml b/solr/contrib/dataimporthandler/build.xml
index bd6ea50a2e2..79a0524fcc7 100644
--- a/solr/contrib/dataimporthandler/build.xml
+++ b/solr/contrib/dataimporthandler/build.xml
@@ -171,6 +171,7 @@
+
@@ -231,6 +232,7 @@
>
+
diff --git a/solr/contrib/extraction/build.xml b/solr/contrib/extraction/build.xml
index 01aa60e7485..50dcb4983d5 100644
--- a/solr/contrib/extraction/build.xml
+++ b/solr/contrib/extraction/build.xml
@@ -115,6 +115,7 @@
>
+
diff --git a/solr/contrib/uima/CHANGES.txt b/solr/contrib/uima/CHANGES.txt
index a31054a05b5..6e97c775acb 100644
--- a/solr/contrib/uima/CHANGES.txt
+++ b/solr/contrib/uima/CHANGES.txt
@@ -28,6 +28,11 @@ Upgrading from Solr 3.1
It should move to UIMAUpdateRequestProcessorFactory setting.
See contrib/uima/README.txt for more details. (SOLR-2436)
+New Features
+----------------------
+
+* SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
+
Test Cases:
----------------------
diff --git a/solr/contrib/uima/README.txt b/solr/contrib/uima/README.txt
index a8ef9cd5598..2e21536d3a5 100644
--- a/solr/contrib/uima/README.txt
+++ b/solr/contrib/uima/README.txt
@@ -37,20 +37,26 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f
-
- org.apache.uima.alchemy.ts.concept.ConceptFS
- text
- concept
+
+ org.apache.uima.alchemy.ts.concept.ConceptFS
+
+ text
+ concept
+
-
- org.apache.uima.alchemy.ts.language.LanguageFS
- language
- language
+
+ org.apache.uima.alchemy.ts.language.LanguageFS
+
+ language
+ language
+
-
- org.apache.uima.SentenceAnnotation
- coveredText
- sentence
+
+ org.apache.uima.SentenceAnnotation
+
+ coveredText
+ sentence
+
diff --git a/solr/contrib/uima/build.xml b/solr/contrib/uima/build.xml
index 631f8a8c413..16c7de67844 100644
--- a/solr/contrib/uima/build.xml
+++ b/solr/contrib/uima/build.xml
@@ -114,6 +114,7 @@
>
+
diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java
index 22357262ba3..68c9e1bac0a 100644
--- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java
+++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java
@@ -30,14 +30,14 @@ public class SolrUIMAConfiguration {
private boolean fieldsMerging;
- private Map> typesFeaturesFieldsMapping;
+ private Map> typesFeaturesFieldsMapping;
private String aePath;
private Map runtimeParameters;
public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
- Map> typesFeaturesFieldsMapping,
+ Map> typesFeaturesFieldsMapping,
Map runtimeParameters) {
this.aePath = aePath;
this.fieldsToAnalyze = fieldsToAnalyze;
@@ -54,7 +54,7 @@ public class SolrUIMAConfiguration {
return fieldsMerging;
}
- public Map> getTypesFeaturesFieldsMapping() {
+ public Map> getTypesFeaturesFieldsMapping() {
return typesFeaturesFieldsMapping;
}
@@ -65,4 +65,39 @@ public class SolrUIMAConfiguration {
public Map getRuntimeParameters() {
return runtimeParameters;
}
+
+ static final class MapField {
+
+ private String fieldName, fieldNameFeature;
+ private boolean prefix; // valid if dynamicField == true
+ // false: *_s, true: s_*
+
+ MapField(String fieldName, String fieldNameFeature){
+ this.fieldName = fieldName;
+ this.fieldNameFeature = fieldNameFeature;
+ if(fieldNameFeature != null){
+ if(fieldName.startsWith("*")){
+ prefix = false;
+ this.fieldName = fieldName.substring(1);
+ }
+ else if(fieldName.endsWith("*")){
+ prefix = true;
+ this.fieldName = fieldName.substring(0, fieldName.length() - 1);
+ }
+ else
+ throw new RuntimeException("static field name cannot be used for dynamicField");
+ }
+ }
+
+ String getFieldNameFeature(){
+ return fieldNameFeature;
+ }
+
+ String getFieldName(String featureValue){
+ if(fieldNameFeature != null){
+ return prefix ? fieldName + featureValue : featureValue + fieldName;
+ }
+ return fieldName;
+ }
+ }
}
diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java
index 00e6aca3288..fc225d1deba 100644
--- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java
+++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java
@@ -22,6 +22,7 @@ import java.util.List;
import java.util.Map;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
/**
* Read configuration for Solr-UIMA integration
@@ -62,18 +63,31 @@ public class SolrUIMAConfigurationReader {
}
@SuppressWarnings("rawtypes")
- private Map> readTypesFeaturesFieldsMapping() {
- Map> map = new HashMap>();
+ private Map> readTypesFeaturesFieldsMapping() {
+ Map> map = new HashMap>();
NamedList fieldMappings = (NamedList) args.get("fieldMappings");
/* iterate over UIMA types */
for (int i = 0; i < fieldMappings.size(); i++) {
- NamedList mapping = (NamedList) fieldMappings.get("mapping", i);
- String typeName = (String) mapping.get("type");
- String featureName = (String) mapping.get("feature");
- String mappedFieldName = (String) mapping.get("field");
- Map subMap = new HashMap();
- subMap.put(featureName, mappedFieldName);
+ NamedList type = (NamedList) fieldMappings.get("type", i);
+ String typeName = (String)type.get("name");
+
+ Map subMap = new HashMap();
+ /* iterate over mapping definitions */
+ for(int j = 0; j < type.size() - 1; j++){
+ NamedList mapping = (NamedList) type.get("mapping", j + 1);
+ String featureName = (String) mapping.get("feature");
+ String fieldNameFeature = null;
+ String mappedFieldName = (String) mapping.get("field");
+ if(mappedFieldName == null){
+ fieldNameFeature = (String) mapping.get("fieldNameFeature");
+ mappedFieldName = (String) mapping.get("dynamicField");
+ }
+ if(mappedFieldName == null)
+ throw new RuntimeException("either of field or dynamicField should be defined for feature " + featureName);
+ MapField mapField = new MapField(mappedFieldName, fieldNameFeature);
+ subMap.put(featureName, mapField);
+ }
map.put(typeName, subMap);
}
return map;
diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java
index 29e7b5c2926..6d8cdc50c0d 100644
--- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java
+++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAToSolrMapper.java
@@ -20,6 +20,7 @@ package org.apache.solr.uima.processor;
import java.util.Map;
import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
@@ -53,7 +54,7 @@ public class UIMAToSolrMapper {
* name of UIMA type to map
* @param featureFieldsmapping
*/
- public void map(String typeName, Map featureFieldsmapping) {
+ public void map(String typeName, Map featureFieldsmapping) {
try {
FeatureStructure fsMock = (FeatureStructure) Class.forName(typeName).getConstructor(
JCas.class).newInstance(cas);
@@ -62,7 +63,11 @@ public class UIMAToSolrMapper {
.hasNext();) {
FeatureStructure fs = iterator.next();
for (String featureName : featureFieldsmapping.keySet()) {
- String fieldName = featureFieldsmapping.get(featureName);
+ MapField mapField = featureFieldsmapping.get(featureName);
+ String fieldNameFeature = mapField.getFieldNameFeature();
+ String fieldNameFeatureValue = fieldNameFeature == null ? null :
+ fs.getFeatureValueAsString(type.getFeatureByBaseName(fieldNameFeature));
+ String fieldName = mapField.getFieldName(fieldNameFeatureValue);
log.info(new StringBuffer("mapping ").append(typeName).append("@").append(featureName)
.append(" to ").append(fieldName).toString());
String featureValue = null;
diff --git a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
index 8b3cb547d67..9950838569c 100644
--- a/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
+++ b/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.uima.processor.ae.AEProvider;
import org.apache.solr.uima.processor.ae.AEProviderFactory;
import org.apache.solr.update.AddUpdateCommand;
@@ -39,7 +40,7 @@ import org.apache.uima.resource.ResourceInitializationException;
*/
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
- private SolrUIMAConfiguration solrUIMAConfiguration;
+ SolrUIMAConfiguration solrUIMAConfiguration;
private AEProvider aeProvider;
@@ -69,7 +70,7 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
/* get field mapping from config */
- Map> typesAndFeaturesFieldsMap = solrUIMAConfiguration
+ Map> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java
index 392afcf1ffc..c7275829171 100644
--- a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java
+++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java
@@ -33,6 +33,8 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.XmlUpdateRequestHandler;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
+import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.junit.Before;
import org.junit.BeforeClass;
@@ -66,6 +68,26 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory) chained
.getFactories()[0];
assertNotNull(factory);
+ UpdateRequestProcessor processor = factory.getInstance(req(), null, null);
+ assertTrue(processor instanceof UIMAUpdateRequestProcessor);
+ }
+
+ @Test
+ public void testMultiMap() {
+ SolrCore core = h.getCore();
+ UpdateRequestProcessorChain chained = core.getUpdateProcessingChain("uima-multi-map");
+ assertNotNull(chained);
+ UIMAUpdateRequestProcessorFactory factory = (UIMAUpdateRequestProcessorFactory) chained
+ .getFactories()[0];
+ assertNotNull(factory);
+ UpdateRequestProcessor processor = factory.getInstance(req(), null, null);
+ assertTrue(processor instanceof UIMAUpdateRequestProcessor);
+ SolrUIMAConfiguration conf = ((UIMAUpdateRequestProcessor)processor).solrUIMAConfiguration;
+ Map> map = conf.getTypesFeaturesFieldsMapping();
+ Map subMap = map.get("a-type-which-can-have-multiple-features");
+ assertEquals(2, subMap.size());
+ assertEquals("1", subMap.get("A").getFieldName(null));
+ assertEquals("2", subMap.get("B").getFieldName(null));
}
@Test
@@ -83,7 +105,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
assertU(commit());
assertQ(req("sentence:*"), "//*[@numFound='1']");
assertQ(req("sentiment:*"), "//*[@numFound='0']");
- assertQ(req("entity:Prague"), "//*[@numFound='1']");
+ assertQ(req("OTHER_sm:Prague"), "//*[@numFound='1']");
}
@Test
@@ -103,7 +125,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
assertQ(req("sentence:*"), "//*[@numFound='2']");
assertQ(req("sentiment:positive"), "//*[@numFound='1']");
- assertQ(req("entity:Apache"), "//*[@numFound='2']");
+ assertQ(req("ORGANIZATION_sm:Apache"), "//*[@numFound='2']");
}
private void addDoc(String doc) throws Exception {
diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyEntityAnnotator.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyEntityAnnotator.java
index 6c3941ac49e..e59da1228fe 100644
--- a/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyEntityAnnotator.java
+++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyEntityAnnotator.java
@@ -34,6 +34,12 @@ public class DummyEntityAnnotator extends JCasAnnotator_ImplBase{
EntityAnnotation entityAnnotation = new EntityAnnotation(jcas);
entityAnnotation.setBegin(annotation.getBegin());
entityAnnotation.setEnd(annotation.getEnd());
+ String entityString = annotation.getCoveredText();
+ entityAnnotation.setEntity(entityString);
+ String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
+ if(entityString.equals("Apache"))
+ name = "ORGANIZATION";
+ entityAnnotation.setName(name);
entityAnnotation.addToIndexes();
}
}
diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation.java
index f48e5bc0912..ed597514a71 100644
--- a/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation.java
+++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation.java
@@ -1,6 +1,6 @@
-/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */
+/* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
package org.apache.solr.uima.ts;
import org.apache.uima.jcas.JCas;
@@ -11,8 +11,8 @@ import org.apache.uima.jcas.tcas.Annotation;
/**
- * Updated by JCasGen Fri Mar 04 12:50:14 CET 2011
- * XML source: /Users/tommasoteofili/Documents/workspaces/lucene_workspace/lucene_dev/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml
+ * Updated by JCasGen Sat May 07 22:33:38 JST 2011
+ * XML source: /Users/koji/Documents/workspace/DummyEntityAnnotator/desc/DummyEntityAEDescriptor.xml
* @generated */
public class EntityAnnotation extends Annotation {
/** @generated
@@ -57,6 +57,42 @@ public class EntityAnnotation extends Annotation {
@generated modifiable */
private void readObject() {}
-}
+
+
+ //*--------------*
+ //* Feature: name
+
+ /** getter for name - gets
+ * @generated */
+ public String getName() {
+ if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
+ jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name);}
+
+ /** setter for name - sets
+ * @generated */
+ public void setName(String v) {
+ if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
+ jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
+ jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name, v);}
+
+
+ //*--------------*
+ //* Feature: entity
+
+ /** getter for entity - gets
+ * @generated */
+ public String getEntity() {
+ if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
+ jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
+ return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity);}
+
+ /** setter for entity - sets
+ * @generated */
+ public void setEntity(String v) {
+ if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
+ jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
+ jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity, v);}
+ }
\ No newline at end of file
diff --git a/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation_Type.java b/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation_Type.java
index f7bb572f7aa..5be6a1a6020 100644
--- a/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation_Type.java
+++ b/solr/contrib/uima/src/test/java/org/apache/solr/uima/ts/EntityAnnotation_Type.java
@@ -1,5 +1,5 @@
-/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */
+/* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
package org.apache.solr.uima.ts;
import org.apache.uima.jcas.JCas;
@@ -9,10 +9,12 @@ import org.apache.uima.cas.impl.FSGenerator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.impl.TypeImpl;
import org.apache.uima.cas.Type;
+import org.apache.uima.cas.impl.FeatureImpl;
+import org.apache.uima.cas.Feature;
import org.apache.uima.jcas.tcas.Annotation_Type;
/**
- * Updated by JCasGen Fri Mar 04 12:50:14 CET 2011
+ * Updated by JCasGen Sat May 07 22:33:38 JST 2011
* @generated */
public class EntityAnnotation_Type extends Annotation_Type {
/** @generated */
@@ -38,6 +40,42 @@ public class EntityAnnotation_Type extends Annotation_Type {
/** @generated
@modifiable */
public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.solr.uima.ts.EntityAnnotation");
+
+ /** @generated */
+ final Feature casFeat_name;
+ /** @generated */
+ final int casFeatCode_name;
+ /** @generated */
+ public String getName(int addr) {
+ if (featOkTst && casFeat_name == null)
+ jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_name);
+ }
+ /** @generated */
+ public void setName(int addr, String v) {
+ if (featOkTst && casFeat_name == null)
+ jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
+ ll_cas.ll_setStringValue(addr, casFeatCode_name, v);}
+
+
+
+ /** @generated */
+ final Feature casFeat_entity;
+ /** @generated */
+ final int casFeatCode_entity;
+ /** @generated */
+ public String getEntity(int addr) {
+ if (featOkTst && casFeat_entity == null)
+ jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
+ return ll_cas.ll_getStringValue(addr, casFeatCode_entity);
+ }
+ /** @generated */
+ public void setEntity(int addr, String v) {
+ if (featOkTst && casFeat_entity == null)
+ jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
+ ll_cas.ll_setStringValue(addr, casFeatCode_entity, v);}
+
+
@@ -47,6 +85,14 @@ public class EntityAnnotation_Type extends Annotation_Type {
super(jcas, casType);
casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
+
+ casFeat_name = jcas.getRequiredFeatureDE(casType, "name", "uima.cas.String", featOkTst);
+ casFeatCode_name = (null == casFeat_name) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_name).getCode();
+
+
+ casFeat_entity = jcas.getRequiredFeatureDE(casType, "entity", "uima.cas.String", featOkTst);
+ casFeatCode_entity = (null == casFeat_entity) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_entity).getCode();
+
}
}
diff --git a/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml b/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml
index 61f1d8c8046..33f05e50e39 100644
--- a/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml
+++ b/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml
@@ -32,6 +32,18 @@
org.apache.solr.uima.ts.EntityAnnotation
uima.tcas.Annotation
+
+
+ name
+
+ uima.cas.String
+
+
+ entity
+
+ uima.cas.String
+
+
diff --git a/solr/contrib/uima/src/test/resources/solr-uima/conf/schema.xml b/solr/contrib/uima/src/test/resources/solr-uima/conf/schema.xml
index 6df09b51320..85d15ef77f1 100644
--- a/solr/contrib/uima/src/test/resources/solr-uima/conf/schema.xml
+++ b/solr/contrib/uima/src/test/resources/solr-uima/conf/schema.xml
@@ -597,6 +597,7 @@
stored="true" multiValued="true"/>
-->
+
+ samsung
electronics
hard drive
7200RPM, 8MB cache, IDE Ultra ATA-133
@@ -36,6 +38,8 @@
6H500F0
Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300
Maxtor Corp.
+
+ maxtor
electronics
hard drive
SATA 3.0Gb/s, NCQ
diff --git a/solr/example/exampledocs/ipod_other.xml b/solr/example/exampledocs/ipod_other.xml
index f259e9e7b43..7756c9fc805 100644
--- a/solr/example/exampledocs/ipod_other.xml
+++ b/solr/example/exampledocs/ipod_other.xml
@@ -21,6 +21,8 @@
F8V7067-APL-KIT
Belkin Mobile Power Cord for iPod w/ Dock
Belkin
+
+ belkin
electronics
connector
car power adapter, white
@@ -37,6 +39,8 @@
IW-02
iPod & iPod Mini USB 2.0 Cable
Belkin
+
+ belkin
electronics
connector
car power adapter for iPod, white
diff --git a/solr/example/exampledocs/ipod_video.xml b/solr/example/exampledocs/ipod_video.xml
index 7895860ea19..1ca5f6f5c21 100644
--- a/solr/example/exampledocs/ipod_video.xml
+++ b/solr/example/exampledocs/ipod_video.xml
@@ -19,6 +19,8 @@
MA147LL/A
Apple 60 GB iPod with Video Playback Black
Apple Computer Inc.
+
+ apple
electronics
music
iTunes, Podcasts, Audiobooks
diff --git a/solr/example/exampledocs/manufacturers.xml b/solr/example/exampledocs/manufacturers.xml
new file mode 100644
index 00000000000..e3121d5db1f
--- /dev/null
+++ b/solr/example/exampledocs/manufacturers.xml
@@ -0,0 +1,75 @@
+
+
+
+
+ adata
+ A-Data Technology
+ 46221 Landing Parkway Fremont, CA 94538
+
+
+ apple
+ Apple
+ 1 Infinite Way, Cupertino CA
+
+
+ asus
+ ASUS Computer
+ 800 Corporate Way Fremont, CA 94539
+
+
+ ati
+ ATI Technologies
+ 33 Commerce Valley Drive East Thornhill, ON L3T 7N6 Canada
+
+
+ belkin
+ Belkin
+ 12045 E. Waterfront Drive Playa Vista, CA 90094
+
+
+ canon
+ Canon, Inc.
+ One Canon Plaza Lake Success, NY 11042
+
+
+ corsair
+ Corsair Microsystems
+ 46221 Landing Parkway Fremont, CA 94538
+
+
+ dell
+ Dell, Inc.
+ One Dell Way Round Rock, Texas 78682
+
+
+ maxtor
+ Maxtor Corporation
+ 920 Disc Drive Scotts Valley, CA 95066
+
+
+ samsung
+ Samsung Electronics Co. Ltd.
+ 105 Challenger Rd. Ridgefield Park, NJ 07660-0511
+
+
+ viewsonic
+ ViewSonic Corp
+ 381 Brea Canyon Road Walnut, CA 91789-0708
+
+
+
diff --git a/solr/example/exampledocs/mem.xml b/solr/example/exampledocs/mem.xml
index 1ca858d4a6b..0b89d6785c2 100644
--- a/solr/example/exampledocs/mem.xml
+++ b/solr/example/exampledocs/mem.xml
@@ -20,6 +20,8 @@
TWINX2048-3200PRO
CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail
Corsair Microsystems Inc.
+
+ corsair
electronics
memory
CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader
@@ -38,6 +40,8 @@
VS1GB400C3
CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail
Corsair Microsystems Inc.
+
+ corsair
electronics
memory
74.99
@@ -54,6 +58,8 @@
VDBDB1A16
A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM
A-DATA Technology Inc.
+
+ corsair
electronics
memory
CAS latency 3, 2.7v
diff --git a/solr/example/exampledocs/monitor.xml b/solr/example/exampledocs/monitor.xml
index 035f61891da..db986fa0b7f 100644
--- a/solr/example/exampledocs/monitor.xml
+++ b/solr/example/exampledocs/monitor.xml
@@ -19,6 +19,8 @@
3007WFP
Dell Widescreen UltraSharp 3007WFP
Dell, Inc.
+
+ dell
electronics
monitor
30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast
diff --git a/solr/example/exampledocs/monitor2.xml b/solr/example/exampledocs/monitor2.xml
index 09cc778c3da..79b99494319 100644
--- a/solr/example/exampledocs/monitor2.xml
+++ b/solr/example/exampledocs/monitor2.xml
@@ -19,6 +19,8 @@
VA902B
ViewSonic VA902B - flat panel display - TFT - 19"
ViewSonic Corp.
+
+ viewsonic
electronics
monitor
19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution
diff --git a/solr/example/exampledocs/mp500.xml b/solr/example/exampledocs/mp500.xml
index 890cd4aadfb..bab401a289b 100644
--- a/solr/example/exampledocs/mp500.xml
+++ b/solr/example/exampledocs/mp500.xml
@@ -19,6 +19,8 @@
0579B002
Canon PIXMA MP500 All-In-One Photo Printer
Canon Inc.
+
+ canon
electronics
multifunction printer
printer
diff --git a/solr/example/exampledocs/sd500.xml b/solr/example/exampledocs/sd500.xml
index ff700025da8..145c6fd5de6 100644
--- a/solr/example/exampledocs/sd500.xml
+++ b/solr/example/exampledocs/sd500.xml
@@ -19,6 +19,8 @@
9885A004
Canon PowerShot SD500
Canon Inc.
+
+ canon
electronics
camera
3x zoop, 7.1 megapixel Digital ELPH
diff --git a/solr/example/exampledocs/vidcard.xml b/solr/example/exampledocs/vidcard.xml
index 9cd3fd1c79c..10b8121fdb1 100644
--- a/solr/example/exampledocs/vidcard.xml
+++ b/solr/example/exampledocs/vidcard.xml
@@ -19,7 +19,10 @@
EN7800GTX/2DHTV/256M
ASUS Extreme N7800GTX/2DHTV (256 MB)
+
ASUS Computer Inc.
+
+ asus
electronics
graphics card
NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz
@@ -39,6 +42,8 @@
100-435805
ATI Radeon X1900 XTX 512 MB PCIE Video Card
ATI Technologies
+
+ ati
electronics
graphics card
ATI RADEON X1900 GPU/VPU clocked at 650MHz
diff --git a/solr/example/solr/conf/velocity/doc.vm b/solr/example/solr/conf/velocity/doc.vm
index de3ad49aae2..91246389177 100644
--- a/solr/example/solr/conf/velocity/doc.vm
+++ b/solr/example/solr/conf/velocity/doc.vm
@@ -26,4 +26,17 @@
#if($params.getBool("debugQuery",false))
toggle explain
$response.getExplainMap().get($doc.getFirstValue('id'))
+ toggle all fields
+
+ #foreach($fieldname in $doc.fieldNames)
+
+ $fieldname :
+
+ #foreach($value in $doc.getFieldValues($fieldname))
+ $value
+ #end
+
+ #end
+
+
#end
\ No newline at end of file
diff --git a/solr/example/solr/conf/velocity/footer.vm b/solr/example/solr/conf/velocity/footer.vm
index 79c8f820afc..b55e8a5a618 100644
--- a/solr/example/solr/conf/velocity/footer.vm
+++ b/solr/example/solr/conf/velocity/footer.vm
@@ -4,7 +4,7 @@
#if($request.params.get('debugQuery'))
disable debug
#else
- enable debug
+ enable debug
#end
#if($annotate)
disable annotation
diff --git a/solr/lib/apache-solr-noggit-pom.xml.template b/solr/lib/apache-solr-noggit-pom.xml.template
index 1596274c115..85b85a4e7d1 100644
--- a/solr/lib/apache-solr-noggit-pom.xml.template
+++ b/solr/lib/apache-solr-noggit-pom.xml.template
@@ -31,6 +31,6 @@
solr-noggit
Solr Specific Noggit
@version@
- Solr Specific Noggit r944541
+ Solr Specific Noggit r1099557
jar
diff --git a/solr/lib/apache-solr-noggit-r1099557.jar b/solr/lib/apache-solr-noggit-r1099557.jar
new file mode 100644
index 00000000000..9fb87b9f301
--- /dev/null
+++ b/solr/lib/apache-solr-noggit-r1099557.jar
@@ -0,0 +1,2 @@
+AnyObjectId[5c4007c7e74af85d823243153d308f80e084eff0] was removed in git history.
+Apache SVN contains full history.
\ No newline at end of file
diff --git a/solr/lib/apache-solr-noggit-r944541.jar b/solr/lib/apache-solr-noggit-r944541.jar
deleted file mode 100755
index e0624dd525f..00000000000
--- a/solr/lib/apache-solr-noggit-r944541.jar
+++ /dev/null
@@ -1,2 +0,0 @@
-AnyObjectId[9b434f5760dd0d78350bdf8237273c0d5db0174e] was removed in git history.
-Apache SVN contains full history.
\ No newline at end of file
diff --git a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java
index c93db874ec7..35ad58d4f9f 100644
--- a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java
@@ -21,6 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
+import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
/**
@@ -37,6 +38,8 @@ public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory {
private char delimiter;
private char replacement;
+ private boolean reverse = false;
+ private int skip = PathHierarchyTokenizer.DEFAULT_SKIP;
/**
* Require a configured pattern
@@ -70,10 +73,23 @@ public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory {
else{
replacement = delimiter;
}
+
+ v = args.get( "reverse" );
+ if( v != null ){
+ reverse = "true".equals( v );
+ }
+
+ v = args.get( "skip" );
+ if( v != null ){
+ skip = Integer.parseInt( v );
+ }
}
public Tokenizer create(Reader input) {
- return new PathHierarchyTokenizer(input, delimiter, replacement);
+ if( reverse ) {
+ return new ReversePathHierarchyTokenizer(input, delimiter, replacement, skip);
+ }
+ return new PathHierarchyTokenizer(input, delimiter, replacement, skip);
}
}
diff --git a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
index 571bdcf7f8d..162913e7899 100644
--- a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
@@ -61,6 +61,16 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
tokFactory = loadTokenizerFactory( loader, tf, args );
}
+ Iterable wlist=loadRules( synonyms, loader );
+
+ synMap = new SynonymMap(ignoreCase);
+ parseRules(wlist, synMap, "=>", ",", expand,tokFactory);
+ }
+
+ /**
+ * @return a list of all rules
+ */
+ protected Iterable loadRules( String synonyms, ResourceLoader loader ) {
List wlist=null;
try {
File synonymFile = new File(synonyms);
@@ -77,13 +87,12 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
} catch (IOException e) {
throw new RuntimeException(e);
}
- synMap = new SynonymMap(ignoreCase);
- parseRules(wlist, synMap, "=>", ",", expand,tokFactory);
+ return wlist;
}
private SynonymMap synMap;
- static void parseRules(List rules, SynonymMap map, String mappingSep,
+ static void parseRules(Iterable rules, SynonymMap map, String mappingSep,
String synSep, boolean expansion, TokenizerFactory tokFactory) {
int count=0;
for (String rule : rules) {
diff --git a/solr/src/java/org/apache/solr/handler/JsonLoader.java b/solr/src/java/org/apache/solr/handler/JsonLoader.java
index c233ce634e4..34118a07402 100644
--- a/solr/src/java/org/apache/solr/handler/JsonLoader.java
+++ b/solr/src/java/org/apache/solr/handler/JsonLoader.java
@@ -23,6 +23,7 @@ import java.util.Stack;
import org.apache.commons.io.IOUtils;
import org.apache.noggit.JSONParser;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.ContentStream;
@@ -43,10 +44,18 @@ import org.slf4j.LoggerFactory;
class JsonLoader extends ContentStreamLoader {
final static Logger log = LoggerFactory.getLogger( JsonLoader.class );
- protected UpdateRequestProcessor processor;
+ protected final UpdateRequestProcessor processor;
+ protected final SolrQueryRequest req;
+ protected JSONParser parser;
+ protected final int commitWithin;
+ protected final boolean overwrite;
- public JsonLoader(UpdateRequestProcessor processor) {
+ public JsonLoader(SolrQueryRequest req, UpdateRequestProcessor processor) {
this.processor = processor;
+ this.req = req;
+
+ commitWithin = req.getParams().getInt(XmlUpdateRequestHandler.COMMIT_WITHIN, -1);
+ overwrite = req.getParams().getBool(XmlUpdateRequestHandler.OVERWRITE, true);
}
@Override
@@ -55,14 +64,14 @@ class JsonLoader extends ContentStreamLoader {
Reader reader = null;
try {
reader = stream.getReader();
- if (XmlUpdateRequestHandler.log.isTraceEnabled()) {
+ if (log.isTraceEnabled()) {
String body = IOUtils.toString(reader);
- XmlUpdateRequestHandler.log.trace("body", body);
+ log.trace("body", body);
reader = new StringReader(body);
}
- JSONParser parser = new JSONParser(reader);
- this.processUpdate(req, processor, parser);
+ parser = new JSONParser(reader);
+ this.processUpdate();
}
finally {
IOUtils.closeQuietly(reader);
@@ -70,39 +79,50 @@ class JsonLoader extends ContentStreamLoader {
}
@SuppressWarnings("fallthrough")
- void processUpdate(SolrQueryRequest req, UpdateRequestProcessor processor, JSONParser parser) throws IOException
+ void processUpdate() throws IOException
{
int ev = parser.nextEvent();
while( ev != JSONParser.EOF ) {
switch( ev )
{
+ case JSONParser.ARRAY_START:
+ handleAdds();
+ break;
+
case JSONParser.STRING:
if( parser.wasKey() ) {
String v = parser.getString();
if( v.equals( XmlUpdateRequestHandler.ADD ) ) {
- processor.processAdd( parseAdd(req, parser ) );
+ int ev2 = parser.nextEvent();
+ if (ev2 == JSONParser.OBJECT_START) {
+ processor.processAdd( parseAdd() );
+ } else if (ev2 == JSONParser.ARRAY_START) {
+ handleAdds();
+ } else {
+ assertEvent(ev2, JSONParser.OBJECT_START);
+ }
}
else if( v.equals( XmlUpdateRequestHandler.COMMIT ) ) {
CommitUpdateCommand cmd = new CommitUpdateCommand(req, false );
cmd.waitFlush = cmd.waitSearcher = true;
- parseCommitOptions( parser, cmd );
+ parseCommitOptions( cmd );
processor.processCommit( cmd );
}
else if( v.equals( XmlUpdateRequestHandler.OPTIMIZE ) ) {
CommitUpdateCommand cmd = new CommitUpdateCommand(req, true );
cmd.waitFlush = cmd.waitSearcher = true;
- parseCommitOptions( parser, cmd );
+ parseCommitOptions( cmd );
processor.processCommit( cmd );
}
else if( v.equals( XmlUpdateRequestHandler.DELETE ) ) {
- processor.processDelete( parseDelete(req, parser ) );
+ processor.processDelete( parseDelete() );
}
else if( v.equals( XmlUpdateRequestHandler.ROLLBACK ) ) {
- processor.processRollback( parseRollback(req, parser ) );
+ processor.processRollback( parseRollback() );
}
else {
- throw new IOException( "Unknown command: "+v+" ["+parser.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown command: "+v+" ["+parser.getPosition()+"]" );
}
break;
}
@@ -117,12 +137,11 @@ class JsonLoader extends ContentStreamLoader {
case JSONParser.OBJECT_START:
case JSONParser.OBJECT_END:
- case JSONParser.ARRAY_START:
case JSONParser.ARRAY_END:
break;
default:
- System.out.println("UNKNOWN_EVENT_ID:"+ev);
+ log.info("Noggit UNKNOWN_EVENT_ID:"+ev);
break;
}
// read the next event
@@ -130,187 +149,211 @@ class JsonLoader extends ContentStreamLoader {
}
}
- DeleteUpdateCommand parseDelete(SolrQueryRequest req, JSONParser js) throws IOException {
- assertNextEvent( js, JSONParser.OBJECT_START );
+ DeleteUpdateCommand parseDelete() throws IOException {
+ assertNextEvent( JSONParser.OBJECT_START );
DeleteUpdateCommand cmd = new DeleteUpdateCommand(req);
-
+
while( true ) {
- int ev = js.nextEvent();
+ int ev = parser.nextEvent();
if( ev == JSONParser.STRING ) {
- String key = js.getString();
- if( js.wasKey() ) {
+ String key = parser.getString();
+ if( parser.wasKey() ) {
if( "id".equals( key ) ) {
- cmd.id = js.getString();
+ cmd.id = parser.getString();
}
else if( "query".equals(key) ) {
- cmd.query = js.getString();
+ cmd.query = parser.getString();
}
else {
- throw new IOException( "Unknown key: "+key+" ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown key: "+key+" ["+parser.getPosition()+"]" );
}
}
else {
- throw new IOException(
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"invalid string: " + key
- +" at ["+js.getPosition()+"]" );
+ +" at ["+parser.getPosition()+"]" );
}
}
else if( ev == JSONParser.OBJECT_END ) {
if( cmd.id == null && cmd.query == null ) {
- throw new IOException( "Missing id or query for delete ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing id or query for delete ["+parser.getPosition()+"]" );
}
return cmd;
}
else {
- throw new IOException(
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Got: "+JSONParser.getEventString( ev )
- +" at ["+js.getPosition()+"]" );
+ +" at ["+parser.getPosition()+"]" );
}
}
}
- RollbackUpdateCommand parseRollback(SolrQueryRequest req, JSONParser js) throws IOException {
- assertNextEvent( js, JSONParser.OBJECT_START );
- assertNextEvent( js, JSONParser.OBJECT_END );
+ RollbackUpdateCommand parseRollback() throws IOException {
+ assertNextEvent( JSONParser.OBJECT_START );
+ assertNextEvent( JSONParser.OBJECT_END );
return new RollbackUpdateCommand(req);
}
- void parseCommitOptions( JSONParser js, CommitUpdateCommand cmd ) throws IOException
+ void parseCommitOptions(CommitUpdateCommand cmd ) throws IOException
{
- assertNextEvent( js, JSONParser.OBJECT_START );
+ assertNextEvent( JSONParser.OBJECT_START );
while( true ) {
- int ev = js.nextEvent();
+ int ev = parser.nextEvent();
if( ev == JSONParser.STRING ) {
- String key = js.getString();
- if( js.wasKey() ) {
+ String key = parser.getString();
+ if( parser.wasKey() ) {
if( XmlUpdateRequestHandler.WAIT_SEARCHER.equals( key ) ) {
- cmd.waitSearcher = js.getBoolean();
+ cmd.waitSearcher = parser.getBoolean();
}
else if( XmlUpdateRequestHandler.WAIT_FLUSH.equals( key ) ) {
- cmd.waitFlush = js.getBoolean();
+ cmd.waitFlush = parser.getBoolean();
}
else {
- throw new IOException( "Unknown key: "+key+" ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown key: "+key+" ["+parser.getPosition()+"]" );
}
}
else {
- throw new IOException(
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"invalid string: " + key
- +" at ["+js.getPosition()+"]" );
+ +" at ["+parser.getPosition()+"]" );
}
}
else if( ev == JSONParser.OBJECT_END ) {
return;
}
else {
- throw new IOException(
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Got: "+JSONParser.getEventString( ev )
- +" at ["+js.getPosition()+"]" );
+ +" at ["+parser.getPosition()+"]" );
}
}
}
- AddUpdateCommand parseAdd(SolrQueryRequest req, JSONParser js ) throws IOException
+ AddUpdateCommand parseAdd() throws IOException
{
- assertNextEvent( js, JSONParser.OBJECT_START );
AddUpdateCommand cmd = new AddUpdateCommand(req);
+ cmd.commitWithin = commitWithin;
+ cmd.overwrite = overwrite;
+
float boost = 1.0f;
while( true ) {
- int ev = js.nextEvent();
+ int ev = parser.nextEvent();
if( ev == JSONParser.STRING ) {
- if( js.wasKey() ) {
- String key = js.getString();
+ if( parser.wasKey() ) {
+ String key = parser.getString();
if( "doc".equals( key ) ) {
if( cmd.solrDoc != null ) {
- throw new IOException( "multiple docs in same add command" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "multiple docs in same add command" );
}
- ev = assertNextEvent( js, JSONParser.OBJECT_START );
- cmd.solrDoc = parseDoc( ev, js );
+ ev = assertNextEvent( JSONParser.OBJECT_START );
+ cmd.solrDoc = parseDoc( ev );
}
else if( XmlUpdateRequestHandler.OVERWRITE.equals( key ) ) {
- cmd.overwrite = js.getBoolean(); // reads next boolean
+ cmd.overwrite = parser.getBoolean(); // reads next boolean
}
else if( XmlUpdateRequestHandler.COMMIT_WITHIN.equals( key ) ) {
- cmd.commitWithin = (int)js.getLong();
+ cmd.commitWithin = (int)parser.getLong();
}
else if( "boost".equals( key ) ) {
- boost = Float.parseFloat( js.getNumberChars().toString() );
+ boost = Float.parseFloat( parser.getNumberChars().toString() );
}
else {
- throw new IOException( "Unknown key: "+key+" ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown key: "+key+" ["+parser.getPosition()+"]" );
}
}
else {
- throw new IOException(
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Should be a key "
- +" at ["+js.getPosition()+"]" );
+ +" at ["+parser.getPosition()+"]" );
}
}
else if( ev == JSONParser.OBJECT_END ) {
if( cmd.solrDoc == null ) {
- throw new IOException("missing solr document. "+js.getPosition() );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"missing solr document. "+parser.getPosition() );
}
cmd.solrDoc.setDocumentBoost( boost );
return cmd;
}
else {
- throw new IOException(
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Got: "+JSONParser.getEventString( ev )
- +" at ["+js.getPosition()+"]" );
+ +" at ["+parser.getPosition()+"]" );
}
}
}
-
- int assertNextEvent( JSONParser parser, int ev ) throws IOException
+
+
+ void handleAdds() throws IOException
+ {
+ while( true ) {
+ AddUpdateCommand cmd = new AddUpdateCommand(req);
+ cmd.commitWithin = commitWithin;
+ cmd.overwrite = overwrite;
+
+ int ev = parser.nextEvent();
+ if (ev == JSONParser.ARRAY_END) break;
+
+ assertEvent(ev, JSONParser.OBJECT_START);
+ cmd.solrDoc = parseDoc(ev);
+ processor.processAdd(cmd);
+ }
+ }
+
+
+ int assertNextEvent(int expected ) throws IOException
{
int got = parser.nextEvent();
- if( ev != got ) {
- throw new IOException(
- "Expected: "+JSONParser.getEventString( ev )
- +" but got "+JSONParser.getEventString( got )
- +" at ["+parser.getPosition()+"]" );
- }
+ assertEvent(got, expected);
return got;
}
+
+ void assertEvent(int ev, int expected) {
+ if( ev != expected ) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "Expected: "+JSONParser.getEventString( expected )
+ +" but got "+JSONParser.getEventString( ev )
+ +" at ["+parser.getPosition()+"]" );
+ }
+ }
- SolrInputDocument parseDoc( int ev, JSONParser js ) throws IOException
+ SolrInputDocument parseDoc(int ev) throws IOException
{
Stack stack = new Stack();
Object obj = null;
boolean inArray = false;
if( ev != JSONParser.OBJECT_START ) {
- throw new IOException( "object should already be started" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "object should already be started" );
}
while( true ) {
- //System.out.println( ev + "["+JSONParser.getEventString(ev)+"] "+js.wasKey() ); //+ js.getString() );
+ //System.out.println( ev + "["+JSONParser.getEventString(ev)+"] "+parser.wasKey() ); //+ parser.getString() );
switch (ev) {
case JSONParser.STRING:
- if( js.wasKey() ) {
+ if( parser.wasKey() ) {
obj = stack.peek();
- String v = js.getString();
+ String v = parser.getString();
if( obj instanceof SolrInputField ) {
SolrInputField field = (SolrInputField)obj;
if( "boost".equals( v ) ) {
- ev = js.nextEvent();
+ ev = parser.nextEvent();
if( ev != JSONParser.NUMBER &&
ev != JSONParser.LONG &&
ev != JSONParser.BIGNUMBER ) {
- throw new IOException( "boost should have number! "+JSONParser.getEventString(ev) );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "boost should have number! "+JSONParser.getEventString(ev) );
}
- field.setBoost( Float.valueOf( js.getNumberChars().toString() ) );
+ field.setBoost( Float.valueOf( parser.getNumberChars().toString() ) );
}
else if( "value".equals( v ) ) {
// nothing special...
stack.push( field ); // so it can be popped
}
else {
- throw new IOException( "invalid key: "+v + " ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "invalid key: "+v + " ["+ parser.getPosition()+"]" );
}
}
else if( obj instanceof SolrInputDocument ) {
@@ -323,22 +366,22 @@ class JsonLoader extends ContentStreamLoader {
stack.push( f );
}
else {
- throw new IOException( "hymmm ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "hymmm ["+ parser.getPosition()+"]" );
}
}
else {
- addValToField(stack, js.getString(), inArray, js);
+ addValToField(stack, parser.getString(), inArray, parser);
}
break;
case JSONParser.LONG:
case JSONParser.NUMBER:
case JSONParser.BIGNUMBER:
- addValToField(stack, js.getNumberChars().toString(), inArray, js);
+ addValToField(stack, parser.getNumberChars().toString(), inArray, parser);
break;
case JSONParser.BOOLEAN:
- addValToField(stack, js.getBoolean(),inArray, js);
+ addValToField(stack, parser.getBoolean(),inArray, parser);
break;
case JSONParser.OBJECT_START:
@@ -351,7 +394,7 @@ class JsonLoader extends ContentStreamLoader {
// should alreay be pushed...
}
else {
- throw new IOException( "should not start new object with: "+obj + " ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "should not start new object with: "+obj + " ["+ parser.getPosition()+"]" );
}
}
break;
@@ -365,7 +408,7 @@ class JsonLoader extends ContentStreamLoader {
// should already be pushed...
}
else {
- throw new IOException( "should not start new object with: "+obj + " ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "should not start new object with: "+obj + " ["+ parser.getPosition()+"]" );
}
break;
@@ -383,18 +426,18 @@ class JsonLoader extends ContentStreamLoader {
break;
}
- ev = js.nextEvent();
+ ev = parser.nextEvent();
if( ev == JSONParser.EOF ) {
- throw new IOException( "should finish doc first!" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "should finish doc first!" );
}
}
}
- static void addValToField( Stack stack, Object val, boolean inArray, JSONParser js ) throws IOException
+ static void addValToField( Stack stack, Object val, boolean inArray, JSONParser parser ) throws IOException
{
Object obj = stack.peek();
if( !(obj instanceof SolrInputField) ) {
- throw new IOException( "hymmm ["+js.getPosition()+"]" );
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "hymmm ["+parser.getPosition()+"]" );
}
SolrInputField f = inArray
diff --git a/solr/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java b/solr/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java
index 9f36c37d785..213089d0943 100644
--- a/solr/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java
+++ b/solr/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java
@@ -37,7 +37,7 @@ public class JsonUpdateRequestHandler extends ContentStreamHandlerBase {
@Override
protected ContentStreamLoader newLoader(SolrQueryRequest req, UpdateRequestProcessor processor) {
- return new JsonLoader(processor);
+ return new JsonLoader(req, processor);
}
//////////////////////// SolrInfoMBeans methods //////////////////////
diff --git a/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java b/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
index 56b9d4826f2..24c5256576b 100644
--- a/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
+++ b/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
@@ -208,7 +208,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
if (keyField != null) {
Document document = reader.document(docId, fieldSelector);
- Fieldable uniqId = document.getField(uniqFieldName);
+ Fieldable uniqId = document.getFieldable(uniqFieldName);
String uniqVal = null;
if (uniqId != null) {
uniqVal = keyField.getType().storedToReadable(uniqId);
diff --git a/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
index fbb1489dba4..b0be39fd0b1 100644
--- a/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@@ -401,13 +401,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
int docId, Document doc, String fieldName ) throws IOException {
+ final SolrIndexSearcher searcher = req.getSearcher();
+ final IndexSchema schema = searcher.getSchema();
+
+ // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
+ // so we disable them until fixed (see LUCENE-3080)!
+ // BEGIN: Hack
+ final SchemaField schemaField = schema.getFieldOrNull(fieldName);
+ if (schemaField != null && (
+ (schemaField.getType() instanceof org.apache.solr.schema.TrieField) ||
+ (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)
+ )) return;
+ // END: Hack
+
SolrParams params = req.getParams();
String[] docTexts = doc.getValues(fieldName);
// according to Document javadoc, doc.getValues() never returns null. check empty instead of null
if (docTexts.length == 0) return;
- SolrIndexSearcher searcher = req.getSearcher();
- IndexSchema schema = searcher.getSchema();
TokenStream tstream = null;
int numFragments = getMaxSnippets(fieldName, params);
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
diff --git a/solr/src/java/org/apache/solr/response/BaseResponseWriter.java b/solr/src/java/org/apache/solr/response/BaseResponseWriter.java
deleted file mode 100644
index 696903b198a..00000000000
--- a/solr/src/java/org/apache/solr/response/BaseResponseWriter.java
+++ /dev/null
@@ -1,319 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.response;
-
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.SolrDocument;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.search.DocList;
-import org.apache.solr.search.ReturnFields;
-import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.search.DocIterator;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.ArrayList;
-
-/**
- * THIS HAS NO TESTS and is not used anywhere.... no idea how or if it should work...
- *
- * I think we should drop it - along with {@link GenericBinaryResponseWriter} and {@link GenericBinaryResponseWriter}
- *
- * unless I'm missing something (ryan, March 2011)
- *
- *
- * This class serves as a basis from which {@link QueryResponseWriter}s can be
- * developed. The class provides a single method
- * {@link #write(SingleResponseWriter, SolrQueryRequest, SolrQueryResponse)}
- * that allows users to implement a {@link SingleResponseWriter} sub-class which
- * defines how to output {@link SolrInputDocument}s or a
- * {@link SolrDocumentList}.
- *
- * @version $Id$
- * @since 1.5
- *
- */
-public abstract class BaseResponseWriter {
-
- private static final Logger LOG = LoggerFactory
- .getLogger(BaseResponseWriter.class);
-
-
- /**
- *
- * The main method that allows users to write {@link SingleResponseWriter}s
- * and provide them as the initial parameter responseWriter
to
- * this method which defines how output should be generated.
- *
- * @param responseWriter
- * The user-provided {@link SingleResponseWriter} implementation.
- * @param request
- * The provided {@link SolrQueryRequest}.
- * @param response
- * The provided {@link SolrQueryResponse}.
- * @throws IOException
- * If any error occurs.
- */
- public void write(SingleResponseWriter responseWriter,
- SolrQueryRequest request, SolrQueryResponse response) throws IOException {
- responseWriter.start();
- NamedList nl = response.getValues();
- for (int i = 0; i < nl.size(); i++) {
- String name = nl.getName(i);
- Object val = nl.getVal(i);
- if ("responseHeader".equals(name)) {
- Boolean omitHeader = request.getParams().getBool(CommonParams.OMIT_HEADER);
- if (omitHeader == null || !omitHeader) responseWriter.writeResponseHeader((NamedList) val);
- } else if (val instanceof SolrDocumentList) {
- SolrDocumentList list = (SolrDocumentList) val;
- DocListInfo info = new DocListInfo((int)list.getNumFound(), list.size(), (int)list.getStart(), list.getMaxScore());
- if (responseWriter.isStreamingDocs()) {
- responseWriter.startDocumentList(name,info);
- for (SolrDocument solrDocument : list)
- responseWriter.writeDoc(solrDocument);
- responseWriter.endDocumentList();
- } else {
- responseWriter.writeAllDocs(info, list);
- }
- } else if (val instanceof DocList) {
- DocList docList = (DocList) val;
- int sz = docList.size();
- IdxInfo idxInfo = new IdxInfo(request.getSchema(), request
- .getSearcher(), response.getReturnFields());
- DocListInfo info = new DocListInfo(docList.matches(), docList.size(),docList.offset(),
- docList.maxScore());
- DocIterator iterator = docList.iterator();
- if (responseWriter.isStreamingDocs()) {
- responseWriter.startDocumentList(name,info);
- for (int j = 0; j < sz; j++) {
- SolrDocument sdoc = getDoc(iterator.nextDoc(), idxInfo);
- responseWriter.writeDoc(sdoc);
- }
- } else {
- ArrayList list = new ArrayList(docList
- .size());
- for (int j = 0; j < sz; j++) {
- SolrDocument sdoc = getDoc(iterator.nextDoc(), idxInfo);
- list.add(sdoc);
- }
- responseWriter.writeAllDocs(info, list);
- }
-
- } else {
- responseWriter.writeOther(name, val);
-
- }
- }
- responseWriter.end();
-
- }
-
- /**No ops implementation so that the implementing classes do not have to do it
- */
- public void init(NamedList args){}
-
- private static class IdxInfo {
- IndexSchema schema;
- SolrIndexSearcher searcher;
- ReturnFields returnFields;
-
- private IdxInfo(IndexSchema schema, SolrIndexSearcher searcher,
- ReturnFields returnFields) {
- this.schema = schema;
- this.searcher = searcher;
- this.returnFields = returnFields;
- }
- }
-
- private static SolrDocument getDoc(int id, IdxInfo info) throws IOException {
- Document doc = info.searcher.doc(id);
- SolrDocument solrDoc = new SolrDocument();
- for (Fieldable f : doc.getFields()) {
- String fieldName = f.name();
- if (info.returnFields != null && !info.returnFields.wantsField(fieldName))
- continue;
- SchemaField sf = info.schema.getFieldOrNull(fieldName);
- FieldType ft = null;
- if (sf != null) ft = sf.getType();
- Object val = null;
- if (ft == null) { // handle fields not in the schema
- if (f.isBinary())
- val = f.getBinaryValue();
- else
- val = f.stringValue();
- } else {
- try {
- if (BinaryResponseWriter.KNOWN_TYPES.contains(ft.getClass())) {
- val = ft.toObject(f);
- } else {
- val = ft.toExternal(f);
- }
- } catch (Exception e) {
- // There is a chance of the underlying field not really matching the
- // actual field type . So ,it can throw exception
- LOG.warn("Error reading a field from document : " + solrDoc, e);
- // if it happens log it and continue
- continue;
- }
- }
- if (sf != null && sf.multiValued() && !solrDoc.containsKey(fieldName)) {
- ArrayList l = new ArrayList();
- l.add(val);
- solrDoc.addField(fieldName, l);
- } else {
- solrDoc.addField(fieldName, val);
- }
- }
-
- return solrDoc;
- }
-
- public static class DocListInfo {
- public final int numFound;
- public final int start ;
- public Float maxScore = null;
- public final int size;
-
- public DocListInfo(int numFound, int sz,int start, Float maxScore) {
- this.numFound = numFound;
- size = sz;
- this.start = start;
- this.maxScore = maxScore;
- }
- }
-
- /**
- *
- * Users wanting to define custom {@link QueryResponseWriter}s that deal with
- * {@link SolrInputDocument}s and {@link SolrDocumentList} should override the
- * methods for this class. All the methods are w/o body because the user is left
- * to choose which all methods are required for his purpose
- */
- public static abstract class SingleResponseWriter {
-
- /**
- * This method is called at the start of the {@link QueryResponseWriter}
- * output. Override this method if you want to provide a header for your
- * output, e.g., XML headers, etc.
- *
- * @throws IOException
- * if any error occurs.
- */
- public void start() throws IOException { }
-
- /**
- * This method is called at the start of processing a
- * {@link SolrDocumentList}. Those that override this method are provided
- * with {@link DocListInfo} object to use to inspect the output
- * {@link SolrDocumentList}.
- *
- * @param info Information about the {@link SolrDocumentList} to output.
- */
- public void startDocumentList(String name, DocListInfo info) throws IOException { }
-
- /**
- * This method writes out a {@link SolrDocument}, on a doc-by-doc basis.
- * This method is only called when {@link #isStreamingDocs()} returns true.
- *
- * @param solrDocument
- * The doc-by-doc {@link SolrDocument} to transform into output as
- * part of this {@link QueryResponseWriter}.
- */
- public void writeDoc(SolrDocument solrDocument) throws IOException { }
-
- /**
- * This method is called at the end of outputting a {@link SolrDocumentList}
- * or on a doc-by-doc {@link SolrDocument} basis.
- */
- public void endDocumentList() throws IOException { }
- /**
- * This method defines how to output the {@link SolrQueryResponse} header
- * which is provided as a {@link NamedList} parameter.
- *
- * @param responseHeader
- * The response header to output.
- */
- public void writeResponseHeader(NamedList responseHeader) throws IOException { }
-
- /**
- * This method is called at the end of the {@link QueryResponseWriter}
- * lifecycle. Implement this method to add a footer to your output, e.g., in
- * the case of XML, the outer tag for your tag set, etc.
- *
- * @throws IOException
- * If any error occurs.
- */
- public void end() throws IOException { }
-
- /**
- * Define this method to control how output is written by this
- * {@link QueryResponseWriter} if the output is not a
- * {@link SolrInputDocument} or a {@link SolrDocumentList}.
- *
- * @param name
- * The name of the object to output.
- * @param other
- * The object to output.
- * @throws IOException
- * If any error occurs.
- */
- public void writeOther(String name, Object other) throws IOException { }
-
- /**
- * Overriding this method to return false forces all
- * {@link SolrInputDocument}s to be spit out as a {@link SolrDocumentList}
- * so they can be processed as a whole, rather than on a doc-by-doc basis.
- * If set to false, this method calls
- * {@link #writeAllDocs(BaseResponseWriter.DocListInfo, List)}, else if set to true, then this
- * method forces calling {@link #writeDoc(SolrDocument)} on a doc-by-doc
- * basis. one
- *
- * @return True to force {@link #writeDoc(SolrDocument)} to be called, False
- * to force {@link #writeAllDocs(BaseResponseWriter.DocListInfo, List)} to be called.
- */
- public boolean isStreamingDocs() { return true; }
-
- /**
- * Writes out all {@link SolrInputDocument}s . This is invoked only if
- * {@link #isStreamingDocs()} returns false.
- *
- * @param info
- * Information about the {@link List} of {@link SolrDocument}s to
- * output.
- * @param allDocs
- * A {@link List} of {@link SolrDocument}s to output.
- * @throws IOException
- * If any error occurs.
- */
- public void writeAllDocs(DocListInfo info, List allDocs) throws IOException { }
-
- }
-
-}
diff --git a/solr/src/java/org/apache/solr/response/GenericBinaryResponseWriter.java b/solr/src/java/org/apache/solr/response/GenericBinaryResponseWriter.java
deleted file mode 100644
index 1ce707ce4e8..00000000000
--- a/solr/src/java/org/apache/solr/response/GenericBinaryResponseWriter.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.response;
-
-import java.io.OutputStream;
-import java.io.IOException;
-import java.io.Writer;
-
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.request.SolrQueryRequest;
-
-import org.apache.solr.response.BaseResponseWriter.SingleResponseWriter; // javadocs
-
-/**
- *
- *
- * A generic {@link QueryResponseWriter} implementation that requires a user to
- * implement the
- * {@link #getSingleResponseWriter(OutputStream, SolrQueryRequest, SolrQueryResponse)}
- * that defines a {@link SingleResponseWriter} to handle the binary output.
- *
- * @since 1.5
- * @version $Id$
- *
- */
-public abstract class GenericBinaryResponseWriter extends BaseResponseWriter
- implements BinaryQueryResponseWriter {
-
- /**
- *
- * Writes the binary output data using the {@link SingleResponseWriter}
- * provided by a call to
- * {@link #getSingleResponseWriter(OutputStream, SolrQueryRequest, SolrQueryResponse)}
- * .
- *
- * @param out
- * The {@link OutputStream} to write the binary data to.
- * @param request
- * The provided {@link SolrQueryRequest}.
- * @param response
- * The provided {@link SolrQueryResponse}.
- */
- public void write(OutputStream out, SolrQueryRequest request,
- SolrQueryResponse response) throws IOException {
- super.write(getSingleResponseWriter(out, request, response), request,
- response);
- }
-
- /**
- * Users of this class should implement this method to define a
- * {@link SingleResponseWriter} responsible for writing the binary output
- * given a {@link SolrDocumentList} or doc-by-doc, given a
- * {@link SolrInputDocument}.
- *
- * @param out
- * The {@link OutputStream} to write the binary data response to.
- * @param request
- * The provided {@link SolrQueryRequest}.
- * @param response
- * The provided {@link SolrQueryResponse}.
- * @return A {@link SingleResponseWriter} that will be used to generate the
- * response output from this {@link QueryResponseWriter}.
- */
- public abstract SingleResponseWriter getSingleResponseWriter(
- OutputStream out, SolrQueryRequest request, SolrQueryResponse response);
-
- /**Just to throw Exception So that the eimplementing classes do not have to do the same
- */
- public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException {
- throw new RuntimeException("This is a binary writer , Cannot write to a characterstream");
- }
-}
diff --git a/solr/src/java/org/apache/solr/response/GenericTextResponseWriter.java b/solr/src/java/org/apache/solr/response/GenericTextResponseWriter.java
deleted file mode 100644
index 0b911b9f0af..00000000000
--- a/solr/src/java/org/apache/solr/response/GenericTextResponseWriter.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.response;
-
-import java.io.Writer;
-import java.io.IOException;
-
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.request.SolrQueryRequest;
-
-import org.apache.solr.response.BaseResponseWriter.SingleResponseWriter; // javadocs
-
-/**
- *
- *
- * A generic {@link QueryResponseWriter} implementation that requires a user to
- * implement the
- * {@link #getSingleResponseWriter(Writer, SolrQueryRequest, SolrQueryResponse)}
- * that defines a {@link SingleResponseWriter} to handle plain ol' text output.
- *
- * @since 1.5
- * @version $Id$
- *
- */
-public abstract class GenericTextResponseWriter extends BaseResponseWriter
- implements QueryResponseWriter {
-
- /**
- *
- * Writes text output using the {@link SingleResponseWriter} provided by a
- * call to
- * {@link #getSingleResponseWriter(Writer, SolrQueryRequest, SolrQueryResponse)}
- * .
- *
- * @param writer
- * The {@link Writer} to write the text output to.
- * @param request
- * The provided {@link SolrQueryRequest}.
- * @param response
- * The provided {@link SolrQueryResponse}.
- */
- public void write(Writer writer, SolrQueryRequest request,
- SolrQueryResponse response) throws IOException {
- super.write(getSingleResponseWriter(writer, request, response), request,
- response);
- }
-
- /**
- * Users of this class should implement this method to define a
- * {@link SingleResponseWriter} responsible for writing text output given a
- * {@link SolrDocumentList} or doc-by-doc, given a {@link SolrInputDocument}.
- *
- * @param writer
- * The {@link Writer} to write the text data response to.
- * @param request
- * The provided {@link SolrQueryRequest}.
- * @param response
- * The provided {@link SolrQueryResponse}.
- * @return A {@link SingleResponseWriter} that will be used to generate the
- * response output from this {@link QueryResponseWriter}.
- */
- protected abstract SingleResponseWriter getSingleResponseWriter(
- Writer writer, SolrQueryRequest request, SolrQueryResponse response);
-}
diff --git a/solr/src/java/org/apache/solr/schema/IndexSchema.java b/solr/src/java/org/apache/solr/schema/IndexSchema.java
index b6b7b6768f5..818f8d85a56 100644
--- a/solr/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/src/java/org/apache/solr/schema/IndexSchema.java
@@ -797,19 +797,23 @@ public final class IndexSchema {
NamedNodeMap attrs = node.getAttributes();
String analyzerName = DOMUtil.getAttr(attrs,"class");
if (analyzerName != null) {
- // No need to be core-aware as Analyzers are not in the core-aware list
- final Class extends Analyzer> clazz = loader.findClass(analyzerName).asSubclass(Analyzer.class);
try {
+ // No need to be core-aware as Analyzers are not in the core-aware list
+ final Class extends Analyzer> clazz = loader.findClass
+ (analyzerName).asSubclass(Analyzer.class);
+
try {
- // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore)
+ // first try to use a ctor with version parameter
+ // (needed for many new Analyzers that have no default one anymore)
Constructor extends Analyzer> cnstr = clazz.getConstructor(Version.class);
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
final Version luceneMatchVersion = (matchVersionStr == null) ?
solrConfig.luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr);
if (luceneMatchVersion == null) {
- throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
- "Configuration Error: Analyzer '" + clazz.getName() +
- "' needs a 'luceneMatchVersion' parameter");
+ throw new SolrException
+ ( SolrException.ErrorCode.SERVER_ERROR,
+ "Configuration Error: Analyzer '" + clazz.getName() +
+ "' needs a 'luceneMatchVersion' parameter");
}
return cnstr.newInstance(luceneMatchVersion);
} catch (NoSuchMethodException nsme) {
@@ -817,8 +821,9 @@ public final class IndexSchema {
return clazz.newInstance();
}
} catch (Exception e) {
+ log.error("Cannot load analyzer: "+analyzerName, e);
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
- "Cannot load analyzer: "+analyzerName );
+ "Cannot load analyzer: "+analyzerName, e );
}
}
diff --git a/solr/src/java/org/apache/solr/schema/SchemaField.java b/solr/src/java/org/apache/solr/schema/SchemaField.java
index 41ad8e051c1..bb2d3e7a523 100644
--- a/solr/src/java/org/apache/solr/schema/SchemaField.java
+++ b/solr/src/java/org/apache/solr/schema/SchemaField.java
@@ -19,7 +19,6 @@ package org.apache.solr.schema;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.SortField;
import org.apache.solr.search.QParser;
diff --git a/solr/src/java/org/apache/solr/schema/TrieDateField.java b/solr/src/java/org/apache/solr/schema/TrieDateField.java
index 7e3b30d240f..8d58fa55213 100755
--- a/solr/src/java/org/apache/solr/schema/TrieDateField.java
+++ b/solr/src/java/org/apache/solr/schema/TrieDateField.java
@@ -18,210 +18,125 @@
package org.apache.solr.schema;
import org.apache.noggit.CharArr;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.analysis.CharFilterFactory;
-import org.apache.solr.analysis.TokenFilterFactory;
-import org.apache.solr.analysis.TokenizerChain;
-import org.apache.solr.analysis.TrieTokenizerFactory;
-import org.apache.solr.search.function.*;
+import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.QParser;
import org.apache.solr.response.TextResponseWriter;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.document.Field;
import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.NumericRangeQuery;
-import org.apache.lucene.search.cache.CachedArrayCreator;
-import org.apache.lucene.search.cache.LongValuesCreator;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.NumericTokenStream;
import java.util.Map;
import java.util.Date;
import java.io.IOException;
public class TrieDateField extends DateField {
- protected int precisionStepArg = TrieField.DEFAULT_PRECISION_STEP; // the one passed in or defaulted
- protected int precisionStep = precisionStepArg; // normalized
+
+ final TrieField wrappedField = new TrieField() {{
+ type = TrieTypes.DATE;
+ }};
@Override
protected void init(IndexSchema schema, Map args) {
- String p = args.remove("precisionStep");
- if (p != null) {
- precisionStepArg = Integer.parseInt(p);
- }
- // normalize the precisionStep
- precisionStep = precisionStepArg;
- if (precisionStep<=0 || precisionStep>=64) precisionStep=Integer.MAX_VALUE;
-
- CharFilterFactory[] filterFactories = new CharFilterFactory[0];
- TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
- analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(TrieField.TrieTypes.DATE, precisionStep), tokenFilterFactories);
- // for query time we only need one token, so we use the biggest possible precisionStep:
- queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(TrieField.TrieTypes.DATE, Integer.MAX_VALUE), tokenFilterFactories);
+ wrappedField.init(schema, args);
+ analyzer = wrappedField.analyzer;
+ queryAnalyzer = wrappedField.queryAnalyzer;
}
@Override
public Date toObject(Fieldable f) {
- byte[] arr = f.getBinaryValue();
- if (arr==null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,TrieField.badFieldString(f));
- return new Date(TrieFieldHelper.toLong(arr));
+ return (Date) wrappedField.toObject(f);
}
@Override
public Object toObject(SchemaField sf, BytesRef term) {
- return new Date(NumericUtils.prefixCodedToLong(term));
+ return wrappedField.toObject(sf, term);
}
@Override
public SortField getSortField(SchemaField field, boolean top) {
- field.checkSortability();
-
- int flags = CachedArrayCreator.CACHE_VALUES_AND_BITS;
- boolean sortMissingLast = field.sortMissingLast();
- boolean sortMissingFirst = field.sortMissingFirst();
-
- Object missingValue = null;
- if( sortMissingLast ) {
- missingValue = top ? Long.MIN_VALUE : Long.MAX_VALUE;
- } else if( sortMissingFirst ) {
- missingValue = top ? Long.MAX_VALUE : Long.MIN_VALUE;
- }
- return new SortField(new LongValuesCreator(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, flags), top).setMissingValue(missingValue);
+ return wrappedField.getSortField(field, top);
}
@Override
public ValueSource getValueSource(SchemaField field, QParser parser) {
- field.checkFieldCacheSource(parser);
- return new TrieDateFieldSource( new LongValuesCreator( field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, CachedArrayCreator.CACHE_VALUES_AND_BITS ));
- }
-
- @Override
- public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
- byte[] arr = f.getBinaryValue();
- if (arr==null) {
- writer.writeStr(name, TrieField.badFieldString(f),true);
- return;
- }
-
- writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr)));
- }
-
- @Override
- public boolean isTokenized() {
- return true;
+ return wrappedField.getValueSource(field, parser);
}
/**
* @return the precisionStep used to index values into the field
*/
public int getPrecisionStep() {
- return precisionStepArg;
+ return wrappedField.getPrecisionStep();
}
+ @Override
+ public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
+ wrappedField.write(writer, name, f);
+ }
+
+ @Override
+ public boolean isTokenized() {
+ return wrappedField.isTokenized();
+ }
+
+ @Override
+ public boolean multiValuedFieldCache() {
+ return wrappedField.multiValuedFieldCache();
+ }
@Override
public String storedToReadable(Fieldable f) {
- return toExternal(f);
+ return wrappedField.storedToReadable(f);
}
@Override
public String readableToIndexed(String val) {
- // TODO: Numeric should never be handled as String, that may break in future lucene versions! Change to use BytesRef for term texts!
- BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
- NumericUtils.longToPrefixCoded(super.parseMath(null, val).getTime(), 0, bytes);
- return bytes.utf8ToString();
+ return wrappedField.readableToIndexed(val);
}
@Override
public String toInternal(String val) {
- return readableToIndexed(val);
+ return wrappedField.toInternal(val);
}
@Override
public String toExternal(Fieldable f) {
- byte[] arr = f.getBinaryValue();
- if (arr==null) return TrieField.badFieldString(f);
- return super.toExternal(new Date(TrieFieldHelper.toLong(arr)));
+ return wrappedField.toExternal(f);
}
@Override
public String indexedToReadable(String _indexedForm) {
- final BytesRef indexedForm = new BytesRef(_indexedForm);
- return super.toExternal( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
+ return wrappedField.indexedToReadable(_indexedForm);
}
@Override
public void indexedToReadable(BytesRef input, CharArr out) {
- String ext = super.toExternal( new Date(NumericUtils.prefixCodedToLong(input)) );
- out.write(ext);
+ wrappedField.indexedToReadable(input, out);
}
@Override
public String storedToIndexed(Fieldable f) {
- // TODO: optimize to remove redundant string conversion
- return readableToIndexed(storedToReadable(f));
+ return wrappedField.storedToIndexed(f);
}
@Override
public Fieldable createField(SchemaField field, Object value, float boost) {
- boolean indexed = field.indexed();
- boolean stored = field.stored();
-
- if (!indexed && !stored) {
- if (log.isTraceEnabled())
- log.trace("Ignoring unindexed/unstored field: " + field);
- return null;
- }
-
- int ps = precisionStep;
-
- byte[] arr=null;
- TokenStream ts=null;
-
- long time = (value instanceof Date)
- ? ((Date)value).getTime()
- : super.parseMath(null, value.toString()).getTime();
-
- if (stored) arr = TrieFieldHelper.toArr(time);
- if (indexed) ts = new NumericTokenStream(ps).setLongValue(time);
-
- Field f;
- if (stored) {
- f = new Field(field.getName(), arr);
- if (indexed) f.setTokenStream(ts);
- } else {
- f = new Field(field.getName(), ts);
- }
-
- // term vectors aren't supported
-
- f.setOmitNorms(field.omitNorms());
- f.setOmitTermFreqAndPositions(field.omitTf());
- f.setBoost(boost);
- return f;
+ return wrappedField.createField(field, value, boost);
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
- return getRangeQuery(parser, field,
- min==null ? null : super.parseMath(null,min),
- max==null ? null : super.parseMath(null,max),
- minInclusive, maxInclusive);
+ return wrappedField.getRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
}
@Override
public Query getRangeQuery(QParser parser, SchemaField sf, Date min, Date max, boolean minInclusive, boolean maxInclusive) {
- int ps = precisionStep;
- Query query = NumericRangeQuery.newLongRange(sf.getName(), ps,
+ return NumericRangeQuery.newLongRange(sf.getName(), wrappedField.precisionStep,
min == null ? null : min.getTime(),
max == null ? null : max.getTime(),
minInclusive, maxInclusive);
-
- return query;
}
}
diff --git a/solr/src/java/org/apache/solr/schema/TrieField.java b/solr/src/java/org/apache/solr/schema/TrieField.java
index e670ba0e338..eb78e1bbfd8 100644
--- a/solr/src/java/org/apache/solr/schema/TrieField.java
+++ b/solr/src/java/org/apache/solr/schema/TrieField.java
@@ -17,6 +17,8 @@
package org.apache.solr.schema;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.search.*;
import org.apache.lucene.search.cache.CachedArrayCreator;
import org.apache.lucene.search.cache.DoubleValuesCreator;
@@ -40,17 +42,17 @@ import java.util.Map;
import java.util.Date;
/**
- * Provides field types to support for Lucene's Trie Range Queries.
+ * Provides field types to support for Lucene's {@link NumericField}.
* See {@link org.apache.lucene.search.NumericRangeQuery} for more details.
* It supports integer, float, long, double and date types.
*
* For each number being added to this field, multiple terms are generated as per the algorithm described in the above
- * link. The possible number of terms increases dramatically with higher precision steps (factor 2^precisionStep). For
+ * link. The possible number of terms increases dramatically with lower precision steps. For
* the fast range search to work, trie fields must be indexed.
*
* Trie fields are sortable in numerical order and can be used in function queries.
*
- * Note that if you use a precisionStep of 32 for int/float and 64 for long/double, then multiple terms will not be
+ * Note that if you use a precisionStep of 32 for int/float and 64 for long/double/date, then multiple terms will not be
* generated, range search will be no faster than any other number field, but sorting will still be possible.
*
* @version $Id$
@@ -101,21 +103,28 @@ public class TrieField extends FieldType {
@Override
public Object toObject(Fieldable f) {
- byte[] arr = f.getBinaryValue();
- if (arr==null) return badFieldString(f);
- switch (type) {
- case INTEGER:
- return TrieFieldHelper.toInt(arr);
- case FLOAT:
- return TrieFieldHelper.toFloat(arr);
- case LONG:
- return TrieFieldHelper.toLong(arr);
- case DOUBLE:
- return TrieFieldHelper.toDouble(arr);
- case DATE:
- return new Date(TrieFieldHelper.toLong(arr));
- default:
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
+ if (f instanceof NumericField) {
+ final Number val = ((NumericField) f).getNumericValue();
+ if (val==null) return badFieldString(f);
+ return (type == TrieTypes.DATE) ? new Date(val.longValue()) : val;
+ } else {
+ // the following code is "deprecated" and only to support pre-3.2 indexes using the old BinaryField encoding:
+ final byte[] arr = f.getBinaryValue();
+ if (arr==null) return badFieldString(f);
+ switch (type) {
+ case INTEGER:
+ return toInt(arr);
+ case FLOAT:
+ return Float.intBitsToFloat(toInt(arr));
+ case LONG:
+ return toLong(arr);
+ case DOUBLE:
+ return Double.longBitsToDouble(toLong(arr));
+ case DATE:
+ return new Date(toLong(arr));
+ default:
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
+ }
}
}
@@ -198,30 +207,7 @@ public class TrieField extends FieldType {
@Override
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
- byte[] arr = f.getBinaryValue();
- if (arr==null) {
- writer.writeStr(name, badFieldString(f),true);
- return;
- }
- switch (type) {
- case INTEGER:
- writer.writeInt(name,TrieFieldHelper.toInt(arr));
- break;
- case FLOAT:
- writer.writeFloat(name,TrieFieldHelper.toFloat(arr));
- break;
- case LONG:
- writer.writeLong(name,TrieFieldHelper.toLong(arr));
- break;
- case DOUBLE:
- writer.writeDouble(name,TrieFieldHelper.toDouble(arr));
- break;
- case DATE:
- writer.writeDate(name,new Date(TrieFieldHelper.toLong(arr)));
- break;
- default:
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
- }
+ writer.writeVal(name, toObject(f));
}
@Override
@@ -290,6 +276,17 @@ public class TrieField extends FieldType {
return query;
}
+ @Deprecated
+ static int toInt(byte[] arr) {
+ return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
+ }
+
+ @Deprecated
+ static long toLong(byte[] arr) {
+ int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
+ int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
+ return (((long)high)<<32) | (low&0x0ffffffffL);
+ }
@Override
public String storedToReadable(Fieldable f) {
@@ -341,22 +338,9 @@ public class TrieField extends FieldType {
@Override
public String toExternal(Fieldable f) {
- byte[] arr = f.getBinaryValue();
- if (arr==null) return badFieldString(f);
- switch (type) {
- case INTEGER:
- return Integer.toString(TrieFieldHelper.toInt(arr));
- case FLOAT:
- return Float.toString(TrieFieldHelper.toFloat(arr));
- case LONG:
- return Long.toString(TrieFieldHelper.toLong(arr));
- case DOUBLE:
- return Double.toString(TrieFieldHelper.toDouble(arr));
- case DATE:
- return dateField.formatDate(new Date(TrieFieldHelper.toLong(arr)));
- default:
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
- }
+ return (type == TrieTypes.DATE)
+ ? dateField.toExternal((Date) toObject(f))
+ : toObject(f).toString();
}
@Override
@@ -372,7 +356,7 @@ public class TrieField extends FieldType {
case DOUBLE:
return Double.toString( NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(indexedForm)) );
case DATE:
- return dateField.formatDate( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
+ return dateField.toExternal( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
}
@@ -397,7 +381,7 @@ public class TrieField extends FieldType {
s = Double.toString( NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(indexedForm)) );
break;
case DATE:
- s = dateField.formatDate( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
+ s = dateField.toExternal( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
@@ -426,59 +410,117 @@ public class TrieField extends FieldType {
@Override
public String storedToIndexed(Fieldable f) {
- // TODO: optimize to remove redundant string conversion
- return readableToIndexed(storedToReadable(f));
+ final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
+ if (f instanceof NumericField) {
+ final Number val = ((NumericField) f).getNumericValue();
+ if (val==null)
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name());
+ switch (type) {
+ case INTEGER:
+ NumericUtils.intToPrefixCoded(val.intValue(), 0, bytes);
+ break;
+ case FLOAT:
+ NumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(val.floatValue()), 0, bytes);
+ break;
+ case LONG: //fallthrough!
+ case DATE:
+ NumericUtils.longToPrefixCoded(val.longValue(), 0, bytes);
+ break;
+ case DOUBLE:
+ NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(val.doubleValue()), 0, bytes);
+ break;
+ default:
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
+ }
+ } else {
+ // the following code is "deprecated" and only to support pre-3.2 indexes using the old BinaryField encoding:
+ final byte[] arr = f.getBinaryValue();
+ if (arr==null)
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name());
+ switch (type) {
+ case INTEGER:
+ NumericUtils.intToPrefixCoded(toInt(arr), 0, bytes);
+ break;
+ case FLOAT: {
+ // WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
+ // copied from NumericUtils to not convert to/from float two times
+ // code in next 2 lines is identical to: int v = NumericUtils.floatToSortableInt(Float.intBitsToFloat(toInt(arr)));
+ int v = toInt(arr);
+ if (v<0) v ^= 0x7fffffff;
+ NumericUtils.intToPrefixCoded(v, 0, bytes);
+ break;
+ }
+ case LONG: //fallthrough!
+ case DATE:
+ NumericUtils.longToPrefixCoded(toLong(arr), 0, bytes);
+ break;
+ case DOUBLE: {
+ // WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
+ // copied from NumericUtils to not convert to/from double two times
+ // code in next 2 lines is identical to: long v = NumericUtils.doubleToSortableLong(Double.longBitsToDouble(toLong(arr)));
+ long v = toLong(arr);
+ if (v<0) v ^= 0x7fffffffffffffffL;
+ NumericUtils.longToPrefixCoded(v, 0, bytes);
+ break;
+ }
+ default:
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
+ }
+ }
+ return bytes.utf8ToString();
}
@Override
public Fieldable createField(SchemaField field, Object value, float boost) {
- TrieFieldHelper.FieldInfo info = new TrieFieldHelper.FieldInfo();
- info.index = field.indexed();
- info.store = field.stored();
- info.precisionStep = precisionStep;
- info.omitNorms = field.omitNorms();
- info.omitTF = field.omitTf();
-
- if (!info.index && !info.store) {
+ boolean indexed = field.indexed();
+ boolean stored = field.stored();
+
+ if (!indexed && !stored) {
if (log.isTraceEnabled())
log.trace("Ignoring unindexed/unstored field: " + field);
return null;
}
+ final NumericField f = new NumericField(field.getName(), precisionStep, stored ? Field.Store.YES : Field.Store.NO, indexed);
switch (type) {
case INTEGER:
int i = (value instanceof Number)
? ((Number)value).intValue()
: Integer.parseInt(value.toString());
- return TrieFieldHelper.createIntField(field.getName(), i, info, boost);
-
+ f.setIntValue(i);
+ break;
case FLOAT:
- float f = (value instanceof Number)
+ float fl = (value instanceof Number)
? ((Number)value).floatValue()
: Float.parseFloat(value.toString());
- return TrieFieldHelper.createFloatField(field.getName(), f, info, boost);
-
+ f.setFloatValue(fl);
+ break;
case LONG:
long l = (value instanceof Number)
? ((Number)value).longValue()
: Long.parseLong(value.toString());
- return TrieFieldHelper.createLongField(field.getName(), l, info, boost);
-
+ f.setLongValue(l);
+ break;
case DOUBLE:
double d = (value instanceof Number)
? ((Number)value).doubleValue()
: Double.parseDouble(value.toString());
- return TrieFieldHelper.createDoubleField(field.getName(), d, info, boost);
-
+ f.setDoubleValue(d);
+ break;
case DATE:
Date date = (value instanceof Date)
? ((Date)value)
: dateField.parseMath(null, value.toString());
- return TrieFieldHelper.createDateField(field.getName(), date, info, boost);
-
+ f.setLongValue(date.getTime());
+ break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
}
+
+ f.setOmitNorms(field.omitNorms());
+ f.setOmitTermFreqAndPositions(field.omitTf());
+ f.setBoost(boost);
+ return f;
}
public enum TrieTypes {
@@ -498,14 +540,12 @@ public class TrieField extends FieldType {
* that indexes multiple precisions per value.
*/
public static String getMainValuePrefix(FieldType ft) {
- if (ft instanceof TrieDateField) {
- int step = ((TrieDateField)ft).getPrecisionStep();
- if (step <= 0 || step >=64) return null;
- return LONG_PREFIX;
- } else if (ft instanceof TrieField) {
- TrieField trie = (TrieField)ft;
- if (trie.precisionStep == Integer.MAX_VALUE) return null;
-
+ if (ft instanceof TrieDateField)
+ ft = ((TrieDateField) ft).wrappedField;
+ if (ft instanceof TrieField) {
+ final TrieField trie = (TrieField)ft;
+ if (trie.precisionStep == Integer.MAX_VALUE)
+ return null;
switch (trie.type) {
case INTEGER:
case FLOAT:
diff --git a/solr/src/java/org/apache/solr/schema/TrieFieldHelper.java b/solr/src/java/org/apache/solr/schema/TrieFieldHelper.java
deleted file mode 100644
index c40ecd87a78..00000000000
--- a/solr/src/java/org/apache/solr/schema/TrieFieldHelper.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * Copyright 2005 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.schema;
-
-import java.util.Date;
-
-import org.apache.lucene.analysis.NumericTokenStream;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Fieldable;
-
-/**
- * Helper class to make TrieFields compatible with ones written in solr
- *
- * TODO -- Something like this should be in in lucene
- * see: LUCENE-3001
- */
-public class TrieFieldHelper {
-
- private TrieFieldHelper() {}
-
- public static class FieldInfo {
- public int precisionStep = 8; // same as solr default
- public boolean store = true;
- public boolean index = true;
- public boolean omitNorms = true;
- public boolean omitTF = true;
- }
-
- //----------------------------------------------
- // Create Field
- //----------------------------------------------
-
- private static Fieldable createField(String name, byte[] arr, TokenStream ts, FieldInfo info, float boost) {
-
- Field f;
- if (info.store) {
- f = new Field(name, arr);
- if (info.index) f.setTokenStream(ts);
- } else {
- f = new Field(name, ts);
- }
-
- // term vectors aren't supported
- f.setOmitNorms(info.omitNorms);
- f.setOmitTermFreqAndPositions(info.omitTF);
- f.setBoost(boost);
- return f;
- }
-
- public static Fieldable createIntField(String name, int value, FieldInfo info, float boost) {
-
- byte[] arr=null;
- TokenStream ts=null;
-
- if (info.store) arr = TrieFieldHelper.toArr(value);
- if (info.index) ts = new NumericTokenStream(info.precisionStep).setIntValue(value);
-
- return createField(name, arr, ts, info, boost);
- }
-
- public static Fieldable createFloatField(String name, float value, FieldInfo info, float boost) {
-
- byte[] arr=null;
- TokenStream ts=null;
-
- if (info.store) arr = TrieFieldHelper.toArr(value);
- if (info.index) ts = new NumericTokenStream(info.precisionStep).setFloatValue(value);
-
- return createField(name, arr, ts, info, boost);
- }
-
- public static Fieldable createLongField(String name, long value, FieldInfo info, float boost) {
-
- byte[] arr=null;
- TokenStream ts=null;
-
- if (info.store) arr = TrieFieldHelper.toArr(value);
- if (info.index) ts = new NumericTokenStream(info.precisionStep).setLongValue(value);
-
- return createField(name, arr, ts, info, boost);
- }
-
- public static Fieldable createDoubleField(String name, double value, FieldInfo info, float boost) {
-
- byte[] arr=null;
- TokenStream ts=null;
-
- if (info.store) arr = TrieFieldHelper.toArr(value);
- if (info.index) ts = new NumericTokenStream(info.precisionStep).setDoubleValue(value);
-
- return createField(name, arr, ts, info, boost);
- }
-
- public static Fieldable createDateField(String name, Date value, FieldInfo info, float boost) {
- // TODO, make sure the date is within long range!
- return createLongField(name, value.getTime(), info, boost);
- }
-
-
- //----------------------------------------------
- // number <=> byte[]
- //----------------------------------------------
-
- public static int toInt(byte[] arr) {
- return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
- }
-
- public static long toLong(byte[] arr) {
- int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
- int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
- return (((long)high)<<32) | (low&0x0ffffffffL);
- }
-
- public static float toFloat(byte[] arr) {
- return Float.intBitsToFloat(toInt(arr));
- }
-
- public static double toDouble(byte[] arr) {
- return Double.longBitsToDouble(toLong(arr));
- }
-
- public static byte[] toArr(int val) {
- byte[] arr = new byte[4];
- arr[0] = (byte)(val>>>24);
- arr[1] = (byte)(val>>>16);
- arr[2] = (byte)(val>>>8);
- arr[3] = (byte)(val);
- return arr;
- }
-
- public static byte[] toArr(long val) {
- byte[] arr = new byte[8];
- arr[0] = (byte)(val>>>56);
- arr[1] = (byte)(val>>>48);
- arr[2] = (byte)(val>>>40);
- arr[3] = (byte)(val>>>32);
- arr[4] = (byte)(val>>>24);
- arr[5] = (byte)(val>>>16);
- arr[6] = (byte)(val>>>8);
- arr[7] = (byte)(val);
- return arr;
- }
-
- public static byte[] toArr(float val) {
- return toArr(Float.floatToRawIntBits(val));
- }
-
- public static byte[] toArr(double val) {
- return toArr(Double.doubleToRawLongBits(val));
- }
-}
diff --git a/solr/src/java/org/apache/solr/search/SolrQueryParser.java b/solr/src/java/org/apache/solr/search/SolrQueryParser.java
index 34192b8fd8b..80db3314c7d 100644
--- a/solr/src/java/org/apache/solr/search/SolrQueryParser.java
+++ b/solr/src/java/org/apache/solr/search/SolrQueryParser.java
@@ -67,7 +67,7 @@ public class SolrQueryParser extends QueryParser {
}
public SolrQueryParser(QParser parser, String defaultField, Analyzer analyzer) {
- super(parser.getReq().getCore().getSolrConfig().getLuceneVersion("luceneMatchVersion", Version.LUCENE_30), defaultField, analyzer);
+ super(parser.getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, analyzer);
this.schema = parser.getReq().getSchema();
this.parser = parser;
this.defaultField = defaultField;
diff --git a/solr/src/java/org/apache/solr/update/AddUpdateCommand.java b/solr/src/java/org/apache/solr/update/AddUpdateCommand.java
index 84632ee0b39..6a02010bf43 100644
--- a/solr/src/java/org/apache/solr/update/AddUpdateCommand.java
+++ b/solr/src/java/org/apache/solr/update/AddUpdateCommand.java
@@ -18,7 +18,7 @@
package org.apache.solr.update;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
@@ -74,7 +74,7 @@ public class AddUpdateCommand extends UpdateCommand {
if (sf != null) {
if (doc != null) {
schema.getUniqueKeyField();
- Field storedId = doc.getField(sf.getName());
+ Fieldable storedId = doc.getFieldable(sf.getName());
indexedId = sf.getType().storedToIndexed(storedId);
}
if (solrDoc != null) {
diff --git a/solr/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/src/java/org/apache/solr/update/DocumentBuilder.java
index e78e18d3340..a7b80e0ded8 100644
--- a/solr/src/java/org/apache/solr/update/DocumentBuilder.java
+++ b/solr/src/java/org/apache/solr/update/DocumentBuilder.java
@@ -159,7 +159,7 @@ public class DocumentBuilder {
// default value are defacto 'required' fields.
List missingFields = null;
for (SchemaField field : schema.getRequiredFields()) {
- if (doc.getField(field.getName() ) == null) {
+ if (doc.getFieldable(field.getName() ) == null) {
if (field.getDefaultValue() != null) {
addField(doc, field, field.getDefaultValue(), 1.0f);
} else {
@@ -313,7 +313,7 @@ public class DocumentBuilder {
// Now validate required fields or add default values
// fields with default values are defacto 'required'
for (SchemaField field : schema.getRequiredFields()) {
- if (out.getField(field.getName() ) == null) {
+ if (out.getFieldable(field.getName() ) == null) {
if (field.getDefaultValue() != null) {
addField(out, field, field.getDefaultValue(), 1.0f);
}
@@ -339,8 +339,7 @@ public class DocumentBuilder {
*/
public SolrDocument loadStoredFields( SolrDocument doc, Document luceneDoc )
{
- for( Object f : luceneDoc.getFields() ) {
- Fieldable field = (Fieldable)f;
+ for( Fieldable field : luceneDoc.getFields() ) {
if( field.isStored() ) {
SchemaField sf = schema.getField( field.name() );
if( !schema.isCopyFieldTarget( sf ) ) {
diff --git a/solr/src/java/org/apache/solr/update/UpdateHandler.java b/solr/src/java/org/apache/solr/update/UpdateHandler.java
index e7332349dfd..cd13a4935ab 100644
--- a/solr/src/java/org/apache/solr/update/UpdateHandler.java
+++ b/solr/src/java/org/apache/solr/update/UpdateHandler.java
@@ -21,7 +21,6 @@ package org.apache.solr.update;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
@@ -125,7 +124,7 @@ public abstract class UpdateHandler implements SolrInfoMBean {
protected final String getIndexedIdOptional(Document doc) {
if (idField == null) return null;
- Field f = doc.getField(idField.getName());
+ Fieldable f = doc.getFieldable(idField.getName());
if (f == null) return null;
return idFieldType.storedToIndexed(f);
}
diff --git a/solr/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/src/test/org/apache/solr/BasicFunctionalityTest.java
index f19d9b2b8c0..3b12f7978e4 100644
--- a/solr/src/test/org/apache/solr/BasicFunctionalityTest.java
+++ b/solr/src/test/org/apache/solr/BasicFunctionalityTest.java
@@ -561,7 +561,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
DocList dl = ((ResultContext) rsp.getValues().get("response")).docs;
org.apache.lucene.document.Document d = req.getSearcher().doc(dl.iterator().nextDoc());
- // ensure field is not lazy
+ // ensure field is not lazy, only works for Non-Numeric fields currently (if you change schema behind test, this may fail)
assertTrue( d.getFieldable("test_hlt") instanceof Field );
assertTrue( d.getFieldable("title") instanceof Field );
req.close();
diff --git a/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java b/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java
index e00cd35c426..d3a6ee77873 100644
--- a/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java
+++ b/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java
@@ -17,6 +17,8 @@
package org.apache.solr.analysis;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -25,6 +27,8 @@ import java.util.Map;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.common.ResourceLoader;
+
public class TestSynonymMap extends LuceneTestCase {
@@ -257,6 +261,43 @@ public class TestSynonymMap extends LuceneTestCase {
assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "gh" );
}
+
+ public void testLoadRules() throws Exception {
+ Map args = new HashMap();
+ args.put( "synonyms", "something.txt" );
+ SynonymFilterFactory ff = new SynonymFilterFactory();
+ ff.init(args);
+ ff.inform( new ResourceLoader() {
+ @Override
+ public List getLines(String resource) throws IOException {
+ if( !"something.txt".equals(resource) ) {
+ throw new RuntimeException( "should not get a differnt resource" );
+ }
+ List rules = new ArrayList();
+ rules.add( "a,b" );
+ return rules;
+ }
+
+ @Override
+ public Object newInstance(String cname, String... subpackages) {
+ throw new RuntimeException("stub");
+ }
+
+ @Override
+ public InputStream openResource(String resource) throws IOException {
+ throw new RuntimeException("stub");
+ }
+ });
+
+ SynonymMap synMap = ff.getSynonymMap();
+ assertEquals( 2, synMap.submap.size() );
+ assertTokIncludes( synMap, "a", "a" );
+ assertTokIncludes( synMap, "a", "b" );
+ assertTokIncludes( synMap, "b", "a" );
+ assertTokIncludes( synMap, "b", "b" );
+ }
+
+
private void assertTokIncludes( SynonymMap map, String src, String exp ) throws Exception {
Token[] tokens = map.submap.get( src ).synonyms;
boolean inc = false;
diff --git a/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java b/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java
index e6635475356..5deec94f01b 100644
--- a/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java
+++ b/solr/src/test/org/apache/solr/handler/JsonLoaderTest.java
@@ -26,7 +26,9 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.update.DeleteUpdateCommand;
@@ -81,13 +83,11 @@ public class JsonLoaderTest extends SolrTestCaseJ4 {
public void testParsing() throws Exception
{
SolrQueryRequest req = req();
- Reader reader = new StringReader(input);
-
+ SolrQueryResponse rsp = new SolrQueryResponse();
BufferingRequestProcessor p = new BufferingRequestProcessor(null);
- JsonLoader loader = new JsonLoader( p );
-
- loader.processUpdate(req, p, new JSONParser(reader) );
-
+ JsonLoader loader = new JsonLoader( req, p );
+ loader.load(req, rsp, new ContentStreamBase.StringStream(input));
+
assertEquals( 2, p.addCommands.size() );
AddUpdateCommand add = p.addCommands.get(0);
@@ -133,8 +133,67 @@ public class JsonLoaderTest extends SolrTestCaseJ4 {
req.close();
}
+
+
+ public void testSimpleFormat() throws Exception
+ {
+ String str = "[{'id':'1'},{'id':'2'}]".replace('\'', '"');
+ SolrQueryRequest req = req("commitWithin","100", "overwrite","false");
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ BufferingRequestProcessor p = new BufferingRequestProcessor(null);
+ JsonLoader loader = new JsonLoader( req, p );
+ loader.load(req, rsp, new ContentStreamBase.StringStream(str));
+
+ assertEquals( 2, p.addCommands.size() );
+
+ AddUpdateCommand add = p.addCommands.get(0);
+ SolrInputDocument d = add.solrDoc;
+ SolrInputField f = d.getField( "id" );
+ assertEquals("1", f.getValue());
+ assertEquals(add.commitWithin, 100);
+ assertEquals(add.overwrite, false);
+
+ add = p.addCommands.get(1);
+ d = add.solrDoc;
+ f = d.getField( "id" );
+ assertEquals("2", f.getValue());
+ assertEquals(add.commitWithin, 100);
+ assertEquals(add.overwrite, false);
+
+ req.close();
+ }
+
+ public void testSimpleFormatInAdd() throws Exception
+ {
+ String str = "{'add':[{'id':'1'},{'id':'2'}]}".replace('\'', '"');
+ SolrQueryRequest req = req();
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ BufferingRequestProcessor p = new BufferingRequestProcessor(null);
+ JsonLoader loader = new JsonLoader( req, p );
+ loader.load(req, rsp, new ContentStreamBase.StringStream(str));
+
+ assertEquals( 2, p.addCommands.size() );
+
+ AddUpdateCommand add = p.addCommands.get(0);
+ SolrInputDocument d = add.solrDoc;
+ SolrInputField f = d.getField( "id" );
+ assertEquals("1", f.getValue());
+ assertEquals(add.commitWithin, -1);
+ assertEquals(add.overwrite, true);
+
+ add = p.addCommands.get(1);
+ d = add.solrDoc;
+ f = d.getField( "id" );
+ assertEquals("2", f.getValue());
+ assertEquals(add.commitWithin, -1);
+ assertEquals(add.overwrite, true);
+
+ req.close();
+ }
+
}
+
class BufferingRequestProcessor extends UpdateRequestProcessor
{
List addCommands = new ArrayList();
diff --git a/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java b/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java
index 6dbae21f244..c7d8a392201 100644
--- a/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java
+++ b/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java
@@ -79,7 +79,7 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 {
params.set(CommonParams.Q, "id:42");
params.set(MoreLikeThisParams.MLT, "true");
- params.set(MoreLikeThisParams.SIMILARITY_FIELDS, "name,subword,foo_ti");
+ params.set(MoreLikeThisParams.SIMILARITY_FIELDS, "name,subword");
params.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
params.set(MoreLikeThisParams.MIN_TERM_FREQ,"1");
params.set(MoreLikeThisParams.MIN_DOC_FREQ,"1");
diff --git a/solr/src/test/org/apache/solr/update/DocumentBuilderTest.java b/solr/src/test/org/apache/solr/update/DocumentBuilderTest.java
index 4a4df13b0f7..991295d1339 100644
--- a/solr/src/test/org/apache/solr/update/DocumentBuilderTest.java
+++ b/solr/src/test/org/apache/solr/update/DocumentBuilderTest.java
@@ -109,8 +109,8 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
doc.addField( "home", "2.2,3.3", 1.0f );
Document out = DocumentBuilder.toDocument( doc, core.getSchema() );
assertNotNull( out.get( "home" ) );//contains the stored value and term vector, if there is one
- assertNotNull( out.getField( "home_0" + FieldType.POLY_FIELD_SEPARATOR + "double" ) );
- assertNotNull( out.getField( "home_1" + FieldType.POLY_FIELD_SEPARATOR + "double" ) );
+ assertNotNull( out.getFieldable( "home_0" + FieldType.POLY_FIELD_SEPARATOR + "double" ) );
+ assertNotNull( out.getFieldable( "home_1" + FieldType.POLY_FIELD_SEPARATOR + "double" ) );
}
}