mirror of
https://github.com/apache/lucene.git
synced 2025-03-07 00:39:21 +00:00
merge trunk (1237250:1237842)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
8501d55c07
@ -748,6 +748,10 @@ Changes in backwards compatibility policy
|
||||
|
||||
* LUCENE-3712: Removed unused and untested ReaderUtil#subReader methods.
|
||||
(Uwe Schindler)
|
||||
|
||||
* LUCENE-3672: Deprecate Directory.fileModified and
|
||||
IndexCommit.getTimestamp and .getVersion. (Andrzej Bialecki, Robert
|
||||
Muir, Mike McCandless)
|
||||
|
||||
Security fixes
|
||||
|
||||
@ -802,6 +806,9 @@ New Features
|
||||
|
||||
* LUCENE-3690: Added HTMLStripCharFilter, a CharFilter that strips HTML
|
||||
markup. (Steve Rowe)
|
||||
|
||||
* LUCENE-3725: Added optional packing to FST building; this uses extra
|
||||
RAM during building but results in a smaller FST. (Mike McCandless)
|
||||
|
||||
Bug fixes
|
||||
|
||||
@ -845,6 +852,12 @@ Bug fixes
|
||||
TermAllGroupsCollector or TermAllGroupHeadsCollector if instantiated with a
|
||||
non default small size. (Martijn van Groningen, yonik)
|
||||
|
||||
* LUCENE-3727: When writing stored fields and term vectors, Lucene
|
||||
checks file sizes to detect a bug in some Sun JREs (LUCENE-1282),
|
||||
however, on some NFS filesystems File.length() could be stale,
|
||||
resulting in false errors like "fdx size mismatch while indexing".
|
||||
These checks now use getFilePointer instead to avoid this.
|
||||
(Jamir Shaikh, Mike McCandless, Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
@ -62,6 +62,14 @@ New Features
|
||||
|
||||
* LUCENE-3602: Added query time joining under the join module. (Martijn van Groningen, Michael McCandless)
|
||||
|
||||
* LUCENE-2795: Generified DirectIOLinuxDirectory to work across any
|
||||
unix supporting the O_DIRECT flag when opening a file (tested on
|
||||
Linux and OS X but likely other Unixes will work), and improved it
|
||||
so it can be used for indexing and searching. The directory uses
|
||||
direct IO when doing large merges to avoid unnecessarily evicting
|
||||
cached IO pages due to large merges. (Varun Thacker, Mike
|
||||
McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
|
||||
@ -192,6 +200,9 @@ Bug Fixes
|
||||
* LUCENE-3697: SimpleBoundaryScanner does not work well when highlighting
|
||||
at the beginning of the text. (Shay Banon via Koji Sekiguchi)
|
||||
|
||||
* LUCENE-3719: FVH: slow performance on very large queries.
|
||||
(Igor Motov via Koji Sekiguchi)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-3599: Javadocs for DistanceUtils.haversine() were incorrectly
|
||||
|
@ -17,11 +17,11 @@ package org.apache.lucene.search.vectorhighlight;
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -60,7 +60,7 @@ public class FieldQuery {
|
||||
|
||||
FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
|
||||
this.fieldMatch = fieldMatch;
|
||||
List<Query> flatQueries = new ArrayList<Query>();
|
||||
Set<Query> flatQueries = new LinkedHashSet<Query>();
|
||||
flatten( query, reader, flatQueries );
|
||||
saveTerms( flatQueries, reader );
|
||||
Collection<Query> expandQueries = expand( flatQueries );
|
||||
@ -133,7 +133,7 @@ public class FieldQuery {
|
||||
* => expandQueries={a,"b c","c d","b c d"}
|
||||
*/
|
||||
Collection<Query> expand( Collection<Query> flatQueries ){
|
||||
List<Query> expandQueries = new ArrayList<Query>();
|
||||
Set<Query> expandQueries = new LinkedHashSet<Query>();
|
||||
for( Iterator<Query> i = flatQueries.iterator(); i.hasNext(); ){
|
||||
Query query = i.next();
|
||||
i.remove();
|
||||
|
@ -40,11 +40,13 @@
|
||||
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
|
||||
<includepath>
|
||||
<pathelement location="${java.home}/../include"/>
|
||||
<pathelement location="${java.home}/include"/>
|
||||
<pathelement location="${java.home}/../include/linux"/>
|
||||
<pathelement location="${java.home}/../include/solaris"/>
|
||||
</includepath>
|
||||
|
||||
<compilerarg value="-fPIC" />
|
||||
<linkerarg value="-lstdc++" />
|
||||
</cc>
|
||||
</target>
|
||||
|
||||
|
@ -15,6 +15,16 @@
|
||||
* the License.
|
||||
*/
|
||||
|
||||
#ifdef LINUX
|
||||
#define DIRECT_FLAG O_DIRECT | O_NOATIME
|
||||
#define LINUX
|
||||
#elif __APPLE__
|
||||
#define DIRECT_FLAG 0
|
||||
#define OSX
|
||||
#else
|
||||
#define DIRECT_FLAG O_DIRECT // __unix__ is not used as even Linux falls under it.
|
||||
#endif
|
||||
|
||||
#include <jni.h>
|
||||
#include <fcntl.h> // posix_fadvise, constants for open
|
||||
#include <string.h> // strerror
|
||||
@ -26,6 +36,7 @@
|
||||
|
||||
// java -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/java:./build/classes/demo -Dlucene.version=2.9-dev -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.TestDoc
|
||||
|
||||
#ifdef LINUX
|
||||
/*
|
||||
* Class: org_apache_lucene_store_NativePosixUtil
|
||||
* Method: posix_fadvise
|
||||
@ -89,7 +100,7 @@ JNIEXPORT jint JNICALL Java_org_apache_lucene_store_NativePosixUtil_posix_1fadvi
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Class: org_apache_lucene_store_NativePosixUtil
|
||||
@ -107,16 +118,26 @@ JNIEXPORT jobject JNICALL Java_org_apache_lucene_store_NativePosixUtil_open_1dir
|
||||
char *fname;
|
||||
|
||||
class_ioex = env->FindClass("java/io/IOException");
|
||||
if (class_ioex == NULL) return NULL;
|
||||
if (class_ioex == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
class_fdesc = env->FindClass("java/io/FileDescriptor");
|
||||
if (class_fdesc == NULL) return NULL;
|
||||
if (class_fdesc == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fname = (char *) env->GetStringUTFChars(filename, NULL);
|
||||
|
||||
if (readOnly) {
|
||||
fd = open(fname, O_RDONLY | O_DIRECT | O_NOATIME);
|
||||
fd = open(fname, O_RDONLY | DIRECT_FLAG);
|
||||
#ifdef OSX
|
||||
fcntl(fd, F_NOCACHE, 1);
|
||||
#endif
|
||||
} else {
|
||||
fd = open(fname, O_RDWR | O_CREAT | O_DIRECT | O_NOATIME, 0666);
|
||||
fd = open(fname, O_RDWR | O_CREAT | DIRECT_FLAG, 0666);
|
||||
#ifdef OSX
|
||||
fcntl(fd, F_NOCACHE, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
//printf("open %s -> %d; ro %d\n", fname, fd, readOnly); fflush(stdout);
|
||||
@ -131,19 +152,22 @@ JNIEXPORT jobject JNICALL Java_org_apache_lucene_store_NativePosixUtil_open_1dir
|
||||
|
||||
// construct a new FileDescriptor
|
||||
const_fdesc = env->GetMethodID(class_fdesc, "<init>", "()V");
|
||||
if (const_fdesc == NULL) return NULL;
|
||||
if (const_fdesc == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
ret = env->NewObject(class_fdesc, const_fdesc);
|
||||
|
||||
// poke the "fd" field with the file descriptor
|
||||
field_fd = env->GetFieldID(class_fdesc, "fd", "I");
|
||||
if (field_fd == NULL) return NULL;
|
||||
if (field_fd == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
env->SetIntField(ret, field_fd, fd);
|
||||
|
||||
// and return it
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Class: org_apache_lucene_store_NativePosixUtil
|
||||
* Method: pread
|
||||
|
@ -27,68 +27,120 @@ import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
|
||||
import org.apache.lucene.store.Directory; // javadoc
|
||||
import org.apache.lucene.store.NativeFSLockFactory; // javadoc
|
||||
import org.apache.lucene.store.IOContext.Context;
|
||||
|
||||
// TODO
|
||||
// - newer Linux kernel versions (after 2.6.29) have
|
||||
// improved MADV_SEQUENTIAL (and hopefully also
|
||||
// FADV_SEQUENTIAL) interaction with the buffer
|
||||
// cache; we should explore using that instead of direct
|
||||
// IO when context is merge
|
||||
|
||||
/**
|
||||
* An {@link Directory} implementation that uses the
|
||||
* Linux-specific O_DIRECT flag to bypass all OS level
|
||||
* caching. To use this you must compile
|
||||
* A {@link Directory} implementation for all Unixes that uses
|
||||
* DIRECT I/O to bypass OS level IO caching during
|
||||
* merging. For all other cases (searching, writing) we delegate
|
||||
* to the provided Directory instance.
|
||||
*
|
||||
* <p>See <a
|
||||
* href="../../../../../contrib-misc/overview-summary.html#NativeUnixDirectory">Overview</a>
|
||||
* for more details.
|
||||
*
|
||||
* <p>To use this you must compile
|
||||
* NativePosixUtil.cpp (exposes Linux-specific APIs through
|
||||
* JNI) for your platform.
|
||||
* JNI) for your platform, by running <code>ant
|
||||
* build-native-unix</code>, and then putting the resulting
|
||||
* <code>libNativePosixUtil.so</code> (from
|
||||
* <code>lucene/build/native</code>) onto your dynamic
|
||||
* linker search path.
|
||||
*
|
||||
* <p><b>WARNING</b>: this code is very new and quite easily
|
||||
* could contain horrible bugs. For example, here's one
|
||||
* known issue: if you use seek in IndexOutput, and then
|
||||
* known issue: if you use seek in <code>IndexOutput</code>, and then
|
||||
* write more than one buffer's worth of bytes, then the
|
||||
* file will be wrong. Lucene does not do this (only writes
|
||||
* small number of bytes after seek).
|
||||
|
||||
* file will be wrong. Lucene does not do this today (only writes
|
||||
* small number of bytes after seek), but that may change.
|
||||
*
|
||||
* <p>This directory passes Solr and Lucene tests on Linux
|
||||
* and OS X; other Unixes should work but have not been
|
||||
* tested! Use at your own risk.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
public class NativeUnixDirectory extends FSDirectory {
|
||||
|
||||
// TODO: this is OS dependent, but likely 512 is the LCD
|
||||
private final static long ALIGN = 512;
|
||||
private final static long ALIGN_NOT_MASK = ~(ALIGN-1);
|
||||
|
||||
/** Default buffer size before writing to disk (256 MB);
|
||||
* larger means less IO load but more RAM and direct
|
||||
* buffer storage space consumed during merging. */
|
||||
|
||||
private final int forcedBufferSize;
|
||||
public final static int DEFAULT_MERGE_BUFFER_SIZE = 262144;
|
||||
|
||||
/** Default min expected merge size before direct IO is
|
||||
* used (10 MB): */
|
||||
public final static long DEFAULT_MIN_BYTES_DIRECT = 10*1024*1024;
|
||||
|
||||
private final int mergeBufferSize;
|
||||
private final long minBytesDirect;
|
||||
private final Directory delegate;
|
||||
|
||||
/** Create a new NIOFSDirectory for the named location.
|
||||
*
|
||||
* @param path the path of the directory
|
||||
* @param lockFactory the lock factory to use, or null for the default
|
||||
* ({@link NativeFSLockFactory});
|
||||
* @param forcedBufferSize if this is 0, just use Lucene's
|
||||
* default buffer size; else, force this buffer size.
|
||||
* For best performance, force the buffer size to
|
||||
* something fairly large (eg 1 MB), but note that this
|
||||
* will eat up the JRE's direct buffer storage space
|
||||
* @param mergeBufferSize Size of buffer to use for
|
||||
* merging. See {@link #DEFAULT_MERGE_BUFFER_SIZE}.
|
||||
* @param minBytesDirect Merges, or files to be opened for
|
||||
* reading, smaller than this will
|
||||
* not use direct IO. See {@link
|
||||
* #DEFAULT_MIN_BYTES_DIRECT}
|
||||
* @param delegate fallback Directory for non-merges
|
||||
* @throws IOException
|
||||
*/
|
||||
public DirectIOLinuxDirectory(File path, LockFactory lockFactory, int forcedBufferSize) throws IOException {
|
||||
super(path, lockFactory);
|
||||
this.forcedBufferSize = forcedBufferSize;
|
||||
public NativeUnixDirectory(File path, int mergeBufferSize, long minBytesDirect, Directory delegate) throws IOException {
|
||||
super(path, delegate.getLockFactory());
|
||||
if ((mergeBufferSize & ALIGN) != 0) {
|
||||
throw new IllegalArgumentException("mergeBufferSize must be 0 mod " + ALIGN + " (got: " + mergeBufferSize + ")");
|
||||
}
|
||||
this.mergeBufferSize = mergeBufferSize;
|
||||
this.minBytesDirect = minBytesDirect;
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
/** Create a new NIOFSDirectory for the named location.
|
||||
*
|
||||
* @param path the path of the directory
|
||||
* @param delegate fallback Directory for non-merges
|
||||
* @throws IOException
|
||||
*/
|
||||
public NativeUnixDirectory(File path, Directory delegate) throws IOException {
|
||||
this(path, DEFAULT_MERGE_BUFFER_SIZE, DEFAULT_MIN_BYTES_DIRECT, delegate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput openInput(String name, IOContext context) throws IOException {
|
||||
ensureOpen();
|
||||
return new DirectIOLinuxIndexInput(new File(getDirectory(), name),
|
||||
bufferSize(context));
|
||||
if (context.context != Context.MERGE || context.mergeInfo.estimatedMergeBytes < minBytesDirect || fileLength(name) < minBytesDirect) {
|
||||
return delegate.openInput(name, context);
|
||||
} else {
|
||||
return new NativeUnixIndexInput(new File(getDirectory(), name), mergeBufferSize);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
ensureOpen();
|
||||
ensureCanWrite(name);
|
||||
return new DirectIOLinuxIndexOutput(new File(getDirectory(), name), bufferSize(context));
|
||||
}
|
||||
|
||||
private int bufferSize(IOContext context) {
|
||||
return forcedBufferSize != 0 ? forcedBufferSize : BufferedIndexInput
|
||||
.bufferSize(context);
|
||||
if (context.context != Context.MERGE || context.mergeInfo.estimatedMergeBytes < minBytesDirect) {
|
||||
return delegate.createOutput(name, context);
|
||||
} else {
|
||||
ensureCanWrite(name);
|
||||
return new NativeUnixIndexOutput(new File(getDirectory(), name), mergeBufferSize);
|
||||
}
|
||||
}
|
||||
|
||||
private final static class DirectIOLinuxIndexOutput extends IndexOutput {
|
||||
private final static class NativeUnixIndexOutput extends IndexOutput {
|
||||
private final ByteBuffer buffer;
|
||||
private final FileOutputStream fos;
|
||||
private final FileChannel channel;
|
||||
@ -101,9 +153,9 @@ public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
private long fileLength;
|
||||
private boolean isOpen;
|
||||
|
||||
public DirectIOLinuxIndexOutput(File path, int bufferSize) throws IOException {
|
||||
public NativeUnixIndexOutput(File path, int bufferSize) throws IOException {
|
||||
//this.path = path;
|
||||
FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), false);
|
||||
final FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), false);
|
||||
fos = new FileOutputStream(fd);
|
||||
//fos = new FileOutputStream(path);
|
||||
channel = fos.getChannel();
|
||||
@ -206,7 +258,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
|
||||
@Override
|
||||
public long length() throws IOException {
|
||||
return fileLength;
|
||||
return fileLength + bufferPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -233,7 +285,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
}
|
||||
}
|
||||
|
||||
private final static class DirectIOLinuxIndexInput extends IndexInput {
|
||||
private final static class NativeUnixIndexInput extends IndexInput {
|
||||
private final ByteBuffer buffer;
|
||||
private final FileInputStream fis;
|
||||
private final FileChannel channel;
|
||||
@ -244,10 +296,9 @@ public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
private long filePos;
|
||||
private int bufferPos;
|
||||
|
||||
public DirectIOLinuxIndexInput(File path, int bufferSize) throws IOException {
|
||||
// TODO make use of IOContext
|
||||
super("DirectIOLinuxIndexInput(path=\"" + path.getPath() + "\")");
|
||||
FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), true);
|
||||
public NativeUnixIndexInput(File path, int bufferSize) throws IOException {
|
||||
super("NativeUnixIndexInput(path=\"" + path.getPath() + "\")");
|
||||
final FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), true);
|
||||
fis = new FileInputStream(fd);
|
||||
channel = fis.getChannel();
|
||||
this.bufferSize = bufferSize;
|
||||
@ -260,7 +311,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
}
|
||||
|
||||
// for clone
|
||||
public DirectIOLinuxIndexInput(DirectIOLinuxIndexInput other) throws IOException {
|
||||
public NativeUnixIndexInput(NativeUnixIndexInput other) throws IOException {
|
||||
super(other.toString());
|
||||
this.fis = null;
|
||||
channel = other.channel;
|
||||
@ -296,13 +347,17 @@ public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
public void seek(long pos) throws IOException {
|
||||
if (pos != getFilePointer()) {
|
||||
final long alignedPos = pos & ALIGN_NOT_MASK;
|
||||
//System.out.println("seek pos=" + pos + " aligned=" + alignedPos + " bufferSize=" + bufferSize + " this=" + this);
|
||||
filePos = alignedPos-bufferSize;
|
||||
refill();
|
||||
|
||||
final int delta = (int) (pos - alignedPos);
|
||||
buffer.position(delta);
|
||||
bufferPos = delta;
|
||||
if (delta != 0) {
|
||||
refill();
|
||||
buffer.position(delta);
|
||||
bufferPos = delta;
|
||||
} else {
|
||||
// force refill on next read
|
||||
bufferPos = bufferSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -371,7 +426,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
|
||||
@Override
|
||||
public Object clone() {
|
||||
try {
|
||||
return new DirectIOLinuxIndexInput(this);
|
||||
return new NativeUnixIndexInput(this);
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException("IOException during clone: " + this, ioe);
|
||||
}
|
@ -27,33 +27,29 @@
|
||||
The misc package has various tools for splitting/merging indices,
|
||||
changing norms, finding high freq terms, and others.
|
||||
|
||||
<h2>DirectIOLinuxDirectory</h2>
|
||||
<a name="NativeUnixDirectory"></a>
|
||||
<h2>NativeUnixDirectory</h2>
|
||||
|
||||
<p>
|
||||
<b>NOTE</b>: This uses C++ sources (accessible via JNI), which you'll
|
||||
have to compile on your platform. Further, this is a very
|
||||
platform-specific extensions (runs only on Linux, and likely only on
|
||||
2.6.x kernels).
|
||||
have to compile on your platform.
|
||||
|
||||
<p>
|
||||
DirectIOLinuxDirectory is a Directory implementation that bypasses the
|
||||
OS's buffer cache for any IndexInput and IndexOutput opened through it
|
||||
(using the linux-specific O_DIRECT flag).
|
||||
{@link org.apache.lucene.store.NativeUnixDirectory} is a Directory implementation that bypasses the
|
||||
OS's buffer cache (using direct IO) for any IndexInput and IndexOutput
|
||||
used during merging of segments larger than a specified size (default
|
||||
10 MB). This avoids evicting hot pages that are still in-use for
|
||||
searching, keeping search more responsive while large merges run.
|
||||
|
||||
<p>
|
||||
Note that doing so typically results in bad performance loss! You
|
||||
should not use this for searching, but rather for indexing (or maybe
|
||||
just merging during indexing), to avoid evicting useful pages from the
|
||||
buffer cache.
|
||||
|
||||
See <a target=_top href="http://chbits.blogspot.com/2010/06/lucene-and-fadvisemadvise.html">here</a>
|
||||
See <a target=_top href="http://blog.mikemccandless.com/2010/06/lucene-and-fadvisemadvise.html">this blog post</a>
|
||||
for details.
|
||||
|
||||
Steps to build:
|
||||
<ul>
|
||||
<li> <tt>cd lucene/contrib/misc/</tt>
|
||||
|
||||
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
|
||||
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run<tt> ant build-native-unix</tt>.
|
||||
|
||||
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
|
||||
|
||||
@ -62,13 +58,6 @@ Steps to build:
|
||||
<li> <tt>ant jar</tt> to compile the java source and put that JAR on your CLASSPATH
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
To use this, you'll likely want to make a custom subclass of
|
||||
FSDirectory that only opens direct IndexInput/Output for merging. One
|
||||
hackish way to do this is to check if the current thread's name starts
|
||||
with "Lucene Merge Thread". Alternatively, you could use this Dir as
|
||||
is for all indexing ops, but not for searching.
|
||||
|
||||
<p>
|
||||
NativePosixUtil.cpp/java also expose access to the posix_madvise,
|
||||
madvise, posix_fadvise functions, which are somewhat more cross
|
||||
|
@ -398,7 +398,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
final long indexStartFP;
|
||||
final long rootBlockFP;
|
||||
final BytesRef rootCode;
|
||||
private FST<BytesRef> index;
|
||||
private final FST<BytesRef> index;
|
||||
|
||||
//private boolean DEBUG;
|
||||
|
||||
@ -433,6 +433,8 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
w.close();
|
||||
}
|
||||
*/
|
||||
} else {
|
||||
index = null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -495,6 +497,8 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
|
||||
private final BytesRef term = new BytesRef();
|
||||
|
||||
private final FST.BytesReader fstReader;
|
||||
|
||||
// TODO: can we share this with the frame in STE?
|
||||
private final class Frame {
|
||||
final int ord;
|
||||
@ -755,6 +759,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
arcs[arcIdx] = new FST.Arc<BytesRef>();
|
||||
}
|
||||
|
||||
if (index == null) {
|
||||
fstReader = null;
|
||||
} else {
|
||||
fstReader = index.getBytesReader(0);
|
||||
}
|
||||
|
||||
// TODO: if the automaton is "smallish" we really
|
||||
// should use the terms index to seek at least to
|
||||
// the initial term and likely to subsequent terms
|
||||
@ -842,7 +852,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
// TODO: we could be more efficient for the next()
|
||||
// case by using current arc as starting point,
|
||||
// passed to findTargetArc
|
||||
arc = index.findTargetArc(target, arc, getArc(1+idx));
|
||||
arc = index.findTargetArc(target, arc, getArc(1+idx), fstReader);
|
||||
assert arc != null;
|
||||
output = fstOutputs.add(output, arc.output);
|
||||
idx++;
|
||||
@ -1186,6 +1196,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
private boolean eof;
|
||||
|
||||
final BytesRef term = new BytesRef();
|
||||
private final FST.BytesReader fstReader;
|
||||
|
||||
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
|
||||
|
||||
@ -1196,6 +1207,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
// Used to hold seek by TermState, or cached seek
|
||||
staticFrame = new Frame(-1);
|
||||
|
||||
if (index == null) {
|
||||
fstReader = null;
|
||||
} else {
|
||||
fstReader = index.getBytesReader(0);
|
||||
}
|
||||
|
||||
// Init w/ root block; don't use index since it may
|
||||
// not (and need not) have been loaded
|
||||
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
|
||||
@ -1581,7 +1598,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
|
||||
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
|
||||
|
||||
final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto));
|
||||
final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
|
||||
|
||||
if (nextArc == null) {
|
||||
|
||||
@ -1838,7 +1855,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
|
||||
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
|
||||
|
||||
final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto));
|
||||
final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
|
||||
|
||||
if (nextArc == null) {
|
||||
|
||||
|
@ -288,7 +288,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||
final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
|
||||
0, 0, true, false, Integer.MAX_VALUE,
|
||||
outputs, null);
|
||||
outputs, null, false);
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" compile index for prefix=" + prefix);
|
||||
//}
|
||||
@ -831,7 +831,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
||||
0, 0, true,
|
||||
true, Integer.MAX_VALUE,
|
||||
noOutputs,
|
||||
new FindBlocks());
|
||||
new FindBlocks(), false);
|
||||
|
||||
postingsWriter.setField(fieldInfo);
|
||||
}
|
||||
|
@ -229,7 +229,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||
////System.out.println("VGW: field=" + fieldInfo.name);
|
||||
|
||||
// Always put empty string in
|
||||
fstBuilder.add(new IntsRef(), fstOutputs.get(termsFilePointer));
|
||||
fstBuilder.add(new IntsRef(), termsFilePointer);
|
||||
startTermsFilePointer = termsFilePointer;
|
||||
}
|
||||
|
||||
@ -260,7 +260,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||
final int lengthSave = text.length;
|
||||
text.length = indexedTermPrefixLength(lastTerm, text);
|
||||
try {
|
||||
fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), fstOutputs.get(termsFilePointer));
|
||||
fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
|
||||
} finally {
|
||||
text.length = lengthSave;
|
||||
}
|
||||
|
@ -521,9 +521,10 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||
private void loadTerms() throws IOException {
|
||||
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
|
||||
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
|
||||
b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1,
|
||||
new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
|
||||
new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs)));
|
||||
final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
|
||||
final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
|
||||
outputsInner);
|
||||
b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
||||
in.seek(termsStart);
|
||||
final BytesRef lastTerm = new BytesRef(10);
|
||||
@ -536,9 +537,9 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
|
||||
if (lastDocsStart != -1) {
|
||||
b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
|
||||
new PairOutputs.Pair<Long,Long>((long) docFreq,
|
||||
posIntOutputs.get(totalTermFreq))));
|
||||
b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
|
||||
outputs.newPair(lastDocsStart,
|
||||
outputsInner.newPair((long) docFreq, totalTermFreq)));
|
||||
sumTotalTermFreq += totalTermFreq;
|
||||
}
|
||||
break;
|
||||
@ -553,9 +554,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
} else if (StringHelper.startsWith(scratch, TERM)) {
|
||||
if (lastDocsStart != -1) {
|
||||
b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
|
||||
new PairOutputs.Pair<Long,Long>((long) docFreq,
|
||||
posIntOutputs.get(totalTermFreq))));
|
||||
b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
|
||||
outputsInner.newPair((long) docFreq, totalTermFreq)));
|
||||
}
|
||||
lastDocsStart = in.getFilePointer();
|
||||
final int len = scratch.length - TERM.length;
|
||||
|
@ -425,7 +425,6 @@ final class DirectoryReader extends BaseMultiReader<SegmentReader> {
|
||||
Collection<String> files;
|
||||
Directory dir;
|
||||
long generation;
|
||||
long version;
|
||||
final Map<String,String> userData;
|
||||
private final int segmentCount;
|
||||
|
||||
@ -434,7 +433,6 @@ final class DirectoryReader extends BaseMultiReader<SegmentReader> {
|
||||
this.dir = dir;
|
||||
userData = infos.getUserData();
|
||||
files = Collections.unmodifiableCollection(infos.files(dir, true));
|
||||
version = infos.getVersion();
|
||||
generation = infos.getGeneration();
|
||||
segmentCount = infos.size();
|
||||
}
|
||||
@ -464,11 +462,6 @@ final class DirectoryReader extends BaseMultiReader<SegmentReader> {
|
||||
return dir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getGeneration() {
|
||||
return generation;
|
||||
|
@ -83,39 +83,31 @@ public abstract class IndexCommit implements Comparable<IndexCommit> {
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof IndexCommit) {
|
||||
IndexCommit otherCommit = (IndexCommit) other;
|
||||
return otherCommit.getDirectory().equals(getDirectory()) && otherCommit.getVersion() == getVersion();
|
||||
} else
|
||||
return otherCommit.getDirectory().equals(getDirectory()) && otherCommit.getGeneration() == getGeneration();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (int) (getDirectory().hashCode() + getVersion());
|
||||
return getDirectory().hashCode() + Long.valueOf(getGeneration()).hashCode();
|
||||
}
|
||||
|
||||
/** Returns the version for this IndexCommit. This is the
|
||||
* same value that {@link IndexReader#getVersion} would
|
||||
* return if it were opened on this commit. */
|
||||
public abstract long getVersion();
|
||||
|
||||
/** Returns the generation (the _N in segments_N) for this
|
||||
* IndexCommit */
|
||||
public abstract long getGeneration();
|
||||
|
||||
/** Convenience method that returns the last modified time
|
||||
* of the segments_N file corresponding to this index
|
||||
* commit, equivalent to
|
||||
* getDirectory().fileModified(getSegmentsFileName()). */
|
||||
public long getTimestamp() throws IOException {
|
||||
return getDirectory().fileModified(getSegmentsFileName());
|
||||
}
|
||||
|
||||
/** Returns userData, previously passed to {@link
|
||||
* IndexWriter#commit(Map)} for this commit. Map is
|
||||
* String -> String. */
|
||||
public abstract Map<String,String> getUserData() throws IOException;
|
||||
|
||||
public int compareTo(IndexCommit commit) {
|
||||
if (getDirectory() != commit.getDirectory()) {
|
||||
throw new UnsupportedOperationException("cannot compare IndexCommits from different Directory instances");
|
||||
}
|
||||
|
||||
long gen = getGeneration();
|
||||
long comgen = commit.getGeneration();
|
||||
if (gen < comgen) {
|
||||
@ -126,5 +118,4 @@ public abstract class IndexCommit implements Comparable<IndexCommit> {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -655,7 +655,6 @@ final class IndexFileDeleter {
|
||||
boolean deleted;
|
||||
Directory directory;
|
||||
Collection<CommitPoint> commitsToDelete;
|
||||
long version;
|
||||
long generation;
|
||||
final Map<String,String> userData;
|
||||
private final int segmentCount;
|
||||
@ -665,7 +664,6 @@ final class IndexFileDeleter {
|
||||
this.commitsToDelete = commitsToDelete;
|
||||
userData = segmentInfos.getUserData();
|
||||
segmentsFileName = segmentInfos.getCurrentSegmentFileName();
|
||||
version = segmentInfos.getVersion();
|
||||
generation = segmentInfos.getGeneration();
|
||||
files = Collections.unmodifiableCollection(segmentInfos.files(directory, true));
|
||||
segmentCount = segmentInfos.size();
|
||||
@ -696,11 +694,6 @@ final class IndexFileDeleter {
|
||||
return directory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getGeneration() {
|
||||
return generation;
|
||||
|
@ -468,36 +468,6 @@ public abstract class IndexReader implements Closeable {
|
||||
throw new UnsupportedOperationException("This reader does not support this method.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the time the index in the named directory was last modified.
|
||||
* Do not use this to check whether the reader is still up-to-date, use
|
||||
* {@link #isCurrent()} instead.
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException {
|
||||
return ((Long) new SegmentInfos.FindSegmentsFile(directory2) {
|
||||
@Override
|
||||
public Object doBody(String segmentFileName) throws IOException {
|
||||
return Long.valueOf(directory2.fileModified(segmentFileName));
|
||||
}
|
||||
}.run()).longValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads version number from segments files. The version number is
|
||||
* initialized with a timestamp and then increased by one for each change of
|
||||
* the index.
|
||||
*
|
||||
* @param directory where the index resides.
|
||||
* @return version number.
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException {
|
||||
return SegmentInfos.readCurrentVersion(directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads commitUserData, previously passed to {@link
|
||||
* IndexWriter#commit(Map)}, from current index
|
||||
@ -525,18 +495,7 @@ public abstract class IndexReader implements Closeable {
|
||||
* a reader based on a Directory), then this method
|
||||
* returns the version recorded in the commit that the
|
||||
* reader opened. This version is advanced every time
|
||||
* {@link IndexWriter#commit} is called.</p>
|
||||
*
|
||||
* <p>If instead this reader is a near real-time reader
|
||||
* (ie, obtained by a call to {@link
|
||||
* IndexWriter#getReader}, or by calling {@link #openIfChanged}
|
||||
* on a near real-time reader), then this method returns
|
||||
* the version of the last commit done by the writer.
|
||||
* Note that even as further changes are made with the
|
||||
* writer, the version will not changed until a commit is
|
||||
* completed. Thus, you should not rely on this method to
|
||||
* determine when a near real-time reader should be
|
||||
* opened. Use {@link #isCurrent} instead.</p>
|
||||
* a change is made with {@link IndexWriter}.</p>
|
||||
*
|
||||
* @throws UnsupportedOperationException unless overridden in subclass
|
||||
*/
|
||||
|
@ -94,16 +94,15 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
||||
* Whenever you add a new format, make it 1 smaller (negative version logic)! */
|
||||
public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2;
|
||||
|
||||
public int counter = 0; // used to name new segments
|
||||
public int counter; // used to name new segments
|
||||
|
||||
/**
|
||||
* counts how often the index has been changed by adding or deleting docs.
|
||||
* starting with the current time in milliseconds forces to create unique version numbers.
|
||||
* counts how often the index has been changed
|
||||
*/
|
||||
public long version = System.currentTimeMillis();
|
||||
public long version;
|
||||
|
||||
private long generation = 0; // generation of the "segments_N" for the next commit
|
||||
private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
|
||||
private long generation; // generation of the "segments_N" for the next commit
|
||||
private long lastGeneration; // generation of the "segments_N" file we last successfully read
|
||||
// or wrote; this is normally the same as generation except if
|
||||
// there was an IOException that had interrupted a commit
|
||||
|
||||
|
@ -125,11 +125,6 @@ public class SnapshotDeletionPolicy implements IndexDeletionPolicy {
|
||||
return cp.getUserData();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getVersion() {
|
||||
return cp.getVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDeleted() {
|
||||
return cp.isDeleted();
|
||||
|
@ -249,14 +249,6 @@ public final class CompoundFileDirectory extends Directory {
|
||||
return entries.containsKey(IndexFileNames.stripSegmentName(name));
|
||||
}
|
||||
|
||||
|
||||
/** Returns the time the compound file was last modified. */
|
||||
@Override
|
||||
public long fileModified(String name) throws IOException {
|
||||
ensureOpen();
|
||||
return directory.fileModified(fileName);
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException */
|
||||
@Override
|
||||
|
@ -62,10 +62,6 @@ public abstract class Directory implements Closeable {
|
||||
public abstract boolean fileExists(String name)
|
||||
throws IOException;
|
||||
|
||||
/** Returns the time the named file was last modified. */
|
||||
public abstract long fileModified(String name)
|
||||
throws IOException;
|
||||
|
||||
/** Removes an existing file in the directory. */
|
||||
public abstract void deleteFile(String name)
|
||||
throws IOException;
|
||||
|
@ -250,14 +250,6 @@ public abstract class FSDirectory extends Directory {
|
||||
return file.exists();
|
||||
}
|
||||
|
||||
/** Returns the time the named file was last modified. */
|
||||
@Override
|
||||
public long fileModified(String name) {
|
||||
ensureOpen();
|
||||
File file = new File(directory, name);
|
||||
return file.lastModified();
|
||||
}
|
||||
|
||||
/** Returns the time the named file was last modified. */
|
||||
public static long fileModified(File directory, String name) {
|
||||
File file = new File(directory, name);
|
||||
|
@ -137,11 +137,6 @@ public class FileSwitchDirectory extends Directory {
|
||||
return getDirectory(name).fileExists(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long fileModified(String name) throws IOException {
|
||||
return getDirectory(name).fileModified(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteFile(String name) throws IOException {
|
||||
getDirectory(name).deleteFile(name);
|
||||
|
@ -152,15 +152,6 @@ public class NRTCachingDirectory extends Directory {
|
||||
return cache.fileExists(name) || delegate.fileExists(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long fileModified(String name) throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
return cache.fileModified(name);
|
||||
} else {
|
||||
return delegate.fileModified(name);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void deleteFile(String name) throws IOException {
|
||||
if (VERBOSE) {
|
||||
|
@ -98,19 +98,6 @@ public class RAMDirectory extends Directory {
|
||||
return fileMap.containsKey(name);
|
||||
}
|
||||
|
||||
/** Returns the time the named file was last modified.
|
||||
* @throws IOException if the file does not exist
|
||||
*/
|
||||
@Override
|
||||
public final long fileModified(String name) throws IOException {
|
||||
ensureOpen();
|
||||
RAMFile file = fileMap.get(name);
|
||||
if (file == null) {
|
||||
throw new FileNotFoundException(name);
|
||||
}
|
||||
return file.getLastModified();
|
||||
}
|
||||
|
||||
/** Returns the length in bytes of a file in the directory.
|
||||
* @throws IOException if the file does not exist
|
||||
*/
|
||||
|
@ -26,8 +26,6 @@ public class RAMFile {
|
||||
RAMDirectory directory;
|
||||
protected long sizeInBytes;
|
||||
|
||||
private long lastModified = System.currentTimeMillis();
|
||||
|
||||
// File used as buffer, in no RAMDirectory
|
||||
public RAMFile() {}
|
||||
|
||||
@ -44,15 +42,6 @@ public class RAMFile {
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
// For non-stream access from thread that might be concurrent with writing
|
||||
public synchronized long getLastModified() {
|
||||
return lastModified;
|
||||
}
|
||||
|
||||
protected synchronized void setLastModified(long lastModified) {
|
||||
this.lastModified = lastModified;
|
||||
}
|
||||
|
||||
protected final byte[] addBuffer(int size) {
|
||||
byte[] buffer = newBuffer(size);
|
||||
synchronized(this) {
|
||||
|
@ -167,7 +167,6 @@ public class RAMOutputStream extends IndexOutput {
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
file.setLastModified(System.currentTimeMillis());
|
||||
setFileLength();
|
||||
}
|
||||
|
||||
|
@ -95,7 +95,7 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
||||
}
|
||||
|
||||
public boolean get(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
assert index >= 0 && index < numBits: "index=" + index;
|
||||
int i = index >> 6; // div 64
|
||||
// signed shift will keep a negative index and force an
|
||||
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
||||
|
@ -588,7 +588,7 @@ public final class UnicodeUtil {
|
||||
out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f));
|
||||
offset += 2;
|
||||
} else {
|
||||
assert b < 0xf8;
|
||||
assert b < 0xf8: "b=" + b;
|
||||
int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f);
|
||||
offset += 3;
|
||||
if (ch < UNI_MAX_BMP) {
|
||||
|
@ -17,15 +17,15 @@ package org.apache.lucene.util.fst;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
|
||||
|
||||
/**
|
||||
* Builds a compact FST (maps an IntsRef term to an arbitrary
|
||||
* Builds a minimal FST (maps an IntsRef term to an arbitrary
|
||||
* output) from pre-sorted terms with outputs (the FST
|
||||
* becomes an FSA if you use NoOutputs). The FST is written
|
||||
* on-the-fly into a compact serialized format byte array, which can
|
||||
@ -35,12 +35,6 @@ import java.io.IOException;
|
||||
* <p>NOTE: The algorithm is described at
|
||||
* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
|
||||
*
|
||||
* If your outputs are ByteSequenceOutput then the final FST
|
||||
* will be minimal, but if you use PositiveIntOutput then
|
||||
* it's only "near minimal". For example, aa/0, aab/1, bbb/2
|
||||
* will produce 6 states when a 5 state fst is also
|
||||
* possible.
|
||||
*
|
||||
* The parameterized type T is the output type. See the
|
||||
* subclasses of {@link Outputs}.
|
||||
*
|
||||
@ -52,7 +46,7 @@ public class Builder<T> {
|
||||
private final FST<T> fst;
|
||||
private final T NO_OUTPUT;
|
||||
|
||||
// private static final boolean DEBUG = false;
|
||||
// private static final boolean DEBUG = true;
|
||||
|
||||
// simplistic pruning: we prune node (and all following
|
||||
// nodes) if less than this number of terms go through it:
|
||||
@ -84,11 +78,12 @@ public class Builder<T> {
|
||||
|
||||
/**
|
||||
* Instantiates an FST/FSA builder without any pruning. A shortcut
|
||||
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, boolean, int, Outputs, FreezeTail)} with
|
||||
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
|
||||
* boolean, int, Outputs, FreezeTail, boolean)} with
|
||||
* pruning options turned off.
|
||||
*/
|
||||
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
|
||||
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
|
||||
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -127,16 +122,21 @@ public class Builder<T> {
|
||||
* @param outputs The output type for each input sequence. Applies only if building an FST. For
|
||||
* FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
|
||||
* singleton output object.
|
||||
*
|
||||
* @param willPackFST Pass true if you will pack the FST before saving. This
|
||||
* causes the FST to create additional data structures internally to facilitate packing, but
|
||||
* it means the resulting FST cannot be saved: it must
|
||||
* first be packed using {@link FST#pack(int, int)}}.
|
||||
*/
|
||||
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
||||
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
|
||||
FreezeTail<T> freezeTail) {
|
||||
FreezeTail<T> freezeTail, boolean willPackFST) {
|
||||
this.minSuffixCount1 = minSuffixCount1;
|
||||
this.minSuffixCount2 = minSuffixCount2;
|
||||
this.freezeTail = freezeTail;
|
||||
this.doShareNonSingletonNodes = doShareNonSingletonNodes;
|
||||
this.shareMaxTailLength = shareMaxTailLength;
|
||||
fst = new FST<T>(inputType, outputs);
|
||||
fst = new FST<T>(inputType, outputs, willPackFST);
|
||||
if (doShareSuffix) {
|
||||
dedupHash = new NodeHash<T>(fst);
|
||||
} else {
|
||||
@ -170,23 +170,23 @@ public class Builder<T> {
|
||||
fst.setAllowArrayArcs(b);
|
||||
}
|
||||
|
||||
private CompiledNode compileNode(UnCompiledNode<T> n, int tailLength) throws IOException {
|
||||
final int address;
|
||||
if (dedupHash != null && (doShareNonSingletonNodes || n.numArcs <= 1) && tailLength <= shareMaxTailLength) {
|
||||
if (n.numArcs == 0) {
|
||||
address = fst.addNode(n);
|
||||
private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
|
||||
final int node;
|
||||
if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
|
||||
if (nodeIn.numArcs == 0) {
|
||||
node = fst.addNode(nodeIn);
|
||||
} else {
|
||||
address = dedupHash.add(n);
|
||||
node = dedupHash.add(nodeIn);
|
||||
}
|
||||
} else {
|
||||
address = fst.addNode(n);
|
||||
node = fst.addNode(nodeIn);
|
||||
}
|
||||
assert address != -2;
|
||||
assert node != -2;
|
||||
|
||||
n.clear();
|
||||
nodeIn.clear();
|
||||
|
||||
final CompiledNode fn = new CompiledNode();
|
||||
fn.address = address;
|
||||
fn.node = node;
|
||||
return fn;
|
||||
}
|
||||
|
||||
@ -319,6 +319,11 @@ public class Builder<T> {
|
||||
}
|
||||
*/
|
||||
|
||||
// De-dup NO_OUTPUT since it must be a singleton:
|
||||
if (output.equals(NO_OUTPUT)) {
|
||||
output = NO_OUTPUT;
|
||||
}
|
||||
|
||||
assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
|
||||
assert validOutput(output);
|
||||
|
||||
@ -443,7 +448,7 @@ public class Builder<T> {
|
||||
}
|
||||
}
|
||||
//if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
|
||||
fst.finish(compileNode(root, lastInput.length).address);
|
||||
fst.finish(compileNode(root, lastInput.length).node);
|
||||
|
||||
return fst;
|
||||
}
|
||||
@ -480,7 +485,7 @@ public class Builder<T> {
|
||||
}
|
||||
|
||||
static final class CompiledNode implements Node {
|
||||
int address;
|
||||
int node;
|
||||
public boolean isCompiled() {
|
||||
return true;
|
||||
}
|
||||
@ -560,7 +565,7 @@ public class Builder<T> {
|
||||
final Arc<T> arc = arcs[numArcs-1];
|
||||
assert arc.label == labelToMatch: "arc.label=" + arc.label + " vs " + labelToMatch;
|
||||
arc.target = target;
|
||||
//assert target.address != -2;
|
||||
//assert target.node != -2;
|
||||
arc.nextFinalOutput = nextFinalOutput;
|
||||
arc.isFinal = isFinal;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -151,7 +151,8 @@ abstract class FSTEnum<T> {
|
||||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
|
||||
in.pos = arc.posArcsStart;
|
||||
in.skip(arc.bytesPerArc*mid+1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - targetLabel;
|
||||
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
|
||||
@ -275,7 +276,7 @@ abstract class FSTEnum<T> {
|
||||
|
||||
// Now scan forward, matching the new suffix of the target
|
||||
while(true) {
|
||||
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast());
|
||||
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast() + " bba=" + arc.bytesPerArc);
|
||||
|
||||
if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
|
||||
// Arcs are fixed array -- use binary search to find
|
||||
@ -289,15 +290,16 @@ abstract class FSTEnum<T> {
|
||||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
|
||||
in.pos = arc.posArcsStart;
|
||||
in.skip(arc.bytesPerArc*mid+1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - targetLabel;
|
||||
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
|
||||
if (cmp < 0)
|
||||
if (cmp < 0) {
|
||||
low = mid + 1;
|
||||
else if (cmp > 0)
|
||||
} else if (cmp > 0) {
|
||||
high = mid - 1;
|
||||
else {
|
||||
} else {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
@ -430,9 +432,11 @@ abstract class FSTEnum<T> {
|
||||
FST.Arc<T> arc = getArc(upto-1);
|
||||
int targetLabel = getTargetLabel();
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
while(true) {
|
||||
//System.out.println(" cycle target=" + (targetLabel == -1 ? "-1" : (char) targetLabel));
|
||||
final FST.Arc<T> nextArc = fst.findTargetArc(targetLabel, arc, getArc(upto));
|
||||
final FST.Arc<T> nextArc = fst.findTargetArc(targetLabel, arc, getArc(upto), fstReader);
|
||||
if (nextArc == null) {
|
||||
// short circuit
|
||||
//upto--;
|
||||
|
@ -35,7 +35,7 @@ final class NodeHash<T> {
|
||||
}
|
||||
|
||||
private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address, FST.BytesReader in) throws IOException {
|
||||
fst.readFirstRealArc(address, scratchArc, in);
|
||||
fst.readFirstRealTargetArc(address, scratchArc, in);
|
||||
if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
|
||||
return false;
|
||||
}
|
||||
@ -43,7 +43,7 @@ final class NodeHash<T> {
|
||||
final Builder.Arc<T> arc = node.arcs[arcUpto];
|
||||
if (arc.label != scratchArc.label ||
|
||||
!arc.output.equals(scratchArc.output) ||
|
||||
((Builder.CompiledNode) arc.target).address != scratchArc.target ||
|
||||
((Builder.CompiledNode) arc.target).node != scratchArc.target ||
|
||||
!arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) ||
|
||||
arc.isFinal != scratchArc.isFinal()) {
|
||||
return false;
|
||||
@ -71,9 +71,9 @@ final class NodeHash<T> {
|
||||
// TODO: maybe if number of arcs is high we can safely subsample?
|
||||
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
|
||||
final Builder.Arc<T> arc = node.arcs[arcIdx];
|
||||
//System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).address + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
|
||||
//System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).node + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
|
||||
h = PRIME * h + arc.label;
|
||||
h = PRIME * h + ((Builder.CompiledNode) arc.target).address;
|
||||
h = PRIME * h + ((Builder.CompiledNode) arc.target).node;
|
||||
h = PRIME * h + arc.output.hashCode();
|
||||
h = PRIME * h + arc.nextFinalOutput.hashCode();
|
||||
if (arc.isFinal) {
|
||||
@ -88,9 +88,9 @@ final class NodeHash<T> {
|
||||
private int hash(int node) throws IOException {
|
||||
final int PRIME = 31;
|
||||
final FST.BytesReader in = fst.getBytesReader(0);
|
||||
//System.out.println("hash frozen");
|
||||
//System.out.println("hash frozen node=" + node);
|
||||
int h = 0;
|
||||
fst.readFirstRealArc(node, scratchArc, in);
|
||||
fst.readFirstRealTargetArc(node, scratchArc, in);
|
||||
while(true) {
|
||||
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
|
||||
h = PRIME * h + scratchArc.label;
|
||||
@ -109,26 +109,26 @@ final class NodeHash<T> {
|
||||
return h & Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
public int add(Builder.UnCompiledNode<T> node) throws IOException {
|
||||
public int add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
|
||||
// System.out.println("hash: add count=" + count + " vs " + table.length);
|
||||
final FST.BytesReader in = fst.getBytesReader(0);
|
||||
final int h = hash(node);
|
||||
final int h = hash(nodeIn);
|
||||
int pos = h & mask;
|
||||
int c = 0;
|
||||
while(true) {
|
||||
final int v = table[pos];
|
||||
if (v == 0) {
|
||||
// freeze & add
|
||||
final int address = fst.addNode(node);
|
||||
//System.out.println(" now freeze addr=" + address);
|
||||
assert hash(address) == h : "frozenHash=" + hash(address) + " vs h=" + h;
|
||||
final int node = fst.addNode(nodeIn);
|
||||
//System.out.println(" now freeze node=" + node);
|
||||
assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
|
||||
count++;
|
||||
table[pos] = address;
|
||||
table[pos] = node;
|
||||
if (table.length < 2*count) {
|
||||
rehash();
|
||||
}
|
||||
return address;
|
||||
} else if (nodesEqual(node, v, in)) {
|
||||
return node;
|
||||
} else if (nodesEqual(nodeIn, v, in)) {
|
||||
// same node is already here
|
||||
return v;
|
||||
}
|
||||
|
@ -26,6 +26,10 @@ import org.apache.lucene.store.DataOutput;
|
||||
* Represents the outputs for an FST, providing the basic
|
||||
* algebra needed for the FST.
|
||||
*
|
||||
* <p>Note that any operation that returns NO_OUTPUT must
|
||||
* return the same singleton object from {@link
|
||||
* #getNoOutput}.</p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
@ -56,6 +60,8 @@ public abstract class Outputs<T> {
|
||||
|
||||
public abstract String outputToString(T output);
|
||||
|
||||
// TODO: maybe make valid(T output) public...? for asserts
|
||||
|
||||
public T merge(T first, T second) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
@ -38,7 +38,8 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
||||
public final A output1;
|
||||
public final B output2;
|
||||
|
||||
public Pair(A output1, B output2) {
|
||||
// use newPair
|
||||
private Pair(A output1, B output2) {
|
||||
this.output1 = output1;
|
||||
this.output2 = output2;
|
||||
}
|
||||
@ -66,35 +67,79 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
||||
this.outputs2 = outputs2;
|
||||
NO_OUTPUT = new Pair<A,B>(outputs1.getNoOutput(), outputs2.getNoOutput());
|
||||
}
|
||||
|
||||
public Pair<A,B> get(A output1, B output2) {
|
||||
if (output1 == outputs1.getNoOutput() && output2 == outputs2.getNoOutput()) {
|
||||
|
||||
/** Create a new Pair */
|
||||
public Pair<A,B> newPair(A a, B b) {
|
||||
if (a.equals(outputs1.getNoOutput())) {
|
||||
a = outputs1.getNoOutput();
|
||||
}
|
||||
if (b.equals(outputs2.getNoOutput())) {
|
||||
b = outputs2.getNoOutput();
|
||||
}
|
||||
|
||||
if (a == outputs1.getNoOutput() && b == outputs2.getNoOutput()) {
|
||||
return NO_OUTPUT;
|
||||
} else {
|
||||
return new Pair<A,B>(output1, output2);
|
||||
final Pair<A,B> p = new Pair<A,B>(a, b);
|
||||
assert valid(p);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// for assert
|
||||
private boolean valid(Pair<A,B> pair) {
|
||||
final boolean noOutput1 = pair.output1.equals(outputs1.getNoOutput());
|
||||
final boolean noOutput2 = pair.output2.equals(outputs2.getNoOutput());
|
||||
|
||||
if (noOutput1 && pair.output1 != outputs1.getNoOutput()) {
|
||||
System.out.println("invalid0");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (noOutput2 && pair.output2 != outputs2.getNoOutput()) {
|
||||
System.out.println("invalid1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (noOutput1 && noOutput2) {
|
||||
if (pair != NO_OUTPUT) {
|
||||
System.out.println("invalid2");
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pair<A,B> common(Pair<A,B> pair1, Pair<A,B> pair2) {
|
||||
return get(outputs1.common(pair1.output1, pair2.output1),
|
||||
outputs2.common(pair1.output2, pair2.output2));
|
||||
assert valid(pair1);
|
||||
assert valid(pair2);
|
||||
return newPair(outputs1.common(pair1.output1, pair2.output1),
|
||||
outputs2.common(pair1.output2, pair2.output2));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pair<A,B> subtract(Pair<A,B> output, Pair<A,B> inc) {
|
||||
return get(outputs1.subtract(output.output1, inc.output1),
|
||||
outputs2.subtract(output.output2, inc.output2));
|
||||
assert valid(output);
|
||||
assert valid(inc);
|
||||
return newPair(outputs1.subtract(output.output1, inc.output1),
|
||||
outputs2.subtract(output.output2, inc.output2));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pair<A,B> add(Pair<A,B> prefix, Pair<A,B> output) {
|
||||
return get(outputs1.add(prefix.output1, output.output1),
|
||||
outputs2.add(prefix.output2, output.output2));
|
||||
assert valid(prefix);
|
||||
assert valid(output);
|
||||
return newPair(outputs1.add(prefix.output1, output.output1),
|
||||
outputs2.add(prefix.output2, output.output2));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Pair<A,B> output, DataOutput writer) throws IOException {
|
||||
assert valid(output);
|
||||
outputs1.write(output.output1, writer);
|
||||
outputs2.write(output.output2, writer);
|
||||
}
|
||||
@ -103,7 +148,7 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
||||
public Pair<A,B> read(DataInput in) throws IOException {
|
||||
A output1 = outputs1.read(in);
|
||||
B output2 = outputs2.read(in);
|
||||
return get(output1, output2);
|
||||
return newPair(output1, output2);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -113,6 +158,12 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
||||
|
||||
@Override
|
||||
public String outputToString(Pair<A,B> output) {
|
||||
assert valid(output);
|
||||
return "<pair:" + outputs1.outputToString(output.output1) + "," + outputs2.outputToString(output.output2) + ">";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PairOutputs<" + outputs1 + "," + outputs2 + ">";
|
||||
}
|
||||
}
|
||||
|
@ -25,10 +25,7 @@ import org.apache.lucene.store.DataOutput;
|
||||
/**
|
||||
* Output is a long, for each input term. NOTE: the
|
||||
* resulting FST is not guaranteed to be minimal! See
|
||||
* {@link Builder}. You must use {@link #get} to obtain the
|
||||
* output for a given long value -- do not use autoboxing
|
||||
* nor create your own Long instance (the value 0
|
||||
* must map to the {@link #getNoOutput} singleton).
|
||||
* {@link Builder}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@ -50,14 +47,6 @@ public final class PositiveIntOutputs extends Outputs<Long> {
|
||||
return doShare ? singletonShare : singletonNoShare;
|
||||
}
|
||||
|
||||
public Long get(long v) {
|
||||
if (v == 0) {
|
||||
return NO_OUTPUT;
|
||||
} else {
|
||||
return Long.valueOf(v);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long common(Long output1, Long output2) {
|
||||
assert valid(output1);
|
||||
|
@ -37,23 +37,21 @@ public final class Util {
|
||||
// TODO: would be nice not to alloc this on every lookup
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
// Accumulate output as we go
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
T output = NO_OUTPUT;
|
||||
T output = fst.outputs.getNoOutput();
|
||||
for(int i=0;i<input.length;i++) {
|
||||
if (fst.findTargetArc(input.ints[input.offset + i], arc, arc) == null) {
|
||||
if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
|
||||
return null;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
}
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
}
|
||||
|
||||
if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
|
||||
return null;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
return fst.outputs.add(output, arc.output);
|
||||
if (arc.isFinal()) {
|
||||
return fst.outputs.add(output, arc.nextFinalOutput);
|
||||
} else {
|
||||
return output;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,26 +62,24 @@ public final class Util {
|
||||
public static<T> T get(FST<T> fst, BytesRef input) throws IOException {
|
||||
assert fst.inputType == FST.INPUT_TYPE.BYTE1;
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
// TODO: would be nice not to alloc this on every lookup
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
|
||||
// Accumulate output as we go
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
T output = NO_OUTPUT;
|
||||
T output = fst.outputs.getNoOutput();
|
||||
for(int i=0;i<input.length;i++) {
|
||||
if (fst.findTargetArc(input.bytes[i+input.offset] & 0xFF, arc, arc) == null) {
|
||||
if (fst.findTargetArc(input.bytes[i+input.offset] & 0xFF, arc, arc, fstReader) == null) {
|
||||
return null;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
}
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
}
|
||||
|
||||
if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
|
||||
return null;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
return fst.outputs.add(output, arc.output);
|
||||
if (arc.isFinal()) {
|
||||
return fst.outputs.add(output, arc.nextFinalOutput);
|
||||
} else {
|
||||
return output;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -142,7 +138,7 @@ public final class Util {
|
||||
result.grow(1+upto);
|
||||
}
|
||||
|
||||
fst.readFirstRealArc(arc.target, arc, in);
|
||||
fst.readFirstRealTargetArc(arc.target, arc, in);
|
||||
|
||||
FST.Arc<Long> prevArc = null;
|
||||
|
||||
@ -238,6 +234,7 @@ public final class Util {
|
||||
// A queue of transitions to consider when processing the next level.
|
||||
final List<FST.Arc<T>> nextLevelQueue = new ArrayList<FST.Arc<T>>();
|
||||
nextLevelQueue.add(startArc);
|
||||
//System.out.println("toDot: startArc: " + startArc);
|
||||
|
||||
// A list of states on the same level (for ranking).
|
||||
final List<Integer> sameLevelStates = new ArrayList<Integer>();
|
||||
@ -289,8 +286,11 @@ public final class Util {
|
||||
|
||||
int level = 0;
|
||||
|
||||
final FST.BytesReader r = fst.getBytesReader(0);
|
||||
|
||||
while (!nextLevelQueue.isEmpty()) {
|
||||
// we could double buffer here, but it doesn't matter probably.
|
||||
//System.out.println("next level=" + level);
|
||||
thisLevelQueue.addAll(nextLevelQueue);
|
||||
nextLevelQueue.clear();
|
||||
|
||||
@ -298,19 +298,19 @@ public final class Util {
|
||||
out.write("\n // Transitions and states at level: " + level + "\n");
|
||||
while (!thisLevelQueue.isEmpty()) {
|
||||
final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
|
||||
//System.out.println(" pop: " + arc);
|
||||
if (fst.targetHasArcs(arc)) {
|
||||
// scan all arcs
|
||||
// scan all target arcs
|
||||
//System.out.println(" readFirstTarget...");
|
||||
final int node = arc.target;
|
||||
fst.readFirstTargetArc(arc, arc);
|
||||
|
||||
if (arc.label == FST.END_LABEL) {
|
||||
// Skip it -- prior recursion took this into account already
|
||||
assert !arc.isLast();
|
||||
fst.readNextArc(arc);
|
||||
}
|
||||
fst.readFirstRealTargetArc(arc.target, arc, r);
|
||||
|
||||
//System.out.println(" firstTarget: " + arc);
|
||||
|
||||
while (true) {
|
||||
|
||||
//System.out.println(" cycle arc=" + arc);
|
||||
// Emit the unseen state and add it to the queue for the next level.
|
||||
if (arc.target >= 0 && !seen.get(arc.target)) {
|
||||
|
||||
@ -329,7 +329,7 @@ public final class Util {
|
||||
if (fst.isExpandedTarget(arc)) {
|
||||
stateColor = expandedNodeColor;
|
||||
} else {
|
||||
stateColor = null;
|
||||
stateColor = null;
|
||||
}
|
||||
|
||||
final String finalOutput;
|
||||
@ -339,7 +339,9 @@ public final class Util {
|
||||
finalOutput = "";
|
||||
}
|
||||
|
||||
emitDotState(out, Integer.toString(arc.target), arc.isFinal() ? finalStateShape : stateShape, stateColor, finalOutput);
|
||||
emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, finalOutput);
|
||||
// To see the node address, use this instead:
|
||||
//emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
|
||||
seen.set(arc.target);
|
||||
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
|
||||
sameLevelStates.add(arc.target);
|
||||
@ -362,14 +364,22 @@ public final class Util {
|
||||
outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]";
|
||||
}
|
||||
|
||||
final String arcColor;
|
||||
if (arc.flag(FST.BIT_TARGET_NEXT)) {
|
||||
arcColor = "red";
|
||||
} else {
|
||||
arcColor = "black";
|
||||
}
|
||||
|
||||
assert arc.label != FST.END_LABEL;
|
||||
out.write(" " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"]\n");
|
||||
out.write(" " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "" ) + " color=\"" + arcColor + "\"]\n");
|
||||
|
||||
// Break the loop if we're on the last arc of this state.
|
||||
if (arc.isLast()) {
|
||||
//System.out.println(" break");
|
||||
break;
|
||||
}
|
||||
fst.readNextArc(arc);
|
||||
fst.readNextRealArc(arc, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -702,12 +702,6 @@ public class MockDirectoryWrapper extends Directory {
|
||||
return delegate.fileExists(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long fileModified(String name) throws IOException {
|
||||
maybeYield();
|
||||
return delegate.fileModified(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long fileLength(String name) throws IOException {
|
||||
maybeYield();
|
||||
|
@ -224,14 +224,6 @@ public class TestCrashCausesCorruptIndex extends LuceneTestCase {
|
||||
return realDirectory.fileLength(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public long fileModified(String name) throws IOException {
|
||||
return realDirectory.fileModified(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
|
@ -18,10 +18,12 @@ package org.apache.lucene.index;
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
@ -32,7 +34,6 @@ import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
@ -47,20 +48,12 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
||||
final IndexCommit firstCommit = commits.get(0);
|
||||
long last = SegmentInfos.generationFromSegmentsFileName(firstCommit.getSegmentsFileName());
|
||||
assertEquals(last, firstCommit.getGeneration());
|
||||
long lastVersion = firstCommit.getVersion();
|
||||
long lastTimestamp = firstCommit.getTimestamp();
|
||||
for(int i=1;i<commits.size();i++) {
|
||||
final IndexCommit commit = commits.get(i);
|
||||
long now = SegmentInfos.generationFromSegmentsFileName(commit.getSegmentsFileName());
|
||||
long nowVersion = commit.getVersion();
|
||||
long nowTimestamp = commit.getTimestamp();
|
||||
assertTrue("SegmentInfos commits are out-of-order", now > last);
|
||||
assertTrue("SegmentInfos versions are out-of-order", nowVersion > lastVersion);
|
||||
assertTrue("SegmentInfos timestamps are out-of-order: now=" + nowTimestamp + " vs last=" + lastTimestamp, nowTimestamp >= lastTimestamp);
|
||||
assertEquals(now, commit.getGeneration());
|
||||
last = now;
|
||||
lastVersion = nowVersion;
|
||||
lastTimestamp = nowTimestamp;
|
||||
}
|
||||
}
|
||||
|
||||
@ -158,6 +151,10 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
static long getCommitTime(IndexCommit commit) throws IOException {
|
||||
return Long.parseLong(commit.getUserData().get("commitTime"));
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a commit only when it has been obsoleted by N
|
||||
* seconds.
|
||||
@ -184,10 +181,10 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
||||
IndexCommit lastCommit = commits.get(commits.size()-1);
|
||||
|
||||
// Any commit older than expireTime should be deleted:
|
||||
double expireTime = dir.fileModified(lastCommit.getSegmentsFileName())/1000.0 - expirationTimeSeconds;
|
||||
double expireTime = getCommitTime(lastCommit)/1000.0 - expirationTimeSeconds;
|
||||
|
||||
for (final IndexCommit commit : commits) {
|
||||
double modTime = dir.fileModified(commit.getSegmentsFileName())/1000.0;
|
||||
double modTime = getCommitTime(commit)/1000.0;
|
||||
if (commit != lastCommit && modTime < expireTime) {
|
||||
commit.delete();
|
||||
numDelete += 1;
|
||||
@ -213,6 +210,9 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
||||
((LogMergePolicy) mp).setUseCompoundFile(true);
|
||||
}
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
Map<String,String> commitData = new HashMap<String,String>();
|
||||
commitData.put("commitTime", String.valueOf(System.currentTimeMillis()));
|
||||
writer.commit(commitData);
|
||||
writer.close();
|
||||
|
||||
final int ITER = 9;
|
||||
@ -233,6 +233,9 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
||||
for(int j=0;j<17;j++) {
|
||||
addDoc(writer);
|
||||
}
|
||||
commitData = new HashMap<String,String>();
|
||||
commitData.put("commitTime", String.valueOf(System.currentTimeMillis()));
|
||||
writer.commit(commitData);
|
||||
writer.close();
|
||||
|
||||
if (i < ITER-1) {
|
||||
@ -269,7 +272,9 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
||||
// if we are on a filesystem that seems to have only
|
||||
// 1 second resolution, allow +1 second in commit
|
||||
// age tolerance:
|
||||
long modTime = dir.fileModified(fileName);
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir, fileName);
|
||||
long modTime = Long.parseLong(sis.getUserData().get("commitTime"));
|
||||
oneSecondResolution &= (modTime % 1000) == 0;
|
||||
final long leeway = (long) ((SECONDS + (oneSecondResolution ? 1.0:0.0))*1000);
|
||||
|
||||
|
@ -126,10 +126,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||
return fsDir.fileExists(name);
|
||||
}
|
||||
@Override
|
||||
public long fileModified(String name) throws IOException {
|
||||
return fsDir.fileModified(name);
|
||||
}
|
||||
@Override
|
||||
public void deleteFile(String name) throws IOException {
|
||||
fsDir.deleteFile(name);
|
||||
}
|
||||
|
@ -34,12 +34,10 @@ public class TestIndexCommit extends LuceneTestCase {
|
||||
|
||||
IndexCommit ic1 = new IndexCommit() {
|
||||
@Override public String getSegmentsFileName() { return "a"; }
|
||||
@Override public long getVersion() { return 12; }
|
||||
@Override public Directory getDirectory() { return dir; }
|
||||
@Override public Collection<String> getFileNames() throws IOException { return null; }
|
||||
@Override public void delete() {}
|
||||
@Override public long getGeneration() { return 0; }
|
||||
@Override public long getTimestamp() throws IOException { return 1;}
|
||||
@Override public Map<String, String> getUserData() throws IOException { return null; }
|
||||
@Override public boolean isDeleted() { return false; }
|
||||
@Override public int getSegmentCount() { return 2; }
|
||||
@ -47,12 +45,10 @@ public class TestIndexCommit extends LuceneTestCase {
|
||||
|
||||
IndexCommit ic2 = new IndexCommit() {
|
||||
@Override public String getSegmentsFileName() { return "b"; }
|
||||
@Override public long getVersion() { return 12; }
|
||||
@Override public Directory getDirectory() { return dir; }
|
||||
@Override public Collection<String> getFileNames() throws IOException { return null; }
|
||||
@Override public void delete() {}
|
||||
@Override public long getGeneration() { return 0; }
|
||||
@Override public long getTimestamp() throws IOException { return 1;}
|
||||
@Override public Map<String, String> getUserData() throws IOException { return null; }
|
||||
@Override public boolean isDeleted() { return false; }
|
||||
@Override public int getSegmentCount() { return 2; }
|
||||
|
@ -381,60 +381,6 @@ public class TestIndexReader extends LuceneTestCase {
|
||||
_TestUtil.rmDir(dirFile);
|
||||
}
|
||||
|
||||
public void testLastModified() throws Exception {
|
||||
for(int i=0;i<2;i++) {
|
||||
final Directory dir = newDirectory();
|
||||
assertFalse(IndexReader.indexExists(dir));
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
|
||||
addDocumentWithFields(writer);
|
||||
assertTrue(IndexWriter.isLocked(dir)); // writer open, so dir is locked
|
||||
writer.close();
|
||||
assertTrue(IndexReader.indexExists(dir));
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
assertFalse(IndexWriter.isLocked(dir)); // reader only, no lock
|
||||
long version = IndexReader.lastModified(dir);
|
||||
if (i == 1) {
|
||||
long version2 = IndexReader.lastModified(dir);
|
||||
assertEquals(version, version2);
|
||||
}
|
||||
reader.close();
|
||||
// modify index and check version has been
|
||||
// incremented:
|
||||
Thread.sleep(1000);
|
||||
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
|
||||
addDocumentWithFields(writer);
|
||||
writer.close();
|
||||
reader = IndexReader.open(dir);
|
||||
assertTrue("old lastModified is " + version + "; new lastModified is " + IndexReader.lastModified(dir), version <= IndexReader.lastModified(dir));
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testVersion() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
assertFalse(IndexReader.indexExists(dir));
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
addDocumentWithFields(writer);
|
||||
assertTrue(IndexWriter.isLocked(dir)); // writer open, so dir is locked
|
||||
writer.close();
|
||||
assertTrue(IndexReader.indexExists(dir));
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
assertFalse(IndexWriter.isLocked(dir)); // reader only, no lock
|
||||
long version = IndexReader.getCurrentVersion(dir);
|
||||
reader.close();
|
||||
// modify index and check version has been
|
||||
// incremented:
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
|
||||
addDocumentWithFields(writer);
|
||||
writer.close();
|
||||
reader = IndexReader.open(dir);
|
||||
assertTrue("old version is " + version + "; new version is " + IndexReader.getCurrentVersion(dir), version < IndexReader.getCurrentVersion(dir));
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testOpenReaderAfterDelete() throws IOException {
|
||||
File dirFile = _TestUtil.getTempDir("deletetest");
|
||||
Directory dir = newFSDirectory(dirFile);
|
||||
|
@ -348,12 +348,6 @@ public class TestBufferedIndexInput extends LuceneTestCase {
|
||||
dir.deleteFile(name);
|
||||
}
|
||||
@Override
|
||||
public long fileModified(String name)
|
||||
throws IOException
|
||||
{
|
||||
return dir.fileModified(name);
|
||||
}
|
||||
@Override
|
||||
public boolean fileExists(String name)
|
||||
throws IOException
|
||||
{
|
||||
|
@ -89,11 +89,11 @@ public class TestFSTs extends LuceneTestCase {
|
||||
return br;
|
||||
}
|
||||
|
||||
private static IntsRef toIntsRef(String s, int inputMode) {
|
||||
static IntsRef toIntsRef(String s, int inputMode) {
|
||||
return toIntsRef(s, inputMode, new IntsRef(10));
|
||||
}
|
||||
|
||||
private static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) {
|
||||
static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) {
|
||||
if (inputMode == 0) {
|
||||
// utf8
|
||||
return toIntsRef(new BytesRef(s), ir);
|
||||
@ -103,7 +103,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
private static IntsRef toIntsRefUTF32(String s, IntsRef ir) {
|
||||
static IntsRef toIntsRefUTF32(String s, IntsRef ir) {
|
||||
final int charLength = s.length();
|
||||
int charIdx = 0;
|
||||
int intIdx = 0;
|
||||
@ -120,7 +120,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
return ir;
|
||||
}
|
||||
|
||||
private static IntsRef toIntsRef(BytesRef br, IntsRef ir) {
|
||||
static IntsRef toIntsRef(BytesRef br, IntsRef ir) {
|
||||
if (br.length > ir.ints.length) {
|
||||
ir.grow(br.length);
|
||||
}
|
||||
@ -172,7 +172,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms2.length);
|
||||
for(int idx=0;idx<terms2.length;idx++) {
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], outputs.get(idx)));
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], (long) idx));
|
||||
}
|
||||
final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs, true).doTest(0, 0, false);
|
||||
assertNotNull(fst);
|
||||
@ -230,7 +230,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
|
||||
for(int idx=0;idx<terms.length;idx++) {
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(idx)));
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], (long) idx));
|
||||
}
|
||||
new FSTTester<Long>(random, dir, inputMode, pairs, outputs, true).doTest();
|
||||
}
|
||||
@ -244,7 +244,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
for(int idx=0;idx<terms.length;idx++) {
|
||||
final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
|
||||
lastOutput = value;
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(value)));
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], value));
|
||||
}
|
||||
new FSTTester<Long>(random, dir, inputMode, pairs, outputs, doShare).doTest();
|
||||
}
|
||||
@ -254,7 +254,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
|
||||
final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
|
||||
for(int idx=0;idx<terms.length;idx++) {
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(random.nextLong()) & Long.MAX_VALUE));
|
||||
pairs.add(new FSTTester.InputOutput<Long>(terms[idx], random.nextLong() & Long.MAX_VALUE));
|
||||
}
|
||||
new FSTTester<Long>(random, dir, inputMode, pairs, outputs, false).doTest();
|
||||
}
|
||||
@ -270,8 +270,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
|
||||
lastOutput = value;
|
||||
pairs.add(new FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>(terms[idx],
|
||||
outputs.get(o1.get(idx),
|
||||
o2.get(value))));
|
||||
outputs.newPair((long) idx, value)));
|
||||
}
|
||||
new FSTTester<PairOutputs.Pair<Long,Long>>(random, dir, inputMode, pairs, outputs, false).doTest();
|
||||
}
|
||||
@ -393,6 +392,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
T output = NO_OUTPUT;
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
for(int i=0;i<=term.length;i++) {
|
||||
final int label;
|
||||
@ -401,8 +401,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||
} else {
|
||||
label = term.ints[term.offset+i];
|
||||
}
|
||||
//System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
|
||||
if (fst.findTargetArc(label, arc, arc) == null) {
|
||||
// System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
|
||||
if (fst.findTargetArc(label, arc, arc, fstReader) == null) {
|
||||
// System.out.println(" not found");
|
||||
if (prefixLength != null) {
|
||||
prefixLength[0] = i;
|
||||
return output;
|
||||
@ -462,16 +463,19 @@ public class TestFSTs extends LuceneTestCase {
|
||||
|
||||
FST<T> doTest(int prune1, int prune2, boolean allowRandomSuffixSharing) throws IOException {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
|
||||
System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
|
||||
}
|
||||
|
||||
final boolean willRewrite = random.nextBoolean();
|
||||
|
||||
final Builder<T> builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
|
||||
prune1, prune2,
|
||||
prune1==0 && prune2==0,
|
||||
allowRandomSuffixSharing ? random.nextBoolean() : true,
|
||||
allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
|
||||
outputs,
|
||||
null);
|
||||
null,
|
||||
willRewrite);
|
||||
|
||||
for(InputOutput<T> pair : pairs) {
|
||||
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
|
||||
@ -486,7 +490,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
}
|
||||
FST<T> fst = builder.finish();
|
||||
|
||||
if (random.nextBoolean() && fst != null) {
|
||||
if (random.nextBoolean() && fst != null && !willRewrite) {
|
||||
TestFSTs t = new TestFSTs();
|
||||
IOContext context = t.newIOContext(random);
|
||||
IndexOutput out = dir.createOutput("fst.bin", context);
|
||||
@ -522,6 +526,21 @@ public class TestFSTs extends LuceneTestCase {
|
||||
verifyPruned(inputMode, fst, prune1, prune2);
|
||||
}
|
||||
|
||||
if (willRewrite && fst != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now rewrite");
|
||||
}
|
||||
final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now verify packed FST");
|
||||
}
|
||||
if (prune1 == 0 && prune2 == 0) {
|
||||
verifyUnPruned(inputMode, packed);
|
||||
} else {
|
||||
verifyPruned(inputMode, packed, prune1, prune2);
|
||||
}
|
||||
}
|
||||
|
||||
return fst;
|
||||
}
|
||||
|
||||
@ -638,7 +657,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
num = atLeast(100);
|
||||
for(int iter=0;iter<num;iter++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: iter=" + iter);
|
||||
System.out.println(" iter=" + iter);
|
||||
}
|
||||
if (random.nextBoolean()) {
|
||||
// seek to term that doesn't exist:
|
||||
@ -866,7 +885,15 @@ public class TestFSTs extends LuceneTestCase {
|
||||
prefixes.put(IntsRef.deepCopyOf(scratch), cmo);
|
||||
} else {
|
||||
cmo.count++;
|
||||
cmo.output = outputs.common(cmo.output, pair.output);
|
||||
T output1 = cmo.output;
|
||||
if (output1.equals(outputs.getNoOutput())) {
|
||||
output1 = outputs.getNoOutput();
|
||||
}
|
||||
T output2 = pair.output;
|
||||
if (output2.equals(outputs.getNoOutput())) {
|
||||
output2 = outputs.getNoOutput();
|
||||
}
|
||||
cmo.output = outputs.common(output1, output2);
|
||||
}
|
||||
if (idx == pair.input.length) {
|
||||
cmo.isFinal = true;
|
||||
@ -992,7 +1019,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
|
||||
public void testRandomWords() throws IOException {
|
||||
testRandomWords(1000, atLeast(2));
|
||||
//testRandomWords(20, 100);
|
||||
//testRandomWords(100, 1);
|
||||
}
|
||||
|
||||
String inputModeToString(int mode) {
|
||||
@ -1055,50 +1082,6 @@ public class TestFSTs extends LuceneTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: this test shows a case where our current builder
|
||||
// fails to produce minimal FST:
|
||||
/*
|
||||
public void test3() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
IntsRef scratchIntsRef = new IntsRef();
|
||||
builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.get(0));
|
||||
builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), 1L);
|
||||
builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), 2L);
|
||||
final FST<Long> fst = builder.finish();
|
||||
//System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount());
|
||||
// NOTE: we produce 7 nodes today
|
||||
assertEquals(6, fst.getNodeCount());
|
||||
// NOTE: we produce 8 arcs today
|
||||
assertEquals(7, fst.getNodeCount());
|
||||
//Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
|
||||
//Util.toDot(fst, w, false, false);
|
||||
//w.close();
|
||||
}
|
||||
*/
|
||||
|
||||
// NOTE: this test shows a case where our current builder
|
||||
// fails to produce minimal FST:
|
||||
/*
|
||||
public void test4() throws Exception {
|
||||
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||
Builder<BytesRef> builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
IntsRef scratchIntsRef = new IntsRef();
|
||||
builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.getNoOutput());
|
||||
builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), new BytesRef("1"));
|
||||
builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), new BytesRef("11"));
|
||||
final FST<BytesRef> fst = builder.finish();
|
||||
//System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount());
|
||||
// NOTE: we produce 7 nodes today
|
||||
assertEquals(6, fst.getNodeCount());
|
||||
// NOTE: we produce 8 arcs today
|
||||
assertEquals(7, fst.getNodeCount());
|
||||
//Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
|
||||
//Util.toDot(fst, w, false, false);
|
||||
//w.close();
|
||||
}
|
||||
*/
|
||||
|
||||
// Build FST for all unique terms in the test line docs
|
||||
// file, up until a time limit
|
||||
public void testRealTerms() throws Exception {
|
||||
@ -1126,7 +1109,10 @@ public class TestFSTs extends LuceneTestCase {
|
||||
IndexReader r = IndexReader.open(writer, true);
|
||||
writer.close();
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
|
||||
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
final boolean doRewrite = random.nextBoolean();
|
||||
|
||||
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite);
|
||||
|
||||
boolean storeOrd = random.nextBoolean();
|
||||
if (VERBOSE) {
|
||||
@ -1162,59 +1148,69 @@ public class TestFSTs extends LuceneTestCase {
|
||||
} else {
|
||||
output = termsEnum.docFreq();
|
||||
}
|
||||
builder.add(Util.toIntsRef(term, scratchIntsRef), outputs.get(output));
|
||||
builder.add(Util.toIntsRef(term, scratchIntsRef), (long) output);
|
||||
ord++;
|
||||
if (VERBOSE && ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
|
||||
System.out.println(ord + " terms...");
|
||||
}
|
||||
}
|
||||
final FST<Long> fst = builder.finish();
|
||||
FST<Long> fst = builder.finish();
|
||||
if (VERBOSE) {
|
||||
System.out.println("FST: " + docCount + " docs; " + ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs;" + " " + fst.sizeInBytes() + " bytes");
|
||||
}
|
||||
|
||||
if (ord > 0) {
|
||||
// Now confirm BytesRefFSTEnum and TermsEnum act the
|
||||
// same:
|
||||
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
||||
int num = atLeast(1000);
|
||||
for(int iter=0;iter<num;iter++) {
|
||||
final BytesRef randomTerm = new BytesRef(getRandomString());
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: seek non-exist " + randomTerm.utf8ToString() + " " + randomTerm);
|
||||
for(int rewriteIter=0;rewriteIter<2;rewriteIter++) {
|
||||
if (rewriteIter == 1) {
|
||||
if (doRewrite) {
|
||||
// Verify again, with packed FST:
|
||||
fst = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Now confirm BytesRefFSTEnum and TermsEnum act the
|
||||
// same:
|
||||
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
||||
int num = atLeast(1000);
|
||||
for(int iter=0;iter<num;iter++) {
|
||||
final BytesRef randomTerm = new BytesRef(getRandomString());
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: seek non-exist " + randomTerm.utf8ToString() + " " + randomTerm);
|
||||
}
|
||||
|
||||
final TermsEnum.SeekStatus seekResult = termsEnum.seekCeil(randomTerm);
|
||||
final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.seekCeil(randomTerm);
|
||||
final TermsEnum.SeekStatus seekResult = termsEnum.seekCeil(randomTerm);
|
||||
final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.seekCeil(randomTerm);
|
||||
|
||||
if (seekResult == TermsEnum.SeekStatus.END) {
|
||||
assertNull("got " + (fstSeekResult == null ? "null" : fstSeekResult.input.utf8ToString()) + " but expected null", fstSeekResult);
|
||||
} else {
|
||||
assertSame(termsEnum, fstEnum, storeOrd);
|
||||
for(int nextIter=0;nextIter<10;nextIter++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: next");
|
||||
if (storeOrd) {
|
||||
System.out.println(" ord=" + termsEnum.ord());
|
||||
}
|
||||
}
|
||||
if (termsEnum.next() != null) {
|
||||
if (seekResult == TermsEnum.SeekStatus.END) {
|
||||
assertNull("got " + (fstSeekResult == null ? "null" : fstSeekResult.input.utf8ToString()) + " but expected null", fstSeekResult);
|
||||
} else {
|
||||
assertSame(termsEnum, fstEnum, storeOrd);
|
||||
for(int nextIter=0;nextIter<10;nextIter++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" term=" + termsEnum.term().utf8ToString());
|
||||
System.out.println("TEST: next");
|
||||
if (storeOrd) {
|
||||
System.out.println(" ord=" + termsEnum.ord());
|
||||
}
|
||||
}
|
||||
assertNotNull(fstEnum.next());
|
||||
assertSame(termsEnum, fstEnum, storeOrd);
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" end!");
|
||||
if (termsEnum.next() != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" term=" + termsEnum.term().utf8ToString());
|
||||
}
|
||||
assertNotNull(fstEnum.next());
|
||||
assertSame(termsEnum, fstEnum, storeOrd);
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" end!");
|
||||
}
|
||||
BytesRefFSTEnum.InputOutput<Long> nextResult = fstEnum.next();
|
||||
if (nextResult != null) {
|
||||
System.out.println("expected null but got: input=" + nextResult.input.utf8ToString() + " output=" + outputs.outputToString(nextResult.output));
|
||||
fail();
|
||||
}
|
||||
break;
|
||||
}
|
||||
BytesRefFSTEnum.InputOutput<Long> nextResult = fstEnum.next();
|
||||
if (nextResult != null) {
|
||||
System.out.println("expected null but got: input=" + nextResult.input.utf8ToString() + " output=" + outputs.outputToString(nextResult.output));
|
||||
fail();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1248,14 +1244,17 @@ public class TestFSTs extends LuceneTestCase {
|
||||
private int inputMode;
|
||||
private final Outputs<T> outputs;
|
||||
private final Builder<T> builder;
|
||||
private final boolean doPack;
|
||||
|
||||
public VisitTerms(String dirOut, String wordsFileIn, int inputMode, int prune, Outputs<T> outputs) {
|
||||
public VisitTerms(String dirOut, String wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean doPack, boolean noArcArrays) {
|
||||
this.dirOut = dirOut;
|
||||
this.wordsFileIn = wordsFileIn;
|
||||
this.inputMode = inputMode;
|
||||
this.outputs = outputs;
|
||||
|
||||
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null);
|
||||
this.doPack = doPack;
|
||||
|
||||
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack);
|
||||
builder.setAllowArrayArcs(!noArcArrays);
|
||||
}
|
||||
|
||||
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
||||
@ -1287,14 +1286,15 @@ public class TestFSTs extends LuceneTestCase {
|
||||
}
|
||||
|
||||
assert builder.getTermCount() == ord;
|
||||
final FST<T> fst = builder.finish();
|
||||
FST<T> fst = builder.finish();
|
||||
if (fst == null) {
|
||||
System.out.println("FST was fully pruned!");
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
if (dirOut == null)
|
||||
if (dirOut == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
|
||||
if (fst.getNodeCount() < 100) {
|
||||
@ -1304,11 +1304,16 @@ public class TestFSTs extends LuceneTestCase {
|
||||
System.out.println("Wrote FST to out.dot");
|
||||
}
|
||||
|
||||
if (doPack) {
|
||||
System.out.println("Pack...");
|
||||
fst = fst.pack(4, 100000000);
|
||||
System.out.println("New size " + fst.sizeInBytes() + " bytes");
|
||||
}
|
||||
|
||||
Directory dir = FSDirectory.open(new File(dirOut));
|
||||
IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT);
|
||||
fst.save(out);
|
||||
out.close();
|
||||
|
||||
System.out.println("Saved FST to fst.bin.");
|
||||
|
||||
if (!verify) {
|
||||
@ -1317,45 +1322,50 @@ public class TestFSTs extends LuceneTestCase {
|
||||
|
||||
System.out.println("\nNow verify...");
|
||||
|
||||
is.close();
|
||||
is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
|
||||
|
||||
ord = 0;
|
||||
tStart = System.currentTimeMillis();
|
||||
while(true) {
|
||||
String w = is.readLine();
|
||||
if (w == null) {
|
||||
break;
|
||||
}
|
||||
toIntsRef(w, inputMode, intsRef);
|
||||
T expected = getOutput(intsRef, ord);
|
||||
T actual = Util.get(fst, intsRef);
|
||||
if (actual == null) {
|
||||
throw new RuntimeException("unexpected null output on input=" + w);
|
||||
}
|
||||
if (!actual.equals(expected)) {
|
||||
throw new RuntimeException("wrong output (got " + outputs.outputToString(actual) + " but expected " + outputs.outputToString(expected) + ") on input=" + w);
|
||||
is.close();
|
||||
is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
|
||||
|
||||
ord = 0;
|
||||
tStart = System.currentTimeMillis();
|
||||
while(true) {
|
||||
String w = is.readLine();
|
||||
if (w == null) {
|
||||
break;
|
||||
}
|
||||
toIntsRef(w, inputMode, intsRef);
|
||||
T expected = getOutput(intsRef, ord);
|
||||
T actual = Util.get(fst, intsRef);
|
||||
if (actual == null) {
|
||||
throw new RuntimeException("unexpected null output on input=" + w);
|
||||
}
|
||||
if (!actual.equals(expected)) {
|
||||
throw new RuntimeException("wrong output (got " + outputs.outputToString(actual) + " but expected " + outputs.outputToString(expected) + ") on input=" + w);
|
||||
}
|
||||
|
||||
ord++;
|
||||
if (ord % 500000 == 0) {
|
||||
System.out.println(((System.currentTimeMillis()-tStart)/1000.0) + "s: " + ord + "...");
|
||||
}
|
||||
if (ord >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ord++;
|
||||
if (ord % 500000 == 0) {
|
||||
System.out.println(((System.currentTimeMillis()-tStart)/1000.0) + "s: " + ord + "...");
|
||||
}
|
||||
if (ord >= limit) {
|
||||
break;
|
||||
}
|
||||
double totSec = ((System.currentTimeMillis() - tStart)/1000.0);
|
||||
System.out.println("Verify took " + totSec + " sec + (" + (int) ((totSec*1000000000/ord)) + " nsec per lookup)");
|
||||
|
||||
// NOTE: comment out to profile lookup...
|
||||
break;
|
||||
}
|
||||
|
||||
double totSec = ((System.currentTimeMillis() - tStart)/1000.0);
|
||||
System.out.println("Verify took " + totSec + " sec + (" + (int) ((totSec*1000000000/ord)) + " nsec per lookup)");
|
||||
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// java -cp build/classes/test:build/classes/test-framework:build/classes/java:lib/junit-4.7.jar org.apache.lucene.util.automaton.fst.TestFSTs /x/tmp/allTerms3.txt out
|
||||
// java -cp build/classes/test:build/classes/test-framework:build/classes/java:lib/junit-4.7.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
|
||||
public static void main(String[] args) throws IOException {
|
||||
int prune = 0;
|
||||
int limit = Integer.MAX_VALUE;
|
||||
@ -1363,7 +1373,8 @@ public class TestFSTs extends LuceneTestCase {
|
||||
boolean storeOrds = false;
|
||||
boolean storeDocFreqs = false;
|
||||
boolean verify = true;
|
||||
|
||||
boolean doPack = false;
|
||||
boolean noArcArrays = false;
|
||||
String wordsFileIn = null;
|
||||
String dirOut = null;
|
||||
|
||||
@ -1381,10 +1392,14 @@ public class TestFSTs extends LuceneTestCase {
|
||||
inputMode = 1;
|
||||
} else if (args[idx].equals("-docFreq")) {
|
||||
storeDocFreqs = true;
|
||||
} else if (args[idx].equals("-noArcArrays")) {
|
||||
noArcArrays = true;
|
||||
} else if (args[idx].equals("-ords")) {
|
||||
storeOrds = true;
|
||||
} else if (args[idx].equals("-noverify")) {
|
||||
verify = false;
|
||||
} else if (args[idx].equals("-pack")) {
|
||||
doPack = true;
|
||||
} else if (args[idx].startsWith("-")) {
|
||||
System.err.println("Unrecognized option: " + args[idx]);
|
||||
System.exit(-1);
|
||||
@ -1413,44 +1428,44 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(true);
|
||||
final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(false);
|
||||
final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
|
||||
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs) {
|
||||
new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
Random rand;
|
||||
@Override
|
||||
public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
|
||||
if (ord == 0) {
|
||||
rand = new Random(17);
|
||||
}
|
||||
return new PairOutputs.Pair<Long,Long>(o1.get(ord),
|
||||
o2.get(_TestUtil.nextInt(rand, 1, 5000)));
|
||||
return outputs.newPair((long) ord,
|
||||
(long) _TestUtil.nextInt(rand, 1, 5000));
|
||||
}
|
||||
}.run(limit, verify);
|
||||
} else if (storeOrds) {
|
||||
// Store only ords
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
@Override
|
||||
public Long getOutput(IntsRef input, int ord) {
|
||||
return outputs.get(ord);
|
||||
return (long) ord;
|
||||
}
|
||||
}.run(limit, verify);
|
||||
} else if (storeDocFreqs) {
|
||||
// Store only docFreq
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(false);
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
|
||||
new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
Random rand;
|
||||
@Override
|
||||
public Long getOutput(IntsRef input, int ord) {
|
||||
if (ord == 0) {
|
||||
rand = new Random(17);
|
||||
}
|
||||
return outputs.get(_TestUtil.nextInt(rand, 1, 5000));
|
||||
return (long) _TestUtil.nextInt(rand, 1, 5000);
|
||||
}
|
||||
}.run(limit, verify);
|
||||
} else {
|
||||
// Store nothing
|
||||
final NoOutputs outputs = NoOutputs.getSingleton();
|
||||
final Object NO_OUTPUT = outputs.getNoOutput();
|
||||
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs) {
|
||||
new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
|
||||
@Override
|
||||
public Object getOutput(IntsRef input, int ord) {
|
||||
return NO_OUTPUT;
|
||||
@ -1468,6 +1483,46 @@ public class TestFSTs extends LuceneTestCase {
|
||||
assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
|
||||
}
|
||||
|
||||
/*
|
||||
public void testTrivial() throws Exception {
|
||||
|
||||
// Get outputs -- passing true means FST will share
|
||||
// (delta code) the outputs. This should result in
|
||||
// smaller FST if the outputs grow monotonically. But
|
||||
// if numbers are "random", false should give smaller
|
||||
// final size:
|
||||
final NoOutputs outputs = NoOutputs.getSingleton();
|
||||
|
||||
String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation", "stat"};
|
||||
|
||||
final Builder<Object> builder = new Builder<Object>(FST.INPUT_TYPE.BYTE1,
|
||||
0, 0,
|
||||
true,
|
||||
true,
|
||||
Integer.MAX_VALUE,
|
||||
outputs,
|
||||
null,
|
||||
true);
|
||||
Arrays.sort(strings);
|
||||
final IntsRef scratch = new IntsRef();
|
||||
for(String s : strings) {
|
||||
builder.add(Util.toIntsRef(new BytesRef(s), scratch), outputs.getNoOutput());
|
||||
}
|
||||
final FST<Object> fst = builder.finish();
|
||||
System.out.println("DOT before rewrite");
|
||||
Writer w = new OutputStreamWriter(new FileOutputStream("/mnt/scratch/before.dot"));
|
||||
Util.toDot(fst, w, false, false);
|
||||
w.close();
|
||||
|
||||
final FST<Object> rewrite = new FST<Object>(fst, 1, 100);
|
||||
|
||||
System.out.println("DOT after rewrite");
|
||||
w = new OutputStreamWriter(new FileOutputStream("/mnt/scratch/after.dot"));
|
||||
Util.toDot(rewrite, w, false, false);
|
||||
w.close();
|
||||
}
|
||||
*/
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
|
||||
// Get outputs -- passing true means FST will share
|
||||
@ -1484,9 +1539,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final BytesRef b = new BytesRef("b");
|
||||
final BytesRef c = new BytesRef("c");
|
||||
|
||||
builder.add(Util.toIntsRef(a, new IntsRef()), outputs.get(17));
|
||||
builder.add(Util.toIntsRef(b, new IntsRef()), outputs.get(42));
|
||||
builder.add(Util.toIntsRef(c, new IntsRef()), outputs.get(13824324872317238L));
|
||||
builder.add(Util.toIntsRef(a, new IntsRef()), 17L);
|
||||
builder.add(Util.toIntsRef(b, new IntsRef()), 42L);
|
||||
builder.add(Util.toIntsRef(c, new IntsRef()), 13824324872317238L);
|
||||
|
||||
final FST<Long> fst = builder.finish();
|
||||
|
||||
@ -1795,11 +1850,11 @@ public class TestFSTs extends LuceneTestCase {
|
||||
public void testFinalOutputOnEndState() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null);
|
||||
builder.add(Util.toUTF32("stat", new IntsRef()), outputs.get(17));
|
||||
builder.add(Util.toUTF32("station", new IntsRef()), outputs.get(10));
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random.nextBoolean());
|
||||
builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
|
||||
builder.add(Util.toUTF32("station", new IntsRef()), 10L);
|
||||
final FST<Long> fst = builder.finish();
|
||||
//Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
|
||||
//Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));
|
||||
StringWriter w = new StringWriter();
|
||||
Util.toDot(fst, w, false, false);
|
||||
w.close();
|
||||
@ -1809,8 +1864,8 @@ public class TestFSTs extends LuceneTestCase {
|
||||
|
||||
public void testInternalFinalState() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
|
||||
final boolean willRewrite = random.nextBoolean();
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite);
|
||||
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
|
||||
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
|
||||
final FST<Long> fst = builder.finish();
|
||||
@ -1819,17 +1874,23 @@ public class TestFSTs extends LuceneTestCase {
|
||||
Util.toDot(fst, w, false, false);
|
||||
w.close();
|
||||
//System.out.println(w.toString());
|
||||
assertTrue(w.toString().indexOf("6 [shape=doublecircle") != -1);
|
||||
final String expected;
|
||||
if (willRewrite) {
|
||||
expected = "4 -> 3 [label=\"t\" style=\"bold\"";
|
||||
} else {
|
||||
expected = "8 -> 6 [label=\"t\" style=\"bold\"";
|
||||
}
|
||||
assertTrue(w.toString().indexOf(expected) != -1);
|
||||
}
|
||||
|
||||
// Make sure raw FST can differentiate between final vs
|
||||
// non-final end nodes
|
||||
public void testNonFinalStopNodes() throws Exception {
|
||||
public void testNonFinalStopNode() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
final Long nothing = outputs.getNoOutput();
|
||||
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false);
|
||||
|
||||
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
|
||||
|
||||
@ -1839,8 +1900,8 @@ public class TestFSTs extends LuceneTestCase {
|
||||
node.isFinal = true;
|
||||
rootNode.addArc('a', node);
|
||||
final Builder.CompiledNode frozen = new Builder.CompiledNode();
|
||||
frozen.address = fst.addNode(node);
|
||||
rootNode.arcs[0].nextFinalOutput = outputs.get(17);
|
||||
frozen.node = fst.addNode(node);
|
||||
rootNode.arcs[0].nextFinalOutput = 17L;
|
||||
rootNode.arcs[0].isFinal = true;
|
||||
rootNode.arcs[0].output = nothing;
|
||||
rootNode.arcs[0].target = frozen;
|
||||
@ -1851,13 +1912,18 @@ public class TestFSTs extends LuceneTestCase {
|
||||
final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
|
||||
rootNode.addArc('b', node);
|
||||
final Builder.CompiledNode frozen = new Builder.CompiledNode();
|
||||
frozen.address = fst.addNode(node);
|
||||
frozen.node = fst.addNode(node);
|
||||
rootNode.arcs[1].nextFinalOutput = nothing;
|
||||
rootNode.arcs[1].output = outputs.get(42);
|
||||
rootNode.arcs[1].output = 42L;
|
||||
rootNode.arcs[1].target = frozen;
|
||||
}
|
||||
|
||||
fst.finish(fst.addNode(rootNode));
|
||||
|
||||
StringWriter w = new StringWriter();
|
||||
//Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));
|
||||
Util.toDot(fst, w, false, false);
|
||||
w.close();
|
||||
|
||||
checkStopNodes(fst, outputs);
|
||||
|
||||
|
@ -226,6 +226,9 @@ public final class SynonymFilter extends TokenFilter {
|
||||
|
||||
private final FST<BytesRef> fst;
|
||||
|
||||
private final FST.BytesReader fstReader;
|
||||
|
||||
|
||||
private final BytesRef scratchBytes = new BytesRef();
|
||||
private final CharsRef scratchChars = new CharsRef();
|
||||
|
||||
@ -241,7 +244,7 @@ public final class SynonymFilter extends TokenFilter {
|
||||
this.synonyms = synonyms;
|
||||
this.ignoreCase = ignoreCase;
|
||||
this.fst = synonyms.fst;
|
||||
|
||||
this.fstReader = fst.getBytesReader(0);
|
||||
if (fst == null) {
|
||||
throw new IllegalArgumentException("fst must be non-null");
|
||||
}
|
||||
@ -366,7 +369,7 @@ public final class SynonymFilter extends TokenFilter {
|
||||
int bufUpto = 0;
|
||||
while(bufUpto < bufferLen) {
|
||||
final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen);
|
||||
if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc) == null) {
|
||||
if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) {
|
||||
//System.out.println(" stop");
|
||||
break byToken;
|
||||
}
|
||||
@ -388,7 +391,7 @@ public final class SynonymFilter extends TokenFilter {
|
||||
|
||||
// See if the FST wants to continue matching (ie, needs to
|
||||
// see the next input token):
|
||||
if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc) == null) {
|
||||
if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null) {
|
||||
// No further rules can match here; we're done
|
||||
// searching for matching rules starting at the
|
||||
// current input position.
|
||||
|
@ -47,16 +47,17 @@ public final class TokenInfoFST {
|
||||
FST.Arc<Long> firstArc = new FST.Arc<Long>();
|
||||
fst.getFirstArc(firstArc);
|
||||
FST.Arc<Long> arc = new FST.Arc<Long>();
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
// TODO: jump to 3040, readNextRealArc to ceiling? (just be careful we don't add bugs)
|
||||
for (int i = 0; i < rootCache.length; i++) {
|
||||
if (fst.findTargetArc(0x3040 + i, firstArc, arc) != null) {
|
||||
if (fst.findTargetArc(0x3040 + i, firstArc, arc, fstReader) != null) {
|
||||
rootCache[i] = new FST.Arc<Long>().copyFrom(arc);
|
||||
}
|
||||
}
|
||||
return rootCache;
|
||||
}
|
||||
|
||||
public FST.Arc<Long> findTargetArc(int ch, FST.Arc<Long> follow, FST.Arc<Long> arc, boolean useCache) throws IOException {
|
||||
public FST.Arc<Long> findTargetArc(int ch, FST.Arc<Long> follow, FST.Arc<Long> arc, boolean useCache, FST.BytesReader fstReader) throws IOException {
|
||||
if (useCache && ch >= 0x3040 && ch <= cacheCeiling) {
|
||||
assert ch != FST.END_LABEL;
|
||||
final Arc<Long> result = rootCache[ch - 0x3040];
|
||||
@ -67,13 +68,17 @@ public final class TokenInfoFST {
|
||||
return arc;
|
||||
}
|
||||
} else {
|
||||
return fst.findTargetArc(ch, follow, arc);
|
||||
return fst.findTargetArc(ch, follow, arc, fstReader);
|
||||
}
|
||||
}
|
||||
|
||||
public Arc<Long> getFirstArc(FST.Arc<Long> arc) {
|
||||
return fst.getFirstArc(arc);
|
||||
}
|
||||
|
||||
public FST.BytesReader getBytesReader(int pos) {
|
||||
return fst.getBytesReader(pos);
|
||||
}
|
||||
|
||||
/** @lucene.internal for testing only */
|
||||
FST<Long> getInternalFST() {
|
||||
|
@ -113,7 +113,7 @@ public final class UserDictionary implements Dictionary {
|
||||
for (int i = 0; i < token.length(); i++) {
|
||||
scratch.ints[i] = (int) token.charAt(i);
|
||||
}
|
||||
fstBuilder.add(scratch, fstOutput.get(ord));
|
||||
fstBuilder.add(scratch, ord);
|
||||
segmentations.add(wordIdAndLength);
|
||||
ord++;
|
||||
}
|
||||
@ -134,6 +134,8 @@ public final class UserDictionary implements Dictionary {
|
||||
TreeMap<Integer, int[]> result = new TreeMap<Integer, int[]>(); // index, [length, length...]
|
||||
boolean found = false; // true if we found any results
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
FST.Arc<Long> arc = new FST.Arc<Long>();
|
||||
int end = off + len;
|
||||
for (int startOffset = off; startOffset < end; startOffset++) {
|
||||
@ -142,7 +144,7 @@ public final class UserDictionary implements Dictionary {
|
||||
int remaining = end - startOffset;
|
||||
for (int i = 0; i < remaining; i++) {
|
||||
int ch = chars[startOffset+i];
|
||||
if (fst.findTargetArc(ch, arc, arc, i == 0) == null) {
|
||||
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
|
||||
break; // continue to next position
|
||||
}
|
||||
output += arc.output.intValue();
|
||||
|
@ -35,7 +35,7 @@ import org.apache.lucene.util.fst.FST;
|
||||
public class Viterbi {
|
||||
|
||||
private final TokenInfoFST fst;
|
||||
|
||||
|
||||
private final TokenInfoDictionary dictionary;
|
||||
|
||||
private final UnknownDictionary unkDictionary;
|
||||
@ -214,6 +214,8 @@ public class Viterbi {
|
||||
ViterbiNode bosNode = new ViterbiNode(-1, BOS, 0, BOS.length, 0, 0, 0, -1, Type.KNOWN);
|
||||
addToArrays(bosNode, 0, 1, startIndexArr, endIndexArr, startSizeArr, endSizeArr);
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
// Process user dictionary;
|
||||
if (useUserDictionary) {
|
||||
processUserDictionary(text, offset, length, startIndexArr, endIndexArr, startSizeArr, endSizeArr);
|
||||
@ -238,7 +240,7 @@ public class Viterbi {
|
||||
for (int endIndex = 1; endIndex < suffixLength + 1; endIndex++) {
|
||||
int ch = text[suffixStart + endIndex - 1];
|
||||
|
||||
if (fst.findTargetArc(ch, arc, arc, endIndex == 1) == null) {
|
||||
if (fst.findTargetArc(ch, arc, arc, endIndex == 1, fstReader) == null) {
|
||||
break; // continue to next position
|
||||
}
|
||||
output += arc.output.intValue();
|
||||
|
Binary file not shown.
@ -131,7 +131,7 @@ public class TokenInfoDictionaryBuilder {
|
||||
System.out.println(" encode...");
|
||||
|
||||
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
|
||||
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, fstOutput);
|
||||
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true);
|
||||
IntsRef scratch = new IntsRef();
|
||||
long ord = -1; // first ord will be 0
|
||||
String lastValue = null;
|
||||
@ -155,13 +155,14 @@ public class TokenInfoDictionaryBuilder {
|
||||
for (int i = 0; i < token.length(); i++) {
|
||||
scratch.ints[i] = (int) token.charAt(i);
|
||||
}
|
||||
fstBuilder.add(scratch, fstOutput.get(ord));
|
||||
fstBuilder.add(scratch, ord);
|
||||
}
|
||||
dictionary.addMapping((int)ord, offset);
|
||||
offset = next;
|
||||
}
|
||||
|
||||
FST<Long> fst = fstBuilder.finish();
|
||||
final FST<Long> fst = fstBuilder.finish().pack(2, 100000);
|
||||
|
||||
System.out.print(" " + fst.getNodeCount() + " nodes, " + fst.getArcCount() + " arcs, " + fst.sizeInBytes() + " bytes... ");
|
||||
dictionary.setFST(fst);
|
||||
System.out.println(" done");
|
||||
|
@ -329,8 +329,11 @@ public class FSTCompletion {
|
||||
private boolean descendWithPrefix(Arc<Object> arc, BytesRef utf8)
|
||||
throws IOException {
|
||||
final int max = utf8.offset + utf8.length;
|
||||
// Cannot save as instance var since multiple threads
|
||||
// can use FSTCompletion at once...
|
||||
final FST.BytesReader fstReader = automaton.getBytesReader(0);
|
||||
for (int i = utf8.offset; i < max; i++) {
|
||||
if (automaton.findTargetArc(utf8.bytes[i] & 0xff, arc, arc) == null) {
|
||||
if (automaton.findTargetArc(utf8.bytes[i] & 0xff, arc, arc, fstReader) == null) {
|
||||
// No matching prefixes, return an empty result.
|
||||
return false;
|
||||
}
|
||||
|
@ -234,7 +234,7 @@ public class FSTCompletionBuilder {
|
||||
final Object empty = outputs.getNoOutput();
|
||||
final Builder<Object> builder = new Builder<Object>(
|
||||
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
|
||||
shareMaxTailLength, outputs, null);
|
||||
shareMaxTailLength, outputs, null, false);
|
||||
|
||||
BytesRef scratch = new BytesRef();
|
||||
final IntsRef scratchIntsRef = new IntsRef();
|
||||
|
@ -176,20 +176,24 @@ public class Overseer implements NodeStateChangeListener, ShardLeaderListener {
|
||||
return false;
|
||||
}
|
||||
/**
|
||||
* Try to assign core to the cluster
|
||||
* Try to assign core to the cluster.
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
private CloudState updateState(CloudState state, String nodeName, CoreState coreState) throws KeeperException, InterruptedException {
|
||||
String collection = coreState.getCollectionName();
|
||||
String zkCoreNodeName = coreState.getCoreNodeName();
|
||||
|
||||
String shardId;
|
||||
if (coreState.getProperties().get(ZkStateReader.SHARD_ID_PROP) == null) {
|
||||
shardId = AssignShard.assignShard(collection, state);
|
||||
} else {
|
||||
shardId = coreState.getProperties().get(ZkStateReader.SHARD_ID_PROP);
|
||||
}
|
||||
|
||||
// use the provided non null shardId
|
||||
String shardId = coreState.getProperties().get(ZkStateReader.SHARD_ID_PROP);
|
||||
if(shardId==null) {
|
||||
//use shardId from CloudState
|
||||
shardId = getAssignedId(state, nodeName, coreState);
|
||||
}
|
||||
if(shardId==null) {
|
||||
//request new shardId
|
||||
shardId = AssignShard.assignShard(collection, state);
|
||||
}
|
||||
|
||||
Map<String,String> props = new HashMap<String,String>();
|
||||
for (Entry<String,String> entry : coreState.getProperties().entrySet()) {
|
||||
@ -209,6 +213,23 @@ public class Overseer implements NodeStateChangeListener, ShardLeaderListener {
|
||||
CloudState newCloudState = updateSlice(state, collection, slice);
|
||||
return newCloudState;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an already assigned id or null if not assigned
|
||||
*/
|
||||
private String getAssignedId(final CloudState state, final String nodeName,
|
||||
final CoreState coreState) {
|
||||
final String key = coreState.getProperties().get(ZkStateReader.NODE_NAME_PROP) + "_" + coreState.getProperties().get(ZkStateReader.CORE_NAME_PROP);
|
||||
Map<String, Slice> slices = state.getSlices(coreState.getCollectionName());
|
||||
if (slices != null) {
|
||||
for (Slice slice : slices.values()) {
|
||||
if (slice.getShards().get(key) != null) {
|
||||
return slice.getName();
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private CloudState updateSlice(CloudState state, String collection, Slice slice) {
|
||||
|
||||
@ -480,6 +501,7 @@ public class Overseer implements NodeStateChangeListener, ShardLeaderListener {
|
||||
Set<String> downNodes = complement(oldLiveNodes, liveNodes);
|
||||
for(String node: downNodes) {
|
||||
NodeStateWatcher watcher = nodeStateWatches.remove(node);
|
||||
log.debug("Removed NodeStateWatcher for node:" + node);
|
||||
}
|
||||
}
|
||||
|
||||
@ -491,6 +513,7 @@ public class Overseer implements NodeStateChangeListener, ShardLeaderListener {
|
||||
if (!nodeStateWatches.containsKey(nodeName)) {
|
||||
zkCmdExecutor.ensureExists(path, zkClient);
|
||||
nodeStateWatches.put(nodeName, new NodeStateWatcher(zkClient, nodeName, path, this));
|
||||
log.debug("Added NodeStateWatcher for node " + nodeName);
|
||||
} else {
|
||||
log.debug("watch already added");
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ package org.apache.solr.core;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
@ -65,13 +66,13 @@ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
|
||||
/**
|
||||
* Set the duration for which commit point is to be reserved by the deletion policy.
|
||||
*
|
||||
* @param indexVersion version of the commit point to be reserved
|
||||
* @param indexGen gen of the commit point to be reserved
|
||||
* @param reserveTime time in milliseconds for which the commit point is to be reserved
|
||||
*/
|
||||
public void setReserveDuration(Long indexVersion, long reserveTime) {
|
||||
public void setReserveDuration(Long indexGen, long reserveTime) {
|
||||
long timeToSet = System.currentTimeMillis() + reserveTime;
|
||||
for(;;) {
|
||||
Long previousTime = reserves.put(indexVersion, timeToSet);
|
||||
Long previousTime = reserves.put(indexGen, timeToSet);
|
||||
|
||||
// this is the common success case: the older time didn't exist, or
|
||||
// came before the new time.
|
||||
@ -103,19 +104,19 @@ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
|
||||
/** Permanently prevent this commit point from being deleted.
|
||||
* A counter is used to allow a commit point to be correctly saved and released
|
||||
* multiple times. */
|
||||
public synchronized void saveCommitPoint(Long indexCommitVersion) {
|
||||
AtomicInteger reserveCount = savedCommits.get(indexCommitVersion);
|
||||
public synchronized void saveCommitPoint(Long indexCommitGen) {
|
||||
AtomicInteger reserveCount = savedCommits.get(indexCommitGen);
|
||||
if (reserveCount == null) reserveCount = new AtomicInteger();
|
||||
reserveCount.incrementAndGet();
|
||||
savedCommits.put(indexCommitVersion, reserveCount);
|
||||
savedCommits.put(indexCommitGen, reserveCount);
|
||||
}
|
||||
|
||||
/** Release a previously saved commit point */
|
||||
public synchronized void releaseCommitPoint(Long indexCommitVersion) {
|
||||
AtomicInteger reserveCount = savedCommits.get(indexCommitVersion);
|
||||
public synchronized void releaseCommitPoint(Long indexCommitGen) {
|
||||
AtomicInteger reserveCount = savedCommits.get(indexCommitGen);
|
||||
if (reserveCount == null) return;// this should not happen
|
||||
if (reserveCount.decrementAndGet() <= 0) {
|
||||
savedCommits.remove(indexCommitVersion);
|
||||
savedCommits.remove(indexCommitGen);
|
||||
}
|
||||
}
|
||||
|
||||
@ -165,10 +166,10 @@ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
|
||||
|
||||
@Override
|
||||
public void delete() {
|
||||
Long version = delegate.getVersion();
|
||||
Long reserve = reserves.get(version);
|
||||
Long gen = delegate.getGeneration();
|
||||
Long reserve = reserves.get(gen);
|
||||
if (reserve != null && System.currentTimeMillis() < reserve) return;
|
||||
if(savedCommits.containsKey(version)) return;
|
||||
if(savedCommits.containsKey(gen)) return;
|
||||
delegate.delete();
|
||||
}
|
||||
|
||||
@ -187,11 +188,6 @@ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
|
||||
return delegate.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getVersion() {
|
||||
return delegate.getVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getGeneration() {
|
||||
return delegate.getGeneration();
|
||||
@ -202,11 +198,6 @@ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
|
||||
return delegate.isDeleted();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getTimestamp() throws IOException {
|
||||
return delegate.getTimestamp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map getUserData() throws IOException {
|
||||
return delegate.getUserData();
|
||||
@ -214,11 +205,11 @@ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param version the version of the commit point
|
||||
* @param gen the gen of the commit point
|
||||
* @return a commit point corresponding to the given version
|
||||
*/
|
||||
public IndexCommit getCommitPoint(Long version) {
|
||||
return solrVersionVsCommits.get(version);
|
||||
public IndexCommit getCommitPoint(Long gen) {
|
||||
return solrVersionVsCommits.get(gen);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -236,10 +227,20 @@ public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
|
||||
Map<Long, IndexCommit> map = new ConcurrentHashMap<Long, IndexCommit>();
|
||||
for (IndexCommitWrapper wrapper : list) {
|
||||
if (!wrapper.isDeleted())
|
||||
map.put(wrapper.getVersion(), wrapper.delegate);
|
||||
map.put(wrapper.delegate.getGeneration(), wrapper.delegate);
|
||||
}
|
||||
solrVersionVsCommits = map;
|
||||
latestCommit = ((list.get(list.size() - 1)).delegate);
|
||||
}
|
||||
|
||||
public static long getCommitTimestamp(IndexCommit commit) throws IOException {
|
||||
final Map<String,String> commitData = commit.getUserData();
|
||||
String commitTime = commitData.get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
|
||||
if (commitTime != null) {
|
||||
return Long.parseLong(commitTime);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,6 @@ public class SolrDeletionPolicy implements IndexDeletionPolicy, NamedListInitial
|
||||
}
|
||||
|
||||
sb.append(",segFN=").append(commit.getSegmentsFileName());
|
||||
sb.append(",version=").append(commit.getVersion());
|
||||
sb.append(",generation=").append(commit.getGeneration());
|
||||
sb.append(",filenames=").append(commit.getFileNames());
|
||||
} catch (Exception e) {
|
||||
@ -133,7 +132,7 @@ public class SolrDeletionPolicy implements IndexDeletionPolicy, NamedListInitial
|
||||
synchronized (this) {
|
||||
long maxCommitAgeTimeStamp = -1L;
|
||||
IndexCommit newest = commits.get(commits.size() - 1);
|
||||
log.info("newest commit = " + newest.getVersion());
|
||||
log.info("newest commit = " + newest.getGeneration());
|
||||
|
||||
int singleSegKept = (newest.getSegmentCount() == 1) ? 1 : 0;
|
||||
int totalKept = 1;
|
||||
@ -149,7 +148,7 @@ public class SolrDeletionPolicy implements IndexDeletionPolicy, NamedListInitial
|
||||
DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.US);
|
||||
maxCommitAgeTimeStamp = dmp.parseMath(maxCommitAge).getTime();
|
||||
}
|
||||
if (commit.getTimestamp() < maxCommitAgeTimeStamp) {
|
||||
if (IndexDeletionPolicyWrapper.getCommitTimestamp(commit) < maxCommitAgeTimeStamp) {
|
||||
commit.delete();
|
||||
continue;
|
||||
}
|
||||
@ -191,8 +190,6 @@ public class SolrDeletionPolicy implements IndexDeletionPolicy, NamedListInitial
|
||||
|
||||
sb.append('/');
|
||||
sb.append(commit.getGeneration());
|
||||
sb.append('_');
|
||||
sb.append(commit.getVersion());
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
@ -60,6 +60,7 @@ import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.BinaryQueryResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
import org.apache.solr.util.NumberUtils;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
@ -139,8 +140,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
// in a catastrophic failure, but will result in the client getting an empty file list for
|
||||
// the CMD_GET_FILE_LIST command.
|
||||
//
|
||||
core.getDeletionPolicy().setReserveDuration(commitPoint.getVersion(), reserveCommitDuration);
|
||||
rsp.add(CMD_INDEX_VERSION, commitPoint.getVersion());
|
||||
core.getDeletionPolicy().setReserveDuration(commitPoint.getGeneration(), reserveCommitDuration);
|
||||
rsp.add(CMD_INDEX_VERSION, core.getDeletionPolicy().getCommitTimestamp(commitPoint));
|
||||
rsp.add(GENERATION, commitPoint.getGeneration());
|
||||
} else {
|
||||
// This happens when replication is not configured to happen after startup and no commit/optimize
|
||||
@ -219,7 +220,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
for (IndexCommit c : commits.values()) {
|
||||
try {
|
||||
NamedList<Object> nl = new NamedList<Object>();
|
||||
nl.add("indexVersion", c.getVersion());
|
||||
nl.add("indexVersion", core.getDeletionPolicy().getCommitTimestamp(c));
|
||||
nl.add(GENERATION, c.getGeneration());
|
||||
nl.add(CMD_GET_FILE_LIST, c.getFileNames());
|
||||
l.add(nl);
|
||||
@ -332,21 +333,21 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void getFileList(SolrParams solrParams, SolrQueryResponse rsp) {
|
||||
String v = solrParams.get(CMD_INDEX_VERSION);
|
||||
String v = solrParams.get(GENERATION);
|
||||
if (v == null) {
|
||||
rsp.add("status", "no indexversion specified");
|
||||
rsp.add("status", "no index generation specified");
|
||||
return;
|
||||
}
|
||||
long version = Long.parseLong(v);
|
||||
IndexCommit commit = core.getDeletionPolicy().getCommitPoint(version);
|
||||
long gen = Long.parseLong(v);
|
||||
IndexCommit commit = core.getDeletionPolicy().getCommitPoint(gen);
|
||||
|
||||
//System.out.println("ask for files for gen:" + commit.getGeneration() + core.getCoreDescriptor().getCoreContainer().getZkController().getNodeName());
|
||||
if (commit == null) {
|
||||
rsp.add("status", "invalid indexversion");
|
||||
rsp.add("status", "invalid index generation");
|
||||
return;
|
||||
}
|
||||
// reserve the indexcommit for sometime
|
||||
core.getDeletionPolicy().setReserveDuration(version, reserveCommitDuration);
|
||||
core.getDeletionPolicy().setReserveDuration(gen, reserveCommitDuration);
|
||||
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
|
||||
try {
|
||||
//get all the files in the commit
|
||||
@ -359,10 +360,10 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
result.add(fileMeta);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
rsp.add("status", "unable to get file names for given indexversion");
|
||||
rsp.add("status", "unable to get file names for given index generation");
|
||||
rsp.add("exception", e);
|
||||
LOG.warn("Unable to get file names for indexCommit version: "
|
||||
+ version, e);
|
||||
LOG.warn("Unable to get file names for indexCommit generation: "
|
||||
+ gen, e);
|
||||
}
|
||||
rsp.add(CMD_GET_FILE_LIST, result);
|
||||
if (confFileNameAlias.size() < 1)
|
||||
@ -487,8 +488,13 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
long version[] = new long[2];
|
||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||
try {
|
||||
version[0] = searcher.get().getIndexReader().getIndexCommit().getVersion();
|
||||
version[1] = searcher.get().getIndexReader().getIndexCommit().getGeneration();
|
||||
final IndexCommit commit = searcher.get().getIndexReader().getIndexCommit();
|
||||
final Map<String,String> commitData = commit.getUserData();
|
||||
String commitTime = commitData.get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
|
||||
if (commitTime != null) {
|
||||
version[0] = Long.parseLong(commitTime);
|
||||
}
|
||||
version[1] = commit.getGeneration();
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Unable to get index version : ", e);
|
||||
} finally {
|
||||
@ -574,7 +580,6 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
}
|
||||
|
||||
if (isMaster && commit != null) {
|
||||
master.add("replicatableIndexVersion", commit.getVersion());
|
||||
master.add("replicatableGeneration", commit.getGeneration());
|
||||
}
|
||||
|
||||
@ -846,7 +851,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
Collection<IndexCommit> commits = IndexReader.listCommits(reader.directory());
|
||||
for (IndexCommit ic : commits) {
|
||||
if(ic.getSegmentCount() == 1){
|
||||
if(indexCommitPoint == null || indexCommitPoint.getVersion() < ic.getVersion()) indexCommitPoint = ic;
|
||||
if(indexCommitPoint == null || indexCommitPoint.getGeneration() < ic.getGeneration()) indexCommitPoint = ic;
|
||||
}
|
||||
}
|
||||
} else{
|
||||
@ -857,7 +862,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
// always saves the last commit point (and the last optimized commit point, if needed)
|
||||
/***
|
||||
if(indexCommitPoint != null){
|
||||
core.getDeletionPolicy().saveCommitPoint(indexCommitPoint.getVersion());
|
||||
core.getDeletionPolicy().saveCommitPoint(indexCommitPoint.getGeneration());
|
||||
}
|
||||
***/
|
||||
}
|
||||
@ -958,10 +963,10 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
// always saves the last commit point (and the last optimized commit point, if needed)
|
||||
/***
|
||||
if (indexCommitPoint != null) {
|
||||
core.getDeletionPolicy().saveCommitPoint(indexCommitPoint.getVersion());
|
||||
core.getDeletionPolicy().saveCommitPoint(indexCommitPoint.getGeneration());
|
||||
}
|
||||
if(oldCommitPoint != null){
|
||||
core.getDeletionPolicy().releaseCommitPoint(oldCommitPoint.getVersion());
|
||||
core.getDeletionPolicy().releaseCommitPoint(oldCommitPoint.getGeneration());
|
||||
}
|
||||
***/
|
||||
}
|
||||
@ -989,7 +994,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
|
||||
private FastOutputStream fos;
|
||||
|
||||
private Long indexVersion;
|
||||
private Long indexGen;
|
||||
private IndexDeletionPolicyWrapper delPolicy;
|
||||
|
||||
public FileStream(SolrParams solrParams) {
|
||||
@ -1004,8 +1009,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
String sLen = params.get(LEN);
|
||||
String compress = params.get(COMPRESSION);
|
||||
String sChecksum = params.get(CHECKSUM);
|
||||
String sindexVersion = params.get(CMD_INDEX_VERSION);
|
||||
if (sindexVersion != null) indexVersion = Long.parseLong(sindexVersion);
|
||||
String sGen = params.get(GENERATION);
|
||||
if (sGen != null) indexGen = Long.parseLong(sGen);
|
||||
if (Boolean.parseBoolean(compress)) {
|
||||
fos = new FastOutputStream(new DeflaterOutputStream(out));
|
||||
} else {
|
||||
@ -1063,9 +1068,9 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
||||
}
|
||||
fos.write(buf, 0, (int) bytesRead);
|
||||
fos.flush();
|
||||
if (indexVersion != null && (packetsWritten % 5 == 0)) {
|
||||
if (indexGen != null && (packetsWritten % 5 == 0)) {
|
||||
//after every 5 packets reserve the commitpoint for some time
|
||||
delPolicy.setReserveDuration(indexVersion, reserveCommitDuration);
|
||||
delPolicy.setReserveDuration(indexGen, reserveCommitDuration);
|
||||
}
|
||||
packetsWritten++;
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ import org.apache.solr.common.util.FileUtils;
|
||||
import org.apache.solr.common.util.JavaBinCodec;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import static org.apache.solr.handler.ReplicationHandler.*;
|
||||
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
@ -210,10 +211,10 @@ public class SnapPuller {
|
||||
/**
|
||||
* Fetches the list of files in a given index commit point
|
||||
*/
|
||||
void fetchFileList(long version) throws IOException {
|
||||
void fetchFileList(long gen) throws IOException {
|
||||
PostMethod post = new PostMethod(masterUrl);
|
||||
post.addParameter(COMMAND, CMD_GET_FILE_LIST);
|
||||
post.addParameter(CMD_INDEX_VERSION, String.valueOf(version));
|
||||
post.addParameter(GENERATION, String.valueOf(gen));
|
||||
post.addParameter("wt", "javabin");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@ -225,7 +226,7 @@ public class SnapPuller {
|
||||
filesToDownload = Collections.synchronizedList(f);
|
||||
else {
|
||||
filesToDownload = Collections.emptyList();
|
||||
LOG.error("No files to download for indexversion: "+ version);
|
||||
LOG.error("No files to download for index generation: "+ gen);
|
||||
}
|
||||
|
||||
f = nl.get(CONF_FILES);
|
||||
@ -274,7 +275,7 @@ public class SnapPuller {
|
||||
}
|
||||
|
||||
if (latestVersion == 0L) {
|
||||
if (force && commit.getVersion() != 0) {
|
||||
if (force && commit.getGeneration() != 0) {
|
||||
// since we won't get the files for an empty index,
|
||||
// we just clear ours and commit
|
||||
core.getUpdateHandler().getSolrCoreState().getIndexWriter(core).deleteAll();
|
||||
@ -288,17 +289,17 @@ public class SnapPuller {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!force && commit.getVersion() == latestVersion && commit.getGeneration() == latestGeneration) {
|
||||
if (!force && IndexDeletionPolicyWrapper.getCommitTimestamp(commit) == latestVersion) {
|
||||
//master and slave are already in sync just return
|
||||
LOG.info("Slave in sync with master.");
|
||||
successfulInstall = true;
|
||||
return true;
|
||||
}
|
||||
LOG.info("Master's version: " + latestVersion + ", generation: " + latestGeneration);
|
||||
LOG.info("Slave's version: " + commit.getVersion() + ", generation: " + commit.getGeneration());
|
||||
LOG.info("Master's generation: " + latestGeneration);
|
||||
LOG.info("Slave's generation: " + commit.getGeneration());
|
||||
LOG.info("Starting replication process");
|
||||
// get the list of files first
|
||||
fetchFileList(latestVersion);
|
||||
fetchFileList(latestGeneration);
|
||||
// this can happen if the commit point is deleted before we fetch the file list.
|
||||
if(filesToDownload.isEmpty()) return false;
|
||||
LOG.info("Number of files in latest index in master: " + filesToDownload.size());
|
||||
@ -309,7 +310,7 @@ public class SnapPuller {
|
||||
filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
|
||||
// if the generateion of master is older than that of the slave , it means they are not compatible to be copied
|
||||
// then a new index direcory to be created and all the files need to be copied
|
||||
boolean isFullCopyNeeded = commit.getVersion() >= latestVersion || force;
|
||||
boolean isFullCopyNeeded = IndexDeletionPolicyWrapper.getCommitTimestamp(commit) >= latestVersion || force;
|
||||
File tmpIndexDir = createTempindexDir(core);
|
||||
if (isIndexStale())
|
||||
isFullCopyNeeded = true;
|
||||
@ -318,11 +319,11 @@ public class SnapPuller {
|
||||
File indexDir = null ;
|
||||
try {
|
||||
indexDir = new File(core.getIndexDir());
|
||||
downloadIndexFiles(isFullCopyNeeded, tmpIndexDir, latestVersion);
|
||||
downloadIndexFiles(isFullCopyNeeded, tmpIndexDir, latestGeneration);
|
||||
LOG.info("Total time taken for download : " + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs");
|
||||
Collection<Map<String, Object>> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload);
|
||||
if (!modifiedConfFiles.isEmpty()) {
|
||||
downloadConfFiles(confFilesToDownload, latestVersion);
|
||||
downloadConfFiles(confFilesToDownload, latestGeneration);
|
||||
if (isFullCopyNeeded) {
|
||||
successfulInstall = modifyIndexProps(tmpIndexDir.getName());
|
||||
deleteTmpIdxDir = false;
|
||||
@ -530,7 +531,7 @@ public class SnapPuller {
|
||||
}.start();
|
||||
}
|
||||
|
||||
private void downloadConfFiles(List<Map<String, Object>> confFilesToDownload, long latestVersion) throws Exception {
|
||||
private void downloadConfFiles(List<Map<String, Object>> confFilesToDownload, long latestGeneration) throws Exception {
|
||||
LOG.info("Starting download of configuration files from master: " + confFilesToDownload);
|
||||
confFilesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
|
||||
File tmpconfDir = new File(solrCore.getResourceLoader().getConfigDir(), "conf." + getDateAsStr(new Date()));
|
||||
@ -542,7 +543,7 @@ public class SnapPuller {
|
||||
}
|
||||
for (Map<String, Object> file : confFilesToDownload) {
|
||||
String saveAs = (String) (file.get(ALIAS) == null ? file.get(NAME) : file.get(ALIAS));
|
||||
fileFetcher = new FileFetcher(tmpconfDir, file, saveAs, true, latestVersion);
|
||||
fileFetcher = new FileFetcher(tmpconfDir, file, saveAs, true, latestGeneration);
|
||||
currentFile = file;
|
||||
fileFetcher.fetchFile();
|
||||
confFilesDownloaded.add(new HashMap<String, Object>(file));
|
||||
@ -561,13 +562,13 @@ public class SnapPuller {
|
||||
*
|
||||
* @param downloadCompleteIndex is it a fresh index copy
|
||||
* @param tmpIdxDir the directory to which files need to be downloadeed to
|
||||
* @param latestVersion the version number
|
||||
* @param latestGeneration the version number
|
||||
*/
|
||||
private void downloadIndexFiles(boolean downloadCompleteIndex, File tmpIdxDir, long latestVersion) throws Exception {
|
||||
private void downloadIndexFiles(boolean downloadCompleteIndex, File tmpIdxDir, long latestGeneration) throws Exception {
|
||||
for (Map<String, Object> file : filesToDownload) {
|
||||
File localIndexFile = new File(solrCore.getIndexDir(), (String) file.get(NAME));
|
||||
if (!localIndexFile.exists() || downloadCompleteIndex) {
|
||||
fileFetcher = new FileFetcher(tmpIdxDir, file, (String) file.get(NAME), false, latestVersion);
|
||||
fileFetcher = new FileFetcher(tmpIdxDir, file, (String) file.get(NAME), false, latestGeneration);
|
||||
currentFile = file;
|
||||
fileFetcher.fetchFile();
|
||||
filesDownloaded.add(new HashMap<String, Object>(file));
|
||||
@ -578,7 +579,7 @@ public class SnapPuller {
|
||||
}
|
||||
|
||||
/**
|
||||
* All the files which are common between master and slave must have same timestamp and size else we assume they are
|
||||
* All the files which are common between master and slave must have same size else we assume they are
|
||||
* not compatible (stale).
|
||||
*
|
||||
* @return true if the index stale and we need to download a fresh copy, false otherwise.
|
||||
@ -896,10 +897,10 @@ public class SnapPuller {
|
||||
|
||||
private boolean aborted = false;
|
||||
|
||||
private Long indexVersion;
|
||||
private Long indexGen;
|
||||
|
||||
FileFetcher(File dir, Map<String, Object> fileDetails, String saveAs,
|
||||
boolean isConf, long latestVersion) throws IOException {
|
||||
boolean isConf, long latestGen) throws IOException {
|
||||
this.copy2Dir = dir;
|
||||
this.fileName = (String) fileDetails.get(NAME);
|
||||
this.size = (Long) fileDetails.get(SIZE);
|
||||
@ -908,7 +909,7 @@ public class SnapPuller {
|
||||
if(fileDetails.get(LAST_MODIFIED) != null){
|
||||
lastmodified = (Long)fileDetails.get(LAST_MODIFIED);
|
||||
}
|
||||
indexVersion = latestVersion;
|
||||
indexGen = latestGen;
|
||||
|
||||
this.file = new File(copy2Dir, saveAs);
|
||||
|
||||
@ -1077,7 +1078,7 @@ public class SnapPuller {
|
||||
//the method is command=filecontent
|
||||
post.addParameter(COMMAND, CMD_GET_FILE);
|
||||
//add the version to download. This is used to reserve the download
|
||||
post.addParameter(CMD_INDEX_VERSION, indexVersion.toString());
|
||||
post.addParameter(GENERATION, indexGen.toString());
|
||||
if (isConf) {
|
||||
//set cf instead of file for config file
|
||||
post.addParameter(CONF_FILE_SHORT, fileName);
|
||||
|
@ -70,7 +70,7 @@ public class SnapShooter {
|
||||
}
|
||||
|
||||
void createSnapAsync(final IndexCommit indexCommit, final int numberToKeep, final ReplicationHandler replicationHandler) {
|
||||
replicationHandler.core.getDeletionPolicy().saveCommitPoint(indexCommit.getVersion());
|
||||
replicationHandler.core.getDeletionPolicy().saveCommitPoint(indexCommit.getGeneration());
|
||||
|
||||
new Thread() {
|
||||
@Override
|
||||
@ -112,7 +112,7 @@ public class SnapShooter {
|
||||
LOG.error("Exception while creating snapshot", e);
|
||||
details.add("snapShootException", e.getMessage());
|
||||
} finally {
|
||||
replicationHandler.core.getDeletionPolicy().releaseCommitPoint(indexCommit.getVersion());
|
||||
replicationHandler.core.getDeletionPolicy().releaseCommitPoint(indexCommit.getGeneration());
|
||||
replicationHandler.snapShootDetails = details;
|
||||
if (lock != null) {
|
||||
try {
|
||||
|
@ -48,6 +48,7 @@ import org.apache.solr.handler.RequestHandlerBase;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
@ -562,7 +563,10 @@ public class LukeRequestHandler extends RequestHandlerBase
|
||||
indexInfo.add("current", reader.isCurrent() );
|
||||
indexInfo.add("hasDeletions", reader.hasDeletions() );
|
||||
indexInfo.add("directory", dir );
|
||||
indexInfo.add("lastModified", new Date(IndexReader.lastModified(dir)) );
|
||||
String s = reader.getIndexCommit().getUserData().get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
|
||||
if (s != null) {
|
||||
indexInfo.add("lastModified", new Date(Long.parseLong(s)));
|
||||
}
|
||||
return indexInfo;
|
||||
}
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
|
@ -29,6 +29,7 @@ import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrConfig.HttpCachingConfig.LastModFrom;
|
||||
@ -157,7 +158,7 @@ public final class HttpCacheHeaderUtil {
|
||||
// assume default, change if needed (getOpenTime() should be fast)
|
||||
lastMod =
|
||||
LastModFrom.DIRLASTMOD == lastModFrom
|
||||
? IndexReader.lastModified(searcher.getIndexReader().directory())
|
||||
? IndexDeletionPolicyWrapper.getCommitTimestamp(searcher.getIndexReader().getIndexCommit())
|
||||
: searcher.getOpenTime();
|
||||
} catch (IOException e) {
|
||||
// we're pretty freaking screwed if this happens
|
||||
|
@ -22,6 +22,8 @@ package org.apache.solr.update;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
@ -387,7 +389,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
||||
}
|
||||
|
||||
// SolrCore.verbose("writer.commit() start writer=",writer);
|
||||
writer.commit();
|
||||
final Map<String,String> commitData = new HashMap<String,String>();
|
||||
commitData.put(SolrIndexWriter.COMMIT_TIME_MSEC_KEY, String.valueOf(System.currentTimeMillis()));
|
||||
writer.commit(commitData);
|
||||
// SolrCore.verbose("writer.commit() end");
|
||||
numDocsPending.set(0);
|
||||
callPostCommitCallbacks();
|
||||
|
@ -50,6 +50,11 @@ public class SolrIndexWriter extends IndexWriter {
|
||||
public static final AtomicLong numOpens = new AtomicLong();
|
||||
public static final AtomicLong numCloses = new AtomicLong();
|
||||
|
||||
|
||||
/** Stored into each Lucene commit to record the
|
||||
* System.currentTimeMillis() when commit was called. */
|
||||
public static final String COMMIT_TIME_MSEC_KEY = "commitTimeMSec";
|
||||
|
||||
String name;
|
||||
private DirectoryFactory directoryFactory;
|
||||
|
||||
|
@ -19,7 +19,6 @@ package org.apache.solr.cloud;
|
||||
|
||||
import java.net.BindException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
@ -297,13 +296,7 @@ public class ChaosMonkey {
|
||||
JettySolrRunner jetty;
|
||||
if (chance <= 5 && aggressivelyKillLeaders) {
|
||||
// if killLeader, really aggressively go after leaders
|
||||
Collection<CloudJettyRunner> leaders = shardToLeaderJetty.values();
|
||||
List<CloudJettyRunner> leadersList = new ArrayList<CloudJettyRunner>(leaders.size());
|
||||
|
||||
leadersList.addAll(leaders);
|
||||
|
||||
int index = random.nextInt(leadersList.size());
|
||||
jetty = leadersList.get(index).jetty;
|
||||
jetty = shardToLeaderJetty.get(slice).jetty;
|
||||
} else {
|
||||
// get random shard
|
||||
List<CloudJettyRunner> jetties = shardToJetty.get(slice);
|
||||
|
@ -59,6 +59,7 @@ import org.junit.Ignore;
|
||||
* what we test now - the default update chain
|
||||
*
|
||||
*/
|
||||
@Ignore
|
||||
public class FullSolrCloudTest extends AbstractDistributedZkTestCase {
|
||||
|
||||
private static final String SHARD2 = "shard2";
|
||||
|
@ -40,6 +40,7 @@ import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.KeeperException.Code;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -49,6 +50,54 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
|
||||
private static class MockZKController{
|
||||
|
||||
private final SolrZkClient zkClient;
|
||||
private final String nodeName;
|
||||
|
||||
public MockZKController(String zkAddress, String nodeName) throws InterruptedException, TimeoutException, IOException, KeeperException {
|
||||
this.nodeName = nodeName;
|
||||
zkClient = new SolrZkClient(zkAddress, TIMEOUT);
|
||||
Overseer.createClientNodes(zkClient, nodeName);
|
||||
|
||||
// live node
|
||||
final String nodePath = ZkStateReader.LIVE_NODES_ZKNODE + "/" + "node1";
|
||||
zkClient.makePath(nodePath, CreateMode.EPHEMERAL, true);
|
||||
}
|
||||
|
||||
private void deleteNode(final String path) {
|
||||
try {
|
||||
Stat stat = zkClient.exists(path, null, false);
|
||||
zkClient.delete(path, stat.getVersion(), false);
|
||||
} catch (KeeperException e) {
|
||||
fail("Unexpected KeeperException!" + e);
|
||||
} catch (InterruptedException e) {
|
||||
fail("Unexpected InterruptedException!" + e);
|
||||
}
|
||||
}
|
||||
|
||||
public void close(){
|
||||
try {
|
||||
deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + "node1");
|
||||
zkClient.close();
|
||||
} catch (InterruptedException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void publishState(String coreName, String stateName) throws KeeperException, InterruptedException{
|
||||
HashMap<String,String> coreProps = new HashMap<String,String>();
|
||||
coreProps.put(ZkStateReader.STATE_PROP, stateName);
|
||||
coreProps.put(ZkStateReader.NODE_NAME_PROP, nodeName);
|
||||
coreProps.put(ZkStateReader.CORE_NAME_PROP, coreName);
|
||||
CoreState state = new CoreState(coreName, "collection1", coreProps);
|
||||
final String statePath = Overseer.STATES_NODE + "/" + nodeName;
|
||||
zkClient.setData(statePath, ZkStateReader.toJSON(new CoreState[] {state}), true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore();
|
||||
@ -438,11 +487,11 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||
SolrZkClient controllerClient = null;
|
||||
SolrZkClient overseerClient = null;
|
||||
ZkStateReader reader = null;
|
||||
MockZKController mockController = null;
|
||||
|
||||
try {
|
||||
server.run();
|
||||
controllerClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
|
||||
|
||||
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
|
||||
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
|
||||
controllerClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
|
||||
@ -450,45 +499,35 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||
reader = new ZkStateReader(controllerClient);
|
||||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
Overseer.createClientNodes(controllerClient, "node1");
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
// live node
|
||||
final String nodePath = ZkStateReader.LIVE_NODES_ZKNODE + "/" + "node1";
|
||||
controllerClient.makePath(nodePath, CreateMode.EPHEMERAL, true);
|
||||
|
||||
HashMap<String,String> coreProps = new HashMap<String,String>();
|
||||
coreProps.put(ZkStateReader.STATE_PROP, ZkStateReader.RECOVERING);
|
||||
coreProps.put(ZkStateReader.NODE_NAME_PROP, "node1");
|
||||
CoreState state = new CoreState("core1", "collection1", coreProps);
|
||||
|
||||
final String statePath = Overseer.STATES_NODE + "/node1";
|
||||
// publish node state (recovering)
|
||||
controllerClient.setData(statePath, ZkStateReader.toJSON(new CoreState[] {state}), true);
|
||||
|
||||
mockController.publishState("core1", ZkStateReader.RECOVERING);
|
||||
|
||||
// wait overseer assignment
|
||||
waitForSliceCount(reader, "collection1", 1);
|
||||
|
||||
verifyStatus(reader, ZkStateReader.RECOVERING);
|
||||
|
||||
// publish node state (active)
|
||||
coreProps.put(ZkStateReader.STATE_PROP, ZkStateReader.ACTIVE);
|
||||
coreProps.put(ZkStateReader.SHARD_ID_PROP, "shard1");
|
||||
state = new CoreState("core1", "collection1", coreProps);
|
||||
controllerClient.setData(statePath,
|
||||
ZkStateReader.toJSON(new CoreState[] {state}), true);
|
||||
int version = getCloudStateVersion(controllerClient);
|
||||
|
||||
mockController.publishState("core1", ZkStateReader.ACTIVE);
|
||||
|
||||
while(version == getCloudStateVersion(controllerClient));
|
||||
|
||||
verifyStatus(reader, ZkStateReader.ACTIVE);
|
||||
version = getCloudStateVersion(controllerClient);
|
||||
overseerClient.close();
|
||||
|
||||
coreProps.put(ZkStateReader.STATE_PROP, ZkStateReader.RECOVERING);
|
||||
state = new CoreState("core1", "collection1", coreProps);
|
||||
|
||||
controllerClient.setData(statePath,
|
||||
ZkStateReader.toJSON(new CoreState[] {state}), true);
|
||||
Thread.sleep(1000); //wait for overseer to get killed
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
mockController.publishState("core1", ZkStateReader.RECOVERING);
|
||||
version = getCloudStateVersion(controllerClient);
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
while(version == getCloudStateVersion(controllerClient));
|
||||
|
||||
verifyStatus(reader, ZkStateReader.RECOVERING);
|
||||
|
||||
assertEquals("Live nodes count does not match", 1, reader.getCloudState()
|
||||
@ -497,6 +536,10 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||
.getSlice("collection1", "shard1").getShards().size());
|
||||
} finally {
|
||||
|
||||
if (mockController != null) {
|
||||
mockController.close();
|
||||
}
|
||||
|
||||
if (overseerClient != null) {
|
||||
overseerClient.close();
|
||||
}
|
||||
@ -509,6 +552,80 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||
server.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDoubleAssignment() throws Exception {
|
||||
String zkDir = dataDir.getAbsolutePath() + File.separator
|
||||
+ "zookeeper/server1/data";
|
||||
|
||||
System.setProperty(ZkStateReader.NUM_SHARDS_PROP, "2");
|
||||
|
||||
ZkTestServer server = new ZkTestServer(zkDir);
|
||||
|
||||
SolrZkClient controllerClient = null;
|
||||
SolrZkClient overseerClient = null;
|
||||
ZkStateReader reader = null;
|
||||
MockZKController mockController = null;
|
||||
|
||||
try {
|
||||
server.run();
|
||||
controllerClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
|
||||
|
||||
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
|
||||
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
|
||||
controllerClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
|
||||
|
||||
reader = new ZkStateReader(controllerClient);
|
||||
reader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
mockController.publishState("core1", ZkStateReader.RECOVERING);
|
||||
|
||||
// wait overseer assignment
|
||||
waitForSliceCount(reader, "collection1", 1);
|
||||
|
||||
verifyStatus(reader, ZkStateReader.RECOVERING);
|
||||
|
||||
mockController.close();
|
||||
|
||||
int version = getCloudStateVersion(controllerClient);
|
||||
|
||||
mockController = new MockZKController(server.getZkAddress(), "node1");
|
||||
mockController.publishState("core1", ZkStateReader.RECOVERING);
|
||||
|
||||
while (version == getCloudStateVersion(controllerClient));
|
||||
|
||||
reader.updateCloudState(true);
|
||||
CloudState state = reader.getCloudState();
|
||||
assertEquals("more than 1 shard id was assigned to same core", 1, state.getSlices("collection1").size());
|
||||
|
||||
} finally {
|
||||
System.clearProperty(ZkStateReader.NUM_SHARDS_PROP);
|
||||
if (overseerClient != null) {
|
||||
overseerClient.close();
|
||||
}
|
||||
if (mockController != null) {
|
||||
mockController.close();
|
||||
}
|
||||
|
||||
if (controllerClient != null) {
|
||||
controllerClient.close();
|
||||
}
|
||||
if (reader != null) {
|
||||
reader.close();
|
||||
}
|
||||
server.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
private int getCloudStateVersion(SolrZkClient controllerClient)
|
||||
throws KeeperException, InterruptedException {
|
||||
return controllerClient.exists(ZkStateReader.CLUSTER_STATE, null, false).getVersion();
|
||||
}
|
||||
|
||||
|
||||
private SolrZkClient electNewOverseer(String address) throws InterruptedException,
|
||||
TimeoutException, IOException, KeeperException {
|
||||
|
@ -92,10 +92,10 @@ public class TestSolrDeletionPolicy1 extends SolrTestCaseJ4 {
|
||||
addDocs();
|
||||
Map<Long, IndexCommit> commits = delPolicy.getCommits();
|
||||
IndexCommit latest = delPolicy.getLatestCommit();
|
||||
for (Long version : commits.keySet()) {
|
||||
if (commits.get(version) == latest)
|
||||
for (Long gen : commits.keySet()) {
|
||||
if (commits.get(gen) == latest)
|
||||
continue;
|
||||
assertEquals(1, commits.get(version).getSegmentCount());
|
||||
assertEquals(1, commits.get(gen).getSegmentCount());
|
||||
}
|
||||
}
|
||||
|
||||
@ -126,7 +126,7 @@ public class TestSolrDeletionPolicy1 extends SolrTestCaseJ4 {
|
||||
);
|
||||
|
||||
commits = delPolicy.getCommits();
|
||||
assertTrue(!commits.containsKey(ic.getVersion()));
|
||||
assertTrue(!commits.containsKey(ic.getGeneration()));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
|
||||
// wait a bit in case any ending threads have anything to release
|
||||
int retries = 0;
|
||||
while (endNumOpens - numOpens != endNumCloses - numCloses) {
|
||||
if (retries++ > 60) {
|
||||
if (retries++ > 120) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
|
Loading…
x
Reference in New Issue
Block a user