";
+
+ if (argv.length == 0) {
+ System.err.println("Usage: " + usage);
+ return;
+ }
+
+ for (int i = 0; i < argv.length; i++) {
+ if (argv[i].equals("-index")) { // parse -index option
+ index = argv[++i];
+ } else if (argv[i].equals("-create")) { // parse -create option
+ create = true;
+ } else if (i != argv.length-1) {
+ System.err.println("Usage: " + usage);
+ return;
+ } else
+ root = new File(argv[i]);
+ }
+
+ Date start = new Date();
+
+ if (!create) { // delete stale docs
+ deleting = true;
+ indexDocs(root, index, create);
+ }
+
+ writer = new IndexWriter(index, new StopAnalyzer(), create);
+ writer.mergeFactor = 20;
+ writer.maxFieldLength = 1000000;
+
+ indexDocs(root, index, create); // add new docs
+
+ System.out.println("Optimizing index...");
+ writer.optimize();
+ writer.close();
+
+ Date end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" total milliseconds");
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ /* Walk directory hierarchy in uid order, while keeping uid iterator from
+ /* existing index in sync. Mismatches indicate one of: (a) old documents to
+ /* be deleted; (b) unchanged documents, to be left alone; or (c) new
+ /* documents, to be indexed.
+ */
+
+ private static void indexDocs(File file, String index, boolean create)
+ throws Exception {
+ if (!create) { // incrementally update
+
+ reader = IndexReader.open(index); // open existing index
+ uidIter = reader.terms(new Term("uid", "")); // init uid iterator
+
+ indexDocs(file);
+
+ if (deleting) { // delete rest of stale docs
+ while (uidIter.term() != null && uidIter.term().field() == "uid") {
+ System.out.println("deleting " +
+ HTMLDocument.uid2url(uidIter.term().text()));
+ reader.delete(uidIter.term());
+ uidIter.next();
+ }
+ deleting = false;
+ }
+
+ uidIter.close(); // close uid iterator
+ reader.close(); // close existing index
+
+ } else // don't have exisiting
+ indexDocs(file);
+ }
+
+ private static void indexDocs(File file) throws Exception {
+ if (file.isDirectory()) { // if a directory
+ String[] files = file.list(); // list its files
+ Arrays.sort(files); // sort the files
+ for (int i = 0; i < files.length; i++) // recursively index them
+ indexDocs(new File(file, files[i]));
+
+ } else if (file.getPath().endsWith(".html") || // index .html files
+ file.getPath().endsWith(".htm") || // index .htm files
+ file.getPath().endsWith(".txt")) { // index .txt files
+
+ if (uidIter != null) {
+ String uid = HTMLDocument.uid(file); // construct uid for doc
+
+ while (uidIter.term() != null && uidIter.term().field() == "uid" &&
+ uidIter.term().text().compareTo(uid) < 0) {
+ if (deleting) { // delete stale docs
+ System.out.println("deleting " +
+ HTMLDocument.uid2url(uidIter.term().text()));
+ reader.delete(uidIter.term());
+ }
+ uidIter.next();
+ }
+ if (uidIter.term() != null && uidIter.term().field() == "uid" &&
+ uidIter.term().text().compareTo(uid) == 0) {
+ uidIter.next(); // keep matching docs
+ } else if (!deleting) { // add new docs
+ Document doc = HTMLDocument.Document(file);
+ System.out.println("adding " + doc.get("url"));
+ writer.addDocument(doc);
+ }
+ } else { // creating a new index
+ Document doc = HTMLDocument.Document(file);
+ System.out.println("adding " + doc.get("url"));
+ writer.addDocument(doc); // add docs unconditionally
+ }
+ }
+ }
+}
diff --git a/src/demo/org/apache/lucene/Makefile b/src/demo/org/apache/lucene/Makefile
new file mode 100644
index 00000000000..4b7b53fc8d4
--- /dev/null
+++ b/src/demo/org/apache/lucene/Makefile
@@ -0,0 +1,3 @@
+# sub-directory makefile for lucene
+ROOT = ..
+include ../com/lucene/rules.mk
diff --git a/src/demo/org/apache/lucene/Search.html b/src/demo/org/apache/lucene/Search.html
new file mode 100644
index 00000000000..58980a641d0
--- /dev/null
+++ b/src/demo/org/apache/lucene/Search.html
@@ -0,0 +1,17 @@
+
+
+Lucene Search Demo
+
+
+
+
+
+Lucene Search Demo
+
+
+
+
+
+
+
diff --git a/src/demo/org/apache/lucene/Search.jhtml b/src/demo/org/apache/lucene/Search.jhtml
new file mode 100644
index 00000000000..2de6b93b531
--- /dev/null
+++ b/src/demo/org/apache/lucene/Search.jhtml
@@ -0,0 +1,166 @@
+
+
+
+
+
+ javax.servlet.*
+ javax.servlet.http.*
+ java.io.*
+ com.lucene.analysis.*
+ com.lucene.document.*
+ com.lucene.index.*
+ com.lucene.search.*
+ com.lucene.queryParser.*
+ demo.HTMLParser.Entities
+
+
+
+ // get index from request
+ String indexName = request.getParameter("index");
+ if (indexName == null) // default to "index"
+ indexName = "index";
+ Searcher searcher = // make searcher
+ new IndexSearcher(getReader(indexName));
+
+ // get query from request
+ String queryString = request.getParameter("query");
+ if (queryString == null)
+ throw new ServletException("no query specified");
+
+ int start = 0; // first hit to display
+ String startString = request.getParameter("start");
+ if (startString != null)
+ start = Integer.parseInt(startString);
+
+ int hitsPerPage = 10; // number of hits to display
+ String hitsString = request.getParameter("hitsPerPage");
+ if (hitsString != null)
+ hitsPerPage = Integer.parseInt(hitsString);
+
+ boolean showSummaries = true; // show summaries?
+ if ("false".equals(request.getParameter("showSummaries")))
+ showSummaries = false;
+
+ Query query = null;
+ try { // parse query
+ query = QueryParser.parse(queryString, "contents", analyzer);
+ } catch (ParseException e) { // error parsing query
+
+ Error Parsing Query
+ While parsing `queryString`: `e.getMessage()`
+
+ return;
+ }
+
+ String servletPath = request.getRequestURI(); // getServletPath should work
+ int j = servletPath.indexOf('?'); // here but doesn't, so we
+ if (j != -1) // remove query by hand...
+ servletPath = servletPath.substring(0, j);
+
+
+
+
Lucene Search Results
+
+
+
+
+
+ Hits hits = searcher.search(query); // perform query
+ int end = Math.min(hits.length(), start + hitsPerPage);
+
+
+Hits start+1-end
+(out of hits.length() total matching documents):
+
+
+
+ for (int i = start; i < end; i++) { // display the hits
+ Document doc = hits.doc(i);
+ String title = doc.get("title");
+ if (title.equals("")) // use url for docs w/o title
+ title = doc.get("url");
+
+ (int)(hits.score(i) * 100.0f)%
+
+ Entities.encode(title)
+
+
+ if (showSummaries) { // maybe show summary
+
+
Summary:
+ Entities.encode(doc.get("summary"))
+
+
+ }
+ }
+
+
+
+
+ if (end < hits.length()) { // insert next page button
+
+
+
+
+
+ }
+
+
+
+
+
+
+ Analyzer analyzer = new StopAnalyzer(); // used to tokenize queries
+
+ /** Keep a cache of open IndexReader's, so that an index does not have to
+ opened for each query. The cache re-opens an index when it has changed
+ so that additions and deletions are visible ASAP. */
+
+ static Hashtable indexCache = new Hashtable(); // name->CachedIndex
+
+ class CachedIndex { // an entry in the cache
+ IndexReader reader; // an open reader
+ long modified; // reader's modified date
+
+ CachedIndex(String name) throws IOException {
+ modified = IndexReader.lastModified(name); // get modified date
+ reader = IndexReader.open(name); // open reader
+ }
+ }
+
+ IndexReader getReader(String name) throws ServletException {
+ CachedIndex index = // look in cache
+ (CachedIndex)indexCache.get(name);
+
+ try {
+ if (index != null && // check up-to-date
+ (index.modified == IndexReader.lastModified(name)))
+ return index.reader; // cache hit
+ else {
+ index = new CachedIndex(name); // cache miss
+ }
+ } catch (IOException e) {
+ StringWriter writer = new StringWriter();
+ PrintWriter pw = new PrintWriter(writer);
+ throw new ServletException("Could not open index " + name + ": " +
+ e.getClass().getName() + "--" +
+ e.getMessage());
+ }
+
+ indexCache.put(name, index); // add to cache
+ return index.reader;
+ }
+
diff --git a/src/demo/org/apache/lucene/SearchFiles.java b/src/demo/org/apache/lucene/SearchFiles.java
new file mode 100644
index 00000000000..4aa3bf45919
--- /dev/null
+++ b/src/demo/org/apache/lucene/SearchFiles.java
@@ -0,0 +1,110 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+import com.lucene.analysis.Analyzer;
+import com.lucene.analysis.StopAnalyzer;
+import com.lucene.document.Document;
+import com.lucene.search.Searcher;
+import com.lucene.search.IndexSearcher;
+import com.lucene.search.Query;
+import com.lucene.search.Hits;
+import com.lucene.queryParser.QueryParser;
+
+class SearchFiles {
+ public static void main(String[] args) {
+ try {
+ Searcher searcher = new IndexSearcher("index");
+ Analyzer analyzer = new StopAnalyzer();
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+ while (true) {
+ System.out.print("Query: ");
+ String line = in.readLine();
+
+ if (line.length() == -1)
+ break;
+
+ Query query = QueryParser.parse(line, "contents", analyzer);
+ System.out.println("Searching for: " + query.toString("contents"));
+
+ Hits hits = searcher.search(query);
+ System.out.println(hits.length() + " total matching documents");
+
+ final int HITS_PER_PAGE = 10;
+ for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
+ int end = Math.min(hits.length(), start + HITS_PER_PAGE);
+ for (int i = start; i < end; i++)
+ System.out.println(i + ". " + hits.doc(i).get("path"));
+ if (hits.length() > end) {
+ System.out.print("more (y/n) ? ");
+ line = in.readLine();
+ if (line.length() == 0 || line.charAt(0) == 'n')
+ break;
+ }
+ }
+ }
+ searcher.close();
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/Makefile b/src/java/org/apache/lucene/Makefile
new file mode 100644
index 00000000000..e0c941efbeb
--- /dev/null
+++ b/src/java/org/apache/lucene/Makefile
@@ -0,0 +1,9 @@
+# top-level makefile for lucene
+
+all: jar doc
+
+# root is two levels up
+ROOT = ../..
+
+include rules.mk
+
diff --git a/src/java/org/apache/lucene/analysis/Analyzer.java b/src/java/org/apache/lucene/analysis/Analyzer.java
new file mode 100644
index 00000000000..7c09b231799
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/Analyzer.java
@@ -0,0 +1,91 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.Reader;
+
+/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
+ * policy for extracting index terms from text.
+ *
+ * Typical implementations first build a Tokenizer, which breaks the stream of
+ * characters from the Reader into raw Tokens. One or more TokenFilters may
+ * then be applied to the output of the Tokenizer.
+ *
+ * WARNING: You must override one of the methods defined by this class in your
+ * subclass or the Analyzer will enter an infinite loop.
+ */
+abstract public class Analyzer {
+ /** Creates a TokenStream which tokenizes all the text in the provided
+ Reader. Default implementation forwards to tokenStream(Reader) for
+ compatibility with older version. Override to allow Analyzer to choose
+ strategy based on document and/or field. Must be able to handle null
+ field name for backward compatibility. */
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ // implemented for backward compatibility
+ return tokenStream(reader);
+ }
+
+ /** Creates a TokenStream which tokenizes all the text in the provided
+ * Reader. Provided for backward compatibility only.
+ * @deprecated use tokenStream(String, Reader) instead.
+ * @see tokenStream(String, Reader)
+ */
+ public TokenStream tokenStream(Reader reader)
+ {
+ return tokenStream(null, reader);
+ }
+}
+
diff --git a/src/java/org/apache/lucene/analysis/LetterTokenizer.java b/src/java/org/apache/lucene/analysis/LetterTokenizer.java
new file mode 100644
index 00000000000..28f9562218a
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/LetterTokenizer.java
@@ -0,0 +1,114 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.Reader;
+
+/** A LetterTokenizer is a tokenizer that divides text at non-letters. That's
+ to say, it defines tokens as maximal strings of adjacent letters, as defined
+ by java.lang.Character.isLetter() predicate.
+
+ Note: this does a decent job for most European languages, but does a terrible
+ job for some Asian languages, where words are not separated by spaces. */
+
+public final class LetterTokenizer extends Tokenizer {
+ public LetterTokenizer(Reader in) {
+ input = in;
+ }
+
+ private int offset = 0, bufferIndex=0, dataLen=0;
+ private final static int MAX_WORD_LEN = 255;
+ private final static int IO_BUFFER_SIZE = 1024;
+ private final char[] buffer = new char[MAX_WORD_LEN];
+ private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+ public final Token next() throws java.io.IOException {
+ int length = 0;
+ int start = offset;
+ while (true) {
+ final char c;
+
+ offset++;
+ if (bufferIndex >= dataLen) {
+ dataLen = input.read(ioBuffer);
+ bufferIndex = 0;
+ };
+ if (dataLen == -1) {
+ if (length > 0)
+ break;
+ else
+ return null;
+ }
+ else
+ c = (char) ioBuffer[bufferIndex++];
+
+ if (Character.isLetter(c)) { // if it's a letter
+
+ if (length == 0) // start of token
+ start = offset-1;
+
+ buffer[length++] = c; // buffer it
+
+ if (length == MAX_WORD_LEN) // buffer overflow!
+ break;
+
+ } else if (length > 0) // at non-Letter w/ chars
+ break; // return 'em
+
+ }
+
+ return new Token(new String(buffer, 0, length), start, start+length);
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/LowerCaseFilter.java b/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
new file mode 100644
index 00000000000..a215cb7a94d
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
@@ -0,0 +1,74 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/** Normalizes token text to lower case. */
+
+public final class LowerCaseFilter extends TokenFilter {
+ public LowerCaseFilter(TokenStream in) {
+ input = in;
+ }
+
+ public final Token next() throws java.io.IOException {
+ Token t = input.next();
+
+ if (t == null)
+ return null;
+
+ t.termText = t.termText.toLowerCase();
+
+ return t;
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java b/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java
new file mode 100644
index 00000000000..b25b3789e47
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java
@@ -0,0 +1,116 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.Reader;
+
+/** LowerCaseTokenizer performs the function of LetterTokenizer
+ and LowerCaseFilter together. It divides text at non-letters and converts
+ them to lower case. While it is functionally equivalent to the combination
+ of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+ to doing the two tasks at once, hence this (redundent) implementation.
+
+ Note: this does a decent job for most European languages, but does a terrible
+ job for some Asian languages, where words are not separated by spaces. */
+
+public final class LowerCaseTokenizer extends Tokenizer {
+ public LowerCaseTokenizer(Reader in) {
+ input = in;
+ }
+
+ private int offset = 0, bufferIndex=0, dataLen=0;
+ private final static int MAX_WORD_LEN = 255;
+ private final static int IO_BUFFER_SIZE = 1024;
+ private final char[] buffer = new char[MAX_WORD_LEN];
+ private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+ public final Token next() throws java.io.IOException {
+ int length = 0;
+ int start = offset;
+ while (true) {
+ final char c;
+
+ offset++;
+ if (bufferIndex >= dataLen) {
+ dataLen = input.read(ioBuffer);
+ bufferIndex = 0;
+ };
+ if (dataLen == -1) {
+ if (length > 0)
+ break;
+ else
+ return null;
+ }
+ else
+ c = (char) ioBuffer[bufferIndex++];
+
+ if (Character.isLetter(c)) { // if it's a letter
+
+ if (length == 0) // start of token
+ start = offset-1;
+
+ buffer[length++] = Character.toLowerCase(c);
+ // buffer it
+ if (length == MAX_WORD_LEN) // buffer overflow!
+ break;
+
+ } else if (length > 0) // at non-Letter w/ chars
+ break; // return 'em
+
+ }
+
+ return new Token(new String(buffer, 0, length), start, start+length);
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/Makefile b/src/java/org/apache/lucene/analysis/Makefile
new file mode 100644
index 00000000000..09c091d1839
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/Makefile
@@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
diff --git a/src/java/org/apache/lucene/analysis/PorterStemFilter.java b/src/java/org/apache/lucene/analysis/PorterStemFilter.java
new file mode 100644
index 00000000000..938c72d0720
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/PorterStemFilter.java
@@ -0,0 +1,98 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+
+/** Transforms the token stream as per the Porter stemming algorithm.
+ Note: the input to the stemming filter must already be in lower case,
+ so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+ down the Tokenizer chain in order for this to work properly!
+
+ To use this filter with other analyzers, you'll want to write an
+ Analyzer class that sets up the TokenStream chain as you want it.
+ To use this with LowerCaseTokenizer, for example, you'd write an
+ analyzer like this:
+
+ class MyAnalyzer extends Analyzer {
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new PorterStemFilter(new LowerCaseTokenizer(reader));
+ }
+ }
+
+*/
+
+public final class PorterStemFilter extends TokenFilter {
+ private PorterStemmer stemmer;
+
+ public PorterStemFilter(TokenStream in) {
+ stemmer = new PorterStemmer();
+ input = in;
+ }
+
+ /** Returns the next input Token, after being stemmed */
+ public final Token next() throws IOException {
+ Token token = input.next();
+ if (token == null)
+ return null;
+ else {
+ String s = stemmer.stem(token.termText);
+ if (s != token.termText) // Yes, I mean object reference comparison here
+ token.termText = s;
+ return token;
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/PorterStemmer.java b/src/java/org/apache/lucene/analysis/PorterStemmer.java
new file mode 100644
index 00000000000..120cd93a26a
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/PorterStemmer.java
@@ -0,0 +1,584 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/*
+
+ Porter stemmer in Java. The original paper is in
+
+ Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+ no. 3, pp 130-137,
+
+ See also http://www.muscat.com/~martin/stem.html
+
+ Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+ Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+ is then out outside the bounds of b.
+
+ Similarly,
+
+ Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+ 'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+ b[j] is then outside the bounds of b.
+
+ Release 3.
+
+ [ This version is derived from Release 3, modified by Brian Goetz to
+ optimize for fewer object creations. ]
+
+*/
+
+
+import java.io.*;
+
+/**
+ *
+ * Stemmer, implementing the Porter Stemming Algorithm
+ *
+ * The Stemmer class transforms a word into its root form. The input
+ * word can be provided a character at time (by calling add()), or at once
+ * by calling one of the various stem(something) methods.
+ */
+
+class PorterStemmer
+{
+ private char[] b;
+ private int i, /* offset into b */
+ j, k, k0;
+ private boolean dirty = false;
+ private static final int INC = 50; /* unit of size whereby b is increased */
+ private static final int EXTRA = 1;
+
+ public PorterStemmer() {
+ b = new char[INC];
+ i = 0;
+ }
+
+ /**
+ * reset() resets the stemmer so it can stem another word. If you invoke
+ * the stemmer by calling add(char) and then stem(), you must call reset()
+ * before starting another word.
+ */
+ public void reset() { i = 0; dirty = false; }
+
+ /**
+ * Add a character to the word being stemmed. When you are finished
+ * adding characters, you can call stem(void) to process the word.
+ */
+ public void add(char ch) {
+ if (b.length <= i + EXTRA) {
+ char[] new_b = new char[b.length+INC];
+ for (int c = 0; c < b.length; c++)
+ new_b[c] = b[c];
+ b = new_b;
+ }
+ b[i++] = ch;
+ }
+
+ /**
+ * After a word has been stemmed, it can be retrieved by toString(),
+ * or a reference to the internal buffer can be retrieved by getResultBuffer
+ * and getResultLength (which is generally more efficient.)
+ */
+ public String toString() { return new String(b,0,i); }
+
+ /**
+ * Returns the length of the word resulting from the stemming process.
+ */
+ public int getResultLength() { return i; }
+
+ /**
+ * Returns a reference to a character buffer containing the results of
+ * the stemming process. You also need to consult getResultLength()
+ * to determine the length of the result.
+ */
+ public char[] getResultBuffer() { return b; }
+
+ /* cons(i) is true <=> b[i] is a consonant. */
+
+ private final boolean cons(int i) {
+ switch (b[i]) {
+ case 'a': case 'e': case 'i': case 'o': case 'u':
+ return false;
+ case 'y':
+ return (i==k0) ? true : !cons(i-1);
+ default:
+ return true;
+ }
+ }
+
+ /* m() measures the number of consonant sequences between k0 and j. if c is
+ a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+ presence,
+
+ gives 0
+ vc gives 1
+ vcvc gives 2
+ vcvcvc gives 3
+ ....
+ */
+
+ private final int m() {
+ int n = 0;
+ int i = k0;
+ while(true) {
+ if (i > j)
+ return n;
+ if (! cons(i))
+ break;
+ i++;
+ }
+ i++;
+ while(true) {
+ while(true) {
+ if (i > j)
+ return n;
+ if (cons(i))
+ break;
+ i++;
+ }
+ i++;
+ n++;
+ while(true) {
+ if (i > j)
+ return n;
+ if (! cons(i))
+ break;
+ i++;
+ }
+ i++;
+ }
+ }
+
+ /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+ private final boolean vowelinstem() {
+ int i;
+ for (i = k0; i <= j; i++)
+ if (! cons(i))
+ return true;
+ return false;
+ }
+
+ /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+ private final boolean doublec(int j) {
+ if (j < k0+1)
+ return false;
+ if (b[j] != b[j-1])
+ return false;
+ return cons(j);
+ }
+
+ /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+ and also if the second c is not w,x or y. this is used when trying to
+ restore an e at the end of a short word. e.g.
+
+ cav(e), lov(e), hop(e), crim(e), but
+ snow, box, tray.
+
+ */
+
+ private final boolean cvc(int i) {
+ if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2))
+ return false;
+ else {
+ int ch = b[i];
+ if (ch == 'w' || ch == 'x' || ch == 'y') return false;
+ }
+ return true;
+ }
+
+ private final boolean ends(String s) {
+ int l = s.length();
+ int o = k-l+1;
+ if (o < k0)
+ return false;
+ for (int i = 0; i < l; i++)
+ if (b[o+i] != s.charAt(i))
+ return false;
+ j = k-l;
+ return true;
+ }
+
+ /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+ k. */
+
+ void setto(String s) {
+ int l = s.length();
+ int o = j+1;
+ for (int i = 0; i < l; i++)
+ b[o+i] = s.charAt(i);
+ k = j+l;
+ dirty = true;
+ }
+
+ /* r(s) is used further down. */
+
+ void r(String s) { if (m() > 0) setto(s); }
+
+ /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+ caresses -> caress
+ ponies -> poni
+ ties -> ti
+ caress -> caress
+ cats -> cat
+
+ feed -> feed
+ agreed -> agree
+ disabled -> disable
+
+ matting -> mat
+ mating -> mate
+ meeting -> meet
+ milling -> mill
+ messing -> mess
+
+ meetings -> meet
+
+ */
+
+ private final void step1() {
+ if (b[k] == 's') {
+ if (ends("sses")) k -= 2;
+ else if (ends("ies")) setto("i");
+ else if (b[k-1] != 's') k--;
+ }
+ if (ends("eed")) {
+ if (m() > 0)
+ k--;
+ }
+ else if ((ends("ed") || ends("ing")) && vowelinstem()) {
+ k = j;
+ if (ends("at")) setto("ate");
+ else if (ends("bl")) setto("ble");
+ else if (ends("iz")) setto("ize");
+ else if (doublec(k)) {
+ int ch = b[k--];
+ if (ch == 'l' || ch == 's' || ch == 'z')
+ k++;
+ }
+ else if (m() == 1 && cvc(k))
+ setto("e");
+ }
+ }
+
+ /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+ private final void step2() {
+ if (ends("y") && vowelinstem()) {
+ b[k] = 'i';
+ dirty = true;
+ }
+ }
+
+ /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+ -ation) maps to -ize etc. note that the string before the suffix must give
+ m() > 0. */
+
+ private final void step3() {
+ if (k == k0) return; /* For Bug 1 */
+ switch (b[k-1]) {
+ case 'a':
+ if (ends("ational")) { r("ate"); break; }
+ if (ends("tional")) { r("tion"); break; }
+ break;
+ case 'c':
+ if (ends("enci")) { r("ence"); break; }
+ if (ends("anci")) { r("ance"); break; }
+ break;
+ case 'e':
+ if (ends("izer")) { r("ize"); break; }
+ break;
+ case 'l':
+ if (ends("bli")) { r("ble"); break; }
+ if (ends("alli")) { r("al"); break; }
+ if (ends("entli")) { r("ent"); break; }
+ if (ends("eli")) { r("e"); break; }
+ if (ends("ousli")) { r("ous"); break; }
+ break;
+ case 'o':
+ if (ends("ization")) { r("ize"); break; }
+ if (ends("ation")) { r("ate"); break; }
+ if (ends("ator")) { r("ate"); break; }
+ break;
+ case 's':
+ if (ends("alism")) { r("al"); break; }
+ if (ends("iveness")) { r("ive"); break; }
+ if (ends("fulness")) { r("ful"); break; }
+ if (ends("ousness")) { r("ous"); break; }
+ break;
+ case 't':
+ if (ends("aliti")) { r("al"); break; }
+ if (ends("iviti")) { r("ive"); break; }
+ if (ends("biliti")) { r("ble"); break; }
+ break;
+ case 'g':
+ if (ends("logi")) { r("log"); break; }
+ }
+ }
+
+ /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+ private final void step4() {
+ switch (b[k]) {
+ case 'e':
+ if (ends("icate")) { r("ic"); break; }
+ if (ends("ative")) { r(""); break; }
+ if (ends("alize")) { r("al"); break; }
+ break;
+ case 'i':
+ if (ends("iciti")) { r("ic"); break; }
+ break;
+ case 'l':
+ if (ends("ical")) { r("ic"); break; }
+ if (ends("ful")) { r(""); break; }
+ break;
+ case 's':
+ if (ends("ness")) { r(""); break; }
+ break;
+ }
+ }
+
+ /* step5() takes off -ant, -ence etc., in context vcvc. */
+
+ private final void step5() {
+ if (k == k0) return; /* for Bug 1 */
+ switch (b[k-1]) {
+ case 'a':
+ if (ends("al")) break;
+ return;
+ case 'c':
+ if (ends("ance")) break;
+ if (ends("ence")) break;
+ return;
+ case 'e':
+ if (ends("er")) break; return;
+ case 'i':
+ if (ends("ic")) break; return;
+ case 'l':
+ if (ends("able")) break;
+ if (ends("ible")) break; return;
+ case 'n':
+ if (ends("ant")) break;
+ if (ends("ement")) break;
+ if (ends("ment")) break;
+ /* element etc. not stripped before the m */
+ if (ends("ent")) break;
+ return;
+ case 'o':
+ if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
+ /* j >= 0 fixes Bug 2 */
+ if (ends("ou")) break;
+ return;
+ /* takes care of -ous */
+ case 's':
+ if (ends("ism")) break;
+ return;
+ case 't':
+ if (ends("ate")) break;
+ if (ends("iti")) break;
+ return;
+ case 'u':
+ if (ends("ous")) break;
+ return;
+ case 'v':
+ if (ends("ive")) break;
+ return;
+ case 'z':
+ if (ends("ize")) break;
+ return;
+ default:
+ return;
+ }
+ if (m() > 1)
+ k = j;
+ }
+
+ /* step6() removes a final -e if m() > 1. */
+
+ private final void step6() {
+ j = k;
+ if (b[k] == 'e') {
+ int a = m();
+ if (a > 1 || a == 1 && !cvc(k-1))
+ k--;
+ }
+ if (b[k] == 'l' && doublec(k) && m() > 1)
+ k--;
+ }
+
+
+ /**
+ * Stem a word provided as a String. Returns the result as a String.
+ */
+ public String stem(String s) {
+ if (stem(s.toCharArray(), s.length()))
+ return toString();
+ else
+ return s;
+ }
+
+ /** Stem a word contained in a char[]. Returns true if the stemming process
+ * resulted in a word different from the input. You can retrieve the
+ * result with getResultLength()/getResultBuffer() or toString().
+ */
+ public boolean stem(char[] word) {
+ return stem(word, word.length);
+ }
+
+ /** Stem a word contained in a portion of a char[] array. Returns
+ * true if the stemming process resulted in a word different from
+ * the input. You can retrieve the result with
+ * getResultLength()/getResultBuffer() or toString().
+ */
+ public boolean stem(char[] wordBuffer, int offset, int wordLen) {
+ reset();
+ if (b.length < wordLen) {
+ char[] new_b = new char[wordLen + EXTRA];
+ b = new_b;
+ }
+ for (int j=0; j k0+1) {
+ step1(); step2(); step3(); step4(); step5(); step6();
+ }
+ // Also, a word is considered dirty if we lopped off letters
+ // Thanks to Ifigenia Vairelles for pointing this out.
+ if (i != k+1)
+ dirty = true;
+ i = k+1;
+ return dirty;
+ }
+
+ /** Test program for demonstrating the Stemmer. It reads a file and
+ * stems each word, writing the result to standard out.
+ * Usage: Stemmer file-name
+ */
+ public static void main(String[] args) {
+ PorterStemmer s = new PorterStemmer();
+
+ for (int i = 0; i < args.length; i++) {
+ try {
+ InputStream in = new FileInputStream(args[i]);
+ byte[] buffer = new byte[1024];
+ int bufferLen, offset, ch;
+
+ bufferLen = in.read(buffer);
+ offset = 0;
+ s.reset();
+
+ while(true) {
+ if (offset < bufferLen)
+ ch = buffer[offset++];
+ else {
+ bufferLen = in.read(buffer);
+ offset = 0;
+ if (bufferLen < 0)
+ ch = -1;
+ else
+ ch = buffer[offset++];
+ }
+
+ if (Character.isLetter((char) ch)) {
+ s.add(Character.toLowerCase((char) ch));
+ }
+ else {
+ s.stem();
+ System.out.print(s.toString());
+ s.reset();
+ if (ch < 0)
+ break;
+ else {
+ System.out.print((char) ch);
+ }
+ }
+ }
+
+ in.close();
+ }
+ catch (IOException e) {
+ System.out.println("error reading " + args[i]);
+ }
+ }
+ }
+}
+
diff --git a/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java b/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
new file mode 100644
index 00000000000..df40d4bbf3c
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
@@ -0,0 +1,65 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.Reader;
+
+/** An Analyzer that filters LetterTokenizer with LowerCaseFilter. */
+
+public final class SimpleAnalyzer extends Analyzer {
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new LowerCaseTokenizer(reader);
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/StopAnalyzer.java b/src/java/org/apache/lucene/analysis/StopAnalyzer.java
new file mode 100644
index 00000000000..af285d2205f
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/StopAnalyzer.java
@@ -0,0 +1,90 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.Reader;
+import java.util.Hashtable;
+
+/** Filters LetterTokenizer with LowerCaseFilter and StopFilter. */
+
+public final class StopAnalyzer extends Analyzer {
+ private Hashtable stopTable;
+
+ /** An array containing some common English words that are not usually useful
+ for searching. */
+ public static final String[] ENGLISH_STOP_WORDS = {
+ "a", "and", "are", "as", "at", "be", "but", "by",
+ "for", "if", "in", "into", "is", "it",
+ "no", "not", "of", "on", "or", "s", "such",
+ "t", "that", "the", "their", "then", "there", "these",
+ "they", "this", "to", "was", "will", "with"
+ };
+
+ /** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */
+ public StopAnalyzer() {
+ stopTable = StopFilter.makeStopTable(ENGLISH_STOP_WORDS);
+ }
+
+ /** Builds an analyzer which removes words in the provided array. */
+ public StopAnalyzer(String[] stopWords) {
+ stopTable = StopFilter.makeStopTable(stopWords);
+ }
+
+ /** Filters LowerCaseTokenizer with StopFilter. */
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new StopFilter(new LowerCaseTokenizer(reader), stopTable);
+ }
+}
+
diff --git a/src/java/org/apache/lucene/analysis/StopFilter.java b/src/java/org/apache/lucene/analysis/StopFilter.java
new file mode 100644
index 00000000000..fbc75d58c7a
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/StopFilter.java
@@ -0,0 +1,99 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+
+/** Removes stop words from a token stream. */
+
+public final class StopFilter extends TokenFilter {
+
+ private Hashtable table;
+
+ /** Constructs a filter which removes words from the input
+ TokenStream that are named in the array of words. */
+ public StopFilter(TokenStream in, String[] stopWords) {
+ input = in;
+ table = makeStopTable(stopWords);
+ }
+
+ /** Constructs a filter which removes words from the input
+ TokenStream that are named in the Hashtable. */
+ public StopFilter(TokenStream in, Hashtable stopTable) {
+ input = in;
+ table = stopTable;
+ }
+
+ /** Builds a Hashtable from an array of stop words, appropriate for passing
+ into the StopFilter constructor. This permits this table construction to
+ be cached once when an Analyzer is constructed. */
+ public final static Hashtable makeStopTable(String[] stopWords) {
+ Hashtable stopTable = new Hashtable(stopWords.length);
+ for (int i = 0; i < stopWords.length; i++)
+ stopTable.put(stopWords[i], stopWords[i]);
+ return stopTable;
+ }
+
+ /** Returns the next input Token whose termText() is not a stop word. */
+ public final Token next() throws IOException {
+ // return the first non-stop word found
+ for (Token token = input.next(); token != null; token = input.next())
+ if (table.get(token.termText) == null)
+ return token;
+ // reached EOS -- return null
+ return null;
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/Token.java b/src/java/org/apache/lucene/analysis/Token.java
new file mode 100644
index 00000000000..d41738f7370
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/Token.java
@@ -0,0 +1,111 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/** A Token is an occurence of a term from the text of a field. It consists of
+ a term's text, the start and end offset of the term in the text of the field,
+ and a type string.
+
+ The start and end offsets permit applications to re-associate a token with
+ its source text, e.g., to display highlighted query terms in a document
+ browser, or to show matching text fragments in a KWIC (KeyWord In Context)
+ display, etc.
+
+ The type is an interned string, assigned by a lexical analyzer
+ (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+ belongs to. For example an end of sentence marker token might be implemented
+ with type "eos". The default token type is "word". */
+
+public final class Token {
+ String termText; // the text of the term
+ int startOffset; // start in source text
+ int endOffset; // end in source text
+ String type = "word"; // lexical type
+
+ /** Constructs a Token with the given term text, and start & end offsets.
+ The type defaults to "word." */
+ public Token(String text, int start, int end) {
+ termText = text;
+ startOffset = start;
+ endOffset = end;
+ }
+
+ /** Constructs a Token with the given text, start and end offsets, & type. */
+ public Token(String text, int start, int end, String typ) {
+ termText = text;
+ startOffset = start;
+ endOffset = end;
+ type = typ;
+ }
+
+ /** Returns the Token's term text. */
+ public final String termText() { return termText; }
+
+ /** Returns this Token's starting offset, the position of the first character
+ corresponding to this token in the source text.
+
+ Note that the difference between endOffset() and startOffset() may not be
+ equal to termText.length(), as the term text may have been altered by a
+ stemmer or some other filter. */
+ public final int startOffset() { return startOffset; }
+
+ /** Returns this Token's ending offset, one greater than the position of the
+ last character corresponding to this token in the source text. */
+ public final int endOffset() { return endOffset; }
+
+ /** Returns this Token's lexical type. Defaults to "word". */
+ public final String type() { return type; }
+
+}
diff --git a/src/java/org/apache/lucene/analysis/TokenFilter.java b/src/java/org/apache/lucene/analysis/TokenFilter.java
new file mode 100644
index 00000000000..20bfa39da3e
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/TokenFilter.java
@@ -0,0 +1,74 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+/** A TokenFilter is a TokenStream whose input is another token stream.
+
+ This is an abstract class.
+ */
+
+abstract public class TokenFilter extends TokenStream {
+ /** The source of tokens for this filter. */
+ protected TokenStream input;
+
+ /** Close the input TokenStream. */
+ public void close() throws IOException {
+ input.close();
+ }
+
+}
+
diff --git a/src/java/org/apache/lucene/analysis/TokenStream.java b/src/java/org/apache/lucene/analysis/TokenStream.java
new file mode 100644
index 00000000000..feaa229bf08
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/TokenStream.java
@@ -0,0 +1,77 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+/** A TokenStream enumerates the sequence of tokens, either from
+ fields of a document or from query text.
+
+ This is an abstract class. Concrete subclasses are:
+
+ - {@link Tokenizer}, a TokenStream
+ whose input is a Reader; and
+
- {@link TokenFilter}, a TokenStream
+ whose input is another TokenStream.
+
+ */
+
+abstract public class TokenStream {
+ /** Returns the next token in the stream, or null at EOS. */
+ abstract public Token next() throws IOException;
+
+ /** Releases resources associated with this stream. */
+ public void close() throws IOException {}
+}
diff --git a/src/java/org/apache/lucene/analysis/Tokenizer.java b/src/java/org/apache/lucene/analysis/Tokenizer.java
new file mode 100644
index 00000000000..2cc3f037f58
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/Tokenizer.java
@@ -0,0 +1,74 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+
+/** A Tokenizer is a TokenStream whose input is a Reader.
+
+ This is an abstract class.
+ */
+
+abstract public class Tokenizer extends TokenStream {
+ /** The text source for this Tokenizer. */
+ protected Reader input;
+
+ /** By default, closes the input Reader. */
+ public void close() throws IOException {
+ input.close();
+ }
+}
+
diff --git a/src/java/org/apache/lucene/analysis/package.html b/src/java/org/apache/lucene/analysis/package.html
new file mode 100644
index 00000000000..6b8ebf93b31
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/package.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+API and code to convert text into indexable tokens.
+
+
diff --git a/src/java/org/apache/lucene/analysis/standard/.cvsignore b/src/java/org/apache/lucene/analysis/standard/.cvsignore
new file mode 100644
index 00000000000..bf0d3397846
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/.cvsignore
@@ -0,0 +1,6 @@
+Token.java
+StandardTokenizer.java
+StandardTokenizerTokenManager.java
+TokenMgrError.java
+CharStream.java
+StandardTokenizerConstants.java
diff --git a/src/java/org/apache/lucene/analysis/standard/FastCharStream.java b/src/java/org/apache/lucene/analysis/standard/FastCharStream.java
new file mode 100644
index 00000000000..f9bcc717223
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/FastCharStream.java
@@ -0,0 +1,159 @@
+// FastCharStream.java
+package org.apache.lucene.analysis.standard;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.*;
+
+/** An efficient implementation of JavaCC's CharStream interface. Note that
+ * this does not do line-number counting, but instead keeps track of the
+ * character position of the token in the input, as required by Lucene's {@link
+ * org.apache.lucene.analysis.Token} API. */
+public final class FastCharStream implements CharStream {
+ char[] buffer = null;
+
+ int bufferLength = 0; // end of valid chars
+ int bufferPosition = 0; // next char to read
+
+ int tokenStart = 0; // offset in buffer
+ int bufferStart = 0; // position in file of buffer
+
+ Reader input; // source of chars
+
+ /** Constructs from a Reader. */
+ public FastCharStream(Reader r) {
+ input = r;
+ }
+
+ public final char readChar() throws IOException {
+ if (bufferPosition >= bufferLength)
+ refill();
+ return buffer[bufferPosition++];
+ }
+
+ private final void refill() throws IOException {
+ int newPosition = bufferLength - tokenStart;
+
+ if (tokenStart == 0) { // token won't fit in buffer
+ if (buffer == null) { // first time: alloc buffer
+ buffer = new char[2048];
+ } else if (bufferLength == buffer.length) { // grow buffer
+ char[] newBuffer = new char[buffer.length*2];
+ System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
+ buffer = newBuffer;
+ }
+ } else { // shift token to front
+ System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
+ }
+
+ bufferLength = newPosition; // update state
+ bufferPosition = newPosition;
+ bufferStart += tokenStart;
+ tokenStart = 0;
+
+ int charsRead = // fill space in buffer
+ input.read(buffer, newPosition, buffer.length-newPosition);
+ if (charsRead == -1)
+ throw new IOException("read past eof");
+ else
+ bufferLength += charsRead;
+ }
+
+ public final char BeginToken() throws IOException {
+ tokenStart = bufferPosition;
+ return readChar();
+ }
+
+ public final void backup(int amount) {
+ bufferPosition -= amount;
+ }
+
+ public final String GetImage() {
+ return new String(buffer, tokenStart, bufferPosition - tokenStart);
+ }
+
+ public final char[] GetSuffix(int len) {
+ char[] value = new char[len];
+ System.arraycopy(buffer, bufferPosition - len, value, 0, len);
+ return value;
+ }
+
+ public final void Done() {
+ try {
+ input.close();
+ } catch (IOException e) {
+ System.err.println("Caught: " + e + "; ignoring.");
+ }
+ }
+
+ public final int getColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getLine() {
+ return 1;
+ }
+ public final int getEndColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getEndLine() {
+ return 1;
+ }
+ public final int getBeginColumn() {
+ return bufferStart + tokenStart;
+ }
+ public final int getBeginLine() {
+ return 1;
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/standard/Makefile b/src/java/org/apache/lucene/analysis/standard/Makefile
new file mode 100644
index 00000000000..5a3ad759de2
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/Makefile
@@ -0,0 +1,7 @@
+ROOT = ../../../..
+
+include ../../rules.mk
+
+# Don't delete ParseException.java -- we've changed it by hand.
+DIRT := $(patsubst ParseException.java,,${DIRT})
+
diff --git a/src/java/org/apache/lucene/analysis/standard/ParseException.java b/src/java/org/apache/lucene/analysis/standard/ParseException.java
new file mode 100644
index 00000000000..856fe93656a
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/ParseException.java
@@ -0,0 +1,191 @@
+/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 0.7pre6 */
+package org.apache.lucene.analysis.standard;
+
+/**
+ * This exception is thrown when parse errors are encountered.
+ * You can explicitly create objects of this exception type by
+ * calling the method generateParseException in the generated
+ * parser.
+ *
+ * You can modify this class to customize your error reporting
+ * mechanisms so long as you retain the public fields.
+ */
+public class ParseException extends java.io.IOException {
+
+ /**
+ * This constructor is used by the method "generateParseException"
+ * in the generated parser. Calling this constructor generates
+ * a new object of this type with the fields "currentToken",
+ * "expectedTokenSequences", and "tokenImage" set. The boolean
+ * flag "specialConstructor" is also set to true to indicate that
+ * this constructor was used to create this object.
+ * This constructor calls its super class with the empty string
+ * to force the "toString" method of parent class "Throwable" to
+ * print the error message in the form:
+ * ParseException:
+ */
+ public ParseException(Token currentTokenVal,
+ int[][] expectedTokenSequencesVal,
+ String[] tokenImageVal
+ )
+ {
+ super("");
+ specialConstructor = true;
+ currentToken = currentTokenVal;
+ expectedTokenSequences = expectedTokenSequencesVal;
+ tokenImage = tokenImageVal;
+ }
+
+ /**
+ * The following constructors are for use by you for whatever
+ * purpose you can think of. Constructing the exception in this
+ * manner makes the exception behave in the normal way - i.e., as
+ * documented in the class "Throwable". The fields "errorToken",
+ * "expectedTokenSequences", and "tokenImage" do not contain
+ * relevant information. The JavaCC generated code does not use
+ * these constructors.
+ */
+
+ public ParseException() {
+ super();
+ specialConstructor = false;
+ }
+
+ public ParseException(String message) {
+ super(message);
+ specialConstructor = false;
+ }
+
+ /**
+ * This variable determines which constructor was used to create
+ * this object and thereby affects the semantics of the
+ * "getMessage" method (see below).
+ */
+ protected boolean specialConstructor;
+
+ /**
+ * This is the last token that has been consumed successfully. If
+ * this object has been created due to a parse error, the token
+ * followng this token will (therefore) be the first error token.
+ */
+ public Token currentToken;
+
+ /**
+ * Each entry in this array is an array of integers. Each array
+ * of integers represents a sequence of tokens (by their ordinal
+ * values) that is expected at this point of the parse.
+ */
+ public int[][] expectedTokenSequences;
+
+ /**
+ * This is a reference to the "tokenImage" array of the generated
+ * parser within which the parse error occurred. This array is
+ * defined in the generated ...Constants interface.
+ */
+ public String[] tokenImage;
+
+ /**
+ * This method has the standard behavior when this object has been
+ * created using the standard constructors. Otherwise, it uses
+ * "currentToken" and "expectedTokenSequences" to generate a parse
+ * error message and returns it. If this object has been created
+ * due to a parse error, and you do not catch it (it gets thrown
+ * from the parser), then this method is called during the printing
+ * of the final stack trace, and hence the correct error message
+ * gets displayed.
+ */
+ public String getMessage() {
+ if (!specialConstructor) {
+ return super.getMessage();
+ }
+ String expected = "";
+ int maxSize = 0;
+ for (int i = 0; i < expectedTokenSequences.length; i++) {
+ if (maxSize < expectedTokenSequences[i].length) {
+ maxSize = expectedTokenSequences[i].length;
+ }
+ for (int j = 0; j < expectedTokenSequences[i].length; j++) {
+ expected += tokenImage[expectedTokenSequences[i][j]] + " ";
+ }
+ if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
+ expected += "...";
+ }
+ expected += eol + " ";
+ }
+ String retval = "Encountered \"";
+ Token tok = currentToken.next;
+ for (int i = 0; i < maxSize; i++) {
+ if (i != 0) retval += " ";
+ if (tok.kind == 0) {
+ retval += tokenImage[0];
+ break;
+ }
+ retval += add_escapes(tok.image);
+ tok = tok.next;
+ }
+ retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn + "." + eol;
+ if (expectedTokenSequences.length == 1) {
+ retval += "Was expecting:" + eol + " ";
+ } else {
+ retval += "Was expecting one of:" + eol + " ";
+ }
+ retval += expected;
+ return retval;
+ }
+
+ /**
+ * The end of line string for this machine.
+ */
+ protected String eol = System.getProperty("line.separator", "\n");
+
+ /**
+ * Used to convert raw characters to their escaped version
+ * when these raw version cannot be used as part of an ASCII
+ * string literal.
+ */
+ protected String add_escapes(String str) {
+ StringBuffer retval = new StringBuffer();
+ char ch;
+ for (int i = 0; i < str.length(); i++) {
+ switch (str.charAt(i))
+ {
+ case 0 :
+ continue;
+ case '\b':
+ retval.append("\\b");
+ continue;
+ case '\t':
+ retval.append("\\t");
+ continue;
+ case '\n':
+ retval.append("\\n");
+ continue;
+ case '\f':
+ retval.append("\\f");
+ continue;
+ case '\r':
+ retval.append("\\r");
+ continue;
+ case '\"':
+ retval.append("\\\"");
+ continue;
+ case '\'':
+ retval.append("\\\'");
+ continue;
+ case '\\':
+ retval.append("\\\\");
+ continue;
+ default:
+ if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+ String s = "0000" + Integer.toString(ch, 16);
+ retval.append("\\u" + s.substring(s.length() - 4, s.length()));
+ } else {
+ retval.append(ch);
+ }
+ continue;
+ }
+ }
+ return retval.toString();
+ }
+
+}
diff --git a/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
new file mode 100644
index 00000000000..19ab618d574
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -0,0 +1,95 @@
+package org.apache.lucene.analysis.standard;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.analysis.*;
+import java.io.Reader;
+import java.util.Hashtable;
+
+/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter} and {@link StopFilter}. */
+public final class StandardAnalyzer extends Analyzer {
+ private Hashtable stopTable;
+
+ /** An array containing some common English words that are not usually useful
+ for searching. */
+ public static final String[] STOP_WORDS = {
+ "a", "and", "are", "as", "at", "be", "but", "by",
+ "for", "if", "in", "into", "is", "it",
+ "no", "not", "of", "on", "or", "s", "such",
+ "t", "that", "the", "their", "then", "there", "these",
+ "they", "this", "to", "was", "will", "with"
+ };
+
+ /** Builds an analyzer. */
+ public StandardAnalyzer() {
+ this(STOP_WORDS);
+ }
+
+ /** Builds an analyzer with the given stop words. */
+ public StandardAnalyzer(String[] stopWords) {
+ stopTable = StopFilter.makeStopTable(stopWords);
+ }
+
+ /** Constructs a {@link StandardTokenizer} filtered by a {@link
+ * StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream result = new StandardTokenizer(reader);
+ result = new StandardFilter(result);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopTable);
+ return result;
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
new file mode 100644
index 00000000000..16b5e69f249
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@@ -0,0 +1,106 @@
+package org.apache.lucene.analysis.standard;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.analysis.*;
+
+/** Normalizes tokens extracted with {@link StandardTokenizer}. */
+
+public final class StandardFilter extends TokenFilter
+ implements StandardTokenizerConstants {
+
+
+ /** Construct filtering in. */
+ public StandardFilter(TokenStream in) {
+ input = in;
+ }
+
+ private static final String APOSTROPHE_TYPE = tokenImage[APOSTROPHE];
+ private static final String ACRONYM_TYPE = tokenImage[ACRONYM];
+
+ /** Returns the next token in the stream, or null at EOS.
+ * Removes 's from the end of words.
+ *
Removes dots from acronyms.
+ */
+ public final org.apache.lucene.analysis.Token next() throws java.io.IOException {
+ org.apache.lucene.analysis.Token t = input.next();
+
+ if (t == null)
+ return null;
+
+ String text = t.termText();
+ String type = t.type();
+
+ if (type == APOSTROPHE_TYPE && // remove 's
+ (text.endsWith("'s") || text.endsWith("'S"))) {
+ return new org.apache.lucene.analysis.Token
+ (text.substring(0,text.length()-2),
+ t.startOffset(), t.endOffset(), type);
+
+ } else if (type == ACRONYM_TYPE) { // remove dots
+ StringBuffer trimmed = new StringBuffer();
+ for (int i = 0; i < text.length(); i++) {
+ char c = text.charAt(i);
+ if (c != '.')
+ trimmed.append(c);
+ }
+ return new org.apache.lucene.analysis.Token
+ (trimmed.toString(), t.startOffset(), t.endOffset(), type);
+
+ } else {
+ return t;
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
new file mode 100644
index 00000000000..6abdc673bc7
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
@@ -0,0 +1,197 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+options {
+ STATIC = false;
+//IGNORE_CASE = true;
+//BUILD_PARSER = false;
+//UNICODE_INPUT = true;
+ USER_CHAR_STREAM = true;
+ OPTIMIZE_TOKEN_MANAGER = true;
+//DEBUG_TOKEN_MANAGER = true;
+}
+PARSER_BEGIN(StandardTokenizer)
+
+package org.apache.lucene.analysis.standard;
+
+import java.io.*;
+
+/** A grammar-based tokenizer constructed with JavaCC.
+ *
+ *
This should be a good tokenizer for most European-language documents.
+ *
+ *
Many applications have specific tokenizer needs. If this tokenizer does
+ * not suit your application, please consider copying this source code
+ * directory to your project and maintaining your own grammar-based tokenizer.
+ */
+public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer {
+
+ /** Constructs a tokenizer for this Reader. */
+ public StandardTokenizer(Reader reader) {
+ this(new FastCharStream(reader));
+ this.input = reader;
+ }
+}
+
+PARSER_END(StandardTokenizer)
+
+TOKEN : { // token patterns
+
+ // basic word: a sequence of digits & letters
+ |)+ >
+
+ // internal apostrophes: O'Reilly, you're, O'Reilly's
+ // use a post-filter to remove possesives
+| ("'" )+ >
+
+ // acronyms: U.S.A., I.B.M., etc.
+ // use a post-filter to remove dots
+| "." ( ".")+ >
+
+ // company names like AT&T and Excite@Home.
+| ("&"|"@") >
+
+ // email addresses
+| "@" ("." )+ >
+
+ // hostname
+| ("." )+ >
+
+ // floating point, serial, model numbers, ip addresses, etc.
+ // every other segment must have at least one digit
+|
+ |
+ | ( )+
+ | ( )+
+ | ( )+
+ | ( )+
+ )
+ >
+| <#P: ("_"|"-"|"/"|"."|",") >
+| <#HAS_DIGIT: // at least one digit
+ (|)*
+
+ (|)*
+ >
+
+| < #ALPHA: ()+>
+| < #LETTER: // unicode letters
+ [
+ "\u0041"-"\u005a",
+ "\u0061"-"\u007a",
+ "\u00c0"-"\u00d6",
+ "\u00d8"-"\u00f6",
+ "\u00f8"-"\u00ff",
+ "\u0100"-"\u1fff",
+ "\u3040"-"\u318f",
+ "\u3300"-"\u337f",
+ "\u3400"-"\u3d2d",
+ "\u4e00"-"\u9fff",
+ "\uf900"-"\ufaff"
+ ]
+ >
+| < #DIGIT: // unicode digits
+ [
+ "\u0030"-"\u0039",
+ "\u0660"-"\u0669",
+ "\u06f0"-"\u06f9",
+ "\u0966"-"\u096f",
+ "\u09e6"-"\u09ef",
+ "\u0a66"-"\u0a6f",
+ "\u0ae6"-"\u0aef",
+ "\u0b66"-"\u0b6f",
+ "\u0be7"-"\u0bef",
+ "\u0c66"-"\u0c6f",
+ "\u0ce6"-"\u0cef",
+ "\u0d66"-"\u0d6f",
+ "\u0e50"-"\u0e59",
+ "\u0ed0"-"\u0ed9",
+ "\u1040"-"\u1049"
+ ]
+ >
+}
+
+SKIP : { // skip unrecognized chars
+
+}
+
+/** Returns the next token in the stream, or null at EOS.
+ * The returned token's type is set to an element of {@link
+ * StandardTokenizerConstants.tokenImage}.
+ */
+org.apache.lucene.analysis.Token next() throws IOException :
+{
+ Token token = null;
+}
+{
+ ( token = |
+ token = |
+ token = |
+ token = |
+ token = |
+ token = |
+ token = |
+ token =
+ )
+ {
+ if (token.kind == EOF) {
+ return null;
+ } else {
+ return
+ new org.apache.lucene.analysis.Token(token.image,
+ token.beginColumn,token.endColumn,
+ tokenImage[token.kind]);
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/analysis/standard/package.html b/src/java/org/apache/lucene/analysis/standard/package.html
new file mode 100644
index 00000000000..007e4a8d5b6
--- /dev/null
+++ b/src/java/org/apache/lucene/analysis/standard/package.html
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+A grammar-based tokenizer constructed with JavaCC.
+Note that JavaCC defines lots of public, classes, methods and fields
+that do not need to be public. These clutter the documentation.
+Sorry.
+
Note that because JavaCC defines a class named Token, org.apache.lucene.analysis.Token
+must always be fully qualified in sourced code in this package.
+
+
diff --git a/src/java/org/apache/lucene/document/DateField.java b/src/java/org/apache/lucene/document/DateField.java
new file mode 100644
index 00000000000..873f80114cb
--- /dev/null
+++ b/src/java/org/apache/lucene/document/DateField.java
@@ -0,0 +1,109 @@
+package org.apache.lucene.document;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Date;
+
+/** Provides support for converting dates to strings and vice-versa. The
+ * strings are structured so that lexicographic sorting orders by date. This
+ * makes them suitable for use as field values and search terms. */
+public class DateField {
+ private DateField() {};
+
+ // make date strings long enough to last a millenium
+ private static int DATE_LEN = Long.toString(1000L*365*24*60*60*1000,
+ Character.MAX_RADIX).length();
+
+ public static String MIN_DATE_STRING() {
+ return timeToString(0);
+ }
+
+ public static String MAX_DATE_STRING() {
+ char[] buffer = new char[DATE_LEN];
+ char c = Character.forDigit(Character.MAX_RADIX-1, Character.MAX_RADIX);
+ for (int i = 0 ; i < DATE_LEN; i++)
+ buffer[i] = c;
+ return new String(buffer);
+ }
+
+ /** Converts a Date to a string suitable for indexing. */
+ public static String dateToString(Date date) {
+ return timeToString(date.getTime());
+ }
+ /** Converts a millisecond time to a string suitable for indexing. */
+ public static String timeToString(long time) {
+ if (time < 0)
+ throw new RuntimeException("time too early");
+
+ String s = Long.toString(time, Character.MAX_RADIX);
+
+ if (s.length() > DATE_LEN)
+ throw new RuntimeException("time too late");
+
+ while (s.length() < DATE_LEN)
+ s = "0" + s; // pad with leading zeros
+
+ return s;
+ }
+
+ /** Converts a string-encoded date into a millisecond time. */
+ public static long stringToTime(String s) {
+ return Long.parseLong(s, Character.MAX_RADIX);
+ }
+ /** Converts a string-encoded date into a Date object. */
+ public static Date stringToDate(String s) {
+ return new Date(stringToTime(s));
+ }
+}
diff --git a/src/java/org/apache/lucene/document/Document.java b/src/java/org/apache/lucene/document/Document.java
new file mode 100644
index 00000000000..dc908fe2e27
--- /dev/null
+++ b/src/java/org/apache/lucene/document/Document.java
@@ -0,0 +1,145 @@
+package org.apache.lucene.document;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Enumeration;
+
+/** Documents are the unit of indexing and search.
+ *
+ * A Document is a set of fields. Each field has a name and a textual value.
+ * A field may be stored with the document, in which case it is returned with
+ * search hits on the document. Thus each document should typically contain
+ * stored fields which uniquely identify it.
+ * */
+
+public final class Document {
+ DocumentFieldList fieldList = null;
+
+ /** Constructs a new document with no fields. */
+ public Document() {}
+
+ /** Adds a field to a document. Several fields may be added with
+ * the same name. In this case, if the fields are indexed, their text is
+ * treated as though appended for the purposes of search. */
+ public final void add(Field field) {
+ fieldList = new DocumentFieldList(field, fieldList);
+ }
+
+ /** Returns a field with the given name if any exist in this document, or
+ null. If multiple fields may exist with this name, this method returns the
+ last added such added. */
+ public final Field getField(String name) {
+ for (DocumentFieldList list = fieldList; list != null; list = list.next)
+ if (list.field.name().equals(name))
+ return list.field;
+ return null;
+ }
+
+ /** Returns the string value of the field with the given name if any exist in
+ this document, or null. If multiple fields may exist with this name, this
+ method returns the last added such added. */
+ public final String get(String name) {
+ Field field = getField(name);
+ if (field != null)
+ return field.stringValue();
+ else
+ return null;
+ }
+
+ /** Returns an Enumeration of all the fields in a document. */
+ public final Enumeration fields() {
+ return new DocumentFieldEnumeration(this);
+ }
+
+ /** Prints the fields of a document for human consumption. */
+ public final String toString() {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("Document<");
+ for (DocumentFieldList list = fieldList; list != null; list = list.next) {
+ buffer.append(list.field.toString());
+ if (list.next != null)
+ buffer.append(" ");
+ }
+ buffer.append(">");
+ return buffer.toString();
+ }
+
+}
+
+final class DocumentFieldList {
+ DocumentFieldList(Field f, DocumentFieldList n) {
+ field = f;
+ next = n;
+ }
+ Field field;
+ DocumentFieldList next;
+}
+
+final class DocumentFieldEnumeration implements Enumeration {
+ DocumentFieldList fields;
+ DocumentFieldEnumeration(Document d) {
+ fields = d.fieldList;
+ }
+
+ public final boolean hasMoreElements() {
+ return fields == null ? false : true;
+ }
+
+ public final Object nextElement() {
+ Field result = fields.field;
+ fields = fields.next;
+ return result;
+ }
+}
diff --git a/src/java/org/apache/lucene/document/Field.java b/src/java/org/apache/lucene/document/Field.java
new file mode 100644
index 00000000000..ce598f951ea
--- /dev/null
+++ b/src/java/org/apache/lucene/document/Field.java
@@ -0,0 +1,169 @@
+package org.apache.lucene.document;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.Reader;
+
+/**
+ A field is a section of a Document. Each field has two parts, a name and a
+ value. Values may be free text, provided as a String or as a Reader, or they
+ may be atomic keywords, which are not further processed. Such keywords may
+ be used to represent dates, urls, etc. Fields are optionally stored in the
+ index, so that they may be returned with hits on the document.
+ */
+
+public final class Field {
+ private String name = "body";
+ private String stringValue = null;
+ private Reader readerValue = null;
+ private boolean isStored = false;
+ private boolean isIndexed = true;
+ private boolean isTokenized = true;
+
+ /** Constructs a String-valued Field that is not tokenized, but is indexed
+ and stored. Useful for non-text fields, e.g. date or url. */
+ public static final Field Keyword(String name, String value) {
+ return new Field(name, value, true, true, false);
+ }
+
+ /** Constructs a String-valued Field that is not tokenized or indexed,
+ but is stored in the index, for return with hits. */
+ public static final Field UnIndexed(String name, String value) {
+ return new Field(name, value, true, false, false);
+ }
+
+ /** Constructs a String-valued Field that is tokenized and indexed,
+ and is stored in the index, for return with hits. Useful for short text
+ fields, like "title" or "subject". */
+ public static final Field Text(String name, String value) {
+ return new Field(name, value, true, true, true);
+ }
+
+ /** Constructs a String-valued Field that is tokenized and indexed,
+ but that is not stored in the index. */
+ public static final Field UnStored(String name, String value) {
+ return new Field(name, value, false, true, true);
+ }
+
+ /** Constructs a Reader-valued Field that is tokenized and indexed, but is
+ not stored in the index verbatim. Useful for longer text fields, like
+ "body". */
+ public static final Field Text(String name, Reader value) {
+ return new Field(name, value);
+ }
+
+ /** The name of the field (e.g., "date", "subject", "title", "body", etc.)
+ as an interned string. */
+ public String name() { return name; }
+
+ /** The value of the field as a String, or null. If null, the Reader value
+ is used. Exactly one of stringValue() and readerValue() must be set. */
+ public String stringValue() { return stringValue; }
+ /** The value of the field as a Reader, or null. If null, the String value
+ is used. Exactly one of stringValue() and readerValue() must be set. */
+ public Reader readerValue() { return readerValue; }
+
+ public Field(String name, String string,
+ boolean store, boolean index, boolean token) {
+ if (name == null)
+ throw new IllegalArgumentException("name cannot be null");
+ if (string == null)
+ throw new IllegalArgumentException("value cannot be null");
+
+ this.name = name.intern(); // field names are interned
+ this.stringValue = string;
+ this.isStored = store;
+ this.isIndexed = index;
+ this.isTokenized = token;
+ }
+ Field(String name, Reader reader) {
+ if (name == null)
+ throw new IllegalArgumentException("name cannot be null");
+ if (reader == null)
+ throw new IllegalArgumentException("value cannot be null");
+
+ this.name = name.intern(); // field names are interned
+ this.readerValue = reader;
+ }
+
+ /** True iff the value of the field is to be stored in the index for return
+ with search hits. It is an error for this to be true if a field is
+ Reader-valued. */
+ public final boolean isStored() { return isStored; }
+
+ /** True iff the value of the field is to be indexed, so that it may be
+ searched on. */
+ public final boolean isIndexed() { return isIndexed; }
+
+ /** True iff the value of the field should be tokenized as text prior to
+ indexing. Un-tokenized fields are indexed as a single word and may not be
+ Reader-valued. */
+ public final boolean isTokenized() { return isTokenized; }
+
+ /** Prints a Field for human consumption. */
+ public final String toString() {
+ if (isStored && isIndexed && !isTokenized)
+ return "Keyword<" + name + ":" + stringValue + ">";
+ else if (isStored && !isIndexed && !isTokenized)
+ return "Unindexed<" + name + ":" + stringValue + ">";
+ else if (isStored && isIndexed && isTokenized && stringValue!=null)
+ return "Text<" + name + ":" + stringValue + ">";
+ else if (!isStored && isIndexed && isTokenized && readerValue!=null)
+ return "Text<" + name + ":" + readerValue + ">";
+ else
+ return super.toString();
+ }
+
+}
diff --git a/src/java/org/apache/lucene/document/Makefile b/src/java/org/apache/lucene/document/Makefile
new file mode 100644
index 00000000000..09c091d1839
--- /dev/null
+++ b/src/java/org/apache/lucene/document/Makefile
@@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
diff --git a/src/java/org/apache/lucene/document/package.html b/src/java/org/apache/lucene/document/package.html
new file mode 100644
index 00000000000..9b5f3702b00
--- /dev/null
+++ b/src/java/org/apache/lucene/document/package.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+The Document abstraction.
+
+
diff --git a/src/java/org/apache/lucene/index/DocumentWriter.java b/src/java/org/apache/lucene/index/DocumentWriter.java
new file mode 100644
index 00000000000..a893ae6ca27
--- /dev/null
+++ b/src/java/org/apache/lucene/index/DocumentWriter.java
@@ -0,0 +1,336 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Hashtable;
+import java.util.Enumeration;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.search.Similarity;
+
+final class DocumentWriter {
+ private Analyzer analyzer;
+ private Directory directory;
+ private FieldInfos fieldInfos;
+ private int maxFieldLength;
+
+ DocumentWriter(Directory d, Analyzer a, int mfl) {
+ directory = d;
+ analyzer = a;
+ maxFieldLength = mfl;
+ }
+
+ final void addDocument(String segment, Document doc)
+ throws IOException {
+ // write field names
+ fieldInfos = new FieldInfos();
+ fieldInfos.add(doc);
+ fieldInfos.write(directory, segment + ".fnm");
+
+ // write field values
+ FieldsWriter fieldsWriter =
+ new FieldsWriter(directory, segment, fieldInfos);
+ try {
+ fieldsWriter.addDocument(doc);
+ } finally {
+ fieldsWriter.close();
+ }
+
+ // invert doc into postingTable
+ postingTable.clear(); // clear postingTable
+ fieldLengths = new int[fieldInfos.size()]; // init fieldLengths
+ invertDocument(doc);
+
+ // sort postingTable into an array
+ Posting[] postings = sortPostingTable();
+
+ /*
+ for (int i = 0; i < postings.length; i++) {
+ Posting posting = postings[i];
+ System.out.print(posting.term);
+ System.out.print(" freq=" + posting.freq);
+ System.out.print(" pos=");
+ System.out.print(posting.positions[0]);
+ for (int j = 1; j < posting.freq; j++)
+ System.out.print("," + posting.positions[j]);
+ System.out.println("");
+ }
+ */
+
+ // write postings
+ writePostings(postings, segment);
+
+ // write norms of indexed fields
+ writeNorms(doc, segment);
+
+ }
+
+ // Keys are Terms, values are Postings.
+ // Used to buffer a document before it is written to the index.
+ private final Hashtable postingTable = new Hashtable();
+ private int[] fieldLengths;
+
+ // Tokenizes the fields of a document into Postings.
+ private final void invertDocument(Document doc)
+ throws IOException {
+ Enumeration fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field)fields.nextElement();
+ String fieldName = field.name();
+ int fieldNumber = fieldInfos.fieldNumber(fieldName);
+
+ int position = fieldLengths[fieldNumber]; // position in field
+
+ if (field.isIndexed()) {
+ if (!field.isTokenized()) { // un-tokenized field
+ addPosition(fieldName, field.stringValue(), position++);
+ } else {
+ Reader reader; // find or make Reader
+ if (field.readerValue() != null)
+ reader = field.readerValue();
+ else if (field.stringValue() != null)
+ reader = new StringReader(field.stringValue());
+ else
+ throw new IllegalArgumentException
+ ("field must have either String or Reader value");
+
+ // Tokenize field and add to postingTable
+ TokenStream stream = analyzer.tokenStream(fieldName, reader);
+ try {
+ for (Token t = stream.next(); t != null; t = stream.next()) {
+ addPosition(fieldName, t.termText(), position++);
+ if (position > maxFieldLength) break;
+ }
+ } finally {
+ stream.close();
+ }
+ }
+
+ fieldLengths[fieldNumber] = position; // save field length
+ }
+ }
+ }
+
+ private final Term termBuffer = new Term("", ""); // avoid consing
+
+ private final void addPosition(String field, String text, int position) {
+ termBuffer.set(field, text);
+ Posting ti = (Posting)postingTable.get(termBuffer);
+ if (ti != null) { // word seen before
+ int freq = ti.freq;
+ if (ti.positions.length == freq) { // positions array is full
+ int[] newPositions = new int[freq * 2]; // double size
+ int[] positions = ti.positions;
+ for (int i = 0; i < freq; i++) // copy old positions to new
+ newPositions[i] = positions[i];
+ ti.positions = newPositions;
+ }
+ ti.positions[freq] = position; // add new position
+ ti.freq = freq + 1; // update frequency
+ }
+ else { // word not seen before
+ Term term = new Term(field, text, false);
+ postingTable.put(term, new Posting(term, position));
+ }
+ }
+
+ private final Posting[] sortPostingTable() {
+ // copy postingTable into an array
+ Posting[] array = new Posting[postingTable.size()];
+ Enumeration postings = postingTable.elements();
+ for (int i = 0; postings.hasMoreElements(); i++)
+ array[i] = (Posting)postings.nextElement();
+
+ // sort the array
+ quickSort(array, 0, array.length - 1);
+
+ return array;
+ }
+
+ static private final void quickSort(Posting[] postings, int lo, int hi) {
+ if(lo >= hi)
+ return;
+
+ int mid = (lo + hi) / 2;
+
+ if(postings[lo].term.compareTo(postings[mid].term) > 0) {
+ Posting tmp = postings[lo];
+ postings[lo] = postings[mid];
+ postings[mid] = tmp;
+ }
+
+ if(postings[mid].term.compareTo(postings[hi].term) > 0) {
+ Posting tmp = postings[mid];
+ postings[mid] = postings[hi];
+ postings[hi] = tmp;
+
+ if(postings[lo].term.compareTo(postings[mid].term) > 0) {
+ Posting tmp2 = postings[lo];
+ postings[lo] = postings[mid];
+ postings[mid] = tmp2;
+ }
+ }
+
+ int left = lo + 1;
+ int right = hi - 1;
+
+ if (left >= right)
+ return;
+
+ Term partition = postings[mid].term;
+
+ for( ;; ) {
+ while(postings[right].term.compareTo(partition) > 0)
+ --right;
+
+ while(left < right && postings[left].term.compareTo(partition) <= 0)
+ ++left;
+
+ if(left < right) {
+ Posting tmp = postings[left];
+ postings[left] = postings[right];
+ postings[right] = tmp;
+ --right;
+ } else {
+ break;
+ }
+ }
+
+ quickSort(postings, lo, left);
+ quickSort(postings, left + 1, hi);
+ }
+
+ private final void writePostings(Posting[] postings, String segment)
+ throws IOException {
+ OutputStream freq = null, prox = null;
+ TermInfosWriter tis = null;
+
+ try {
+ freq = directory.createFile(segment + ".frq");
+ prox = directory.createFile(segment + ".prx");
+ tis = new TermInfosWriter(directory, segment, fieldInfos);
+ TermInfo ti = new TermInfo();
+
+ for (int i = 0; i < postings.length; i++) {
+ Posting posting = postings[i];
+
+ // add an entry to the dictionary with pointers to prox and freq files
+ ti.set(1, freq.getFilePointer(), prox.getFilePointer());
+ tis.add(posting.term, ti);
+
+ // add an entry to the freq file
+ int f = posting.freq;
+ if (f == 1) // optimize freq=1
+ freq.writeVInt(1); // set low bit of doc num.
+ else {
+ freq.writeVInt(0); // the document number
+ freq.writeVInt(f); // frequency in doc
+ }
+
+ int lastPosition = 0; // write positions
+ int[] positions = posting.positions;
+ for (int j = 0; j < f; j++) { // use delta-encoding
+ int position = positions[j];
+ prox.writeVInt(position - lastPosition);
+ lastPosition = position;
+ }
+ }
+ }
+ finally {
+ if (freq != null) freq.close();
+ if (prox != null) prox.close();
+ if (tis != null) tis.close();
+ }
+ }
+
+ private final void writeNorms(Document doc, String segment)
+ throws IOException {
+ Enumeration fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field)fields.nextElement();
+ if (field.isIndexed()) {
+ int fieldNumber = fieldInfos.fieldNumber(field.name());
+ OutputStream norm = directory.createFile(segment + ".f" + fieldNumber);
+ try {
+ norm.writeByte(Similarity.norm(fieldLengths[fieldNumber]));
+ } finally {
+ norm.close();
+ }
+ }
+ }
+ }
+}
+
+final class Posting { // info about a Term in a doc
+ Term term; // the Term
+ int freq; // its frequency in doc
+ int[] positions; // positions it occurs at
+
+ Posting(Term t, int position) {
+ term = t;
+ freq = 1;
+ positions = new int[1];
+ positions[0] = position;
+ }
+}
diff --git a/src/java/org/apache/lucene/index/FieldInfo.java b/src/java/org/apache/lucene/index/FieldInfo.java
new file mode 100644
index 00000000000..a0a93017c6f
--- /dev/null
+++ b/src/java/org/apache/lucene/index/FieldInfo.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+final class FieldInfo {
+ String name;
+ boolean isIndexed;
+ int number;
+
+ FieldInfo(String na, boolean tk, int nu) {
+ name = na;
+ isIndexed = tk;
+ number = nu;
+ }
+}
diff --git a/src/java/org/apache/lucene/index/FieldInfos.java b/src/java/org/apache/lucene/index/FieldInfos.java
new file mode 100644
index 00000000000..59f9c1a6a9d
--- /dev/null
+++ b/src/java/org/apache/lucene/index/FieldInfos.java
@@ -0,0 +1,167 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Hashtable;
+import java.util.Vector;
+import java.util.Enumeration;
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.store.InputStream;
+
+final class FieldInfos {
+ private Vector byNumber = new Vector();
+ private Hashtable byName = new Hashtable();
+
+ FieldInfos() {
+ add("", false);
+ }
+
+ FieldInfos(Directory d, String name) throws IOException {
+ InputStream input = d.openFile(name);
+ try {
+ read(input);
+ } finally {
+ input.close();
+ }
+ }
+
+ /** Adds field info for a Document. */
+ final void add(Document doc) {
+ Enumeration fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field)fields.nextElement();
+ add(field.name(), field.isIndexed());
+ }
+ }
+
+ /** Merges in information from another FieldInfos. */
+ final void add(FieldInfos other) {
+ for (int i = 0; i < other.size(); i++) {
+ FieldInfo fi = other.fieldInfo(i);
+ add(fi.name, fi.isIndexed);
+ }
+ }
+
+ private final void add(String name, boolean isIndexed) {
+ FieldInfo fi = fieldInfo(name);
+ if (fi == null)
+ addInternal(name, isIndexed);
+ else if (fi.isIndexed != isIndexed)
+ throw new IllegalStateException("field " + name +
+ (fi.isIndexed ? " must" : " cannot") +
+ " be an indexed field.");
+ }
+
+ private final void addInternal(String name, boolean isIndexed) {
+ FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size());
+ byNumber.addElement(fi);
+ byName.put(name, fi);
+ }
+
+ final int fieldNumber(String fieldName) {
+ FieldInfo fi = fieldInfo(fieldName);
+ if (fi != null)
+ return fi.number;
+ else
+ return -1;
+ }
+
+ final FieldInfo fieldInfo(String fieldName) {
+ return (FieldInfo)byName.get(fieldName);
+ }
+
+ final String fieldName(int fieldNumber) {
+ return fieldInfo(fieldNumber).name;
+ }
+
+ final FieldInfo fieldInfo(int fieldNumber) {
+ return (FieldInfo)byNumber.elementAt(fieldNumber);
+ }
+
+ final int size() {
+ return byNumber.size();
+ }
+
+ final void write(Directory d, String name) throws IOException {
+ OutputStream output = d.createFile(name);
+ try {
+ write(output);
+ } finally {
+ output.close();
+ }
+ }
+
+ final void write(OutputStream output) throws IOException {
+ output.writeVInt(size());
+ for (int i = 0; i < size(); i++) {
+ FieldInfo fi = fieldInfo(i);
+ output.writeString(fi.name);
+ output.writeByte((byte)(fi.isIndexed ? 1 : 0));
+ }
+ }
+
+ private final void read(InputStream input) throws IOException {
+ int size = input.readVInt();
+ for (int i = 0; i < size; i++)
+ addInternal(input.readString().intern(),
+ input.readByte() != 0);
+ }
+}
diff --git a/src/java/org/apache/lucene/index/FieldsReader.java b/src/java/org/apache/lucene/index/FieldsReader.java
new file mode 100644
index 00000000000..ff4cfac4f92
--- /dev/null
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@@ -0,0 +1,113 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+final class FieldsReader {
+ private FieldInfos fieldInfos;
+ private InputStream fieldsStream;
+ private InputStream indexStream;
+ private int size;
+
+ FieldsReader(Directory d, String segment, FieldInfos fn)
+ throws IOException {
+ fieldInfos = fn;
+
+ fieldsStream = d.openFile(segment + ".fdt");
+ indexStream = d.openFile(segment + ".fdx");
+
+ size = (int)indexStream.length() / 8;
+ }
+
+ final void close() throws IOException {
+ fieldsStream.close();
+ indexStream.close();
+ }
+
+ final int size() {
+ return size;
+ }
+
+ final Document doc(int n) throws IOException {
+ indexStream.seek(n * 8L);
+ long position = indexStream.readLong();
+ fieldsStream.seek(position);
+
+ Document doc = new Document();
+ int numFields = fieldsStream.readVInt();
+ for (int i = 0; i < numFields; i++) {
+ int fieldNumber = fieldsStream.readVInt();
+ FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
+
+ byte bits = fieldsStream.readByte();
+
+ doc.add(new Field(fi.name, // name
+ fieldsStream.readString(), // read value
+ true, // stored
+ fi.isIndexed, // indexed
+ (bits & 1) != 0)); // tokenized
+ }
+
+ return doc;
+ }
+}
diff --git a/src/java/org/apache/lucene/index/FieldsWriter.java b/src/java/org/apache/lucene/index/FieldsWriter.java
new file mode 100644
index 00000000000..b2ea31de18b
--- /dev/null
+++ b/src/java/org/apache/lucene/index/FieldsWriter.java
@@ -0,0 +1,110 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+final class FieldsWriter {
+ private FieldInfos fieldInfos;
+ private OutputStream fieldsStream;
+ private OutputStream indexStream;
+
+ FieldsWriter(Directory d, String segment, FieldInfos fn)
+ throws IOException {
+ fieldInfos = fn;
+ fieldsStream = d.createFile(segment + ".fdt");
+ indexStream = d.createFile(segment + ".fdx");
+ }
+
+ final void close() throws IOException {
+ fieldsStream.close();
+ indexStream.close();
+ }
+
+ final void addDocument(Document doc) throws IOException {
+ indexStream.writeLong(fieldsStream.getFilePointer());
+
+ int storedCount = 0;
+ Enumeration fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field)fields.nextElement();
+ if (field.isStored())
+ storedCount++;
+ }
+ fieldsStream.writeVInt(storedCount);
+
+ fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field)fields.nextElement();
+ if (field.isStored()) {
+ fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
+
+ byte bits = 0;
+ if (field.isTokenized())
+ bits |= 1;
+ fieldsStream.writeByte(bits);
+
+ fieldsStream.writeString(field.stringValue());
+ }
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java
new file mode 100644
index 00000000000..6aeca66ad9c
--- /dev/null
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@@ -0,0 +1,215 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.io.File;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.document.Document;
+
+/** IndexReader is an abstract class, providing an interface for accessing an
+ index. Search of an index is done entirely through this abstract interface,
+ so that any subclass which implements it is searchable.
+
+ Concrete subclasses of IndexReader are usually constructed with a call to
+ the static method {@link #open}.
+
+
For efficiency, in this API documents are often referred to via
+ document numbers, non-negative integers which each name a unique
+ document in the index. These document numbers are ephemeral--they may change
+ as documents are added to and deleted from an index. Clients should thus not
+ rely on a given document having the same number between sessions. */
+
+abstract public class IndexReader {
+ protected IndexReader() {};
+
+ /** Returns an IndexReader reading the index in an FSDirectory in the named
+ path. */
+ public static IndexReader open(String path) throws IOException {
+ return open(FSDirectory.getDirectory(path, false));
+ }
+
+ /** Returns an IndexReader reading the index in an FSDirectory in the named
+ path. */
+ public static IndexReader open(File path) throws IOException {
+ return open(FSDirectory.getDirectory(path, false));
+ }
+
+ /** Returns an IndexReader reading the index in the given Directory. */
+ public static IndexReader open(Directory directory) throws IOException {
+ synchronized (directory) {
+ SegmentInfos infos = new SegmentInfos();
+ infos.read(directory);
+ if (infos.size() == 1) // index is optimized
+ return new SegmentReader(infos.info(0), true);
+
+ SegmentReader[] readers = new SegmentReader[infos.size()];
+ for (int i = 0; i < infos.size(); i++)
+ readers[i] = new SegmentReader(infos.info(i), i == infos.size() - 1);
+ return new SegmentsReader(readers);
+ }
+ }
+
+ /** Returns the time the index in the named directory was last modified. */
+ public static long lastModified(String directory) throws IOException {
+ return lastModified(new File(directory));
+ }
+
+ /** Returns the time the index in the named directory was last modified. */
+ public static long lastModified(File directory) throws IOException {
+ return FSDirectory.fileModified(directory, "segments");
+ }
+
+ /** Returns the time the index in this directory was last modified. */
+ public static long lastModified(Directory directory) throws IOException {
+ return directory.fileModified("segments");
+ }
+
+ /** Returns the number of documents in this index. */
+ abstract public int numDocs();
+ /** Returns one greater than the largest possible document number.
+ This may be used to, e.g., determine how big to allocate an array which
+ will have an element for every document number in an index.
+ */
+ abstract public int maxDoc();
+ /** Returns the stored fields of the n
th
+ Document
in this index. */
+ abstract public Document document(int n) throws IOException;
+
+ /** Returns true if document n has been deleted */
+ abstract public boolean isDeleted(int n);
+
+ /** Returns the byte-encoded normalization factor for the named field of
+ every document. This is used by the search code to score documents.
+ @see org.apache.lucene.search.Similarity#norm
+ */
+ abstract public byte[] norms(String field) throws IOException;
+
+ /** Returns an enumeration of all the terms in the index.
+ The enumeration is ordered by Term.compareTo(). Each term
+ is greater than all that precede it in the enumeration.
+ */
+ abstract public TermEnum terms() throws IOException;
+ /** Returns an enumeration of all terms after a given term.
+ The enumeration is ordered by Term.compareTo(). Each term
+ is greater than all that precede it in the enumeration.
+ */
+ abstract public TermEnum terms(Term t) throws IOException;
+
+ /** Returns the number of documents containing the term t
. */
+ abstract public int docFreq(Term t) throws IOException;
+
+ /** Returns an enumeration of all the documents which contain
+ Term
. For each document, the document number, the frequency of
+ the term in that document is also provided, for use in search scoring.
+ Thus, this method implements the mapping:
+
+ Term => <docNum, freq>*
+
+ The enumeration is ordered by document number. Each document number
+ is greater than all that precede it in the enumeration. */
+ abstract public TermDocs termDocs(Term t) throws IOException;
+
+ /** Returns an enumeration of all the documents which contain
+ Term
. For each document, in addition to the document number
+ and frequency of the term in that document, a list of all of the ordinal
+ positions of the term in the document is available. Thus, this method
+ implements the mapping:
+
+
+ Term => <docNum, freq,
+ <pos1, pos2, ...
+ posfreq-1>
+ >*
+
+ This positional information faciliates phrase and proximity searching.
+
The enumeration is ordered by document number. Each document number is
+ greater than all that precede it in the enumeration. */
+ abstract public TermPositions termPositions(Term t) throws IOException;
+
+ /** Deletes the document numbered docNum
. Once a document is
+ deleted it will not appear in TermDocs or TermPostitions enumerations.
+ Attempts to read its field with the {@link #document}
+ method will result in an error. The presence of this document may still be
+ reflected in the {@link #docFreq} statistic, though
+ this will be corrected eventually as the index is further modified. */
+ abstract public void delete(int docNum) throws IOException;
+
+ /** Deletes all documents containing term
.
+ This is useful if one uses a document field to hold a unique ID string for
+ the document. Then to delete such a document, one merely constructs a
+ term with the appropriate field and the unique ID string as its text and
+ passes it to this method. Returns the number of documents deleted. */
+ public final int delete(Term term) throws IOException {
+ TermDocs docs = termDocs(term);
+ if ( docs == null ) return 0;
+ int n = 0;
+ try {
+ while (docs.next()) {
+ delete(docs.doc());
+ n++;
+ }
+ } finally {
+ docs.close();
+ }
+ return n;
+ }
+
+ /** Closes files associated with this index.
+ Also saves any new deletions to disk.
+ No other methods should be called after this has been called. */
+ abstract public void close() throws IOException;
+}
diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java
new file mode 100644
index 00000000000..5136d64abf8
--- /dev/null
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@@ -0,0 +1,385 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Vector;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.analysis.Analyzer;
+
+/**
+ An IndexWriter creates and maintains an index.
+
+ The third argument to the constructor
+ determines whether a new index is created, or whether an existing index is
+ opened for the addition of new documents.
+
+ In either case, documents are added with the addDocument method. When finished adding
+ documents, close should be called.
+
+ If an index will not have more documents added for a while and optimal search
+ performance is desired, then the optimize
+ method should be called before the index is closed.
+ */
+
+public final class IndexWriter {
+ private Directory directory; // where this index resides
+ private Analyzer analyzer; // how to analyze text
+
+ private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
+ private final Directory ramDirectory = new RAMDirectory(); // for temp segs
+
+ /** Constructs an IndexWriter for the index in path
. Text will
+ be analyzed with a
. If create
is true, then a
+ new, empty index will be created in d
, replacing the index
+ already there, if any. */
+ public IndexWriter(String path, Analyzer a, boolean create)
+ throws IOException {
+ this(FSDirectory.getDirectory(path, create), a, create);
+ }
+
+ /** Constructs an IndexWriter for the index in path
. Text will
+ be analyzed with a
. If create
is true, then a
+ new, empty index will be created in d
, replacing the index
+ already there, if any. */
+ public IndexWriter(File path, Analyzer a, boolean create)
+ throws IOException {
+ this(FSDirectory.getDirectory(path, create), a, create);
+ }
+
+ /** Constructs an IndexWriter for the index in d
. Text will be
+ analyzed with a
. If create
is true, then a new,
+ empty index will be created in d
, replacing the index already
+ there, if any. */
+ public IndexWriter(Directory d, Analyzer a, boolean create)
+ throws IOException {
+ directory = d;
+ analyzer = a;
+
+ synchronized (directory) {
+ if (create)
+ segmentInfos.write(directory);
+ else
+ segmentInfos.read(directory);
+ }
+ }
+
+ /** Flushes all changes to an index, closes all associated files, and closes
+ the directory that the index is stored in. */
+ public final synchronized void close() throws IOException {
+ flushRamSegments();
+ ramDirectory.close();
+ directory.close();
+ }
+
+ /** Returns the number of documents currently in this index. */
+ public final synchronized int docCount() {
+ int count = 0;
+ for (int i = 0; i < segmentInfos.size(); i++) {
+ SegmentInfo si = segmentInfos.info(i);
+ count += si.docCount;
+ }
+ return count;
+ }
+
+ /** The maximum number of terms that will be indexed for a single field in a
+ document. This limits the amount of memory required for indexing, so that
+ collections with very large files will not crash the indexing process by
+ running out of memory.
+
+
By default, no more than 10,000 terms will be indexed for a field. */
+ public int maxFieldLength = 10000;
+
+ /** Adds a document to this index.*/
+ public final void addDocument(Document doc) throws IOException {
+ DocumentWriter dw =
+ new DocumentWriter(ramDirectory, analyzer, maxFieldLength);
+ String segmentName = newSegmentName();
+ dw.addDocument(segmentName, doc);
+ synchronized (this) {
+ segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory));
+ maybeMergeSegments();
+ }
+ }
+
+ private final synchronized String newSegmentName() {
+ return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
+ }
+
+ /** Determines how often segment indexes are merged by addDocument(). With
+ * smaller values, less RAM is used while indexing, and searches on
+ * unoptimized indexes are faster, but indexing speed is slower. With larger
+ * values more RAM is used while indexing and searches on unoptimized indexes
+ * are slower, but indexing is faster. Thus larger values (> 10) are best
+ * for batched index creation, and smaller values (< 10) for indexes that are
+ * interactively maintained.
+ *
+ *
This must never be less than 2. The default value is 10.*/
+ public int mergeFactor = 10;
+
+ /** Determines the largest number of documents ever merged by addDocument().
+ * Small values (e.g., less than 10,000) are best for interactive indexing,
+ * as this limits the length of pauses while indexing to a few seconds.
+ * Larger values are best for batched indexing and speedier searches.
+ *
+ *
The default value is {@link Integer#MAX_VALUE}. */
+ public int maxMergeDocs = Integer.MAX_VALUE;
+
+ /** If non-null, information about merges will be printed to this. */
+ public PrintStream infoStream = null;
+
+ /** Merges all segments together into a single segment, optimizing an index
+ for search. */
+ public final synchronized void optimize() throws IOException {
+ flushRamSegments();
+ while (segmentInfos.size() > 1 ||
+ (segmentInfos.size() == 1 &&
+ SegmentReader.hasDeletions(segmentInfos.info(0)))){
+ int minSegment = segmentInfos.size() - mergeFactor;
+ mergeSegments(minSegment < 0 ? 0 : minSegment);
+ }
+ }
+
+ /** Merges all segments from an array of indexes into this index.
+ *
+ *
This may be used to parallelize batch indexing. A large document
+ * collection can be broken into sub-collections. Each sub-collection can be
+ * indexed in parallel, on a different thread, process or machine. The
+ * complete index can then be created by merging sub-collection indexes
+ * with this method.
+ *
+ *
After this completes, the index is optimized. */
+ public final synchronized void addIndexes(Directory[] dirs)
+ throws IOException {
+ optimize(); // start with zero or 1 seg
+ int minSegment = segmentInfos.size();
+ int segmentsAddedSinceMerge = 0;
+ for (int i = 0; i < dirs.length; i++) {
+ SegmentInfos sis = new SegmentInfos(); // read infos from dir
+ sis.read(dirs[i]);
+ for (int j = 0; j < sis.size(); j++) {
+ segmentInfos.addElement(sis.info(j)); // add each info
+
+ // merge whenever mergeFactor segments have been added
+ if (++segmentsAddedSinceMerge == mergeFactor) {
+ mergeSegments(minSegment++, false);
+ segmentsAddedSinceMerge = 0;
+ }
+ }
+ }
+ optimize(); // final cleanup
+ }
+
+ /** Merges all RAM-resident segments. */
+ private final void flushRamSegments() throws IOException {
+ int minSegment = segmentInfos.size()-1;
+ int docCount = 0;
+ while (minSegment >= 0 &&
+ (segmentInfos.info(minSegment)).dir == ramDirectory) {
+ docCount += segmentInfos.info(minSegment).docCount;
+ minSegment--;
+ }
+ if (minSegment < 0 || // add one FS segment?
+ (docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
+ !(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
+ minSegment++;
+ if (minSegment >= segmentInfos.size())
+ return; // none to merge
+ mergeSegments(minSegment);
+ }
+
+ /** Incremental segment merger. */
+ private final void maybeMergeSegments() throws IOException {
+ long targetMergeDocs = mergeFactor;
+ while (targetMergeDocs <= maxMergeDocs) {
+ // find segments smaller than current target size
+ int minSegment = segmentInfos.size();
+ int mergeDocs = 0;
+ while (--minSegment >= 0) {
+ SegmentInfo si = segmentInfos.info(minSegment);
+ if (si.docCount >= targetMergeDocs)
+ break;
+ mergeDocs += si.docCount;
+ }
+
+ if (mergeDocs >= targetMergeDocs) // found a merge to do
+ mergeSegments(minSegment+1);
+ else
+ break;
+
+ targetMergeDocs *= mergeFactor; // increase target size
+ }
+ }
+
+ /** Pops segments off of segmentInfos stack down to minSegment, merges them,
+ and pushes the merged index onto the top of the segmentInfos stack. */
+ private final void mergeSegments(int minSegment) throws IOException {
+ mergeSegments(minSegment, true);
+ }
+
+ /** Pops segments off of segmentInfos stack down to minSegment, merges them,
+ and pushes the merged index onto the top of the segmentInfos stack. */
+ private final void mergeSegments(int minSegment, boolean delete)
+ throws IOException {
+ String mergedName = newSegmentName();
+ int mergedDocCount = 0;
+ if (infoStream != null) infoStream.print("merging segments");
+ SegmentMerger merger = new SegmentMerger(directory, mergedName);
+ Vector segmentsToDelete = new Vector();
+ for (int i = minSegment; i < segmentInfos.size(); i++) {
+ SegmentInfo si = segmentInfos.info(i);
+ if (infoStream != null)
+ infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
+ SegmentReader reader = new SegmentReader(si);
+ merger.add(reader);
+ if (delete)
+ segmentsToDelete.addElement(reader); // queue for deletion
+ mergedDocCount += si.docCount;
+ }
+ if (infoStream != null) {
+ infoStream.println();
+ infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
+ }
+ merger.merge();
+
+ segmentInfos.setSize(minSegment); // pop old infos & add new
+ segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
+ directory));
+
+ synchronized (directory) {
+ segmentInfos.write(directory); // commit before deleting
+ deleteSegments(segmentsToDelete); // delete now-unused segments
+ }
+ }
+
+ /* Some operating systems (e.g. Windows) don't permit a file to be deleted
+ while it is opened for read (e.g. by another process or thread). So we
+ assume that when a delete fails it is because the file is open in another
+ process, and queue the file for subsequent deletion. */
+
+ private final void deleteSegments(Vector segments) throws IOException {
+ Vector deletable = new Vector();
+
+ deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable
+
+ for (int i = 0; i < segments.size(); i++) {
+ SegmentReader reader = (SegmentReader)segments.elementAt(i);
+ if (reader.directory == this.directory)
+ deleteFiles(reader.files(), deletable); // try to delete our files
+ else
+ deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
+ }
+
+ writeDeleteableFiles(deletable); // note files we can't delete
+ }
+
+ private final void deleteFiles(Vector files, Directory directory)
+ throws IOException {
+ for (int i = 0; i < files.size(); i++)
+ directory.deleteFile((String)files.elementAt(i));
+ }
+
+ private final void deleteFiles(Vector files, Vector deletable)
+ throws IOException {
+ for (int i = 0; i < files.size(); i++) {
+ String file = (String)files.elementAt(i);
+ try {
+ directory.deleteFile(file); // try to delete each file
+ } catch (IOException e) { // if delete fails
+ if (directory.fileExists(file)) {
+ if (infoStream != null)
+ infoStream.println(e.getMessage() + "; Will re-try later.");
+ deletable.addElement(file); // add to deletable
+ }
+ }
+ }
+ }
+
+ private final Vector readDeleteableFiles() throws IOException {
+ Vector result = new Vector();
+ if (!directory.fileExists("deletable"))
+ return result;
+
+ InputStream input = directory.openFile("deletable");
+ try {
+ for (int i = input.readInt(); i > 0; i--) // read file names
+ result.addElement(input.readString());
+ } finally {
+ input.close();
+ }
+ return result;
+ }
+
+ private final void writeDeleteableFiles(Vector files) throws IOException {
+ OutputStream output = directory.createFile("deleteable.new");
+ try {
+ output.writeInt(files.size());
+ for (int i = 0; i < files.size(); i++)
+ output.writeString((String)files.elementAt(i));
+ } finally {
+ output.close();
+ }
+ directory.renameFile("deleteable.new", "deletable");
+ }
+}
diff --git a/src/java/org/apache/lucene/index/Makefile b/src/java/org/apache/lucene/index/Makefile
new file mode 100644
index 00000000000..09c091d1839
--- /dev/null
+++ b/src/java/org/apache/lucene/index/Makefile
@@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
diff --git a/src/java/org/apache/lucene/index/SegmentInfo.java b/src/java/org/apache/lucene/index/SegmentInfo.java
new file mode 100644
index 00000000000..b3f488aeb31
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -0,0 +1,69 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.store.Directory;
+
+final class SegmentInfo {
+ public String name; // unique name in dir
+ public int docCount; // number of docs in seg
+ public Directory dir; // where segment resides
+
+ public SegmentInfo(String name, int docCount, Directory dir) {
+ this.name = name;
+ this.docCount = docCount;
+ this.dir = dir;
+ }
+}
diff --git a/src/java/org/apache/lucene/index/SegmentInfos.java b/src/java/org/apache/lucene/index/SegmentInfos.java
new file mode 100644
index 00000000000..cc29225c25c
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -0,0 +1,101 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Vector;
+import java.io.IOException;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+
+final class SegmentInfos extends Vector {
+ public int counter = 0; // used to name new segments
+
+ public final SegmentInfo info(int i) {
+ return (SegmentInfo)elementAt(i);
+ }
+
+ public final void read(Directory directory) throws IOException {
+ InputStream input = directory.openFile("segments");
+ try {
+ counter = input.readInt(); // read counter
+ for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
+ SegmentInfo si = new SegmentInfo(input.readString(), input.readInt(),
+ directory);
+ addElement(si);
+ }
+ } finally {
+ input.close();
+ }
+ }
+
+ public final void write(Directory directory) throws IOException {
+ OutputStream output = directory.createFile("segments.new");
+ try {
+ output.writeInt(counter); // write counter
+ output.writeInt(size()); // write infos
+ for (int i = 0; i < size(); i++) {
+ SegmentInfo si = info(i);
+ output.writeString(si.name);
+ output.writeInt(si.docCount);
+ }
+ } finally {
+ output.close();
+ }
+
+ // install new segment info
+ directory.renameFile("segments.new", "segments");
+ }
+}
diff --git a/src/java/org/apache/lucene/index/SegmentMergeInfo.java b/src/java/org/apache/lucene/index/SegmentMergeInfo.java
new file mode 100644
index 00000000000..b3d581cd71c
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentMergeInfo.java
@@ -0,0 +1,106 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.BitVector;
+
+final class SegmentMergeInfo {
+ Term term;
+ int base;
+ SegmentTermEnum termEnum;
+ SegmentReader reader;
+ SegmentTermPositions postings;
+ int[] docMap = null; // maps around deleted docs
+
+ SegmentMergeInfo(int b, SegmentTermEnum te, SegmentReader r)
+ throws IOException {
+ base = b;
+ reader = r;
+ termEnum = te;
+ term = te.term();
+ postings = new SegmentTermPositions(r);
+
+ if (reader.deletedDocs != null) {
+ // build array which maps document numbers around deletions
+ BitVector deletedDocs = reader.deletedDocs;
+ int maxDoc = reader.maxDoc();
+ docMap = new int[maxDoc];
+ int j = 0;
+ for (int i = 0; i < maxDoc; i++) {
+ if (deletedDocs.get(i))
+ docMap[i] = -1;
+ else
+ docMap[i] = j++;
+ }
+ }
+ }
+
+ final boolean next() throws IOException {
+ if (termEnum.next()) {
+ term = termEnum.term();
+ return true;
+ } else {
+ term = null;
+ return false;
+ }
+ }
+
+ final void close() throws IOException {
+ termEnum.close();
+ postings.close();
+ }
+}
+
diff --git a/src/java/org/apache/lucene/index/SegmentMergeQueue.java b/src/java/org/apache/lucene/index/SegmentMergeQueue.java
new file mode 100644
index 00000000000..044e10beb39
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentMergeQueue.java
@@ -0,0 +1,80 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.PriorityQueue;
+
+final class SegmentMergeQueue extends PriorityQueue {
+ SegmentMergeQueue(int size) {
+ initialize(size);
+ }
+
+ protected final boolean lessThan(Object a, Object b) {
+ SegmentMergeInfo stiA = (SegmentMergeInfo)a;
+ SegmentMergeInfo stiB = (SegmentMergeInfo)b;
+ int comparison = stiA.term.compareTo(stiB.term);
+ if (comparison == 0)
+ return stiA.base < stiB.base;
+ else
+ return comparison < 0;
+ }
+
+ final void close() throws IOException {
+ while (top() != null)
+ ((SegmentMergeInfo)pop()).close();
+ }
+
+}
diff --git a/src/java/org/apache/lucene/index/SegmentMerger.java b/src/java/org/apache/lucene/index/SegmentMerger.java
new file mode 100644
index 00000000000..d0fd86acc9d
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -0,0 +1,275 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Vector;
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.BitVector;
+
+final class SegmentMerger {
+ private Directory directory;
+ private String segment;
+
+ private Vector readers = new Vector();
+ private FieldInfos fieldInfos;
+
+ SegmentMerger(Directory dir, String name) {
+ directory = dir;
+ segment = name;
+ }
+
+ final void add(SegmentReader reader) {
+ readers.addElement(reader);
+ }
+
+ final SegmentReader segmentReader(int i) {
+ return (SegmentReader)readers.elementAt(i);
+ }
+
+ final void merge() throws IOException {
+ try {
+ mergeFields();
+ mergeTerms();
+ mergeNorms();
+
+ } finally {
+ for (int i = 0; i < readers.size(); i++) { // close readers
+ SegmentReader reader = (SegmentReader)readers.elementAt(i);
+ reader.close();
+ }
+ }
+ }
+
+ private final void mergeFields() throws IOException {
+ fieldInfos = new FieldInfos(); // merge field names
+ for (int i = 0; i < readers.size(); i++) {
+ SegmentReader reader = (SegmentReader)readers.elementAt(i);
+ fieldInfos.add(reader.fieldInfos);
+ }
+ fieldInfos.write(directory, segment + ".fnm");
+
+ FieldsWriter fieldsWriter = // merge field values
+ new FieldsWriter(directory, segment, fieldInfos);
+ try {
+ for (int i = 0; i < readers.size(); i++) {
+ SegmentReader reader = (SegmentReader)readers.elementAt(i);
+ BitVector deletedDocs = reader.deletedDocs;
+ int maxDoc = reader.maxDoc();
+ for (int j = 0; j < maxDoc; j++)
+ if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
+ fieldsWriter.addDocument(reader.document(j));
+ }
+ } finally {
+ fieldsWriter.close();
+ }
+ }
+
+ private OutputStream freqOutput = null;
+ private OutputStream proxOutput = null;
+ private TermInfosWriter termInfosWriter = null;
+ private SegmentMergeQueue queue = null;
+
+ private final void mergeTerms() throws IOException {
+ try {
+ freqOutput = directory.createFile(segment + ".frq");
+ proxOutput = directory.createFile(segment + ".prx");
+ termInfosWriter =
+ new TermInfosWriter(directory, segment, fieldInfos);
+
+ mergeTermInfos();
+
+ } finally {
+ if (freqOutput != null) freqOutput.close();
+ if (proxOutput != null) proxOutput.close();
+ if (termInfosWriter != null) termInfosWriter.close();
+ if (queue != null) queue.close();
+ }
+ }
+
+ private final void mergeTermInfos() throws IOException {
+ queue = new SegmentMergeQueue(readers.size());
+ int base = 0;
+ for (int i = 0; i < readers.size(); i++) {
+ SegmentReader reader = (SegmentReader)readers.elementAt(i);
+ SegmentTermEnum termEnum = (SegmentTermEnum)reader.terms();
+ SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
+ base += reader.numDocs();
+ if (smi.next())
+ queue.put(smi); // initialize queue
+ else
+ smi.close();
+ }
+
+ SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
+
+ while (queue.size() > 0) {
+ int matchSize = 0; // pop matching terms
+ match[matchSize++] = (SegmentMergeInfo)queue.pop();
+ Term term = match[0].term;
+ SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
+
+ while (top != null && term.compareTo(top.term) == 0) {
+ match[matchSize++] = (SegmentMergeInfo)queue.pop();
+ top = (SegmentMergeInfo)queue.top();
+ }
+
+ mergeTermInfo(match, matchSize); // add new TermInfo
+
+ while (matchSize > 0) {
+ SegmentMergeInfo smi = match[--matchSize];
+ if (smi.next())
+ queue.put(smi); // restore queue
+ else
+ smi.close(); // done with a segment
+ }
+ }
+ }
+
+ private final TermInfo termInfo = new TermInfo(); // minimize consing
+
+ private final void mergeTermInfo(SegmentMergeInfo[] smis, int n)
+ throws IOException {
+ long freqPointer = freqOutput.getFilePointer();
+ long proxPointer = proxOutput.getFilePointer();
+
+ int df = appendPostings(smis, n); // append posting data
+
+ if (df > 0) {
+ // add an entry to the dictionary with pointers to prox and freq files
+ termInfo.set(df, freqPointer, proxPointer);
+ termInfosWriter.add(smis[0].term, termInfo);
+ }
+ }
+
+ private final int appendPostings(SegmentMergeInfo[] smis, int n)
+ throws IOException {
+ int lastDoc = 0;
+ int df = 0; // number of docs w/ term
+ for (int i = 0; i < n; i++) {
+ SegmentMergeInfo smi = smis[i];
+ SegmentTermPositions postings = smi.postings;
+ int base = smi.base;
+ int[] docMap = smi.docMap;
+ smi.termEnum.termInfo(termInfo);
+ postings.seek(termInfo);
+ while (postings.next()) {
+ int doc;
+ if (docMap == null)
+ doc = base + postings.doc; // no deletions
+ else
+ doc = base + docMap[postings.doc]; // re-map around deletions
+
+ if (doc < lastDoc)
+ throw new IllegalStateException("docs out of order");
+
+ int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
+ lastDoc = doc;
+
+ int freq = postings.freq;
+ if (freq == 1) {
+ freqOutput.writeVInt(docCode | 1); // write doc & freq=1
+ } else {
+ freqOutput.writeVInt(docCode); // write doc
+ freqOutput.writeVInt(freq); // write frequency in doc
+ }
+
+ int lastPosition = 0; // write position deltas
+ for (int j = 0; j < freq; j++) {
+ int position = postings.nextPosition();
+ proxOutput.writeVInt(position - lastPosition);
+ lastPosition = position;
+ }
+
+ df++;
+ }
+ }
+ return df;
+ }
+
+ private final void mergeNorms() throws IOException {
+ for (int i = 0; i < fieldInfos.size(); i++) {
+ FieldInfo fi = fieldInfos.fieldInfo(i);
+ if (fi.isIndexed) {
+ OutputStream output = directory.createFile(segment + ".f" + i);
+ try {
+ for (int j = 0; j < readers.size(); j++) {
+ SegmentReader reader = (SegmentReader)readers.elementAt(j);
+ BitVector deletedDocs = reader.deletedDocs;
+ InputStream input = reader.normStream(fi.name);
+ int maxDoc = reader.maxDoc();
+ try {
+ for (int k = 0; k < maxDoc; k++) {
+ byte norm = input != null ? input.readByte() : (byte)0;
+ if (deletedDocs == null || !deletedDocs.get(k))
+ output.writeByte(norm);
+ }
+ } finally {
+ if (input != null)
+ input.close();
+ }
+ }
+ } finally {
+ output.close();
+ }
+ }
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java
new file mode 100644
index 00000000000..67292205c7a
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@@ -0,0 +1,284 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+import java.util.Enumeration;
+import java.util.Vector;
+
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.document.Document;
+
+final class SegmentReader extends IndexReader {
+ Directory directory;
+ private boolean closeDirectory = false;
+ private String segment;
+
+ FieldInfos fieldInfos;
+ private FieldsReader fieldsReader;
+
+ TermInfosReader tis;
+
+ BitVector deletedDocs = null;
+ private boolean deletedDocsDirty = false;
+
+ private InputStream freqStream;
+ private InputStream proxStream;
+
+
+ private static class Norm {
+ public Norm(InputStream in) { this.in = in; }
+ public InputStream in;
+ public byte[] bytes;
+ }
+ private Hashtable norms = new Hashtable();
+
+ SegmentReader(SegmentInfo si, boolean closeDir)
+ throws IOException {
+ this(si);
+ closeDirectory = closeDir;
+ }
+
+ SegmentReader(SegmentInfo si)
+ throws IOException {
+ directory = si.dir;
+ segment = si.name;
+
+ fieldInfos = new FieldInfos(directory, segment + ".fnm");
+ fieldsReader = new FieldsReader(directory, segment, fieldInfos);
+
+ tis = new TermInfosReader(directory, segment, fieldInfos);
+
+ if (hasDeletions(si))
+ deletedDocs = new BitVector(directory, segment + ".del");
+
+ // make sure that all index files have been read or are kept open
+ // so that if an index update removes them we'll still have them
+ freqStream = directory.openFile(segment + ".frq");
+ proxStream = directory.openFile(segment + ".prx");
+ openNorms();
+ }
+
+ public final synchronized void close() throws IOException {
+ if (deletedDocsDirty) {
+ synchronized (directory) {
+ deletedDocs.write(directory, segment + ".tmp");
+ directory.renameFile(segment + ".tmp", segment + ".del");
+ }
+ deletedDocsDirty = false;
+ }
+
+ fieldsReader.close();
+ tis.close();
+
+ if (freqStream != null)
+ freqStream.close();
+ if (proxStream != null)
+ proxStream.close();
+
+ closeNorms();
+
+ if (closeDirectory)
+ directory.close();
+ }
+
+ final static boolean hasDeletions(SegmentInfo si) throws IOException {
+ return si.dir.fileExists(si.name + ".del");
+ }
+
+ public final synchronized void delete(int docNum) throws IOException {
+ if (deletedDocs == null)
+ deletedDocs = new BitVector(maxDoc());
+ deletedDocsDirty = true;
+ deletedDocs.set(docNum);
+ }
+
+ final Vector files() throws IOException {
+ Vector files = new Vector(16);
+ files.addElement(segment + ".fnm");
+ files.addElement(segment + ".fdx");
+ files.addElement(segment + ".fdt");
+ files.addElement(segment + ".tii");
+ files.addElement(segment + ".tis");
+ files.addElement(segment + ".frq");
+ files.addElement(segment + ".prx");
+
+ if (directory.fileExists(segment + ".del"))
+ files.addElement(segment + ".del");
+
+ for (int i = 0; i < fieldInfos.size(); i++) {
+ FieldInfo fi = fieldInfos.fieldInfo(i);
+ if (fi.isIndexed)
+ files.addElement(segment + ".f" + i);
+ }
+ return files;
+ }
+
+ public final TermEnum terms() throws IOException {
+ return tis.terms();
+ }
+
+ public final TermEnum terms(Term t) throws IOException {
+ return tis.terms(t);
+ }
+
+ public final synchronized Document document(int n) throws IOException {
+ if (isDeleted(n))
+ throw new IllegalArgumentException
+ ("attempt to access a deleted document");
+ return fieldsReader.doc(n);
+ }
+
+ public final synchronized boolean isDeleted(int n) {
+ return (deletedDocs != null && deletedDocs.get(n));
+ }
+
+ public final TermDocs termDocs(Term t) throws IOException {
+ TermInfo ti = tis.get(t);
+ if (ti != null)
+ return new SegmentTermDocs(this, ti);
+ else
+ return null;
+ }
+
+ final InputStream getFreqStream () {
+ return (InputStream)freqStream.clone();
+ }
+
+ public final TermPositions termPositions(Term t) throws IOException {
+ TermInfo ti = tis.get(t);
+ if (ti != null)
+ return new SegmentTermPositions(this, ti);
+ else
+ return null;
+ }
+
+ final InputStream getProxStream () {
+ return (InputStream)proxStream.clone();
+ }
+
+ public final int docFreq(Term t) throws IOException {
+ TermInfo ti = tis.get(t);
+ if (ti != null)
+ return ti.docFreq;
+ else
+ return 0;
+ }
+
+ public final int numDocs() {
+ int n = maxDoc();
+ if (deletedDocs != null)
+ n -= deletedDocs.count();
+ return n;
+ }
+
+ public final int maxDoc() {
+ return fieldsReader.size();
+ }
+
+ public final byte[] norms(String field) throws IOException {
+ Norm norm = (Norm)norms.get(field);
+ if (norm == null)
+ return null;
+ if (norm.bytes == null) {
+ byte[] bytes = new byte[maxDoc()];
+ norms(field, bytes, 0);
+ norm.bytes = bytes;
+ }
+ return norm.bytes;
+ }
+
+ final void norms(String field, byte[] bytes, int offset) throws IOException {
+ InputStream normStream = normStream(field);
+ if (normStream == null)
+ return; // use zeros in array
+ try {
+ normStream.readBytes(bytes, offset, maxDoc());
+ } finally {
+ normStream.close();
+ }
+ }
+
+ final InputStream normStream(String field) throws IOException {
+ Norm norm = (Norm)norms.get(field);
+ if (norm == null)
+ return null;
+ InputStream result = (InputStream)norm.in.clone();
+ result.seek(0);
+ return result;
+ }
+
+ private final void openNorms() throws IOException {
+ for (int i = 0; i < fieldInfos.size(); i++) {
+ FieldInfo fi = fieldInfos.fieldInfo(i);
+ if (fi.isIndexed)
+ norms.put(fi.name,
+ new Norm(directory.openFile(segment + ".f" + fi.number)));
+ }
+ }
+
+ private final void closeNorms() throws IOException {
+ synchronized (norms) {
+ Enumeration enum = norms.elements();
+ while (enum.hasMoreElements()) {
+ Norm norm = (Norm)enum.nextElement();
+ norm.in.close();
+ }
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/index/SegmentTermDocs.java b/src/java/org/apache/lucene/index/SegmentTermDocs.java
new file mode 100644
index 00000000000..3a48d8aa045
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentTermDocs.java
@@ -0,0 +1,150 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.store.InputStream;
+
+class SegmentTermDocs implements TermDocs {
+ protected SegmentReader parent;
+ private InputStream freqStream;
+ private int freqCount;
+ private BitVector deletedDocs;
+ int doc = 0;
+ int freq;
+
+ SegmentTermDocs(SegmentReader p) throws IOException {
+ parent = p;
+ freqStream = parent.getFreqStream();
+ deletedDocs = parent.deletedDocs;
+ }
+
+ SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException {
+ this(p);
+ seek(ti);
+ }
+
+ void seek(TermInfo ti) throws IOException {
+ freqCount = ti.docFreq;
+ doc = 0;
+ freqStream.seek(ti.freqPointer);
+ }
+
+ public void close() throws IOException {
+ freqStream.close();
+ }
+
+ public final int doc() { return doc; }
+ public final int freq() { return freq; }
+
+ protected void skippingDoc() throws IOException {
+ }
+
+ public boolean next() throws IOException {
+ while (true) {
+ if (freqCount == 0)
+ return false;
+
+ int docCode = freqStream.readVInt();
+ doc += docCode >>> 1; // shift off low bit
+ if ((docCode & 1) != 0) // if low bit is set
+ freq = 1; // freq is one
+ else
+ freq = freqStream.readVInt(); // else read freq
+
+ freqCount--;
+
+ if (deletedDocs == null || !deletedDocs.get(doc))
+ break;
+ skippingDoc();
+ }
+ return true;
+ }
+
+ /** Optimized implementation. */
+ public int read(final int[] docs, final int[] freqs)
+ throws IOException {
+ final int end = docs.length;
+ int i = 0;
+ while (i < end && freqCount > 0) {
+
+ // manually inlined call to next() for speed
+ final int docCode = freqStream.readVInt();
+ doc += docCode >>> 1; // shift off low bit
+ if ((docCode & 1) != 0) // if low bit is set
+ freq = 1; // freq is one
+ else
+ freq = freqStream.readVInt(); // else read freq
+ freqCount--;
+
+ if (deletedDocs == null || !deletedDocs.get(doc)) {
+ docs[i] = doc;
+ freqs[i] = freq;
+ ++i;
+ }
+ }
+ return i;
+ }
+
+ /** As yet unoptimized implementation. */
+ public boolean skipTo(int target) throws IOException {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc);
+ return true;
+ }
+}
diff --git a/src/java/org/apache/lucene/index/SegmentTermEnum.java b/src/java/org/apache/lucene/index/SegmentTermEnum.java
new file mode 100644
index 00000000000..e7722da0f17
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentTermEnum.java
@@ -0,0 +1,184 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.store.InputStream;
+
+final class SegmentTermEnum extends TermEnum implements Cloneable {
+ private InputStream input;
+ private FieldInfos fieldInfos;
+ int size;
+ int position = -1;
+
+ private Term term = new Term("", "");
+ private TermInfo termInfo = new TermInfo();
+
+ boolean isIndex = false;
+ long indexPointer = 0;
+ Term prev;
+
+ private char[] buffer = {};
+
+ SegmentTermEnum(InputStream i, FieldInfos fis, boolean isi)
+ throws IOException {
+ input = i;
+ fieldInfos = fis;
+ size = input.readInt();
+ isIndex = isi;
+ }
+
+ protected Object clone() {
+ SegmentTermEnum clone = null;
+ try {
+ clone = (SegmentTermEnum)super.clone();
+ } catch (CloneNotSupportedException e) {}
+
+ clone.input = (InputStream)input.clone();
+ clone.termInfo = new TermInfo(termInfo);
+ clone.growBuffer(term.text.length());
+
+ return clone;
+ }
+
+ final void seek(long pointer, int p, Term t, TermInfo ti)
+ throws IOException {
+ input.seek(pointer);
+ position = p;
+ term = t;
+ prev = null;
+ termInfo.set(ti);
+ growBuffer(term.text.length()); // copy term text into buffer
+ }
+
+ /** Increments the enumeration to the next element. True if one exists.*/
+ public final boolean next() throws IOException {
+ if (position++ >= size-1) {
+ term = null;
+ return false;
+ }
+
+ prev = term;
+ term = readTerm();
+
+ termInfo.docFreq = input.readVInt(); // read doc freq
+ termInfo.freqPointer += input.readVLong(); // read freq pointer
+ termInfo.proxPointer += input.readVLong(); // read prox pointer
+
+ if (isIndex)
+ indexPointer += input.readVLong(); // read index pointer
+
+ return true;
+ }
+
+ private final Term readTerm() throws IOException {
+ int start = input.readVInt();
+ int length = input.readVInt();
+ int totalLength = start + length;
+ if (buffer.length < totalLength)
+ growBuffer(totalLength);
+
+ input.readChars(buffer, start, length);
+ return new Term(fieldInfos.fieldName(input.readVInt()),
+ new String(buffer, 0, totalLength), false);
+ }
+
+ private final void growBuffer(int length) {
+ buffer = new char[length];
+ for (int i = 0; i < term.text.length(); i++) // copy contents
+ buffer[i] = term.text.charAt(i);
+ }
+
+ /** Returns the current Term in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ public final Term term() {
+ return term;
+ }
+
+ /** Returns the current TermInfo in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ final TermInfo termInfo() {
+ return new TermInfo(termInfo);
+ }
+
+ /** Sets the argument to the current TermInfo in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ final void termInfo(TermInfo ti) {
+ ti.set(termInfo);
+ }
+
+ /** Returns the docFreq from the current TermInfo in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ public final int docFreq() {
+ return termInfo.docFreq;
+ }
+
+ /* Returns the freqPointer from the current TermInfo in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ final long freqPointer() {
+ return termInfo.freqPointer;
+ }
+
+ /* Returns the proxPointer from the current TermInfo in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ final long proxPointer() {
+ return termInfo.proxPointer;
+ }
+
+ /** Closes the enumeration to further activity, freeing resources. */
+ public final void close() throws IOException {
+ input.close();
+ }
+}
diff --git a/src/java/org/apache/lucene/index/SegmentTermPositions.java b/src/java/org/apache/lucene/index/SegmentTermPositions.java
new file mode 100644
index 00000000000..d7b10c8eac0
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentTermPositions.java
@@ -0,0 +1,114 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.InputStream;
+
+final class SegmentTermPositions
+extends SegmentTermDocs implements TermPositions {
+ private InputStream proxStream;
+ private int proxCount;
+ private int position;
+
+ SegmentTermPositions(SegmentReader p) throws IOException {
+ super(p);
+ proxStream = parent.getProxStream();
+ }
+
+ SegmentTermPositions(SegmentReader p, TermInfo ti)
+ throws IOException {
+ this(p);
+ seek(ti);
+ }
+
+ final void seek(TermInfo ti) throws IOException {
+ super.seek(ti);
+ proxStream.seek(ti.proxPointer);
+ }
+
+ public final void close() throws IOException {
+ super.close();
+ proxStream.close();
+ }
+
+ public final int nextPosition() throws IOException {
+ proxCount--;
+ return position += proxStream.readVInt();
+ }
+
+ protected final void skippingDoc() throws IOException {
+ for (int f = freq; f > 0; f--) // skip all positions
+ proxStream.readVInt();
+ }
+
+ public final boolean next() throws IOException {
+ for (int f = proxCount; f > 0; f--) // skip unread positions
+ proxStream.readVInt();
+
+ if (super.next()) { // run super
+ proxCount = freq; // note frequency
+ position = 0; // reset position
+ return true;
+ }
+ return false;
+ }
+
+ public final int read(final int[] docs, final int[] freqs)
+ throws IOException {
+ throw new RuntimeException();
+ }
+}
diff --git a/src/java/org/apache/lucene/index/SegmentsReader.java b/src/java/org/apache/lucene/index/SegmentsReader.java
new file mode 100644
index 00000000000..4ab2197dc04
--- /dev/null
+++ b/src/java/org/apache/lucene/index/SegmentsReader.java
@@ -0,0 +1,329 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+
+final class SegmentsReader extends IndexReader {
+ protected SegmentReader[] readers;
+ protected int[] starts; // 1st docno for each segment
+ private Hashtable normsCache = new Hashtable();
+ private int maxDoc = 0;
+ private int numDocs = -1;
+
+ SegmentsReader(SegmentReader[] r) throws IOException {
+ readers = r;
+ starts = new int[readers.length + 1]; // build starts array
+ for (int i = 0; i < readers.length; i++) {
+ starts[i] = maxDoc;
+ maxDoc += readers[i].maxDoc(); // compute maxDocs
+ }
+ starts[readers.length] = maxDoc;
+ }
+
+ public final int numDocs() {
+ if (numDocs == -1) { // check cache
+ int n = 0; // cache miss--recompute
+ for (int i = 0; i < readers.length; i++)
+ n += readers[i].numDocs(); // sum from readers
+ numDocs = n;
+ }
+ return numDocs;
+ }
+
+ public final int maxDoc() {
+ return maxDoc;
+ }
+
+ public final Document document(int n) throws IOException {
+ int i = readerIndex(n); // find segment num
+ return readers[i].document(n - starts[i]); // dispatch to segment reader
+ }
+
+ public final boolean isDeleted(int n) {
+ int i = readerIndex(n); // find segment num
+ return readers[i].isDeleted(n - starts[i]); // dispatch to segment reader
+ }
+
+ public final void delete(int n) throws IOException {
+ numDocs = -1; // invalidate cache
+ int i = readerIndex(n); // find segment num
+ readers[i].delete(n - starts[i]); // dispatch to segment reader
+ }
+
+ private final int readerIndex(int n) { // find reader for doc n:
+ int lo = 0; // search starts array
+ int hi = readers.length - 1; // for first element less
+ // than n, return its index
+ while (hi >= lo) {
+ int mid = (lo + hi) >> 1;
+ int midValue = starts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else
+ return mid;
+ }
+ return hi;
+ }
+
+ public final synchronized byte[] norms(String field) throws IOException {
+ byte[] bytes = (byte[])normsCache.get(field);
+ if (bytes != null)
+ return bytes; // cache hit
+
+ bytes = new byte[maxDoc()];
+ for (int i = 0; i < readers.length; i++)
+ readers[i].norms(field, bytes, starts[i]);
+ normsCache.put(field, bytes); // update cache
+ return bytes;
+ }
+
+ public final TermEnum terms() throws IOException {
+ return new SegmentsTermEnum(readers, starts, null);
+ }
+
+ public final TermEnum terms(Term term) throws IOException {
+ return new SegmentsTermEnum(readers, starts, term);
+ }
+
+ public final int docFreq(Term t) throws IOException {
+ int total = 0; // sum freqs in segments
+ for (int i = 0; i < readers.length; i++)
+ total += readers[i].docFreq(t);
+ return total;
+ }
+
+ public final TermDocs termDocs(Term term) throws IOException {
+ return new SegmentsTermDocs(readers, starts, term);
+ }
+
+ public final TermPositions termPositions(Term term) throws IOException {
+ return new SegmentsTermPositions(readers, starts, term);
+ }
+
+ public final void close() throws IOException {
+ for (int i = 0; i < readers.length; i++)
+ readers[i].close();
+ }
+}
+
+class SegmentsTermEnum extends TermEnum {
+ private SegmentMergeQueue queue;
+
+ private Term term;
+ private int docFreq;
+
+ SegmentsTermEnum(SegmentReader[] readers, int[] starts, Term t)
+ throws IOException {
+ queue = new SegmentMergeQueue(readers.length);
+ for (int i = 0; i < readers.length; i++) {
+ SegmentReader reader = readers[i];
+ SegmentTermEnum termEnum;
+
+ if (t != null) {
+ termEnum = (SegmentTermEnum)reader.terms(t);
+ } else
+ termEnum = (SegmentTermEnum)reader.terms();
+
+ SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
+ if (t == null ? smi.next() : termEnum.term() != null)
+ queue.put(smi); // initialize queue
+ else
+ smi.close();
+ }
+
+ if (t != null && queue.size() > 0) {
+ SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
+ term = top.termEnum.term();
+ docFreq = top.termEnum.docFreq();
+ }
+ }
+
+ public final boolean next() throws IOException {
+ SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
+ if (top == null) {
+ term = null;
+ return false;
+ }
+
+ term = top.term;
+ docFreq = 0;
+
+ while (top != null && term.compareTo(top.term) == 0) {
+ queue.pop();
+ docFreq += top.termEnum.docFreq(); // increment freq
+ if (top.next())
+ queue.put(top); // restore queue
+ else
+ top.close(); // done with a segment
+ top = (SegmentMergeInfo)queue.top();
+ }
+ return true;
+ }
+
+ public final Term term() {
+ return term;
+ }
+
+ public final int docFreq() {
+ return docFreq;
+ }
+
+ public final void close() throws IOException {
+ queue.close();
+ }
+}
+
+class SegmentsTermDocs implements TermDocs {
+ protected SegmentReader[] readers;
+ protected int[] starts;
+ protected Term term;
+
+ protected int base = 0;
+ protected int pointer = 0;
+
+ SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) {
+ readers = r;
+ starts = s;
+ term = t;
+ }
+
+ protected SegmentTermDocs current;
+
+ public final int doc() {
+ return base + current.doc;
+ }
+ public final int freq() {
+ return current.freq;
+ }
+
+ public final boolean next() throws IOException {
+ if (current != null && current.next()) {
+ return true;
+ } else if (pointer < readers.length) {
+ if (current != null)
+ current.close();
+ base = starts[pointer];
+ current = termDocs(readers[pointer++]);
+ return next();
+ } else
+ return false;
+ }
+
+ /** Optimized implementation. */
+ public final int read(final int[] docs, final int[] freqs)
+ throws IOException {
+ while (true) {
+ while (current == null) {
+ if (pointer < readers.length) { // try next segment
+ base = starts[pointer];
+ current = termDocs(readers[pointer++]);
+ } else {
+ return 0;
+ }
+ }
+ int end = current.read(docs, freqs);
+ if (end == 0) { // none left in segment
+ current.close();
+ current = null;
+ } else { // got some
+ final int b = base; // adjust doc numbers
+ for (int i = 0; i < end; i++)
+ docs[i] += b;
+ return end;
+ }
+ }
+ }
+
+ /** As yet unoptimized implementation. */
+ public boolean skipTo(int target) throws IOException {
+ do {
+ if (!next())
+ return false;
+ } while (target > doc());
+ return true;
+ }
+
+ protected SegmentTermDocs termDocs(SegmentReader reader)
+ throws IOException {
+ return (SegmentTermDocs)reader.termDocs(term);
+ }
+
+ public final void close() throws IOException {
+ if (current != null)
+ current.close();
+ }
+}
+
+class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
+ SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) {
+ super(r,s,t);
+ }
+
+ protected final SegmentTermDocs termDocs(SegmentReader reader)
+ throws IOException {
+ return (SegmentTermDocs)reader.termPositions(term);
+ }
+
+ public final int nextPosition() throws IOException {
+ return ((SegmentTermPositions)current).nextPosition();
+ }
+}
diff --git a/src/java/org/apache/lucene/index/Term.java b/src/java/org/apache/lucene/index/Term.java
new file mode 100644
index 00000000000..2ec6592e72e
--- /dev/null
+++ b/src/java/org/apache/lucene/index/Term.java
@@ -0,0 +1,122 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/**
+ A Term represents a word from text. This is the unit of search. It is
+ composed of two elements, the text of the word, as a string, and the name of
+ the field that the text occured in, an interned string.
+
+ Note that terms may represent more than words from text fields, but also
+ things like dates, email addresses, urls, etc. */
+
+public final class Term {
+ String field;
+ String text;
+
+ /** Constructs a Term with the given field and text. */
+ public Term(String fld, String txt) {
+ this(fld, txt, true);
+ }
+ Term(String fld, String txt, boolean intern) {
+ field = intern ? fld.intern() : fld; // field names are interned
+ text = txt; // unless already known to be
+ }
+
+ /** Returns the field of this term, an interned string. The field indicates
+ the part of a document which this term came from. */
+ public final String field() { return field; }
+
+ /** Returns the text of this term. In the case of words, this is simply the
+ text of the word. In the case of dates and other types, this is an
+ encoding of the object as a string. */
+ public final String text() { return text; }
+
+ /** Compares two terms, returning true iff they have the same
+ field and text. */
+ public final boolean equals(Object o) {
+ if (o == null)
+ return false;
+ Term other = (Term)o;
+ return field == other.field && text.equals(other.text);
+ }
+
+ /** Combines the hashCode() of the field and the text. */
+ public final int hashCode() {
+ return field.hashCode() + text.hashCode();
+ }
+
+ /** Compares two terms, returning an integer which is less than zero iff this
+ term belongs after the argument, equal zero iff this term is equal to the
+ argument, and greater than zero iff this term belongs after the argument.
+
+ The ordering of terms is first by field, then by text.*/
+ public final int compareTo(Term other) {
+ if (field == other.field) // fields are interned
+ return text.compareTo(other.text);
+ else
+ return field.compareTo(other.field);
+ }
+
+ /** Resets the field and text of a Term. */
+ final void set(String fld, String txt) {
+ field = fld;
+ text = txt;
+ }
+
+ public final String toString() {
+ return "Term<" + field + ":" + text + ">";
+ }
+}
diff --git a/src/java/org/apache/lucene/index/TermDocs.java b/src/java/org/apache/lucene/index/TermDocs.java
new file mode 100644
index 00000000000..cb510688024
--- /dev/null
+++ b/src/java/org/apache/lucene/index/TermDocs.java
@@ -0,0 +1,110 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.document.Document;
+
+/** TermDocs provides an interface for enumerating <document, frequency>
+ pairs for a term.
The document portion names each document containing
+ the term. Documents are indicated by number. The frequency portion gives
+ the number of times the term occurred in each document.
The pairs are
+ ordered by document number.
+
+ @see IndexReader#termDocs
+ */
+
+public interface TermDocs {
+ /** Returns the current document number.
This is invalid until {@link
+ #next()} is called for the first time.*/
+ public int doc();
+
+ /** Returns the frequency of the term within the current document.
This
+ is invalid until {@link #next()} is called for the first time.*/
+ public int freq();
+
+ /** Moves to the next pair in the enumeration.
Returns true iff there is
+ such a next pair in the enumeration. */
+ public boolean next() throws IOException;
+
+ /** Attempts to read multiple entries from the enumeration, up to length of
+ * docs. Document numbers are stored in docs, and term
+ * frequencies are stored in freqs. The freqs array must be as
+ * long as the docs array.
+ *
+ *
Returns the number of entries read. Zero is only returned when the
+ * stream has been exhausted. */
+ public int read(int[] docs, int[] freqs) throws IOException;
+
+ /** Skips entries to the first beyond the current whose document number is
+ * greater than or equal to target.
Returns true iff there is such
+ * an entry.
Behaves as if written:
+ * public boolean skipTo(int target) {
+ * do {
+ * if (!next())
+ * return false;
+ * } while (target > doc());
+ * return true;
+ * }
+ *
+ * Some implementations are considerably more efficient than that.
+ */
+ public boolean skipTo(int target) throws IOException;
+
+ /** Frees associated resources. */
+ public void close() throws IOException;
+}
+
+
diff --git a/src/java/org/apache/lucene/index/TermEnum.java b/src/java/org/apache/lucene/index/TermEnum.java
new file mode 100644
index 00000000000..0f75a01e7d3
--- /dev/null
+++ b/src/java/org/apache/lucene/index/TermEnum.java
@@ -0,0 +1,78 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+/** Abstract class for enumerating terms.
+
+ Term enumerations are always ordered by Term.compareTo(). Each term in
+ the enumeration is greater than all that precede it. */
+
+public abstract class TermEnum {
+ /** Increments the enumeration to the next element. True if one exists.*/
+ abstract public boolean next() throws IOException;
+
+ /** Returns the current Term in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ abstract public Term term();
+
+ /** Returns the docFreq of the current Term in the enumeration.
+ Initially invalid, valid after next() called for the first time.*/
+ abstract public int docFreq();
+
+ /** Closes the enumeration to further activity, freeing resources. */
+ abstract public void close() throws IOException;
+}
diff --git a/src/java/org/apache/lucene/index/TermInfo.java b/src/java/org/apache/lucene/index/TermInfo.java
new file mode 100644
index 00000000000..91c974aa6d4
--- /dev/null
+++ b/src/java/org/apache/lucene/index/TermInfo.java
@@ -0,0 +1,91 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/** A TermInfo is the record of information stored for a term.*/
+
+final class TermInfo {
+ /** The number of documents which contain the term. */
+ int docFreq = 0;
+
+ long freqPointer = 0;
+ long proxPointer = 0;
+
+ TermInfo() {}
+
+ TermInfo(int df, long fp, long pp) {
+ docFreq = df;
+ freqPointer = fp;
+ proxPointer = pp;
+ }
+
+ TermInfo(TermInfo ti) {
+ docFreq = ti.docFreq;
+ freqPointer = ti.freqPointer;
+ proxPointer = ti.proxPointer;
+ }
+
+ final void set(int df, long fp, long pp) {
+ docFreq = df;
+ freqPointer = fp;
+ proxPointer = pp;
+ }
+
+ final void set(TermInfo ti) {
+ docFreq = ti.docFreq;
+ freqPointer = ti.freqPointer;
+ proxPointer = ti.proxPointer;
+ }
+}
diff --git a/src/java/org/apache/lucene/index/TermInfosReader.java b/src/java/org/apache/lucene/index/TermInfosReader.java
new file mode 100644
index 00000000000..c4e767136c3
--- /dev/null
+++ b/src/java/org/apache/lucene/index/TermInfosReader.java
@@ -0,0 +1,222 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+
+/** This stores a monotonically increasing set of pairs in a
+ * Directory. Pairs are accessed either by Term or by ordinal position the
+ * set. */
+
+final class TermInfosReader {
+ private Directory directory;
+ private String segment;
+ private FieldInfos fieldInfos;
+
+ private SegmentTermEnum enum;
+ private int size;
+
+ TermInfosReader(Directory dir, String seg, FieldInfos fis)
+ throws IOException {
+ directory = dir;
+ segment = seg;
+ fieldInfos = fis;
+
+ enum = new SegmentTermEnum(directory.openFile(segment + ".tis"),
+ fieldInfos, false);
+ size = enum.size;
+ readIndex();
+ }
+
+ final void close() throws IOException {
+ if (enum != null)
+ enum.close();
+ }
+
+ /** Returns the number of term/value pairs in the set. */
+ final int size() {
+ return size;
+ }
+
+ Term[] indexTerms = null;
+ TermInfo[] indexInfos;
+ long[] indexPointers;
+
+ private final void readIndex() throws IOException {
+ SegmentTermEnum indexEnum =
+ new SegmentTermEnum(directory.openFile(segment + ".tii"),
+ fieldInfos, true);
+ try {
+ int indexSize = indexEnum.size;
+
+ indexTerms = new Term[indexSize];
+ indexInfos = new TermInfo[indexSize];
+ indexPointers = new long[indexSize];
+
+ for (int i = 0; indexEnum.next(); i++) {
+ indexTerms[i] = indexEnum.term();
+ indexInfos[i] = indexEnum.termInfo();
+ indexPointers[i] = indexEnum.indexPointer;
+ }
+ } finally {
+ indexEnum.close();
+ }
+ }
+
+ /** Returns the offset of the greatest index entry which is less than term.*/
+ private final int getIndexOffset(Term term) throws IOException {
+ int lo = 0; // binary search indexTerms[]
+ int hi = indexTerms.length - 1;
+
+ while (hi >= lo) {
+ int mid = (lo + hi) >> 1;
+ int delta = term.compareTo(indexTerms[mid]);
+ if (delta < 0)
+ hi = mid - 1;
+ else if (delta > 0)
+ lo = mid + 1;
+ else
+ return mid;
+ }
+ return hi;
+ }
+
+ private final void seekEnum(int indexOffset) throws IOException {
+ enum.seek(indexPointers[indexOffset],
+ (indexOffset * TermInfosWriter.INDEX_INTERVAL) - 1,
+ indexTerms[indexOffset], indexInfos[indexOffset]);
+ }
+
+ /** Returns the TermInfo for a Term in the set, or null. */
+ final synchronized TermInfo get(Term term) throws IOException {
+ if (size == 0) return null;
+
+ // optimize sequential access: first try scanning cached enum w/o seeking
+ if (enum.term() != null // term is at or past current
+ && ((enum.prev != null && term.compareTo(enum.prev) > 0)
+ || term.compareTo(enum.term()) >= 0)) {
+ int enumOffset = (enum.position/TermInfosWriter.INDEX_INTERVAL)+1;
+ if (indexTerms.length == enumOffset // but before end of block
+ || term.compareTo(indexTerms[enumOffset]) < 0)
+ return scanEnum(term); // no need to seek
+ }
+
+ // random-access: must seek
+ seekEnum(getIndexOffset(term));
+ return scanEnum(term);
+ }
+
+ /** Scans within block for matching term. */
+ private final TermInfo scanEnum(Term term) throws IOException {
+ while (term.compareTo(enum.term()) > 0 && enum.next()) {}
+ if (enum.term() != null && term.compareTo(enum.term()) == 0)
+ return enum.termInfo();
+ else
+ return null;
+ }
+
+ /** Returns the nth term in the set. */
+ final synchronized Term get(int position) throws IOException {
+ if (size == 0) return null;
+
+ if (enum != null && enum.term() != null && position >= enum.position &&
+ position < (enum.position + TermInfosWriter.INDEX_INTERVAL))
+ return scanEnum(position); // can avoid seek
+
+ seekEnum(position / TermInfosWriter.INDEX_INTERVAL); // must seek
+ return scanEnum(position);
+ }
+
+ private final Term scanEnum(int position) throws IOException {
+ while(enum.position < position)
+ if (!enum.next())
+ return null;
+
+ return enum.term();
+ }
+
+ /** Returns the position of a Term in the set or -1. */
+ final synchronized int getPosition(Term term) throws IOException {
+ if (size == 0) return -1;
+
+ int indexOffset = getIndexOffset(term);
+ seekEnum(indexOffset);
+
+ while(term.compareTo(enum.term()) > 0 && enum.next()) {}
+
+ if (term.compareTo(enum.term()) == 0)
+ return enum.position;
+ else
+ return -1;
+ }
+
+ /** Returns an enumeration of all the Terms and TermInfos in the set. */
+ final synchronized SegmentTermEnum terms() throws IOException {
+ if (enum.position != -1) // if not at start
+ seekEnum(0); // reset to start
+ return (SegmentTermEnum)enum.clone();
+ }
+
+ /** Returns an enumeration of terms starting at or after the named term. */
+ final synchronized SegmentTermEnum terms(Term term) throws IOException {
+ get(term); // seek enum to term
+ return (SegmentTermEnum)enum.clone();
+ }
+
+
+}
diff --git a/src/java/org/apache/lucene/index/TermInfosWriter.java b/src/java/org/apache/lucene/index/TermInfosWriter.java
new file mode 100644
index 00000000000..5053072f898
--- /dev/null
+++ b/src/java/org/apache/lucene/index/TermInfosWriter.java
@@ -0,0 +1,159 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.store.Directory;
+
+/** This stores a monotonically increasing set of pairs in a
+ Directory. A TermInfos can be written once, in order. */
+
+final class TermInfosWriter {
+ private FieldInfos fieldInfos;
+ private OutputStream output;
+ private Term lastTerm = new Term("", "");
+ private TermInfo lastTi = new TermInfo();
+ private int size = 0;
+
+ static final int INDEX_INTERVAL = 128;
+ private long lastIndexPointer = 0;
+ private boolean isIndex = false;
+
+ private TermInfosWriter other = null;
+
+ TermInfosWriter(Directory directory, String segment, FieldInfos fis)
+ throws IOException, SecurityException {
+ initialize(directory, segment, fis, false);
+ other = new TermInfosWriter(directory, segment, fis, true);
+ other.other = this;
+ }
+
+ private TermInfosWriter(Directory directory, String segment, FieldInfos fis,
+ boolean isIndex) throws IOException {
+ initialize(directory, segment, fis, isIndex);
+ }
+
+ private void initialize(Directory directory, String segment, FieldInfos fis,
+ boolean isi) throws IOException {
+ fieldInfos = fis;
+ isIndex = isi;
+ output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
+ output.writeInt(0); // leave space for size
+ }
+
+ /** Adds a new pair to the set.
+ Term must be lexicographically greater than all previous Terms added.
+ TermInfo pointers must be positive and greater than all previous.*/
+ final void add(Term term, TermInfo ti)
+ throws IOException, SecurityException {
+ if (!isIndex && term.compareTo(lastTerm) <= 0)
+ throw new IOException("term out of order");
+ if (ti.freqPointer < lastTi.freqPointer)
+ throw new IOException("freqPointer out of order");
+ if (ti.proxPointer < lastTi.proxPointer)
+ throw new IOException("proxPointer out of order");
+
+ if (!isIndex && size % INDEX_INTERVAL == 0)
+ other.add(lastTerm, lastTi); // add an index term
+
+ writeTerm(term); // write term
+ output.writeVInt(ti.docFreq); // write doc freq
+ output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
+ output.writeVLong(ti.proxPointer - lastTi.proxPointer);
+
+ if (isIndex) {
+ output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
+ lastIndexPointer = other.output.getFilePointer(); // write pointer
+ }
+
+ lastTi.set(ti);
+ size++;
+ }
+
+ private final void writeTerm(Term term)
+ throws IOException {
+ int start = stringDifference(lastTerm.text, term.text);
+ int length = term.text.length() - start;
+
+ output.writeVInt(start); // write shared prefix length
+ output.writeVInt(length); // write delta length
+ output.writeChars(term.text, start, length); // write delta chars
+
+ output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num
+
+ lastTerm = term;
+ }
+
+ private static final int stringDifference(String s1, String s2) {
+ int len1 = s1.length();
+ int len2 = s2.length();
+ int len = len1 < len2 ? len1 : len2;
+ for (int i = 0; i < len; i++)
+ if (s1.charAt(i) != s2.charAt(i))
+ return i;
+ return len;
+ }
+
+ /** Called to complete TermInfos creation. */
+ final void close() throws IOException, SecurityException {
+ output.seek(0); // write size at start
+ output.writeInt(size);
+ output.close();
+
+ if (!isIndex)
+ other.close();
+ }
+}
diff --git a/src/java/org/apache/lucene/index/TermPositions.java b/src/java/org/apache/lucene/index/TermPositions.java
new file mode 100644
index 00000000000..834f8f7b215
--- /dev/null
+++ b/src/java/org/apache/lucene/index/TermPositions.java
@@ -0,0 +1,75 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.document.Document;
+
+
+/** TermPositions provides an interface for enumerating the <document,
+ frequency, <position>* > tuples for a term. The document and
+ frequency are as for a TermDocs. The positions portion lists the ordinal
+ positions of each occurence of a term in a document.
+ @see IndexReader#termPositions
+ */
+
+public interface TermPositions extends TermDocs {
+ /** Returns next position in the current document. It is an error to call
+ this more than {@link #freq()} times
+ without calling {@link #next()}
This is
+ invalid until {@link #next()} is called for
+ the first time.*/
+ public int nextPosition() throws IOException;
+}
diff --git a/src/java/org/apache/lucene/index/package.html b/src/java/org/apache/lucene/index/package.html
new file mode 100644
index 00000000000..25947337a7e
--- /dev/null
+++ b/src/java/org/apache/lucene/index/package.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+Code to maintain and access indices.
+
+
diff --git a/src/java/org/apache/lucene/manifest b/src/java/org/apache/lucene/manifest
new file mode 100644
index 00000000000..9421cc6f3b9
--- /dev/null
+++ b/src/java/org/apache/lucene/manifest
@@ -0,0 +1,8 @@
+
+Name: com/lucene
+Specification-Title: Lucene Search Engine
+Specification-Version: $Name$
+Specification-Vendor: Lucene
+Implementation-Title: com.lucene
+Implementation-Version: $Name$ $Date$
+Implementation-Vendor: Lucene
diff --git a/src/java/org/apache/lucene/queryParser/.cvsignore b/src/java/org/apache/lucene/queryParser/.cvsignore
new file mode 100644
index 00000000000..e966a9fa735
--- /dev/null
+++ b/src/java/org/apache/lucene/queryParser/.cvsignore
@@ -0,0 +1,6 @@
+QueryParser.java
+TokenMgrError.java
+ParseException.java
+Token.java
+TokenManager.java
+QueryParserConstants.java
diff --git a/src/java/org/apache/lucene/queryParser/Makefile b/src/java/org/apache/lucene/queryParser/Makefile
new file mode 100644
index 00000000000..09c091d1839
--- /dev/null
+++ b/src/java/org/apache/lucene/queryParser/Makefile
@@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.jj b/src/java/org/apache/lucene/queryParser/QueryParser.jj
new file mode 100644
index 00000000000..6eab8f0b9ad
--- /dev/null
+++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj
@@ -0,0 +1,366 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+
+options {
+ STATIC= false;
+}
+
+PARSER_BEGIN(QueryParser)
+
+package org.apache.lucene.queryParser;
+
+import java.util.Vector;
+import java.io.*;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.search.*;
+
+/**
+ * This class is generated by JavaCC. The only method that clients should need
+ * to call is parse().
+ *
+ * The syntax for query strings is as follows:
+ * A Query is a series of clauses.
+ * A clause may be prefixed by:
+ *
+ * - a plus (
+
) or a minus (-
) sign, indicating
+ * that the clause is required or prohibited respectively; or
+ * - a term followed by a colon, indicating the field to be searched.
+ * This enables one to construct queries which search multiple fields.
+ *
+ *
+ * A clause may be either a:
+ *
+ * - a term, indicating all the documents that contain this term; or
+ *
- a nested query, enclosed in parentheses. Note that this may be used
+ * with a
+
/-
prefix to require any of a set of
+ * terms.
+ *
+ *
+ * Thus, in BNF, the query grammar is:
+ *
+ * Query ::= ( Clause )*
+ * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" )
+ *
+ */
+
+public class QueryParser {
+ /** Parses a query string, returning a
+ * Query.
+ * @param query the query string to be parsed.
+ * @param field the default field for query terms.
+ * @param analyzer used to find terms in the query text.
+ */
+ static public Query parse(String query, String field, Analyzer analyzer)
+ throws ParseException {
+ QueryParser parser = new QueryParser(field, analyzer);
+ return parser.parse(query);
+ }
+
+ Analyzer analyzer;
+ String field;
+ int phraseSlop = 0;
+
+ /** Constructs a query parser.
+ * @param field the default field for query terms.
+ * @param analyzer used to find terms in the query text.
+ */
+ public QueryParser(String f, Analyzer a) {
+ this(new StringReader(""));
+ analyzer = a;
+ field = f;
+ }
+
+ /** Parses a query string, returning a
+ * Query.
+ * @param query the query string to be parsed.
+ */
+ public Query parse(String query) throws ParseException {
+ ReInit(new StringReader(query));
+ return Query(field);
+ }
+
+ /** Sets the default slop for phrases. If zero, then exact phrase matches
+ are required. Zero by default. */
+ public void setPhraseSlop(int s) { phraseSlop = s; }
+ /** Gets the default slop for phrases. */
+ public int getPhraseSlop() { return phraseSlop; }
+
+ private void addClause(Vector clauses, int conj, int mods,
+ Query q) {
+ boolean required, prohibited;
+
+ // If this term is introduced by AND, make the preceding term required,
+ // unless it's already prohibited
+ if (conj == CONJ_AND) {
+ BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
+ if (!c.prohibited)
+ c.required = true;
+ }
+
+ // We might have been passed a null query; the term might have been
+ // filtered away by the analyzer.
+ if (q == null)
+ return;
+
+ // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
+ // introduced by NOT or -; make sure not to set both.
+ prohibited = (mods == MOD_NOT);
+ required = (mods == MOD_REQ);
+ if (conj == CONJ_AND && !prohibited)
+ required = true;
+ clauses.addElement(new BooleanClause(q, required, prohibited));
+ }
+
+ private Query getFieldQuery(String field, Analyzer analyzer, String queryText) {
+ // Use the analyzer to get all the tokens, and then build a TermQuery,
+ // PhraseQuery, or nothing based on the term count
+
+ TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
+ Vector v = new Vector();
+ org.apache.lucene.analysis.Token t;
+
+ while (true) {
+ try {
+ t = source.next();
+ }
+ catch (IOException e) {
+ t = null;
+ }
+ if (t == null)
+ break;
+ v.addElement(t.termText());
+ }
+ if (v.size() == 0)
+ return null;
+ else if (v.size() == 1)
+ return new TermQuery(new Term(field, (String) v.elementAt(0)));
+ else {
+ PhraseQuery q = new PhraseQuery();
+ q.setSlop(phraseSlop);
+ for (int i=0; i TOKEN : {
+ <#_ALPHA_CHAR: ["a"-"z", "A"-"Z"] >
+| <#_NUM_CHAR: ["0"-"9"] >
+| <#_ALPHANUM_CHAR: [ "a"-"z", "A"-"Z", "0"-"9" ] >
+| <#_IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_" ] >
+| <#_IDENTIFIER: <_ALPHA_CHAR> (<_IDENTIFIER_CHAR>)* >
+| <#_NEWLINE: ( "\r\n" | "\r" | "\n" ) >
+| <#_WHITESPACE: ( " " | "\t" ) >
+| <#_QCHAR: ( "\\" (<_NEWLINE> | ~["a"-"z", "A"-"Z", "0"-"9"] ) ) >
+| <#_RESTOFLINE: (~["\r", "\n"])* >
+}
+
+ TOKEN : {
+
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+| )+ "." (<_NUM_CHAR>)+ >
+|
+ ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "*", "?", "~" ] )* >
+|
+|
+ ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "~" ] )* <_IDENTIFIER_CHAR>>
+}
+
+ SKIP : {
+ <<_WHITESPACE>>
+}
+
+// * Query ::= ( Clause )*
+// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" )
+
+int Conjunction() : {
+ int ret = CONJ_NONE;
+}
+{
+ [
+ { ret = CONJ_AND; }
+ | { ret = CONJ_OR; }
+ ]
+ { return ret; }
+}
+
+int Modifiers() : {
+ int ret = MOD_NONE;
+}
+{
+ [
+ { ret = MOD_REQ; }
+ | { ret = MOD_NOT; }
+ | { ret = MOD_NOT; }
+ ]
+ { return ret; }
+}
+
+Query Query(String field) :
+{
+ Vector clauses = new Vector();
+ Query q;
+ int conj, mods;
+}
+{
+ mods=Modifiers() q=Clause(field)
+ { addClause(clauses, CONJ_NONE, mods, q); }
+ (
+ conj=Conjunction() mods=Modifiers() q=Clause(field)
+ { addClause(clauses, conj, mods, q); }
+ )*
+ {
+ BooleanQuery query = new BooleanQuery();
+ for (int i = 0; i < clauses.size(); i++)
+ query.add((BooleanClause)clauses.elementAt(i));
+ return query;
+ }
+}
+
+Query Clause(String field) : {
+ Query q;
+ Token fieldToken=null;
+}
+{
+ [
+ LOOKAHEAD(2)
+ fieldToken= { field = fieldToken.image; }
+ ]
+
+ (
+ q=Term(field)
+ | q=Query(field)
+ )
+ {
+ return q;
+ }
+}
+
+
+Query Term(String field) : {
+ Token term, boost=null;
+ boolean prefix = false;
+ boolean wildcard = false;
+ boolean fuzzy = false;
+ Query q;
+}
+{
+ (
+ (term=|term={wildcard=true;}|term=)[{prefix=true;}|{fuzzy=true;}][ boost=]
+ { if (wildcard)
+ q = new WildcardQuery(new Term(field, term.image));
+ else if (prefix)
+ q = new PrefixQuery(new Term(field, term.image));
+ else if (fuzzy)
+ q = new FuzzyQuery(new Term(field, term.image));
+ else
+ q = getFieldQuery(field, analyzer, term.image); }
+ | term=
+ { q = getFieldQuery(field, analyzer,
+ term.image.substring(1, term.image.length()-1)); }
+ )
+ {
+ if (boost != null) {
+ float f = (float) 1.0;
+ try {
+ f = Float.valueOf(boost.image).floatValue();
+ }
+ catch (Exception ignored) { }
+
+ if (q instanceof TermQuery)
+ ((TermQuery) q).setBoost(f);
+ else if (q instanceof PhraseQuery)
+ ((PhraseQuery) q).setBoost(f);
+ else if (q instanceof MultiTermQuery)
+ ((MultiTermQuery) q).setBoost(f);
+ }
+ return q;
+ }
+}
+
+
diff --git a/src/java/org/apache/lucene/queryParser/package.html b/src/java/org/apache/lucene/queryParser/package.html
new file mode 100644
index 00000000000..5a3b8368794
--- /dev/null
+++ b/src/java/org/apache/lucene/queryParser/package.html
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+A simple query parser implemented with JavaCC.
+Note that JavaCC defines lots of public, classes, methods and fields
+that do not need to be public. These clutter the documentation.
+Sorry.
+
Note that because JavaCC defines a class named Token, com.lucene.analysis.Token
+must always be fully qualified in sourced code in this package.
+
+
diff --git a/src/java/org/apache/lucene/rootrules.mk b/src/java/org/apache/lucene/rootrules.mk
new file mode 100644
index 00000000000..04b788f7287
--- /dev/null
+++ b/src/java/org/apache/lucene/rootrules.mk
@@ -0,0 +1,58 @@
+# rules to enable the running of "make jar" and the like from any dir..
+
+# directories containing java source code
+DIRS = store util document analysis analysis/standard index search queryParser
+PACKAGES = $(subst /,.,$(patsubst %,com.lucene.%,$(DIRS)))
+
+ifeq ($(JAVALINK),)
+ JAVALINK = http://java.sun.com/products/jdk/1.3/docs/api/
+endif
+
+# OLDJAVA does not have a -link option
+ifeq ($(OLDJAVA),)
+ JLINK_OPT = -link $(JAVALINK)
+ JAR_CMD = $(JAR) -cvfm lucene.jar com/lucene/manifest
+else
+ JAR_CMD = $(JAR) -cvf lucene.jar
+endif
+
+.PHONY: jar doc demo release
+
+jar: all_classes
+ cd $(ROOT) && $(JAR_CMD) \
+ `ls com/lucene/*/*.class` `ls com/lucene/*/*/*.class`
+
+doc: all_classes
+ if [ -d $(ROOT)/doc/api ]; then rm -rf $(ROOT)/doc/api ;fi
+ mkdir $(ROOT)/doc/api
+ $(JAVADOC) -classpath '$(CLASSPATH)' -author -version \
+ -d $(ROOT)/doc/api $(JLINK_OPT) $(PACKAGES)
+
+demo: all_classes
+ $(MAKE) -C $(ROOT)/demo/HTMLParser -w
+ $(MAKE) -C $(ROOT)/demo -w CLASSPATH=..
+
+release: jar demo doc
+ cd $(ROOT) && tar cvf lucene.tar lucene.jar doc/*.html doc/api \
+ demo/*.java demo/*.class demo/*.html demo/*.jhtml \
+ demo/HTMLParser/*.class demo/HTMLParser/*.jj \
+ demo/HTMLParser/*.java
+
+# make all the Lucene classes
+all_classes : TARGET = classes
+all_classes : $(DIRS)
+
+.PHONY: $(DIRS)
+$(DIRS):
+ $(MAKE) -C $(ROOT)/com/lucene/$@ -w $(TARGET)
+
+# Removes all generated files from src directories.
+src_clean: TARGET = clean
+src_clean: $(DIRS) clean
+
+# Removes all generated files.
+real_clean: DIRS += demo
+real_clean: DIRS += demo/HTMLParser
+real_clean: TARGET = clean
+real_clean: $(DIRS) clean
+ cd $(ROOT) && rm -rf lucene.jar lucene.tar doc/api
diff --git a/src/java/org/apache/lucene/rules.mk b/src/java/org/apache/lucene/rules.mk
new file mode 100644
index 00000000000..9222636619c
--- /dev/null
+++ b/src/java/org/apache/lucene/rules.mk
@@ -0,0 +1,128 @@
+# GNU make rules for lucene
+
+# determine whether we're on Win32 or Unix
+ifeq ($(findstring CYGWIN,$(shell uname)),CYGWIN)
+ OS = win32
+else
+ OS = unix
+endif
+
+# DOS compatibility:
+# These should be used in variables that end up in CLASSPATH.
+ifeq ($(OS),win32)
+ SLASH=\\
+ COLON=;
+else
+ SLASH=/
+ COLON=:
+endif
+
+# ROOT should be set to the root directory of the Lucene package
+# hierarchy. This is typically ../../.., as most packages are of the
+# form com.lucene..
+ifeq ($(ROOT),)
+ ROOT = ..$(SLASH)..$(SLASH)..
+else
+ ROOT := $(subst /,$(SLASH),$(ROOT))
+endif
+
+#include all the relevant variables
+include $(subst $(SLASH),/,$(ROOT))/com/lucene/variables.mk
+
+# directories containing java source code
+DIRS = store util document analysis analysis/standard index search queryParser
+PACKAGES = $(subst /,.,$(patsubst %,com.lucene.%,$(DIRS)))
+
+ifeq ($(JDK_HOME),)
+ ifneq ($(JAVA_HOME),)
+ JDK_HOME=$(JAVA_HOME)
+ else
+ ifeq ($(OS),win32)
+ JDK_HOME = C:/jdk1.3.1
+ else
+ JDK_HOME = /usr/local/java/jdk1.3.1
+ endif
+ endif
+endif
+
+# Location of JavaCC
+ifeq ($(JAVACC),)
+ ifeq ($(OS),win32)
+ JAVACC = C:/javacc2_0/bin/lib/JavaCC.zip
+ else
+ JAVACC = /usr/local/java/javacc2_0/bin/lib/JavaCC.zip
+ endif
+endif
+
+JAVADIR = $(subst \,/,$(JDK_HOME))
+
+# The compiler executable.
+ifeq ($(JAVAC),)
+ JAVAC = $(JAVADIR)/bin/javac
+endif
+
+# The java executable
+JAVA = $(JAVADIR)/bin/java
+
+# The jar executable
+JAR = $(JAVADIR)/bin/jar
+
+# javadoc
+JAVADOC = $(JAVADIR)/bin/javadoc
+
+# Options to pass to Java compiler
+ifeq ($(JFLAGS),)
+ JFLAGS = -O
+endif
+
+
+# CLASSPATH
+# By default include the Lucene root, and Java's builtin classes
+ifeq ($(OLDJAVA),)
+ export CLASSPATH=$(PREPENDCLASSPATH)$(COLON)$(ROOT)$(COLON)$(JDK_HOME)$(SLASH)jre$(SLASH)lib$(SLASH)rt.jar
+else
+ export CLASSPATH=$(PREPENDCLASSPATH)$(COLON)$(ROOT)$(COLON)$(JDK_HOME)$(SLASH)lib$(SLASH)classes.zip
+endif
+
+# JIKESPATH overrides the classpath variable for jikes, so we need to set it
+# here to avoid problems with a jikes user
+export JIKESPATH=$(CLASSPATH)
+
+## Rules
+
+# Use JAVAC to compile .java files into .class files
+%.class : %.java
+ $(JAVAC) $(JFLAGS) $<
+
+# Compile .jj files to .java with JavaCC
+%.java : %.jj
+ $(JAVA) -classpath '$(CLASSPATH)$(COLON)$(JAVACC)' COM.sun.labs.javacc.Main $<
+
+# Add JavaCC generated files to 'classes' and 'clean' targets.
+JJFILES = $(wildcard *.jj)
+ifneq ($(JJFILES),)
+ CLASSES += $(patsubst %.jj,%.class, $(JJFILES))
+ DIRT += $(patsubst %.jj,%.java, $(JJFILES))
+ DIRT += $(patsubst %.jj,%Constants.java, $(JJFILES))
+ DIRT += $(patsubst %.jj,%TokenManager.java, $(JJFILES))
+ DIRT += Token.java TokenMgrError.java TokenManager.java \
+ CharStream.java ASCII_CharStream.java ParseException.java
+endif
+
+
+# Don't delete parser's .java file -- it's needed by javadoc.
+.PRECIOUS: $(patsubst %.jj,%.java, $(JJFILES))
+
+
+# Assume all .java files should have a .class file.
+CLASSES += $(patsubst %.java,%.class,$(wildcard *.java))
+
+# default rule
+classes : $(CLASSES)
+
+# Removes all generated files from the connected src directory.
+clean:
+ rm -f *.class $(DIRT)
+
+# include all the rules for the root directory..
+include $(subst $(SLASH),/,$(ROOT))/com/lucene/rootrules.mk
diff --git a/src/java/org/apache/lucene/search/BooleanClause.java b/src/java/org/apache/lucene/search/BooleanClause.java
new file mode 100644
index 00000000000..b562a5ce928
--- /dev/null
+++ b/src/java/org/apache/lucene/search/BooleanClause.java
@@ -0,0 +1,75 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/** A clause in a BooleanQuery. */
+public final class BooleanClause {
+ /** The query whose matching documents are combined by the boolean query. */
+ public Query query;
+ /** If true, documents documents which do not
+ match this sub-query will not match the boolean query. */
+ public boolean required = false;
+ /** If true, documents documents which do
+ match this sub-query will not match the boolean query. */
+ public boolean prohibited = false;
+
+ /** Constructs a BooleanClause with query q
, required
+ r
and prohibited p
. */
+ public BooleanClause(Query q, boolean r, boolean p) {
+ query = q;
+ required = r;
+ prohibited = p;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java
new file mode 100644
index 00000000000..b965e53d9ba
--- /dev/null
+++ b/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -0,0 +1,177 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.index.IndexReader;
+
+/** A Query that matches documents matching boolean combinations of other
+ queries, typically {@link TermQuery}s or {@link PhraseQuery}s.
+ */
+final public class BooleanQuery extends Query {
+ private Vector clauses = new Vector();
+
+ /** Constructs an empty boolean query. */
+ public BooleanQuery() {}
+
+ /** Adds a clause to a boolean query. Clauses may be:
+
+ required
which means that documents which do not
+ match this sub-query will not match the boolean query;
+ prohibited
which means that documents which do
+ match this sub-query will not match the boolean query; or
+ - neither, in which case matched documents are neither prohibited from
+ nor required to match the sub-query.
+
+ It is an error to specify a clause as both required
and
+ prohibited
.
+ */
+ public final void add(Query query, boolean required, boolean prohibited) {
+ clauses.addElement(new BooleanClause(query, required, prohibited));
+ }
+
+ /** Adds a clause to a boolean query. */
+ public final void add(BooleanClause clause) {
+ clauses.addElement(clause);
+ }
+
+ void prepare(IndexReader reader) {
+ for (int i = 0 ; i < clauses.size(); i++) {
+ BooleanClause c = (BooleanClause)clauses.elementAt(i);
+ c.query.prepare(reader);
+ }
+ }
+
+ final float sumOfSquaredWeights(Searcher searcher)
+ throws IOException {
+ float sum = 0.0f;
+
+ for (int i = 0 ; i < clauses.size(); i++) {
+ BooleanClause c = (BooleanClause)clauses.elementAt(i);
+ if (!c.prohibited)
+ sum += c.query.sumOfSquaredWeights(searcher); // sum sub-query weights
+ }
+
+ return sum;
+ }
+
+ final void normalize(float norm) {
+ for (int i = 0 ; i < clauses.size(); i++) {
+ BooleanClause c = (BooleanClause)clauses.elementAt(i);
+ if (!c.prohibited)
+ c.query.normalize(norm);
+ }
+ }
+
+ final Scorer scorer(IndexReader reader)
+ throws IOException {
+
+ if (clauses.size() == 1) { // optimize 1-term queries
+ BooleanClause c = (BooleanClause)clauses.elementAt(0);
+ if (!c.prohibited) // just return term scorer
+ return c.query.scorer(reader);
+ }
+
+ BooleanScorer result = new BooleanScorer();
+
+ int theMask = 1, thisMask;
+ for (int i = 0 ; i < clauses.size(); i++) {
+ BooleanClause c = (BooleanClause)clauses.elementAt(i);
+ if (c.required || c.prohibited) {
+ thisMask = theMask;
+ theMask = theMask << 1;
+ } else
+ thisMask = 0;
+
+ Scorer subScorer = c.query.scorer(reader);
+ if (subScorer != null)
+ result.add(subScorer, c.required, c.prohibited);
+ else if (c.required)
+ return null;
+ }
+ if (theMask == 0)
+ throw new IndexOutOfBoundsException
+ ("More than 32 required/prohibited clauses in query.");
+
+ return result;
+ }
+
+ /** Prints a user-readable version of this query. */
+ public String toString(String field) {
+ StringBuffer buffer = new StringBuffer();
+ for (int i = 0 ; i < clauses.size(); i++) {
+ BooleanClause c = (BooleanClause)clauses.elementAt(i);
+ if (c.prohibited)
+ buffer.append("-");
+ else if (c.required)
+ buffer.append("+");
+
+ Query subQuery = c.query;
+ if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens
+ BooleanQuery bq = (BooleanQuery)subQuery;
+ buffer.append("(");
+ buffer.append(c.query.toString(field));
+ buffer.append(")");
+ } else
+ buffer.append(c.query.toString(field));
+
+ if (i != clauses.size()-1)
+ buffer.append(" ");
+ }
+ return buffer.toString();
+ }
+
+}
diff --git a/src/java/org/apache/lucene/search/BooleanScorer.java b/src/java/org/apache/lucene/search/BooleanScorer.java
new file mode 100644
index 00000000000..e9cfd3543ab
--- /dev/null
+++ b/src/java/org/apache/lucene/search/BooleanScorer.java
@@ -0,0 +1,204 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.*;
+
+final class BooleanScorer extends Scorer {
+ private int currentDoc;
+
+ private SubScorer scorers = null;
+ private BucketTable bucketTable = new BucketTable(this);
+
+ private int maxCoord = 1;
+ private float[] coordFactors = null;
+
+ private int requiredMask = 0;
+ private int prohibitedMask = 0;
+ private int nextMask = 1;
+
+ static final class SubScorer {
+ public Scorer scorer;
+ public boolean required = false;
+ public boolean prohibited = false;
+ public HitCollector collector;
+ public SubScorer next;
+
+ public SubScorer(Scorer scorer, boolean required, boolean prohibited,
+ HitCollector collector, SubScorer next) {
+ this.scorer = scorer;
+ this.required = required;
+ this.prohibited = prohibited;
+ this.collector = collector;
+ this.next = next;
+ }
+ }
+
+ final void add(Scorer scorer, boolean required, boolean prohibited) {
+ int mask = 0;
+ if (required || prohibited) {
+ if (nextMask == 0)
+ throw new IndexOutOfBoundsException
+ ("More than 32 required/prohibited clauses in query.");
+ mask = nextMask;
+ nextMask = nextMask << 1;
+ } else
+ mask = 0;
+
+ if (!prohibited)
+ maxCoord++;
+
+ if (prohibited)
+ prohibitedMask |= mask; // update prohibited mask
+ else if (required)
+ requiredMask |= mask; // update required mask
+
+ scorers = new SubScorer(scorer, required, prohibited,
+ bucketTable.newCollector(mask), scorers);
+ }
+
+ private final void computeCoordFactors() throws IOException {
+ coordFactors = new float[maxCoord];
+ for (int i = 0; i < maxCoord; i++)
+ coordFactors[i] = Similarity.coord(i, maxCoord);
+ }
+
+ final void score(HitCollector results, int maxDoc) throws IOException {
+ if (coordFactors == null)
+ computeCoordFactors();
+
+ while (currentDoc < maxDoc) {
+ currentDoc = Math.min(currentDoc+BucketTable.SIZE, maxDoc);
+ for (SubScorer t = scorers; t != null; t = t.next)
+ t.scorer.score(t.collector, currentDoc);
+ bucketTable.collectHits(results);
+ }
+ }
+
+ static final class Bucket {
+ int doc = -1; // tells if bucket is valid
+ float score; // incremental score
+ int bits; // used for bool constraints
+ int coord; // count of terms in score
+ Bucket next; // next valid bucket
+ }
+
+ /** A simple hash table of document scores within a range. */
+ static final class BucketTable {
+ public static final int SIZE = 1 << 10;
+ public static final int MASK = SIZE - 1;
+
+ final Bucket[] buckets = new Bucket[SIZE];
+ Bucket first = null; // head of valid list
+
+ private BooleanScorer scorer;
+
+ public BucketTable(BooleanScorer scorer) {
+ this.scorer = scorer;
+ }
+
+ public final void collectHits(HitCollector results) {
+ final int required = scorer.requiredMask;
+ final int prohibited = scorer.prohibitedMask;
+ final float[] coord = scorer.coordFactors;
+
+ for (Bucket bucket = first; bucket!=null; bucket = bucket.next) {
+ if ((bucket.bits & prohibited) == 0 && // check prohibited
+ (bucket.bits & required) == required){// check required
+ results.collect(bucket.doc, // add to results
+ bucket.score * coord[bucket.coord]);
+ }
+ }
+ first = null; // reset for next round
+ }
+
+ public final int size() { return SIZE; }
+
+ public HitCollector newCollector(int mask) {
+ return new Collector(mask, this);
+ }
+ }
+
+ static final class Collector extends HitCollector {
+ private BucketTable bucketTable;
+ private int mask;
+ public Collector(int mask, BucketTable bucketTable) {
+ this.mask = mask;
+ this.bucketTable = bucketTable;
+ }
+ public final void collect(final int doc, final float score) {
+ final BucketTable table = bucketTable;
+ final int i = doc & BucketTable.MASK;
+ Bucket bucket = table.buckets[i];
+ if (bucket == null)
+ table.buckets[i] = bucket = new Bucket();
+
+ if (bucket.doc != doc) { // invalid bucket
+ bucket.doc = doc; // set doc
+ bucket.score = score; // initialize score
+ bucket.bits = mask; // initialize mask
+ bucket.coord = 1; // initialize coord
+
+ bucket.next = table.first; // push onto valid list
+ table.first = bucket;
+ } else { // valid bucket
+ bucket.score += score; // increment score
+ bucket.bits |= mask; // add bits in mask
+ bucket.coord++; // increment coord
+ }
+ }
+ }
+}
diff --git a/src/java/org/apache/lucene/search/DateFilter.java b/src/java/org/apache/lucene/search/DateFilter.java
new file mode 100644
index 00000000000..b0ad2d70d47
--- /dev/null
+++ b/src/java/org/apache/lucene/search/DateFilter.java
@@ -0,0 +1,161 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.BitSet;
+import java.util.Date;
+import java.io.IOException;
+
+import org.apache.lucene.document.DateField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.IndexReader;
+
+/** A Filter that restricts search results to a range of time.
+
+ For this to work, documents must have been indexed with a {@link
+ DateField}. */
+
+public final class DateFilter extends Filter {
+ String field;
+
+ String start = DateField.MIN_DATE_STRING();
+ String end = DateField.MAX_DATE_STRING();
+
+ private DateFilter(String f) {
+ field = f;
+ }
+
+ /** Constructs a filter for field f
matching dates between
+ from
and to
. */
+ public DateFilter(String f, Date from, Date to) {
+ field = f;
+ start = DateField.dateToString(from);
+ end = DateField.dateToString(to);
+ }
+ /** Constructs a filter for field f
matching times between
+ from
and to
. */
+ public DateFilter(String f, long from, long to) {
+ field = f;
+ start = DateField.timeToString(from);
+ end = DateField.timeToString(to);
+ }
+
+ /** Constructs a filter for field f
matching dates before
+ date
. */
+ public static DateFilter Before(String field, Date date) {
+ DateFilter result = new DateFilter(field);
+ result.end = DateField.dateToString(date);
+ return result;
+ }
+ /** Constructs a filter for field f
matching times before
+ time
. */
+ public static DateFilter Before(String field, long time) {
+ DateFilter result = new DateFilter(field);
+ result.end = DateField.timeToString(time);
+ return result;
+ }
+
+ /** Constructs a filter for field f
matching dates before
+ date
. */
+ public static DateFilter After(String field, Date date) {
+ DateFilter result = new DateFilter(field);
+ result.start = DateField.dateToString(date);
+ return result;
+ }
+ /** Constructs a filter for field f
matching times before
+ time
. */
+ public static DateFilter After(String field, long time) {
+ DateFilter result = new DateFilter(field);
+ result.start = DateField.timeToString(time);
+ return result;
+ }
+
+ /** Returns a BitSet with true for documents which should be permitted in
+ search results, and false for those that should not. */
+ final public BitSet bits(IndexReader reader) throws IOException {
+ BitSet bits = new BitSet(reader.maxDoc());
+ TermEnum enum = reader.terms(new Term(field, start));
+ try {
+ Term stop = new Term(field, end);
+ while (enum.term().compareTo(stop) <= 0) {
+ TermDocs termDocs = reader.termDocs(enum.term());
+ try {
+ while (termDocs.next())
+ bits.set(termDocs.doc());
+ } finally {
+ termDocs.close();
+ }
+ if (!enum.next()) {
+ break;
+ }
+ }
+ } finally {
+ enum.close();
+ }
+ return bits;
+ }
+
+ public final String toString() {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append(field);
+ buffer.append(":");
+ buffer.append(DateField.stringToDate(start).toString());
+ buffer.append("-");
+ buffer.append(DateField.stringToDate(end).toString());
+ return buffer.toString();
+ }
+}
diff --git a/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/src/java/org/apache/lucene/search/ExactPhraseScorer.java
new file mode 100644
index 00000000000..46c590fd68c
--- /dev/null
+++ b/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -0,0 +1,91 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.util.*;
+import org.apache.lucene.index.*;
+
+final class ExactPhraseScorer extends PhraseScorer {
+
+ ExactPhraseScorer(TermPositions[] tps, byte[] n, float w)
+ throws IOException {
+ super(tps, n, w);
+ }
+
+ protected final float phraseFreq() throws IOException {
+ // sort list with pq
+ for (PhrasePositions pp = first; pp != null; pp = pp.next) {
+ pp.firstPosition();
+ pq.put(pp); // build pq from list
+ }
+ pqToList(); // rebuild list from pq
+
+ int freq = 0;
+ do { // find position w/ all terms
+ while (first.position < last.position) { // scan forward in first
+ do {
+ if (!first.nextPosition())
+ return (float)freq;
+ } while (first.position < last.position);
+ firstToLast();
+ }
+ freq++; // all equal: a match
+ } while (last.nextPosition());
+
+ return (float)freq;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/Filter.java b/src/java/org/apache/lucene/search/Filter.java
new file mode 100644
index 00000000000..2881ecaef2f
--- /dev/null
+++ b/src/java/org/apache/lucene/search/Filter.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.BitSet;
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+
+/** Abstract base class providing a mechanism to restrict searches to a subset
+ of an index. */
+abstract public class Filter {
+ /** Returns a BitSet with true for documents which should be permitted in
+ search results, and false for those that should not. */
+ abstract public BitSet bits(IndexReader reader) throws IOException;
+}
diff --git a/src/java/org/apache/lucene/search/FilteredTermEnum.java b/src/java/org/apache/lucene/search/FilteredTermEnum.java
new file mode 100644
index 00000000000..20661d59f76
--- /dev/null
+++ b/src/java/org/apache/lucene/search/FilteredTermEnum.java
@@ -0,0 +1,130 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/** Abstract class for enumerating a subset of all terms.
+
+
Term enumerations are always ordered by Term.compareTo(). Each term in
+ the enumeration is greater than all that precede it. */
+public abstract class FilteredTermEnum extends TermEnum {
+ private Term currentTerm = null;
+ private TermEnum actualEnum = null;
+
+ public FilteredTermEnum(IndexReader reader, Term term) throws IOException {}
+
+ /** Equality compare on the term */
+ protected abstract boolean termCompare(Term term);
+
+ /** Equality measure on the term */
+ protected abstract float difference();
+
+ /** Indiciates the end of the enumeration has been reached */
+ protected abstract boolean endEnum();
+
+ protected void setEnum(TermEnum actualEnum) throws IOException {
+ this.actualEnum = actualEnum;
+ // Find the first term that matches
+ Term term = actualEnum.term();
+ if (termCompare(term))
+ currentTerm = term;
+ else next();
+ }
+
+ /**
+ * Returns the docFreq of the current Term in the enumeration.
+ * Initially invalid, valid after next() called for the first time.
+ */
+ public int docFreq() {
+ if (actualEnum == null) return -1;
+ return actualEnum.docFreq();
+ }
+
+ /** Increments the enumeration to the next element. True if one exists. */
+ public boolean next() throws IOException {
+ if (actualEnum == null) return false; // the actual enumerator is not initialized!
+ currentTerm = null;
+ while (currentTerm == null) {
+ if (endEnum()) return false;
+ if (actualEnum.next()) {
+ Term term = actualEnum.term();
+ if (termCompare(term)) {
+ currentTerm = term;
+ return true;
+ }
+ }
+ else return false;
+ }
+ currentTerm = null;
+ return false;
+ }
+
+ /** Returns the current Term in the enumeration.
+ * Initially invalid, valid after next() called for the first time. */
+ public Term term() {
+ return currentTerm;
+ }
+
+ /** Closes the enumeration to further activity, freeing resources. */
+ public void close() throws IOException {
+ actualEnum.close();
+ currentTerm = null;
+ actualEnum = null;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/FuzzyQuery.java b/src/java/org/apache/lucene/search/FuzzyQuery.java
new file mode 100644
index 00000000000..2918d41c2e6
--- /dev/null
+++ b/src/java/org/apache/lucene/search/FuzzyQuery.java
@@ -0,0 +1,79 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import java.io.IOException;
+
+/** Implements the fuzzy search query */
+final public class FuzzyQuery extends MultiTermQuery {
+ private Term fuzzyTerm;
+
+ public FuzzyQuery(Term term) {
+ super(term);
+ fuzzyTerm = term;
+ }
+
+ final void prepare(IndexReader reader) {
+ try {
+ setEnum(new FuzzyTermEnum(reader, fuzzyTerm));
+ } catch (IOException e) {}
+ }
+
+ public String toString(String field) {
+ return super.toString(field) + '~';
+ }
+}
diff --git a/src/java/org/apache/lucene/search/FuzzyTermEnum.java b/src/java/org/apache/lucene/search/FuzzyTermEnum.java
new file mode 100644
index 00000000000..d32409c0f45
--- /dev/null
+++ b/src/java/org/apache/lucene/search/FuzzyTermEnum.java
@@ -0,0 +1,175 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/** Subclass of FilteredTermEnum for enumerating all terms that are similiar to the specified filter term.
+
+
Term enumerations are always ordered by Term.compareTo(). Each term in
+ the enumeration is greater than all that precede it. */
+final public class FuzzyTermEnum extends FilteredTermEnum {
+ double distance;
+ boolean fieldMatch = false;
+ boolean endEnum = false;
+
+ Term searchTerm = null;
+ String field = "";
+ String text = "";
+ int textlen;
+
+ public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
+ super(reader, term);
+ searchTerm = term;
+ field = searchTerm.field();
+ text = searchTerm.text();
+ textlen = text.length();
+ setEnum(reader.terms(new Term(searchTerm.field(), "")));
+ }
+
+ /**
+ The termCompare method in FuzzyTermEnum uses Levenshtein distance to
+ calculate the distance between the given term and the comparing term.
+ */
+ final protected boolean termCompare(Term term) {
+ if (field == term.field()) {
+ String target = term.text();
+ int targetlen = target.length();
+ int dist = editDistance(text, target, textlen, targetlen);
+ distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));
+ return (distance > FUZZY_THRESHOLD);
+ }
+ endEnum = true;
+ return false;
+ }
+
+ final protected float difference() {
+ return (float)((distance - FUZZY_THRESHOLD) * SCALE_FACTOR);
+ }
+
+ final public boolean endEnum() {
+ return endEnum;
+ }
+
+ /******************************
+ * Compute Levenshtein distance
+ ******************************/
+
+ public static final double FUZZY_THRESHOLD = 0.5;
+ public static final double SCALE_FACTOR = 1.0f / (1.0f - FUZZY_THRESHOLD);
+
+ /**
+ Finds and returns the smallest of three integers
+ */
+ private final static int min(int a, int b, int c) {
+ int t = (a < b) ? a : b;
+ return (t < c) ? t : c;
+ }
+
+ /**
+ * This static array saves us from the time required to create a new array
+ * everytime editDistance is called.
+ */
+ private int e[][] = new int[0][0];
+
+ /**
+ Levenshtein distance also known as edit distance is a measure of similiarity
+ between two strings where the distance is measured as the number of character
+ deletions, insertions or substitutions required to transform one string to
+ the other string.
+
This method takes in four parameters; two strings and their respective
+ lengths to compute the Levenshtein distance between the two strings.
+ The result is returned as an integer.
+ */
+ private final int editDistance(String s, String t, int n, int m) {
+ if (e.length <= n || e[0].length <= m) {
+ e = new int[Math.max(e.length, n+1)][Math.max(e.length, m+1)];
+ }
+ int d[][] = e; // matrix
+ int i; // iterates through s
+ int j; // iterates through t
+ char s_i; // ith character of s
+
+ if (n == 0) return m;
+ if (m == 0) return n;
+
+ // init matrix d
+ for (i = 0; i <= n; i++) d[i][0] = i;
+ for (j = 0; j <= m; j++) d[0][j] = j;
+
+ // start computing edit distance
+ for (i = 1; i <= n; i++) {
+ s_i = s.charAt(i - 1);
+ for (j = 1; j <= m; j++) {
+ if (s_i != t.charAt(j-1))
+ d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1;
+ else d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]);
+ }
+ }
+
+ // we got the result!
+ return d[n][m];
+ }
+
+ public void close() throws IOException {
+ super.close();
+ searchTerm = null;
+ field = null;
+ text = null;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/HitCollector.java b/src/java/org/apache/lucene/search/HitCollector.java
new file mode 100644
index 00000000000..ca66b56331f
--- /dev/null
+++ b/src/java/org/apache/lucene/search/HitCollector.java
@@ -0,0 +1,76 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/** Lower-level search API.
+ * @see IndexSearcher#search(Query,HitCollector)
+ */
+public abstract class HitCollector {
+ /** Called once for every non-zero scoring document, with the document number
+ * and its score.
+ *
+ *
If, for example, an application wished to collect all of the hits for a
+ * query in a BitSet, then it might:
+ * Searcher = new IndexSearcher(indexReader);
+ * final BitSet bits = new BitSet(indexReader.maxDoc());
+ * searcher.search(query, new HitCollector() {
+ * public void collect(int doc, float score) {
+ * bits.set(doc);
+ * }
+ * });
+ *
+ */
+ public abstract void collect(int doc, float score);
+}
diff --git a/src/java/org/apache/lucene/search/HitQueue.java b/src/java/org/apache/lucene/search/HitQueue.java
new file mode 100644
index 00000000000..12b02cf39f7
--- /dev/null
+++ b/src/java/org/apache/lucene/search/HitQueue.java
@@ -0,0 +1,72 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.util.PriorityQueue;
+
+final class HitQueue extends PriorityQueue {
+ HitQueue(int size) {
+ initialize(size);
+ }
+
+ protected final boolean lessThan(Object a, Object b) {
+ ScoreDoc hitA = (ScoreDoc)a;
+ ScoreDoc hitB = (ScoreDoc)b;
+ if (hitA.score == hitB.score)
+ return hitA.doc > hitB.doc;
+ else
+ return hitA.score < hitB.score;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/Hits.java b/src/java/org/apache/lucene/search/Hits.java
new file mode 100644
index 00000000000..c25db7faaff
--- /dev/null
+++ b/src/java/org/apache/lucene/search/Hits.java
@@ -0,0 +1,188 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import java.util.BitSet;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+
+/** A ranked list of documents, used to hold search results. */
+public final class Hits {
+ private Query query;
+ private Searcher searcher;
+ private Filter filter = null;
+
+ private int length; // the total number of hits
+ private Vector hitDocs = new Vector(); // cache of hits retrieved
+
+ private HitDoc first; // head of LRU cache
+ private HitDoc last; // tail of LRU cache
+ private int numDocs = 0; // number cached
+ private int maxDocs = 200; // max to cache
+
+ Hits(Searcher s, Query q, Filter f) throws IOException {
+ query = q;
+ searcher = s;
+ filter = f;
+ getMoreDocs(50); // retrieve 100 initially
+ }
+
+ // Tries to add new documents to hitDocs.
+ // Ensures that the hit numbered min
has been retrieved.
+ private final void getMoreDocs(int min) throws IOException {
+ if (hitDocs.size() > min)
+ min = hitDocs.size();
+
+ int n = min * 2; // double # retrieved
+ TopDocs topDocs = searcher.search(query, filter, n);
+ length = topDocs.totalHits;
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+
+ float scoreNorm = 1.0f;
+ if (length > 0 && scoreDocs[0].score > 1.0f)
+ scoreNorm = 1.0f / scoreDocs[0].score;
+
+ int end = scoreDocs.length < length ? scoreDocs.length : length;
+ for (int i = hitDocs.size(); i < end; i++)
+ hitDocs.addElement(new HitDoc(scoreDocs[i].score*scoreNorm,
+ scoreDocs[i].doc));
+ }
+
+ /** Returns the total number of hits available in this set. */
+ public final int length() {
+ return length;
+ }
+
+ /** Returns the nth document in this set.
+ Documents are cached, so that repeated requests for the same element may
+ return the same Document object. */
+ public final Document doc(int n) throws IOException {
+ HitDoc hitDoc = hitDoc(n);
+
+ // Update LRU cache of documents
+ remove(hitDoc); // remove from list, if there
+ addToFront(hitDoc); // add to front of list
+ if (numDocs > maxDocs) { // if cache is full
+ HitDoc oldLast = last;
+ remove(last); // flush last
+ oldLast.doc = null; // let doc get gc'd
+ }
+
+ if (hitDoc.doc == null)
+ hitDoc.doc = searcher.doc(hitDoc.id); // cache miss: read document
+
+ return hitDoc.doc;
+ }
+
+ /** Returns the score for the nth document in this set. */
+ public final float score(int n) throws IOException {
+ return hitDoc(n).score;
+ }
+
+ private final HitDoc hitDoc(int n) throws IOException {
+ if (n >= length)
+ throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
+ if (n >= hitDocs.size())
+ getMoreDocs(n);
+
+ return (HitDoc)hitDocs.elementAt(n);
+ }
+
+ private final void addToFront(HitDoc hitDoc) { // insert at front of cache
+ if (first == null)
+ last = hitDoc;
+ else
+ first.prev = hitDoc;
+
+ hitDoc.next = first;
+ first = hitDoc;
+ hitDoc.prev = null;
+
+ numDocs++;
+ }
+
+ private final void remove(HitDoc hitDoc) { // remove from cache
+ if (hitDoc.doc == null) // it's not in the list
+ return; // abort
+
+ if (hitDoc.next == null)
+ last = hitDoc.prev;
+ else
+ hitDoc.next.prev = hitDoc.prev;
+
+ if (hitDoc.prev == null)
+ first = hitDoc.next;
+ else
+ hitDoc.prev.next = hitDoc.next;
+
+ numDocs--;
+ }
+}
+
+final class HitDoc {
+ float score;
+ int id;
+ Document doc = null;
+
+ HitDoc next; // in doubly-linked cache
+ HitDoc prev; // in doubly-linked cache
+
+ HitDoc(float s, int i) {
+ score = s;
+ id = i;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java
new file mode 100644
index 00000000000..69fda6b18b3
--- /dev/null
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -0,0 +1,178 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.PriorityQueue;
+
+/** Implements search over a single IndexReader. */
+public final class IndexSearcher extends Searcher {
+ IndexReader reader;
+
+ /** Creates a searcher searching the index in the named directory. */
+ public IndexSearcher(String path) throws IOException {
+ this(IndexReader.open(path));
+ }
+
+ /** Creates a searcher searching the index in the provided directory. */
+ public IndexSearcher(Directory directory) throws IOException {
+ this(IndexReader.open(directory));
+ }
+
+ /** Creates a searcher searching the provided index. */
+ public IndexSearcher(IndexReader r) {
+ reader = r;
+ }
+
+ /** Frees resources associated with this Searcher. */
+ public final void close() throws IOException {
+ reader.close();
+ }
+
+ final int docFreq(Term term) throws IOException {
+ return reader.docFreq(term);
+ }
+
+ final Document doc(int i) throws IOException {
+ return reader.document(i);
+ }
+
+ final int maxDoc() throws IOException {
+ return reader.maxDoc();
+ }
+
+ final TopDocs search(Query query, Filter filter, final int nDocs)
+ throws IOException {
+ Scorer scorer = Query.scorer(query, this, reader);
+ if (scorer == null)
+ return new TopDocs(0, new ScoreDoc[0]);
+
+ final BitSet bits = filter != null ? filter.bits(reader) : null;
+ final HitQueue hq = new HitQueue(nDocs);
+ final int[] totalHits = new int[1];
+ scorer.score(new HitCollector() {
+ private float minScore = 0.0f;
+ public final void collect(int doc, float score) {
+ if (score > 0.0f && // ignore zeroed buckets
+ (bits==null || bits.get(doc))) { // skip docs not in bits
+ totalHits[0]++;
+ if (score >= minScore) {
+ hq.put(new ScoreDoc(doc, score)); // update hit queue
+ if (hq.size() > nDocs) { // if hit queue overfull
+ hq.pop(); // remove lowest in hit queue
+ minScore = ((ScoreDoc)hq.top()).score; // reset minScore
+ }
+ }
+ }
+ }
+ }, reader.maxDoc());
+
+ ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
+ for (int i = hq.size()-1; i >= 0; i--) // put docs in array
+ scoreDocs[i] = (ScoreDoc)hq.pop();
+
+ return new TopDocs(totalHits[0], scoreDocs);
+ }
+
+ /** Lower-level search API.
+ *
+ *
{@link HitCollector#collect(int,float)} is called for every non-zero
+ * scoring document.
+ *
+ *
Applications should only use this if they need all of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits. */
+ public final void search(Query query, HitCollector results)
+ throws IOException {
+ search(query, null, results);
+ }
+
+ /** Lower-level search API.
+ *
+ *
{@link HitCollector#collect(int,float)} is called for every non-zero
+ * scoring document.
+ *
+ *
Applications should only use this if they need all of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits. */
+ public final void search(Query query, Filter filter,
+ final HitCollector results) throws IOException {
+ HitCollector collector = results;
+ if (filter != null) {
+ final BitSet bits = filter.bits(reader);
+ collector = new HitCollector() {
+ public final void collect(int doc, float score) {
+ if (bits.get(doc)) { // skip docs not in bits
+ results.collect(doc, score);
+ }
+ }
+ };
+ }
+
+ Scorer scorer = Query.scorer(query, this, reader);
+ if (scorer == null)
+ return;
+ scorer.score(collector, reader.maxDoc());
+ }
+
+}
diff --git a/src/java/org/apache/lucene/search/Makefile b/src/java/org/apache/lucene/search/Makefile
new file mode 100644
index 00000000000..09c091d1839
--- /dev/null
+++ b/src/java/org/apache/lucene/search/Makefile
@@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
diff --git a/src/java/org/apache/lucene/search/MultiSearcher.java b/src/java/org/apache/lucene/search/MultiSearcher.java
new file mode 100644
index 00000000000..86e9c3c16ba
--- /dev/null
+++ b/src/java/org/apache/lucene/search/MultiSearcher.java
@@ -0,0 +1,152 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.PriorityQueue;
+
+/** Implements search over a set of Searcher's. */
+public final class MultiSearcher extends Searcher {
+ private Searcher[] searchers;
+ private int[] starts;
+ private int maxDoc = 0;
+
+ /** Creates a searcher which searches searchers. */
+ public MultiSearcher(Searcher[] searchers) throws IOException {
+ this.searchers = searchers;
+
+ starts = new int[searchers.length + 1]; // build starts array
+ for (int i = 0; i < searchers.length; i++) {
+ starts[i] = maxDoc;
+ maxDoc += searchers[i].maxDoc(); // compute maxDocs
+ }
+ starts[searchers.length] = maxDoc;
+ }
+
+ /** Frees resources associated with this Searcher. */
+ public final void close() throws IOException {
+ for (int i = 0; i < searchers.length; i++)
+ searchers[i].close();
+ }
+
+ final int docFreq(Term term) throws IOException {
+ int docFreq = 0;
+ for (int i = 0; i < searchers.length; i++)
+ docFreq += searchers[i].docFreq(term);
+ return docFreq;
+ }
+
+ final Document doc(int n) throws IOException {
+ int i = searcherIndex(n); // find searcher index
+ return searchers[i].doc(n - starts[i]); // dispatch to searcher
+ }
+
+ // replace w/ call to Arrays.binarySearch in Java 1.2
+ private final int searcherIndex(int n) { // find searcher for doc n:
+ int lo = 0; // search starts array
+ int hi = searchers.length - 1; // for first element less
+ // than n, return its index
+ while (hi >= lo) {
+ int mid = (lo + hi) >> 1;
+ int midValue = starts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else
+ return mid;
+ }
+ return hi;
+ }
+
+ final int maxDoc() throws IOException {
+ return maxDoc;
+ }
+
+ final TopDocs search(Query query, Filter filter, int nDocs)
+ throws IOException {
+ HitQueue hq = new HitQueue(nDocs);
+ float minScore = 0.0f;
+ int totalHits = 0;
+
+ for (int i = 0; i < searchers.length; i++) { // search each searcher
+ TopDocs docs = searchers[i].search(query, filter, nDocs);
+ totalHits += docs.totalHits; // update totalHits
+ ScoreDoc[] scoreDocs = docs.scoreDocs;
+ for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
+ ScoreDoc scoreDoc = scoreDocs[j];
+ if (scoreDoc.score >= minScore) {
+ scoreDoc.doc += starts[i]; // convert doc
+ hq.put(scoreDoc); // update hit queue
+ if (hq.size() > nDocs) { // if hit queue overfull
+ hq.pop(); // remove lowest in hit queue
+ minScore = ((ScoreDoc)hq.top()).score; // reset minScore
+ }
+ } else
+ break; // no more scores > minScore
+ }
+ }
+
+ ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
+ for (int i = hq.size()-1; i >= 0; i--) // put docs in array
+ scoreDocs[i] = (ScoreDoc)hq.pop();
+
+ return new TopDocs(totalHits, scoreDocs);
+ }
+}
diff --git a/src/java/org/apache/lucene/search/MultiTermQuery.java b/src/java/org/apache/lucene/search/MultiTermQuery.java
new file mode 100644
index 00000000000..87a45a20428
--- /dev/null
+++ b/src/java/org/apache/lucene/search/MultiTermQuery.java
@@ -0,0 +1,161 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/** A Query that matches documents containing a subset of terms provided by a
+ FilteredTermEnum enumeration. MultiTermQuery is not designed to be used by
+ itself. The reason being that it is not intialized with a FilteredTermEnum
+ enumeration. A FilteredTermEnum enumeration needs to be provided. For example,
+ WildcardQuery and FuzzyQuery extends MultiTermQuery to provide WildcardTermEnum
+ and FuzzyTermEnum respectively. */
+public class MultiTermQuery extends Query {
+ private Term term;
+ private FilteredTermEnum enum;
+ private IndexReader reader;
+ private float boost = 1.0f;
+ private BooleanQuery query;
+
+ /** Enable or disable lucene style toString(field) format */
+ private static boolean LUCENE_STYLE_TOSTRING = false;
+
+ /** Constructs a query for terms matching term
. */
+ public MultiTermQuery(Term term) {
+ this.term = term;
+ this.query = query;
+ }
+
+ /** Set the TermEnum to be used */
+ protected void setEnum(FilteredTermEnum enum) {
+ this.enum = enum;
+ }
+
+ /** Sets the boost for this term to b
. Documents containing
+ * this term will (in addition to the normal weightings) have their score
+ * multiplied by boost
. */
+ final public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
+ /** Returns the boost for this term. */
+ final public float getBoost() {
+ return boost;
+ }
+
+ final float sumOfSquaredWeights(Searcher searcher) throws IOException {
+ return getQuery().sumOfSquaredWeights(searcher);
+ }
+
+ final void normalize(float norm) {
+ try {
+ getQuery().normalize(norm);
+ } catch (IOException e) {
+ throw new RuntimeException(e.toString());
+ }
+ }
+
+ final Scorer scorer(IndexReader reader) throws IOException {
+ return getQuery().scorer(reader);
+ }
+
+ final private BooleanQuery getQuery() throws IOException {
+ if (query == null) {
+ BooleanQuery q = new BooleanQuery();
+ try {
+ do {
+ Term t = enum.term();
+ if (t != null) {
+ TermQuery tq = new TermQuery(t); // found a match
+ tq.setBoost(boost * enum.difference()); // set the boost
+ q.add(tq, false, false); // add to q
+ }
+ } while (enum.next());
+ } finally {
+ enum.close();
+ }
+ query = q;
+ }
+ return query;
+ }
+
+ /** Prints a user-readable version of this query. */
+ public String toString(String field) {
+ if (!LUCENE_STYLE_TOSTRING) {
+ Query q = null;
+ try {
+ q = getQuery();
+ } catch (Exception e) {}
+ if (q != null) {
+ return "(" + q.toString(field) + ")";
+ }
+ }
+ StringBuffer buffer = new StringBuffer();
+ if (!term.field().equals(field)) {
+ buffer.append(term.field());
+ buffer.append(":");
+ }
+ buffer.append(term.text());
+ if (boost != 1.0f) {
+ buffer.append("^");
+ buffer.append(Float.toString(boost));
+ }
+ return buffer.toString();
+ }
+}
diff --git a/src/java/org/apache/lucene/search/PhrasePositions.java b/src/java/org/apache/lucene/search/PhrasePositions.java
new file mode 100644
index 00000000000..adfb59e63b2
--- /dev/null
+++ b/src/java/org/apache/lucene/search/PhrasePositions.java
@@ -0,0 +1,96 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.*;
+
+final class PhrasePositions {
+ int doc; // current doc
+ int position; // position in doc
+ int count; // remaining pos in this doc
+ int offset; // position in phrase
+ TermPositions tp; // stream of positions
+ PhrasePositions next; // used to make lists
+
+ PhrasePositions(TermPositions t, int o) throws IOException {
+ tp = t;
+ offset = o;
+ next();
+ }
+
+ final void next() throws IOException { // increments to next doc
+ if (!tp.next()) {
+ tp.close(); // close stream
+ doc = Integer.MAX_VALUE; // sentinel value
+ return;
+ }
+ doc = tp.doc();
+ position = 0;
+ }
+
+ final void firstPosition() throws IOException {
+ count = tp.freq(); // read first pos
+ nextPosition();
+ }
+
+ final boolean nextPosition() throws IOException {
+ if (count-- > 0) { // read subsequent pos's
+ position = tp.nextPosition() - offset;
+ return true;
+ } else
+ return false;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/PhraseQuery.java b/src/java/org/apache/lucene/search/PhraseQuery.java
new file mode 100644
index 00000000000..32a85111269
--- /dev/null
+++ b/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -0,0 +1,183 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.IndexReader;
+
+/** A Query that matches documents containing a particular sequence of terms.
+ This may be combined with other terms with a {@link BooleanQuery}.
+ */
+final public class PhraseQuery extends Query {
+ private String field;
+ private Vector terms = new Vector();
+ private float idf = 0.0f;
+ private float weight = 0.0f;
+
+ private float boost = 1.0f;
+ private int slop = 0;
+
+
+ /** Constructs an empty phrase query. */
+ public PhraseQuery() {
+ }
+
+ /** Sets the boost for this term to b
. Documents containing
+ this term will (in addition to the normal weightings) have their score
+ multiplied by b
. */
+ public final void setBoost(float b) { boost = b; }
+ /** Gets the boost for this term. Documents containing
+ this term will (in addition to the normal weightings) have their score
+ multiplied by b
. The boost is 1.0 by default. */
+ public final float getBoost() { return boost; }
+
+ /** Sets the number of other words permitted between words in query phrase.
+ If zero, then this is an exact phrase search. For larger values this works
+ like a WITHIN
or NEAR
operator.
+
+
The slop is in fact an edit-distance, where the units correspond to
+ moves of terms in the query phrase out of position. For example, to switch
+ the order of two words requires two moves (the first move places the words
+ atop one another), so to permit re-orderings of phrases, the slop must be
+ at least two.
+
+
More exact matches are scored higher than sloppier matches, thus search
+ results are sorted by exactness.
+
+
The slop is zero by default, requiring exact matches.*/
+ public final void setSlop(int s) { slop = s; }
+ /** Returns the slop. See setSlop(). */
+ public final int getSlop() { return slop; }
+
+ /** Adds a term to the end of the query phrase. */
+ public final void add(Term term) {
+ if (terms.size() == 0)
+ field = term.field();
+ else if (term.field() != field)
+ throw new IllegalArgumentException
+ ("All phrase terms must be in the same field: " + term);
+
+ terms.addElement(term);
+ }
+
+ final float sumOfSquaredWeights(Searcher searcher) throws IOException {
+ for (int i = 0; i < terms.size(); i++) // sum term IDFs
+ idf += Similarity.idf((Term)terms.elementAt(i), searcher);
+
+ weight = idf * boost;
+ return weight * weight; // square term weights
+ }
+
+ final void normalize(float norm) {
+ weight *= norm; // normalize for query
+ weight *= idf; // factor from document
+ }
+
+ final Scorer scorer(IndexReader reader) throws IOException {
+ if (terms.size() == 0) // optimize zero-term case
+ return null;
+ if (terms.size() == 1) { // optimize one-term case
+ Term term = (Term)terms.elementAt(0);
+ TermDocs docs = reader.termDocs(term);
+ if (docs == null)
+ return null;
+ return new TermScorer(docs, reader.norms(term.field()), weight);
+ }
+
+ TermPositions[] tps = new TermPositions[terms.size()];
+ for (int i = 0; i < terms.size(); i++) {
+ TermPositions p = reader.termPositions((Term)terms.elementAt(i));
+ if (p == null)
+ return null;
+ tps[i] = p;
+ }
+
+ if (slop == 0) // optimize exact case
+ return new ExactPhraseScorer(tps, reader.norms(field), weight);
+ else
+ return
+ new SloppyPhraseScorer(tps, slop, reader.norms(field), weight);
+
+ }
+
+ /** Prints a user-readable version of this query. */
+ public final String toString(String f) {
+ StringBuffer buffer = new StringBuffer();
+ if (!field.equals(f)) {
+ buffer.append(field);
+ buffer.append(":");
+ }
+
+ buffer.append("\"");
+ for (int i = 0; i < terms.size(); i++) {
+ buffer.append(((Term)terms.elementAt(i)).text());
+ if (i != terms.size()-1)
+ buffer.append(" ");
+ }
+ buffer.append("\"");
+
+ if (boost != 1.0f) {
+ buffer.append("^");
+ buffer.append(Float.toString(boost));
+ }
+
+ return buffer.toString();
+ }
+}
diff --git a/src/java/org/apache/lucene/search/PhraseQueue.java b/src/java/org/apache/lucene/search/PhraseQueue.java
new file mode 100644
index 00000000000..9f093ee538b
--- /dev/null
+++ b/src/java/org/apache/lucene/search/PhraseQueue.java
@@ -0,0 +1,72 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.util.PriorityQueue;
+
+final class PhraseQueue extends PriorityQueue {
+ PhraseQueue(int size) {
+ initialize(size);
+ }
+
+ protected final boolean lessThan(Object o1, Object o2) {
+ PhrasePositions pp1 = (PhrasePositions)o1;
+ PhrasePositions pp2 = (PhrasePositions)o2;
+ if (pp1.doc == pp2.doc)
+ return pp1.position < pp2.position;
+ else
+ return pp1.doc < pp2.doc;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/PhraseScorer.java b/src/java/org/apache/lucene/search/PhraseScorer.java
new file mode 100644
index 00000000000..f8cf741ed0e
--- /dev/null
+++ b/src/java/org/apache/lucene/search/PhraseScorer.java
@@ -0,0 +1,124 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.util.*;
+import org.apache.lucene.index.*;
+
+abstract class PhraseScorer extends Scorer {
+ protected byte[] norms;
+ protected float weight;
+
+ protected PhraseQueue pq;
+ protected PhrasePositions first, last;
+
+ PhraseScorer(TermPositions[] tps, byte[] n, float w) throws IOException {
+ norms = n;
+ weight = w;
+
+ // use PQ to build a sorted list of PhrasePositions
+ pq = new PhraseQueue(tps.length);
+ for (int i = 0; i < tps.length; i++)
+ pq.put(new PhrasePositions(tps[i], i));
+ pqToList();
+ }
+
+ final void score(HitCollector results, int end) throws IOException {
+ while (last.doc < end) { // find doc w/ all the terms
+ while (first.doc < last.doc) { // scan forward in first
+ do {
+ first.next();
+ } while (first.doc < last.doc);
+ firstToLast();
+ if (last.doc >= end)
+ return;
+ }
+
+ // found doc with all terms
+ float freq = phraseFreq(); // check for phrase
+
+ if (freq > 0.0) {
+ float score = Similarity.tf(freq)*weight; // compute score
+ score *= Similarity.norm(norms[first.doc]); // normalize
+ results.collect(first.doc, score); // add to results
+ }
+ last.next(); // resume scanning
+ }
+ }
+
+ abstract protected float phraseFreq() throws IOException;
+
+ protected final void pqToList() {
+ last = first = null;
+ while (pq.top() != null) {
+ PhrasePositions pp = (PhrasePositions)pq.pop();
+ if (last != null) { // add next to end of list
+ last.next = pp;
+ } else
+ first = pp;
+ last = pp;
+ pp.next = null;
+ }
+ }
+
+ protected final void firstToLast() {
+ last.next = first; // move first to end of list
+ last = first;
+ first = first.next;
+ last.next = null;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/PrefixQuery.java b/src/java/org/apache/lucene/search/PrefixQuery.java
new file mode 100644
index 00000000000..d1c8bf9d7c0
--- /dev/null
+++ b/src/java/org/apache/lucene/search/PrefixQuery.java
@@ -0,0 +1,153 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.IndexReader;
+
+/** A Query that matches documents containing terms with a specified prefix. */
+final public class PrefixQuery extends Query {
+ private Term prefix;
+ private IndexReader reader;
+ private float boost = 1.0f;
+ private BooleanQuery query;
+
+ /** Constructs a query for terms starting with prefix
. */
+ public PrefixQuery(Term prefix) {
+ this.prefix = prefix;
+ this.reader = reader;
+ }
+
+ /** Sets the boost for this term to b
. Documents containing
+ this term will (in addition to the normal weightings) have their score
+ multiplied by boost
. */
+ public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
+ /** Returns the boost for this term. */
+ public float getBoost() {
+ return boost;
+ }
+
+ final void prepare(IndexReader reader) {
+ this.query = null;
+ this.reader = reader;
+ }
+
+ final float sumOfSquaredWeights(Searcher searcher)
+ throws IOException {
+ return getQuery().sumOfSquaredWeights(searcher);
+ }
+
+ void normalize(float norm) {
+ try {
+ getQuery().normalize(norm);
+ } catch (IOException e) {
+ throw new RuntimeException(e.toString());
+ }
+ }
+
+ Scorer scorer(IndexReader reader) throws IOException {
+ return getQuery().scorer(reader);
+ }
+
+ private BooleanQuery getQuery() throws IOException {
+ if (query == null) {
+ BooleanQuery q = new BooleanQuery();
+ TermEnum enum = reader.terms(prefix);
+ try {
+ String prefixText = prefix.text();
+ String prefixField = prefix.field();
+ do {
+ Term term = enum.term();
+ if (term != null &&
+ term.text().startsWith(prefixText) &&
+ term.field() == prefixField) {
+ TermQuery tq = new TermQuery(term); // found a match
+ tq.setBoost(boost); // set the boost
+ q.add(tq, false, false); // add to q
+ //System.out.println("added " + term);
+ } else {
+ break;
+ }
+ } while (enum.next());
+ } finally {
+ enum.close();
+ }
+ query = q;
+ }
+ return query;
+ }
+
+ /** Prints a user-readable version of this query. */
+ public String toString(String field) {
+ StringBuffer buffer = new StringBuffer();
+ if (!prefix.field().equals(field)) {
+ buffer.append(prefix.field());
+ buffer.append(":");
+ }
+ buffer.append(prefix.text());
+ buffer.append('*');
+ if (boost != 1.0f) {
+ buffer.append("^");
+ buffer.append(Float.toString(boost));
+ }
+ return buffer.toString();
+ }
+}
diff --git a/src/java/org/apache/lucene/search/Query.java b/src/java/org/apache/lucene/search/Query.java
new file mode 100644
index 00000000000..179bfe9a8ca
--- /dev/null
+++ b/src/java/org/apache/lucene/search/Query.java
@@ -0,0 +1,101 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+
+/** The abstract base class for queries.
+
Instantiable subclasses are:
+
+ - {@link TermQuery}
+
- {@link PhraseQuery}
+
- {@link BooleanQuery}
+
+ A parser for queries is contained in:
+
+ */
+abstract public class Query {
+
+ // query weighting
+ abstract float sumOfSquaredWeights(Searcher searcher) throws IOException;
+ abstract void normalize(float norm);
+
+ // query evaluation
+ abstract Scorer scorer(IndexReader reader) throws IOException;
+
+ void prepare(IndexReader reader) {}
+
+ static Scorer scorer(Query query, Searcher searcher, IndexReader reader)
+ throws IOException {
+ query.prepare(reader);
+ float sum = query.sumOfSquaredWeights(searcher);
+ float norm = 1.0f / (float)Math.sqrt(sum);
+ query.normalize(norm);
+ return query.scorer(reader);
+ }
+
+ /** Prints a query to a string, with field
as the default field
+ for terms.
+ The representation used is one that is readable by
+ QueryParser
+ (although, if the query was created by the parser, the printed
+ representation may not be exactly what was parsed). */
+ abstract public String toString(String field);
+}
diff --git a/src/java/org/apache/lucene/search/ScoreDoc.java b/src/java/org/apache/lucene/search/ScoreDoc.java
new file mode 100644
index 00000000000..5e2f010c939
--- /dev/null
+++ b/src/java/org/apache/lucene/search/ScoreDoc.java
@@ -0,0 +1,65 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+final class ScoreDoc {
+ float score;
+ int doc;
+
+ ScoreDoc(int d, float s) {
+ doc = d;
+ score = s;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/Scorer.java b/src/java/org/apache/lucene/search/Scorer.java
new file mode 100644
index 00000000000..863a447c7d9
--- /dev/null
+++ b/src/java/org/apache/lucene/search/Scorer.java
@@ -0,0 +1,61 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+abstract class Scorer {
+ abstract void score(HitCollector hc, int maxDoc) throws IOException;
+}
diff --git a/src/java/org/apache/lucene/search/Searcher.java b/src/java/org/apache/lucene/search/Searcher.java
new file mode 100644
index 00000000000..9c988109315
--- /dev/null
+++ b/src/java/org/apache/lucene/search/Searcher.java
@@ -0,0 +1,87 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+
+/** The abstract base class for search implementations.
+
Subclasses implement search over a single index, over multiple indices,
+ and over indices on remote servers.
+ */
+public abstract class Searcher {
+
+ /** Returns the documents matching query
. */
+ public final Hits search(Query query) throws IOException {
+ return search(query, null);
+ }
+
+ /** Returns the documents matching query
and
+ filter
. */
+ public final Hits search(Query query, Filter filter) throws IOException {
+ return new Hits(this, query, filter);
+ }
+
+ /** Frees resources associated with this Searcher. */
+ abstract public void close() throws IOException;
+
+ abstract int docFreq(Term term) throws IOException;
+ abstract int maxDoc() throws IOException;
+ abstract TopDocs search(Query query, Filter filter, int n)
+ throws IOException;
+ abstract Document doc(int i) throws IOException;
+
+}
diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java
new file mode 100644
index 00000000000..ff8a4f1cf5c
--- /dev/null
+++ b/src/java/org/apache/lucene/search/Similarity.java
@@ -0,0 +1,114 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.Term;
+
+/** Internal class used for scoring.
+ *
Public only so that the indexing code can compute and store the
+ * normalization byte for each document. */
+public final class Similarity {
+ private Similarity() {} // no public constructor
+
+ /** Computes the normalization byte for a document given the total number of
+ * terms contained in the document. These values are stored in an index and
+ * used by the search code. */
+ public static final byte norm(int numTerms) {
+ // Scales 1/sqrt(numTerms) into a byte, i.e. 256/sqrt(numTerms).
+ // Math.ceil is used to ensure that even very long documents don't get a
+ // zero norm byte, as that is reserved for zero-lengthed documents and
+ // deleted documents.
+ return (byte) Math.ceil(255.0 / Math.sqrt(numTerms));
+ }
+
+
+ private static final float[] makeNormTable() {
+ float[] result = new float[256];
+ for (int i = 0; i < 256; i++)
+ result[i] = i / 255.0F;
+ return result;
+ }
+
+ static final float[] NORM_TABLE = makeNormTable();
+
+ static final float norm(byte normByte) {
+ // Un-scales from the byte encoding of a norm into a float, i.e.,
+ // approximately 1/sqrt(numTerms).
+ return NORM_TABLE[normByte & 0xFF];
+ }
+
+ static final float tf(int freq) {
+ return (float)Math.sqrt(freq);
+ }
+
+ static final float tf(float freq) {
+ return (float)Math.sqrt(freq);
+ }
+
+ static final float idf(Term term, Searcher searcher) throws IOException {
+ // Use maxDoc() instead of numDocs() because its proportional to docFreq(),
+ // i.e., when one is inaccurate, so is the other, and in the same way.
+ return idf(searcher.docFreq(term), searcher.maxDoc());
+ }
+
+ static final float idf(int docFreq, int numDocs) {
+ return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0);
+ }
+
+ static final float coord(int overlap, int maxOverlap) {
+ return overlap / (float)maxOverlap;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
new file mode 100644
index 00000000000..74bec4a5343
--- /dev/null
+++ b/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@@ -0,0 +1,106 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.util.*;
+import org.apache.lucene.index.*;
+
+final class SloppyPhraseScorer extends PhraseScorer {
+ private int slop;
+
+ SloppyPhraseScorer(TermPositions[] tps, int s, byte[] n, float w)
+ throws IOException {
+ super(tps, n, w);
+ slop = s;
+ }
+
+ protected final float phraseFreq() throws IOException {
+ pq.clear();
+ int end = 0;
+ for (PhrasePositions pp = first; pp != null; pp = pp.next) {
+ pp.firstPosition();
+ if (pp.position > end)
+ end = pp.position;
+ pq.put(pp); // build pq from list
+ }
+
+ float freq = 0.0f;
+ boolean done = false;
+ do {
+ PhrasePositions pp = (PhrasePositions)pq.pop();
+ int start = pp.position;
+ int next = ((PhrasePositions)pq.top()).position;
+ for (int pos = start; pos <= next; pos = pp.position) {
+ start = pos; // advance pp to min window
+ if (!pp.nextPosition()) {
+ done = true; // ran out of a term -- done
+ break;
+ }
+ }
+
+ int matchLength = end - start;
+ if (matchLength <= slop)
+ freq += 1.0 / (matchLength + 1); // penalize longer matches
+
+ if (pp.position > end)
+ end = pp.position;
+ pq.put(pp); // restore pq
+ } while (!done);
+
+ return freq;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/TermQuery.java b/src/java/org/apache/lucene/search/TermQuery.java
new file mode 100644
index 00000000000..8f5b0c0738b
--- /dev/null
+++ b/src/java/org/apache/lucene/search/TermQuery.java
@@ -0,0 +1,120 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.IndexReader;
+
+/** A Query that matches documents containing a term.
+ This may be combined with other terms with a {@link BooleanQuery}.
+ */
+final public class TermQuery extends Query {
+ private Term term;
+ private float boost = 1.0f;
+ private float idf = 0.0f;
+ private float weight = 0.0f;
+
+ /** Constructs a query for the term t
. */
+ public TermQuery(Term t) {
+ term = t;
+ }
+
+ /** Sets the boost for this term to b
. Documents containing
+ this term will (in addition to the normal weightings) have their score
+ multiplied by b
. */
+ public void setBoost(float b) { boost = b; }
+ /** Gets the boost for this term. Documents containing
+ this term will (in addition to the normal weightings) have their score
+ multiplied by b
. The boost is 1.0 by default. */
+ public float getBoost() { return boost; }
+
+ final float sumOfSquaredWeights(Searcher searcher) throws IOException {
+ idf = Similarity.idf(term, searcher);
+ weight = idf * boost;
+ return weight * weight; // square term weights
+ }
+
+ final void normalize(float norm) {
+ weight *= norm; // normalize for query
+ weight *= idf; // factor from document
+ }
+
+ Scorer scorer(IndexReader reader)
+ throws IOException {
+ TermDocs termDocs = reader.termDocs(term);
+
+ if (termDocs == null)
+ return null;
+
+ return new TermScorer(termDocs, reader.norms(term.field()), weight);
+ }
+
+ /** Prints a user-readable version of this query. */
+ public String toString(String field) {
+ StringBuffer buffer = new StringBuffer();
+ if (!term.field().equals(field)) {
+ buffer.append(term.field());
+ buffer.append(":");
+ }
+ buffer.append(term.text());
+ if (boost != 1.0f) {
+ buffer.append("^");
+ buffer.append(Float.toString(boost));
+ }
+ return buffer.toString();
+ }
+}
diff --git a/src/java/org/apache/lucene/search/TermScorer.java b/src/java/org/apache/lucene/search/TermScorer.java
new file mode 100644
index 00000000000..cda5d202e12
--- /dev/null
+++ b/src/java/org/apache/lucene/search/TermScorer.java
@@ -0,0 +1,119 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.TermDocs;
+
+final class TermScorer extends Scorer {
+ private TermDocs termDocs;
+ private byte[] norms;
+ private float weight;
+ private int doc;
+
+ private final int[] docs = new int[128]; // buffered doc numbers
+ private final int[] freqs = new int[128]; // buffered term freqs
+ private int pointer;
+ private int pointerMax;
+
+ private static final int SCORE_CACHE_SIZE = 32;
+ private float[] scoreCache = new float[SCORE_CACHE_SIZE];
+
+ TermScorer(TermDocs td, byte[] n, float w) throws IOException {
+ termDocs = td;
+ norms = n;
+ weight = w;
+
+ for (int i = 0; i < SCORE_CACHE_SIZE; i++)
+ scoreCache[i] = Similarity.tf(i) * weight;
+
+ pointerMax = termDocs.read(docs, freqs); // fill buffers
+
+ if (pointerMax != 0)
+ doc = docs[0];
+ else {
+ termDocs.close(); // close stream
+ doc = Integer.MAX_VALUE; // set to sentinel value
+ }
+ }
+
+ final void score(HitCollector c, final int end) throws IOException {
+ int d = doc; // cache doc in local
+ while (d < end) { // for docs in window
+ final int f = freqs[pointer];
+ float score = // compute tf(f)*weight
+ f < SCORE_CACHE_SIZE // check cache
+ ? scoreCache[f] // cache hit
+ : Similarity.tf(f)*weight; // cache miss
+
+ score *= Similarity.norm(norms[d]); // normalize for field
+
+ c.collect(d, score); // collect score
+
+ if (++pointer == pointerMax) {
+ pointerMax = termDocs.read(docs, freqs); // refill buffers
+ if (pointerMax != 0) {
+ pointer = 0;
+ } else {
+ termDocs.close(); // close stream
+ doc = Integer.MAX_VALUE; // set to sentinel value
+ return;
+ }
+ }
+ d = docs[pointer];
+ }
+ doc = d; // flush cache
+ }
+}
diff --git a/src/java/org/apache/lucene/search/TopDocs.java b/src/java/org/apache/lucene/search/TopDocs.java
new file mode 100644
index 00000000000..52506bfdd26
--- /dev/null
+++ b/src/java/org/apache/lucene/search/TopDocs.java
@@ -0,0 +1,65 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+final class TopDocs {
+ int totalHits;
+ ScoreDoc[] scoreDocs;
+
+ TopDocs(int th, ScoreDoc[] sds) {
+ totalHits = th;
+ scoreDocs = sds;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/WildcardQuery.java b/src/java/org/apache/lucene/search/WildcardQuery.java
new file mode 100644
index 00000000000..78fd8ebd437
--- /dev/null
+++ b/src/java/org/apache/lucene/search/WildcardQuery.java
@@ -0,0 +1,76 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import java.io.IOException;
+
+/** Implements the wildcard search query */
+final public class WildcardQuery extends MultiTermQuery {
+ private Term wildcardTerm;
+
+ public WildcardQuery(Term term) {
+ super(term);
+ wildcardTerm = term;
+ }
+
+ final void prepare(IndexReader reader) {
+ try {
+ setEnum(new WildcardTermEnum(reader, wildcardTerm));
+ } catch (IOException e) {}
+ }
+
+}
diff --git a/src/java/org/apache/lucene/search/WildcardTermEnum.java b/src/java/org/apache/lucene/search/WildcardTermEnum.java
new file mode 100644
index 00000000000..778cac451f3
--- /dev/null
+++ b/src/java/org/apache/lucene/search/WildcardTermEnum.java
@@ -0,0 +1,149 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/** Subclass of FilteredTermEnum for enumerating all terms that match the specified wildcard filter term.
+
+
Term enumerations are always ordered by Term.compareTo(). Each term in
+ the enumeration is greater than all that precede it. */
+public class WildcardTermEnum extends FilteredTermEnum {
+ Term searchTerm;
+ String field = "";
+ String text = "";
+ String pre = "";
+ int preLen = 0;
+ boolean fieldMatch = false;
+ boolean endEnum = false;
+
+ /** Creates new WildcardTermEnum */
+ public WildcardTermEnum(IndexReader reader, Term term) throws IOException {
+ super(reader, term);
+ searchTerm = term;
+ field = searchTerm.field();
+ text = searchTerm.text();
+
+ int sidx = text.indexOf(WILDCARD_STRING);
+ int cidx = text.indexOf(WILDCARD_CHAR);
+ int idx = sidx;
+ if (idx == -1) idx = cidx;
+ else if (cidx >= 0) idx = Math.min(idx, cidx);
+
+ pre = searchTerm.text().substring(0,idx);
+ preLen = pre.length();
+ text = text.substring(preLen);
+ setEnum(reader.terms(new Term(searchTerm.field(), pre)));
+ }
+
+ final protected boolean termCompare(Term term) {
+ if (field == term.field()) {
+ String searchText = term.text();
+ if (searchText.startsWith(pre)) {
+ return wildcardEquals(text, 0, searchText, preLen);
+ }
+ }
+ endEnum = true;
+ return false;
+ }
+
+ final public float difference() {
+ return 1.0f;
+ }
+
+ final public boolean endEnum() {
+ return endEnum;
+ }
+
+ /********************************************
+ * String equality with support for wildcards
+ ********************************************/
+
+ public static final char WILDCARD_STRING = '*';
+ public static final char WILDCARD_CHAR = '?';
+
+ public static final boolean wildcardEquals(String pattern, int patternIdx, String string, int stringIdx) {
+ for ( int p = patternIdx; ; ++p ) {
+ for ( int s = stringIdx; ; ++p, ++s ) {
+ boolean sEnd = (s >= string.length());
+ boolean pEnd = (p >= pattern.length());
+
+ if (sEnd && pEnd) return true;
+ if (sEnd || pEnd) break;
+ if (pattern.charAt(p) == WILDCARD_CHAR) continue;
+ if (pattern.charAt(p) == WILDCARD_STRING) {
+ int i;
+ ++p;
+ for (i = string.length(); i >= s; --i)
+ if (wildcardEquals(pattern, p, string, i))
+ return true;
+ break;
+ }
+ if (pattern.charAt(p) != string.charAt(s)) break;
+ }
+ return false;
+ }
+ }
+
+ public void close() throws IOException {
+ super.close();
+ searchTerm = null;
+ field = null;
+ text = null;
+ }
+}
diff --git a/src/java/org/apache/lucene/search/package.html b/src/java/org/apache/lucene/search/package.html
new file mode 100644
index 00000000000..0f877ccfa75
--- /dev/null
+++ b/src/java/org/apache/lucene/search/package.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+Search over indices.
+
+
diff --git a/src/java/org/apache/lucene/store/Directory.java b/src/java/org/apache/lucene/store/Directory.java
new file mode 100644
index 00000000000..2660c03b5e4
--- /dev/null
+++ b/src/java/org/apache/lucene/store/Directory.java
@@ -0,0 +1,114 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+/*
+ Java's filesystem API is not used directly, but rather through these
+ classes. This permits:
+ . implementation of RAM-based indices, useful for summarization, etc.;
+ . implementation of an index as a single file.
+
+*/
+
+/**
+ A Directory is a flat list of files. Files may be written once,
+ when they are created. Once a file is created it may only be opened for
+ read, or deleted. Random access is permitted when reading and writing.
+
+ @author Doug Cutting
+*/
+
+abstract public class Directory {
+ /** Returns an array of strings, one for each file in the directory. */
+ abstract public String[] list()
+ throws IOException, SecurityException;
+
+ /** Returns true iff a file with the given name exists. */
+ abstract public boolean fileExists(String name)
+ throws IOException, SecurityException;
+
+ /** Returns the time the named file was last modified. */
+ abstract public long fileModified(String name)
+ throws IOException, SecurityException;
+
+ /** Removes an existing file in the directory. */
+ abstract public void deleteFile(String name)
+ throws IOException, SecurityException;
+
+ /** Renames an existing file in the directory.
+ If a file already exists with the new name, then it is replaced.
+ This replacement should be atomic. */
+ abstract public void renameFile(String from, String to)
+ throws IOException, SecurityException;
+
+ /** Returns the length of a file in the directory. */
+ abstract public long fileLength(String name)
+ throws IOException, SecurityException;
+
+ /** Creates a new, empty file in the directory with the given name.
+ Returns a stream writing this file. */
+ abstract public OutputStream createFile(String name)
+ throws IOException, SecurityException;
+
+ /** Returns a stream reading an existing file. */
+ abstract public InputStream openFile(String name)
+ throws IOException, SecurityException;
+
+ /** Closes the store. */
+ abstract public void close()
+ throws IOException, SecurityException;
+}
diff --git a/src/java/org/apache/lucene/store/FSDirectory.java b/src/java/org/apache/lucene/store/FSDirectory.java
new file mode 100644
index 00000000000..e158501c465
--- /dev/null
+++ b/src/java/org/apache/lucene/store/FSDirectory.java
@@ -0,0 +1,308 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.io.FileNotFoundException;
+import java.util.Hashtable;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+
+/**
+ Straightforward implementation of Directory as a directory of files.
+ @see Directory
+ @author Doug Cutting
+*/
+
+final public class FSDirectory extends Directory {
+ /** This cache of directories ensures that there is a unique Directory
+ * instance per path, so that synchronization on the Directory can be used to
+ * synchronize access between readers and writers.
+ *
+ * This should be a WeakHashMap, so that entries can be GC'd, but that would
+ * require Java 1.2. Instead we use refcounts... */
+ private static final Hashtable DIRECTORIES = new Hashtable();
+
+ /** Returns the directory instance for the named location.
+ *
+ * Directories are cached, so that, for a given canonical path, the same
+ * FSDirectory instance will always be returned. This permits
+ * synchronization on directories.
+ *
+ * @param path the path to the directory.
+ * @param create if true, create, or erase any existing contents.
+ * @returns the FSDirectory for the named file. */
+ public static FSDirectory getDirectory(String path, boolean create)
+ throws IOException {
+ return getDirectory(new File(path), create);
+ }
+
+ /** Returns the directory instance for the named location.
+ *
+ *
Directories are cached, so that, for a given canonical path, the same
+ * FSDirectory instance will always be returned. This permits
+ * synchronization on directories.
+ *
+ * @param file the path to the directory.
+ * @param create if true, create, or erase any existing contents.
+ * @returns the FSDirectory for the named file. */
+ public static FSDirectory getDirectory(File file, boolean create)
+ throws IOException {
+ file = new File(file.getCanonicalPath());
+ FSDirectory dir;
+ synchronized (DIRECTORIES) {
+ dir = (FSDirectory)DIRECTORIES.get(file);
+ if (dir == null) {
+ dir = new FSDirectory(file, create);
+ DIRECTORIES.put(file, dir);
+ }
+ }
+ synchronized (dir) {
+ dir.refCount++;
+ }
+ return dir;
+ }
+
+ private File directory = null;
+ private int refCount;
+
+ private FSDirectory(File path, boolean create) throws IOException {
+ directory = path;
+ if (!directory.exists() && create)
+ directory.mkdir();
+ if (!directory.isDirectory())
+ throw new IOException(path + " not a directory");
+
+ if (create) { // clear old files
+ String[] files = directory.list();
+ for (int i = 0; i < files.length; i++) {
+ File file = new File(directory, files[i]);
+ if (!file.delete())
+ throw new IOException("couldn't delete " + files[i]);
+ }
+ }
+
+ }
+
+ /** Returns an array of strings, one for each file in the directory. */
+ public final String[] list() throws IOException {
+ return directory.list();
+ }
+
+ /** Returns true iff a file with the given name exists. */
+ public final boolean fileExists(String name) throws IOException {
+ File file = new File(directory, name);
+ return file.exists();
+ }
+
+ /** Returns the time the named file was last modified. */
+ public final long fileModified(String name) throws IOException {
+ File file = new File(directory, name);
+ return file.lastModified();
+ }
+
+ /** Returns the time the named file was last modified. */
+ public static final long fileModified(File directory, String name)
+ throws IOException {
+ File file = new File(directory, name);
+ return file.lastModified();
+ }
+
+ /** Returns the length in bytes of a file in the directory. */
+ public final long fileLength(String name) throws IOException {
+ File file = new File(directory, name);
+ return file.length();
+ }
+
+ /** Removes an existing file in the directory. */
+ public final void deleteFile(String name) throws IOException {
+ File file = new File(directory, name);
+ if (!file.delete())
+ throw new IOException("couldn't delete " + name);
+ }
+
+ /** Renames an existing file in the directory. */
+ public final synchronized void renameFile(String from, String to)
+ throws IOException {
+ File old = new File(directory, from);
+ File nu = new File(directory, to);
+
+ /* This is not atomic. If the program crashes between the call to
+ delete() and the call to renameTo() then we're screwed, but I've
+ been unable to figure out how else to do this... */
+
+ if (nu.exists())
+ if (!nu.delete())
+ throw new IOException("couldn't delete " + to);
+
+ if (!old.renameTo(nu))
+ throw new IOException("couldn't rename " + from + " to " + to);
+ }
+
+ /** Creates a new, empty file in the directory with the given name.
+ Returns a stream writing this file. */
+ public final OutputStream createFile(String name) throws IOException {
+ return new FSOutputStream(new File(directory, name));
+ }
+
+ /** Returns a stream reading an existing file. */
+ public final InputStream openFile(String name) throws IOException {
+ return new FSInputStream(new File(directory, name));
+ }
+
+ /** Closes the store to future operations. */
+ public final synchronized void close() throws IOException {
+ if (--refCount <= 0) {
+ synchronized (DIRECTORIES) {
+ DIRECTORIES.remove(directory);
+ }
+ }
+ }
+}
+
+
+final class FSInputStream extends InputStream {
+ private class Descriptor extends RandomAccessFile {
+ public long position;
+ public Descriptor(File file, String mode) throws IOException {
+ super(file, mode);
+ }
+ }
+
+ Descriptor file = null;
+ boolean isClone;
+
+ public FSInputStream(File path) throws IOException {
+ file = new Descriptor(path, "r");
+ length = file.length();
+ }
+
+ /** InputStream methods */
+ protected final void readInternal(byte[] b, int offset, int len)
+ throws IOException {
+ synchronized (file) {
+ long position = getFilePointer();
+ if (position != file.position) {
+ file.seek(position);
+ file.position = position;
+ }
+ int total = 0;
+ do {
+ int i = file.read(b, offset+total, len-total);
+ if (i == -1)
+ throw new IOException("read past EOF");
+ file.position += i;
+ total += i;
+ } while (total < len);
+ }
+ }
+
+ public final void close() throws IOException {
+ if (!isClone)
+ file.close();
+ }
+
+ /** Random-access methods */
+ protected final void seekInternal(long position) throws IOException {
+ }
+
+ protected final void finalize() throws IOException {
+ close(); // close the file
+ }
+
+ public Object clone() {
+ FSInputStream clone = (FSInputStream)super.clone();
+ clone.isClone = true;
+ return clone;
+ }
+}
+
+
+final class FSOutputStream extends OutputStream {
+ RandomAccessFile file = null;
+
+ public FSOutputStream(File path) throws IOException {
+ if (path.isFile())
+ throw new IOException(path + " already exists");
+ file = new RandomAccessFile(path, "rw");
+ }
+
+ /** output methods: */
+ public final void flushBuffer(byte[] b, int size) throws IOException {
+ file.write(b, 0, size);
+ }
+ public final void close() throws IOException {
+ super.close();
+ file.close();
+ }
+
+ /** Random-access methods */
+ public final void seek(long pos) throws IOException {
+ super.seek(pos);
+ file.seek(pos);
+ }
+ public final long length() throws IOException {
+ return file.length();
+ }
+
+ protected final void finalize() throws IOException {
+ file.close(); // close the file
+ }
+
+}
diff --git a/src/java/org/apache/lucene/store/InputStream.java b/src/java/org/apache/lucene/store/InputStream.java
new file mode 100644
index 00000000000..3c5d452529d
--- /dev/null
+++ b/src/java/org/apache/lucene/store/InputStream.java
@@ -0,0 +1,214 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+/**
+ Abstract class for input from a file in a Directory.
+ @author Doug Cutting
+*/
+
+/** A random-access input stream */
+abstract public class InputStream implements Cloneable {
+ final static int BUFFER_SIZE = OutputStream.BUFFER_SIZE;
+
+ private byte[] buffer;
+ private char[] chars;
+
+ private long bufferStart = 0; // position in file of buffer
+ private int bufferLength = 0; // end of valid bytes
+ private int bufferPosition = 0; // next byte to read
+
+ protected long length; // set by subclasses
+
+ /** InputStream-like methods @see java.io.InputStream */
+ public final byte readByte() throws IOException {
+ if (bufferPosition >= bufferLength)
+ refill();
+ return buffer[bufferPosition++];
+ }
+
+ public final void readBytes(byte[] b, int offset, int len)
+ throws IOException {
+ if (len < BUFFER_SIZE) {
+ for (int i = 0; i < len; i++) // read byte-by-byte
+ b[i + offset] = (byte)readByte();
+ } else { // read all-at-once
+ long start = getFilePointer();
+ seekInternal(start);
+ readInternal(b, offset, len);
+
+ bufferStart = start + len; // adjust stream variables
+ bufferPosition = 0;
+ bufferLength = 0; // trigger refill() on read
+ }
+ }
+
+ public final int readInt() throws IOException {
+ return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16)
+ | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF);
+ }
+
+ public final int readVInt() throws IOException {
+ byte b = readByte();
+ int i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = readByte();
+ i |= (b & 0x7F) << shift;
+ }
+ return i;
+ }
+
+ public final long readLong() throws IOException {
+ return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
+ }
+
+ public final long readVLong() throws IOException {
+ byte b = readByte();
+ long i = b & 0x7F;
+ for (int shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = readByte();
+ i |= (b & 0x7FL) << shift;
+ }
+ return i;
+ }
+
+ public final String readString() throws IOException {
+ int length = readVInt();
+ if (chars == null || length > chars.length)
+ chars = new char[length];
+ readChars(chars, 0, length);
+ return new String(chars, 0, length);
+ }
+
+ public final void readChars(char[] buffer, int start, int length)
+ throws IOException {
+ final int end = start + length;
+ for (int i = start; i < end; i++) {
+ byte b = readByte();
+ if ((b & 0x80) == 0)
+ buffer[i] = (char)(b & 0x7F);
+ else if ((b & 0xE0) != 0xE0) {
+ buffer[i] = (char)(((b & 0x1F) << 6)
+ | (readByte() & 0x3F));
+ } else
+ buffer[i] = (char)(((b & 0x0F) << 12)
+ | ((readByte() & 0x3F) << 6)
+ | (readByte() & 0x3F));
+ }
+ }
+
+
+ protected final void refill() throws IOException {
+ long start = bufferStart + bufferPosition;
+ long end = start + BUFFER_SIZE;
+ if (end > length) // don't read past EOF
+ end = length;
+ bufferLength = (int)(end - start);
+ if (bufferLength == 0)
+ throw new IOException("read past EOF");
+
+ if (buffer == null)
+ buffer = new byte[BUFFER_SIZE]; // allocate buffer lazily
+ readInternal(buffer, 0, bufferLength);
+
+ bufferStart = start;
+ bufferPosition = 0;
+ }
+
+ abstract protected void readInternal(byte[] b, int offset, int length)
+ throws IOException;
+
+ abstract public void close() throws IOException;
+
+ /** RandomAccessFile-like methods @see java.io.RandomAccessFile */
+ public final long getFilePointer() {
+ return bufferStart + bufferPosition;
+ }
+
+ public final void seek(long pos) throws IOException {
+ if (pos >= bufferStart && pos < (bufferStart + bufferLength))
+ bufferPosition = (int)(pos - bufferStart); // seek within buffer
+ else {
+ bufferStart = pos;
+ bufferPosition = 0;
+ bufferLength = 0; // trigger refill() on read()
+ seekInternal(pos);
+ }
+ }
+ abstract protected void seekInternal(long pos) throws IOException;
+
+ public final long length() {
+ return length;
+ }
+
+ public Object clone() {
+ InputStream clone = null;
+ try {
+ clone = (InputStream)super.clone();
+ } catch (CloneNotSupportedException e) {}
+
+ if (buffer != null) {
+ clone.buffer = new byte[BUFFER_SIZE];
+ System.arraycopy(buffer, 0, clone.buffer, 0, bufferLength);
+ }
+
+ clone.chars = null;
+
+ return clone;
+ }
+
+}
diff --git a/src/java/org/apache/lucene/store/Makefile b/src/java/org/apache/lucene/store/Makefile
new file mode 100644
index 00000000000..09c091d1839
--- /dev/null
+++ b/src/java/org/apache/lucene/store/Makefile
@@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
diff --git a/src/java/org/apache/lucene/store/OutputStream.java b/src/java/org/apache/lucene/store/OutputStream.java
new file mode 100644
index 00000000000..177a3f456b7
--- /dev/null
+++ b/src/java/org/apache/lucene/store/OutputStream.java
@@ -0,0 +1,161 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+
+/**
+ Abstract class for output from a file in a Directory.
+ @author Doug Cutting
+*/
+
+/** A random-access output stream */
+abstract public class OutputStream {
+ final static int BUFFER_SIZE = 1024;
+
+ private final byte[] buffer = new byte[BUFFER_SIZE];
+ private long bufferStart = 0; // position in file of buffer
+ private int bufferPosition = 0; // position in buffer
+
+ /** OutputStream-like methods @see java.io.InputStream */
+ public final void writeByte(byte b) throws IOException {
+ if (bufferPosition >= BUFFER_SIZE)
+ flush();
+ buffer[bufferPosition++] = b;
+ }
+
+ public final void writeBytes(byte[] b, int length) throws IOException {
+ for (int i = 0; i < length; i++)
+ writeByte(b[i]);
+ }
+
+ public final void writeInt(int i) throws IOException {
+ writeByte((byte)(i >> 24));
+ writeByte((byte)(i >> 16));
+ writeByte((byte)(i >> 8));
+ writeByte((byte) i);
+ }
+
+ public final void writeVInt(int i) throws IOException {
+ while ((i & ~0x7F) != 0) {
+ writeByte((byte)((i & 0x7f) | 0x80));
+ i >>>= 7;
+ }
+ writeByte((byte)i);
+ }
+
+ public final void writeLong(long i) throws IOException {
+ writeInt((int) (i >> 32));
+ writeInt((int) i);
+ }
+
+ public final void writeVLong(long i) throws IOException {
+ while ((i & ~0x7F) != 0) {
+ writeByte((byte)((i & 0x7f) | 0x80));
+ i >>>= 7;
+ }
+ writeByte((byte)i);
+ }
+
+ public final void writeString(String s) throws IOException {
+ int length = s.length();
+ writeVInt(length);
+ writeChars(s, 0, length);
+ }
+
+ public final void writeChars(String s, int start, int length)
+ throws IOException {
+ final int end = start + length;
+ for (int i = start; i < end; i++) {
+ final int code = (int)s.charAt(i);
+ if (code >= 0x01 && code <= 0x7F)
+ writeByte((byte)code);
+ else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) {
+ writeByte((byte)(0xC0 | (code >> 6)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
+ } else {
+ writeByte((byte)(0xE0 | (code >>> 12)));
+ writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
+ writeByte((byte)(0x80 | (code & 0x3F)));
+ }
+ }
+ }
+
+ protected final void flush() throws IOException {
+ flushBuffer(buffer, bufferPosition);
+ bufferStart += bufferPosition;
+ bufferPosition = 0;
+ }
+
+ abstract protected void flushBuffer(byte[] b, int len) throws IOException;
+
+ public void close() throws IOException {
+ flush();
+ }
+
+ /** RandomAccessFile-like methods @see java.io.RandomAccessFile */
+ public final long getFilePointer() throws IOException {
+ return bufferStart + bufferPosition;
+ }
+
+ public void seek(long pos) throws IOException {
+ flush();
+ bufferStart = pos;
+ }
+
+ abstract public long length() throws IOException;
+
+
+}
diff --git a/src/java/org/apache/lucene/store/RAMDirectory.java b/src/java/org/apache/lucene/store/RAMDirectory.java
new file mode 100644
index 00000000000..c67e5d5b5ef
--- /dev/null
+++ b/src/java/org/apache/lucene/store/RAMDirectory.java
@@ -0,0 +1,223 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import java.util.Hashtable;
+import java.util.Enumeration;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+
+final public class RAMDirectory extends Directory {
+ Hashtable files = new Hashtable();
+
+ public RAMDirectory() {
+ }
+
+ /** Returns an array of strings, one for each file in the directory. */
+ public final String[] list() {
+ String[] result = new String[files.size()];
+ int i = 0;
+ Enumeration names = files.keys();
+ while (names.hasMoreElements())
+ result[i++] = (String)names.nextElement();
+ return result;
+ }
+
+ /** Returns true iff the named file exists in this directory. */
+ public final boolean fileExists(String name) {
+ RAMFile file = (RAMFile)files.get(name);
+ return file != null;
+ }
+
+ /** Returns the time the named file was last modified. */
+ public final long fileModified(String name) throws IOException {
+ RAMFile file = (RAMFile)files.get(name);
+ return file.lastModified;
+ }
+
+ /** Returns the length in bytes of a file in the directory. */
+ public final long fileLength(String name) {
+ RAMFile file = (RAMFile)files.get(name);
+ return file.length;
+ }
+
+ /** Removes an existing file in the directory. */
+ public final void deleteFile(String name) {
+ files.remove(name);
+ }
+
+ /** Removes an existing file in the directory. */
+ public final void renameFile(String from, String to) {
+ RAMFile file = (RAMFile)files.get(from);
+ files.remove(from);
+ files.put(to, file);
+ }
+
+ /** Creates a new, empty file in the directory with the given name.
+ Returns a stream writing this file. */
+ public final OutputStream createFile(String name) {
+ RAMFile file = new RAMFile();
+ files.put(name, file);
+ return new RAMOutputStream(file);
+ }
+
+ /** Returns a stream reading an existing file. */
+ public final InputStream openFile(String name) {
+ RAMFile file = (RAMFile)files.get(name);
+ return new RAMInputStream(file);
+ }
+
+ /** Closes the store to future operations. */
+ public final void close() {
+ }
+}
+
+
+final class RAMInputStream extends InputStream implements Cloneable {
+ RAMFile file;
+ int pointer = 0;
+
+ public RAMInputStream(RAMFile f) {
+ file = f;
+ length = file.length;
+ }
+
+ /** InputStream methods */
+ public final void readInternal(byte[] dest, int destOffset, int len) {
+ int bufferNumber = pointer/InputStream.BUFFER_SIZE;
+ int bufferOffset = pointer%InputStream.BUFFER_SIZE;
+ int bytesInBuffer = InputStream.BUFFER_SIZE - bufferOffset;
+ int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
+ byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
+ System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
+
+ if (bytesToCopy < len) { // not all in one buffer
+ destOffset += bytesToCopy;
+ bytesToCopy = len - bytesToCopy; // remaining bytes
+ buffer = (byte[])file.buffers.elementAt(bufferNumber+1);
+ System.arraycopy(buffer, 0, dest, destOffset, bytesToCopy);
+ }
+ pointer += len;
+ }
+ public final void close() {
+ }
+
+ /** Random-access methods */
+ public final void seekInternal(long pos) {
+ pointer = (int)pos;
+ }
+}
+
+
+final class RAMOutputStream extends OutputStream {
+ RAMFile file;
+ int pointer = 0;
+
+ public RAMOutputStream(RAMFile f) {
+ file = f;
+ }
+
+ /** output methods: */
+ public final void flushBuffer(byte[] src, int len) {
+ int bufferNumber = pointer/OutputStream.BUFFER_SIZE;
+ int bufferOffset = pointer%OutputStream.BUFFER_SIZE;
+ int bytesInBuffer = OutputStream.BUFFER_SIZE - bufferOffset;
+ int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
+
+ if (bufferNumber == file.buffers.size())
+ file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
+
+ byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
+ System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
+
+ if (bytesToCopy < len) { // not all in one buffer
+ int srcOffset = bytesToCopy;
+ bytesToCopy = len - bytesToCopy; // remaining bytes
+ bufferNumber++;
+ if (bufferNumber == file.buffers.size())
+ file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
+ buffer = (byte[])file.buffers.elementAt(bufferNumber);
+ System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
+ }
+ pointer += len;
+ if (pointer > file.length)
+ file.length = pointer;
+
+ file.lastModified = System.currentTimeMillis();
+ }
+
+ public final void close() throws IOException {
+ super.close();
+ }
+
+ /** Random-access methods */
+ public final void seek(long pos) throws IOException {
+ super.seek(pos);
+ pointer = (int)pos;
+ }
+ public final long length() throws IOException {
+ return file.length;
+ }
+}
+
+final class RAMFile {
+ Vector buffers = new Vector();
+ long length;
+ long lastModified = System.currentTimeMillis();
+}
diff --git a/src/java/org/apache/lucene/store/package.html b/src/java/org/apache/lucene/store/package.html
new file mode 100644
index 00000000000..06f213f5682
--- /dev/null
+++ b/src/java/org/apache/lucene/store/package.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+Binary i/o API, for storing index data.
+
+
diff --git a/src/java/org/apache/lucene/util/Arrays.java b/src/java/org/apache/lucene/util/Arrays.java
new file mode 100644
index 00000000000..fce63a8790e
--- /dev/null
+++ b/src/java/org/apache/lucene/util/Arrays.java
@@ -0,0 +1,150 @@
+package org.apache.lucene.util;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+// copied from jdk 1.2b3 sources, so that we can use it in java 1.1
+
+/**
+ * This class contains various methods for manipulating arrays (such as
+ * sorting and searching). It also contains a static factory that allows
+ * arrays to be viewed as Lists.
+ *
+ * @author Josh Bloch
+ * @version 1.17 03/18/98
+ * @since JDK1.2
+ */
+
+public class Arrays {
+ /**
+ * Sorts the specified array of objects into ascending order, according
+ * to the natural comparison method of its elements. All
+ * elements in the array must implement the Comparable interface.
+ * Furthermore, all elements in the array must be mutually
+ * comparable (that is, e1.compareTo(e2) must not throw a
+ * typeMismatchException for any elements e1 and e2 in the array).
+ *
+ * This sort is guaranteed to be stable: equal elements will
+ * not be reordered as a result of the sort.
+ *
+ * The sorting algorithm is a modified mergesort (in which the merge is
+ * omitted if the highest element in the low sublist is less than the
+ * lowest element in the high sublist). This algorithm offers guaranteed
+ * n*log(n) performance, and can approach linear performance on nearly
+ * sorted lists.
+ *
+ * @param a the array to be sorted.
+ * @exception ClassCastException array contains elements that are not
+ * mutually comparable (for example, Strings and
+ * Integers).
+ * @see Comparable
+ */
+ public static void sort(String[] a) {
+ String aux[] = (String[])a.clone();
+ mergeSort(aux, a, 0, a.length);
+ }
+
+ private static void mergeSort(String src[], String dest[],
+ int low, int high) {
+ int length = high - low;
+
+ // Insertion sort on smallest arrays
+ if (length < 7) {
+ for (int i=low; ilow && (dest[j-1]).compareTo(dest[j])>0; j--)
+ swap(dest, j, j-1);
+ return;
+ }
+
+ // Recursively sort halves of dest into src
+ int mid = (low + high)/2;
+ mergeSort(dest, src, low, mid);
+ mergeSort(dest, src, mid, high);
+
+ // If list is already sorted, just copy from src to dest. This is an
+ // optimization that results in faster sorts for nearly ordered lists.
+ if ((src[mid-1]).compareTo(src[mid]) <= 0) {
+ System.arraycopy(src, low, dest, low, length);
+ return;
+ }
+
+ // Merge sorted halves (now in src) into dest
+ for(int i = low, p = low, q = mid; i < high; i++) {
+ if (q>=high || p.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+
+/** Optimized implementation of a vector of bits. This is more-or-less like
+ java.util.BitSet, but also includes the following:
+
+ - a count() method, which efficiently computes the number of one bits;
+ - optimized read from and write to disk;
+ - inlinable get() method;
+
+ */
+public final class BitVector {
+ /** This is public just so that methods will inline. Please don't touch.*/
+ public byte[] bits;
+ private int size;
+ private int count = -1;
+
+ /** Constructs a vector capable of holding n
bits. */
+ public BitVector(int n) {
+ size = n;
+ bits = new byte[(size >> 3) + 1];
+ }
+
+ /** Sets the value of bit
to one. */
+ public final void set(int bit) {
+ bits[bit >> 3] |= 1 << (bit & 7);
+ count = -1;
+ }
+
+ /** Sets the value of bit
to zero. */
+ public final void clear(int bit) {
+ bits[bit >> 3] &= ~(1 << (bit & 7));
+ count = -1;
+ }
+
+ /** Returns true
if bit
is one and
+ false
if it is zero. */
+ public final boolean get(int bit) {
+ return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
+ }
+
+ /** Returns the number of bits in this vector. This is also one greater than
+ the number of the largest valid bit number. */
+ public final int size() {
+ return size;
+ }
+
+ /** Returns the total number of one bits in this vector. This is efficiently
+ computed and cached, so that, if the vector is not changed, no
+ recomputation is done for repeated calls. */
+ public final int count() {
+ if (count == -1) {
+ int c = 0;
+ int end = bits.length;
+ for (int i = 0; i < end; i++)
+ c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
+ count = c;
+ }
+ return count;
+ }
+
+ private static final byte[] BYTE_COUNTS = { // table of bits/byte
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+ };
+
+
+ /** Writes this vector to the file name
in Directory
+ d
, in a format that can be read by the constructor {@link
+ #BitVector(Directory, String)}. */
+ public final void write(Directory d, String name) throws IOException {
+ OutputStream output = d.createFile(name);
+ try {
+ output.writeInt(size()); // write size
+ output.writeInt(count()); // write count
+ output.writeBytes(bits, bits.length); // write bits
+ } finally {
+ output.close();
+ }
+ }
+
+ /** Constructs a bit vector from the file name
in Directory
+ d
, as written by the {@link #write} method.
+ */
+ public BitVector(Directory d, String name) throws IOException {
+ InputStream input = d.openFile(name);
+ try {
+ size = input.readInt(); // read size
+ count = input.readInt(); // read count
+ bits = new byte[(size >> 3) + 1]; // allocate bits
+ input.readBytes(bits, 0, bits.length); // read bits
+ } finally {
+ input.close();
+ }
+ }
+
+}
diff --git a/src/java/org/apache/lucene/util/Makefile b/src/java/org/apache/lucene/util/Makefile
new file mode 100644
index 00000000000..09c091d1839
--- /dev/null
+++ b/src/java/org/apache/lucene/util/Makefile
@@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
diff --git a/src/java/org/apache/lucene/util/PriorityQueue.java b/src/java/org/apache/lucene/util/PriorityQueue.java
new file mode 100644
index 00000000000..f97d0af6a2f
--- /dev/null
+++ b/src/java/org/apache/lucene/util/PriorityQueue.java
@@ -0,0 +1,159 @@
+package org.apache.lucene.util;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+/** A PriorityQueue maintains a partial ordering of its elements such that the
+ least element can always be found in constant time. Put()'s and pop()'s
+ require log(size) time. */
+public abstract class PriorityQueue {
+ private Object[] heap;
+ private int size;
+
+ /** Determines the ordering of objects in this priority queue. Subclasses
+ must define this one method. */
+ abstract protected boolean lessThan(Object a, Object b);
+
+ /** Subclass constructors must call this. */
+ protected final void initialize(int maxSize) {
+ size = 0;
+ int heapSize = (maxSize * 2) + 1;
+ heap = new Object[heapSize];
+ }
+
+ /** Adds an Object to a PriorityQueue in log(size) time. */
+ public final void put(Object element) {
+ size++;
+ heap[size] = element;
+ upHeap();
+ }
+
+ /** Returns the least element of the PriorityQueue in constant time. */
+ public final Object top() {
+ if (size > 0)
+ return heap[1];
+ else
+ return null;
+ }
+
+ /** Removes and returns the least element of the PriorityQueue in log(size)
+ time. */
+ public final Object pop() {
+ if (size > 0) {
+ Object result = heap[1]; // save first value
+ heap[1] = heap[size]; // move last to first
+ heap[size] = null; // permit GC of objects
+ size--;
+ downHeap(); // adjust heap
+ return result;
+ } else
+ return null;
+ }
+
+ /** Should be called when the Object at top changes values. Still log(n)
+ * worst case, but it's at least twice as fast to
+ * { pq.top().change(); pq.adjustTop(); }
+ *
instead of
+ * { o = pq.pop(); o.change(); pq.push(o); }
+ *
+ */
+ public final void adjustTop() {
+ downHeap();
+ }
+
+
+ /** Returns the number of elements currently stored in the PriorityQueue. */
+ public final int size() {
+ return size;
+ }
+
+ /** Removes all entries from the PriorityQueue. */
+ public final void clear() {
+ for (int i = 0; i < size; i++)
+ heap[i] = null;
+ size = 0;
+ }
+
+ private final void upHeap() {
+ int i = size;
+ Object node = heap[i]; // save bottom node
+ int j = i >>> 1;
+ while (j > 0 && lessThan(node, heap[j])) {
+ heap[i] = heap[j]; // shift parents down
+ i = j;
+ j = j >>> 1;
+ }
+ heap[i] = node; // install saved node
+ }
+
+ private final void downHeap() {
+ int i = 1;
+ Object node = heap[i]; // save top node
+ int j = i << 1; // find smaller child
+ int k = j + 1;
+ if (k <= size && lessThan(heap[k], heap[j])) {
+ j = k;
+ }
+ while (j <= size && lessThan(heap[j], node)) {
+ heap[i] = heap[j]; // shift up child
+ i = j;
+ j = i << 1;
+ k = j + 1;
+ if (k <= size && lessThan(heap[k], heap[j])) {
+ j = k;
+ }
+ }
+ heap[i] = node; // install saved node
+ }
+}
diff --git a/src/java/org/apache/lucene/util/package.html b/src/java/org/apache/lucene/util/package.html
new file mode 100644
index 00000000000..d4aaedff7f9
--- /dev/null
+++ b/src/java/org/apache/lucene/util/package.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+Some utility classes.
+
+
diff --git a/src/java/org/apache/lucene/variables.mk b/src/java/org/apache/lucene/variables.mk
new file mode 100644
index 00000000000..d6a15ecdfe8
--- /dev/null
+++ b/src/java/org/apache/lucene/variables.mk
@@ -0,0 +1,30 @@
+# User variables for make. Customize for your installation as needed.
+# Anything set here override the defaults set in rules.mk
+
+# where your JDK is installed.
+# Please note: this is not the same as JAVA_HOME!
+# Default is: C:/jdk1.3 or /usr/local/java/jdk1.3 depending on OS
+# JDK_HOME=
+
+# set this if you are using JDK1.1.x
+# OLDJAVA=1
+
+# set this if you are using a custom java compiler (i.e. jikes)
+# Default is: $JDK_HOME/bin/javac
+# JAVAC=jikes
+
+# set this to the location of the javacc zip file
+# Default is:
+# JAVACC=/usr/local/java/javacc2_0/bin/lib/JavaCC.zip
+
+# Set this to the flags you want to give your java compiler
+# -O by default.
+# Use JFLAGS=-g to generate debuggable code.
+# JFLAGS= -O
+
+# prepend any custom classpath here:
+# PREPENDCLASSPATH=
+
+# where the default java documentation is
+# Default is:
+# JAVALINK = http://java.sun.com/products/jdk/1.3/docs/api/
diff --git a/src/test/org/apache/lucene/AnalysisTest.java b/src/test/org/apache/lucene/AnalysisTest.java
new file mode 100644
index 00000000000..c0c6aaaaaca
--- /dev/null
+++ b/src/test/org/apache/lucene/AnalysisTest.java
@@ -0,0 +1,125 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import com.lucene.analysis.SimpleAnalyzer;
+import com.lucene.analysis.Analyzer;
+import com.lucene.analysis.TokenStream;
+import com.lucene.analysis.Token;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.Date;
+import java.util.Random;
+
+class AnalysisTest {
+ public static void main(String[] args) {
+ try {
+ test("This is a test", true);
+ test(new File("words.txt"), false);
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ static void test(File file, boolean verbose)
+ throws Exception {
+ long bytes = file.length();
+ System.out.println(" Reading test file containing " + bytes + " bytes.");
+
+ FileInputStream is = new FileInputStream(file);
+ BufferedReader ir = new BufferedReader(new InputStreamReader(is));
+
+ test(ir, verbose, bytes);
+
+ ir.close();
+ }
+
+ static void test(String text, boolean verbose) throws Exception {
+ System.out.println(" Tokenizing string: " + text);
+ test(new StringReader(text), verbose, text.length());
+ }
+
+ static void test(Reader reader, boolean verbose, long bytes)
+ throws Exception {
+ Analyzer analyzer = new SimpleAnalyzer();
+ TokenStream stream = analyzer.tokenStream(null, reader);
+
+ Date start = new Date();
+
+ int count = 0;
+ for (Token t = stream.next(); t!=null; t = stream.next()) {
+ if (verbose) {
+ System.out.println("Text=" + t.termText()
+ + " start=" + t.startOffset()
+ + " end=" + t.endOffset());
+ }
+ count++;
+ }
+
+ Date end = new Date();
+
+ long time = end.getTime() - start.getTime();
+ System.out.println(time + " milliseconds to extract " + count + " tokens");
+ System.out.println((time*1000.0)/count + " microseconds/token");
+ System.out.println((bytes * 1000.0 * 60.0 * 60.0)/(time * 1000000.0)
+ + " megabytes/hour");
+ }
+}
diff --git a/src/test/org/apache/lucene/HighFreqTerms.java b/src/test/org/apache/lucene/HighFreqTerms.java
new file mode 100644
index 00000000000..58566a4fb78
--- /dev/null
+++ b/src/test/org/apache/lucene/HighFreqTerms.java
@@ -0,0 +1,120 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import com.lucene.util.PriorityQueue;
+import com.lucene.store.Directory;
+import com.lucene.store.FSDirectory;
+import com.lucene.index.IndexReader;
+import com.lucene.index.Term;
+import com.lucene.index.TermEnum;
+
+class HighFreqTerms {
+ public static int numTerms = 100;
+
+ public static void main(String[] args) {
+ try {
+ Directory directory = new FSDirectory("demo index", false);
+ IndexReader reader = IndexReader.open(directory);
+
+ TermInfoQueue tiq = new TermInfoQueue(numTerms);
+ TermEnum terms = reader.terms();
+
+ int minFreq = 0;
+ while (terms.next()) {
+ if (terms.docFreq() > minFreq) {
+ tiq.put(new TermInfo(terms.term(), terms.docFreq()));
+ if (tiq.size() > numTerms) { // if tiq overfull
+ tiq.pop(); // remove lowest in tiq
+ minFreq = ((TermInfo)tiq.top()).docFreq; // reset minFreq
+ }
+ }
+ }
+
+ while (tiq.size() != 0) {
+ TermInfo termInfo = (TermInfo)tiq.pop();
+ System.out.println(termInfo.term + " " + termInfo.docFreq);
+ }
+
+ reader.close();
+ directory.close();
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+}
+
+final class TermInfo {
+ TermInfo(Term t, int df) {
+ term = t;
+ docFreq = df;
+ }
+ int docFreq;
+ Term term;
+}
+
+final class TermInfoQueue extends PriorityQueue {
+ TermInfoQueue(int size) {
+ initialize(size);
+ }
+ protected final boolean lessThan(Object a, Object b) {
+ TermInfo termInfoA = (TermInfo)a;
+ TermInfo termInfoB = (TermInfo)b;
+ return termInfoA.docFreq < termInfoB.docFreq;
+ }
+}
+
diff --git a/src/test/org/apache/lucene/IndexTest.java b/src/test/org/apache/lucene/IndexTest.java
new file mode 100644
index 00000000000..b2dc1211375
--- /dev/null
+++ b/src/test/org/apache/lucene/IndexTest.java
@@ -0,0 +1,117 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import com.lucene.analysis.SimpleAnalyzer;
+import com.lucene.index.IndexWriter;
+import com.lucene.index.TermPositions;
+import com.lucene.document.Document;
+import demo.FileDocument;
+
+import java.io.File;
+import java.util.Date;
+
+class IndexTest {
+ public static void main(String[] args) {
+ try {
+ Date start = new Date();
+
+ IndexWriter writer = new IndexWriter("F:\\test", new SimpleAnalyzer(),
+ true);
+
+ writer.mergeFactor = 20;
+
+ indexDocs(writer, new File("F:\\recipes"));
+
+ writer.optimize();
+ writer.close();
+
+ Date end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" total milliseconds");
+
+ Runtime runtime = Runtime.getRuntime();
+
+ System.out.print(runtime.freeMemory());
+ System.out.println(" free memory before gc");
+ System.out.print(runtime.totalMemory());
+ System.out.println(" total memory before gc");
+
+ runtime.gc();
+
+ System.out.print(runtime.freeMemory());
+ System.out.println(" free memory after gc");
+ System.out.print(runtime.totalMemory());
+ System.out.println(" total memory after gc");
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ public static void indexDocs(IndexWriter writer, File file)
+ throws Exception {
+ if (file.isDirectory()) {
+ String[] files = file.list();
+ for (int i = 0; i < files.length; i++)
+ indexDocs(writer, new File(file, files[i]));
+ } else {
+ System.out.println("adding " + file);
+ writer.addDocument(FileDocument.Document(file));
+ }
+ }
+}
diff --git a/src/test/org/apache/lucene/SearchTest.java b/src/test/org/apache/lucene/SearchTest.java
new file mode 100644
index 00000000000..06f80950e02
--- /dev/null
+++ b/src/test/org/apache/lucene/SearchTest.java
@@ -0,0 +1,137 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.GregorianCalendar;
+
+import com.lucene.store.*;
+import com.lucene.document.*;
+import com.lucene.analysis.*;
+import com.lucene.index.*;
+import com.lucene.search.*;
+import com.lucene.queryParser.*;
+
+class SearchTest {
+ public static void main(String[] args) {
+ try {
+ Directory directory = new RAMDirectory();
+ Analyzer analyzer = new SimpleAnalyzer();
+ IndexWriter writer = new IndexWriter(directory, analyzer, true);
+
+ String[] docs = {
+ "a b c d e",
+ "a b c d e a b c d e",
+ "a b c d e f g h i j",
+ "a c e",
+ "e c a",
+ "a c e a c e",
+ "a c e a b c"
+ };
+ for (int j = 0; j < docs.length; j++) {
+ Document d = new Document();
+ d.add(Field.Text("contents", docs[j]));
+ writer.addDocument(d);
+ }
+ writer.close();
+
+ Searcher searcher = new IndexSearcher(directory);
+
+ String[] queries = {
+// "a b",
+// "\"a b\"",
+// "\"a b c\"",
+// "a c",
+// "\"a c\"",
+ "\"a c e\"",
+ };
+ Hits hits = null;
+
+ QueryParser parser = new QueryParser("contents", analyzer);
+ parser.setPhraseSlop(4);
+ for (int j = 0; j < queries.length; j++) {
+ Query query = parser.parse(queries[j]);
+ System.out.println("Query: " + query.toString("contents"));
+
+ //DateFilter filter =
+ // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
+ //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
+ //System.out.println(filter);
+
+ hits = searcher.search(query, null);
+
+ System.out.println(hits.length() + " total results");
+ for (int i = 0 ; i < hits.length() && i < 10; i++) {
+ Document d = hits.doc(i);
+ System.out.println(i + " " + hits.score(i)
+// + " " + DateField.stringToDate(d.get("modified"))
+ + " " + d.get("contents"));
+ }
+ }
+ searcher.close();
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ static long Time(int year, int month, int day) {
+ GregorianCalendar calendar = new GregorianCalendar();
+ calendar.set(year, month, day);
+ return calendar.getTime().getTime();
+ }
+}
diff --git a/src/test/org/apache/lucene/SearchTestForDuplicates.java b/src/test/org/apache/lucene/SearchTestForDuplicates.java
new file mode 100644
index 00000000000..6b8a834124d
--- /dev/null
+++ b/src/test/org/apache/lucene/SearchTestForDuplicates.java
@@ -0,0 +1,136 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.GregorianCalendar;
+
+import com.lucene.store.*;
+import com.lucene.document.*;
+import com.lucene.analysis.*;
+import com.lucene.index.*;
+import com.lucene.search.*;
+import com.lucene.queryParser.*;
+
+class SearchTestForDuplicates {
+
+ final static String PRIORITY_FIELD ="priority";
+ final static String ID_FIELD ="id";
+ final static String HIGH_PRIORITY ="high";
+ final static String MED_PRIORITY ="medium";
+ final static String LOW_PRIORITY ="low";
+
+ public static void main(String[] args) {
+ try {
+ Directory directory = new RAMDirectory();
+ Analyzer analyzer = new SimpleAnalyzer();
+ IndexWriter writer = new IndexWriter(directory, analyzer, true);
+
+ final int MAX_DOCS = 225;
+
+ for (int j = 0; j < MAX_DOCS; j++) {
+ Document d = new Document();
+ d.add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY));
+ d.add(Field.Text(ID_FIELD, Integer.toString(j)));
+ writer.addDocument(d);
+ }
+ writer.close();
+
+ // try a search without OR
+ Searcher searcher = new IndexSearcher(directory);
+ Hits hits = null;
+
+ QueryParser parser = new QueryParser(PRIORITY_FIELD, analyzer);
+
+ Query query = parser.parse(HIGH_PRIORITY);
+ System.out.println("Query: " + query.toString(PRIORITY_FIELD));
+
+ hits = searcher.search(query, null);
+ printHits(hits);
+
+ searcher.close();
+
+ // try a new search with OR
+ searcher = new IndexSearcher(directory);
+ hits = null;
+
+ parser = new QueryParser(PRIORITY_FIELD, analyzer);
+
+ query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
+ System.out.println("Query: " + query.toString(PRIORITY_FIELD));
+
+ hits = searcher.search(query, null);
+ printHits(hits);
+
+ searcher.close();
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ private static void printHits( Hits hits ) throws IOException {
+ System.out.println(hits.length() + " total results\n");
+ for (int i = 0 ; i < hits.length(); i++) {
+ if ( i < 10 || (i > 94 && i < 105) ) {
+ Document d = hits.doc(i);
+ System.out.println(i + " " + d.get(ID_FIELD));
+ }
+ }
+ }
+
+}
diff --git a/src/test/org/apache/lucene/StoreTest.java b/src/test/org/apache/lucene/StoreTest.java
new file mode 100644
index 00000000000..ead63ddfaad
--- /dev/null
+++ b/src/test/org/apache/lucene/StoreTest.java
@@ -0,0 +1,161 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import com.lucene.store.Directory;
+import com.lucene.store.InputStream;
+import com.lucene.store.OutputStream;
+import com.lucene.store.FSDirectory;
+import com.lucene.store.RAMDirectory;
+
+import java.util.Date;
+import java.util.Random;
+
+class StoreTest {
+ public static void main(String[] args) {
+ try {
+ test(1000, true);
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ public static void test(int count, boolean ram)
+ throws Exception {
+ Random gen = new Random(1251971);
+ int i;
+
+ Date veryStart = new Date();
+ Date start = new Date();
+
+ Directory store;
+ if (ram)
+ store = new RAMDirectory();
+ else
+ store = new FSDirectory("test.store", true);
+
+ final int LENGTH_MASK = 0xFFF;
+
+ for (i = 0; i < count; i++) {
+ String name = i + ".dat";
+ int length = gen.nextInt() & LENGTH_MASK;
+ byte b = (byte)(gen.nextInt() & 0x7F);
+ //System.out.println("filling " + name + " with " + length + " of " + b);
+
+ OutputStream file = store.createFile(name);
+
+ for (int j = 0; j < length; j++)
+ file.writeByte(b);
+
+ file.close();
+ }
+
+ store.close();
+
+ Date end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" total milliseconds to create");
+
+ gen = new Random(1251971);
+ start = new Date();
+
+ if (!ram)
+ store = new FSDirectory("test.store", false);
+
+ for (i = 0; i < count; i++) {
+ String name = i + ".dat";
+ int length = gen.nextInt() & LENGTH_MASK;
+ byte b = (byte)(gen.nextInt() & 0x7F);
+ //System.out.println("reading " + name + " with " + length + " of " + b);
+
+ InputStream file = store.openFile(name);
+
+ if (file.length() != length)
+ throw new Exception("length incorrect");
+
+ for (int j = 0; j < length; j++)
+ if (file.readByte() != b)
+ throw new Exception("contents incorrect");
+
+ file.close();
+ }
+
+ end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" total milliseconds to read");
+
+ gen = new Random(1251971);
+ start = new Date();
+
+ for (i = 0; i < count; i++) {
+ String name = i + ".dat";
+ //System.out.println("deleting " + name);
+ store.deleteFile(name);
+ }
+
+ end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" total milliseconds to delete");
+
+ System.out.print(end.getTime() - veryStart.getTime());
+ System.out.println(" total milliseconds");
+
+ store.close();
+ }
+}
diff --git a/src/test/org/apache/lucene/ThreadSafetyTest.java b/src/test/org/apache/lucene/ThreadSafetyTest.java
new file mode 100644
index 00000000000..8da170225bd
--- /dev/null
+++ b/src/test/org/apache/lucene/ThreadSafetyTest.java
@@ -0,0 +1,243 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import com.lucene.store.*;
+import com.lucene.document.*;
+import com.lucene.analysis.*;
+import com.lucene.index.*;
+import com.lucene.search.*;
+import com.lucene.queryParser.*;
+
+import java.io.File;
+import java.util.Random;
+
+class ThreadSafetyTest {
+ private static final Analyzer ANALYZER = new SimpleAnalyzer();
+ private static final Random RANDOM = new Random();
+ private static Searcher SEARCHER;
+
+ private static int random(int i) { // for JDK 1.1 compatibility
+ int r = RANDOM.nextInt();
+ if (r < 0) r = -r;
+ return r % i;
+ }
+
+ private static class IndexerThread extends Thread {
+ private final int reopenInterval = 30 + random(60);
+ IndexWriter writer;
+
+ public IndexerThread(IndexWriter writer) {
+ this.writer = writer;
+ }
+
+ public void run() {
+ try {
+ for (int i = 0; i < 1024*16; i++) {
+ Document d = new Document();
+ int n = RANDOM.nextInt();
+ d.add(Field.Keyword("id", Integer.toString(n)));
+ d.add(Field.UnStored("contents", intToEnglish(n)));
+ System.out.println("Adding " + n);
+ writer.addDocument(d);
+
+ if (i%reopenInterval == 0) {
+ writer.close();
+ writer = new IndexWriter("index", ANALYZER, false);
+ }
+ }
+ } catch (Exception e) {
+ System.out.println(e.toString());
+ e.printStackTrace();
+ System.exit(0);
+ }
+ }
+ }
+
+ private static class SearcherThread extends Thread {
+ private IndexSearcher searcher;
+ private final int reopenInterval = 10 + random(20);
+
+ public SearcherThread(boolean useGlobal) throws java.io.IOException {
+ if (!useGlobal)
+ this.searcher = new IndexSearcher("index");
+ }
+
+ public void run() {
+ try {
+ for (int i = 0; i < 1024*8; i++) {
+ searchFor(RANDOM.nextInt(), (searcher==null)?SEARCHER:searcher);
+ if (i%reopenInterval == 0) {
+ if (searcher == null) {
+ SEARCHER = new IndexSearcher("index");
+ } else {
+ searcher.close();
+ searcher = new IndexSearcher("index");
+ }
+ }
+ }
+ } catch (Exception e) {
+ System.out.println(e.toString());
+ e.printStackTrace();
+ System.exit(0);
+ }
+ }
+
+ private void searchFor(int n, Searcher searcher)
+ throws Exception {
+ System.out.println("Searching for " + n);
+ Hits hits =
+ searcher.search(QueryParser.parse(intToEnglish(n), "contents",
+ ANALYZER));
+ System.out.println("Search for " + n + ": total=" + hits.length());
+ for (int j = 0; j < Math.min(3, hits.length()); j++) {
+ System.out.println("Hit for " + n + ": " + hits.doc(j).get("id"));
+ }
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ IndexWriter writer = new IndexWriter("index", ANALYZER, true);
+
+ Thread indexerThread = new IndexerThread(writer);
+ indexerThread.start();
+
+ Thread.sleep(1000);
+
+ SearcherThread searcherThread1 = new SearcherThread(false);
+ searcherThread1.start();
+
+ SEARCHER = new IndexSearcher("index");
+
+ SearcherThread searcherThread2 = new SearcherThread(true);
+ searcherThread2.start();
+
+ SearcherThread searcherThread3 = new SearcherThread(true);
+ searcherThread3.start();
+ }
+
+ private static String intToEnglish(int i) {
+ StringBuffer result = new StringBuffer();
+ intToEnglish(i, result);
+ return result.toString();
+ }
+
+ private static void intToEnglish(int i, StringBuffer result) {
+ if (i < 0) {
+ result.append("minus ");
+ i = -i;
+ }
+ if (i >= 1000000000) { // billions
+ intToEnglish(i/1000000000, result);
+ result.append("billion, ");
+ i = i%1000000000;
+ }
+ if (i >= 1000000) { // millions
+ intToEnglish(i/1000000, result);
+ result.append("million, ");
+ i = i%1000000;
+ }
+ if (i >= 1000) { // thousands
+ intToEnglish(i/1000, result);
+ result.append("thousand, ");
+ i = i%1000;
+ }
+ if (i >= 100) { // hundreds
+ intToEnglish(i/100, result);
+ result.append("hundred ");
+ i = i%100;
+ }
+ if (i >= 20) {
+ switch (i/10) {
+ case 9 : result.append("ninety"); break;
+ case 8 : result.append("eighty"); break;
+ case 7 : result.append("seventy"); break;
+ case 6 : result.append("sixty"); break;
+ case 5 : result.append("fifty"); break;
+ case 4 : result.append("forty"); break;
+ case 3 : result.append("thirty"); break;
+ case 2 : result.append("twenty"); break;
+ }
+ i = i%10;
+ if (i == 0)
+ result.append(" ");
+ else
+ result.append("-");
+ }
+ switch (i) {
+ case 19 : result.append("nineteen "); break;
+ case 18 : result.append("eighteen "); break;
+ case 17 : result.append("seventeen "); break;
+ case 16 : result.append("sixteen "); break;
+ case 15 : result.append("fifteen "); break;
+ case 14 : result.append("fourteen "); break;
+ case 13 : result.append("thirteen "); break;
+ case 12 : result.append("twelve "); break;
+ case 11 : result.append("eleven "); break;
+ case 10 : result.append("ten "); break;
+ case 9 : result.append("nine "); break;
+ case 8 : result.append("eight "); break;
+ case 7 : result.append("seven "); break;
+ case 6 : result.append("six "); break;
+ case 5 : result.append("five "); break;
+ case 4 : result.append("four "); break;
+ case 3 : result.append("three "); break;
+ case 2 : result.append("two "); break;
+ case 1 : result.append("one "); break;
+ case 0 : result.append(""); break;
+ }
+ }
+}
diff --git a/src/test/org/apache/lucene/index/DocTest.java b/src/test/org/apache/lucene/index/DocTest.java
new file mode 100644
index 00000000000..704d6f4e8cc
--- /dev/null
+++ b/src/test/org/apache/lucene/index/DocTest.java
@@ -0,0 +1,156 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import demo.FileDocument;
+
+import java.io.File;
+import java.util.Date;
+
+
+class DocTest {
+ public static void main(String[] args) {
+ try {
+ Directory directory = FSDirectory.getDirectory("test", true);
+ directory.close();
+
+ indexDoc("one", "test.txt");
+ printSegment("one");
+ indexDoc("two", "test2.txt");
+ printSegment("two");
+
+ merge("one", "two", "merge");
+ printSegment("merge");
+
+ merge("one", "two", "merge2");
+ printSegment("merge2");
+
+ merge("merge", "merge2", "merge3");
+ printSegment("merge3");
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ public static void indexDoc(String segment, String fileName)
+ throws Exception {
+ Directory directory = FSDirectory.getDirectory("test", false);
+ Analyzer analyzer = new SimpleAnalyzer();
+ DocumentWriter writer = new DocumentWriter(directory, analyzer, 1000);
+
+ File file = new File(fileName);
+ Document doc = FileDocument.Document(file);
+
+ writer.addDocument(segment, doc);
+
+ directory.close();
+ }
+
+ static void merge(String seg1, String seg2, String merged)
+ throws Exception {
+ Directory directory = FSDirectory.getDirectory("test", false);
+
+ SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory));
+ SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory));
+
+ SegmentMerger merger = new SegmentMerger(directory, merged);
+ merger.add(r1);
+ merger.add(r2);
+ merger.merge();
+
+ directory.close();
+ }
+
+ static void printSegment(String segment)
+ throws Exception {
+ Directory directory = FSDirectory.getDirectory("test", false);
+ SegmentReader reader =
+ new SegmentReader(new SegmentInfo(segment, 1, directory));
+
+ for (int i = 0; i < reader.numDocs(); i++)
+ System.out.println(reader.document(i));
+
+ TermEnum tis = reader.terms();
+ while (tis.next()) {
+ System.out.print(tis.term());
+ System.out.println(" DF=" + tis.docFreq());
+
+ TermPositions positions = reader.termPositions(tis.term());
+ try {
+ while (positions.next()) {
+ System.out.print(" doc=" + positions.doc());
+ System.out.print(" TF=" + positions.freq());
+ System.out.print(" pos=");
+ System.out.print(positions.nextPosition());
+ for (int j = 1; j < positions.freq(); j++)
+ System.out.print("," + positions.nextPosition());
+ System.out.println("");
+ }
+ } finally {
+ positions.close();
+ }
+ }
+ tis.close();
+ reader.close();
+ directory.close();
+ }
+}
diff --git a/src/test/org/apache/lucene/index/TermInfosTest.java b/src/test/org/apache/lucene/index/TermInfosTest.java
new file mode 100644
index 00000000000..9df881d57c9
--- /dev/null
+++ b/src/test/org/apache/lucene/index/TermInfosTest.java
@@ -0,0 +1,220 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+import java.util.Date;
+import java.util.Random;
+import java.util.Vector;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.File;
+import java.io.FileInputStream;
+
+class TermInfosTest {
+ public static void main(String[] args) {
+ try {
+ test();
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ public static void test()
+ throws Exception {
+
+ File file = new File("words.txt");
+ System.out.println(" reading word file containing " +
+ file.length() + " bytes");
+
+ Date start = new Date();
+
+ Vector keys = new Vector();
+ FileInputStream ws = new FileInputStream(file);
+ BufferedReader wr = new BufferedReader(new InputStreamReader(ws));
+
+ for (String key = wr.readLine(); key!=null; key = wr.readLine())
+ keys.addElement(new Term("word", key));
+ wr.close();
+
+ Date end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" milliseconds to read " + keys.size() + " words");
+
+ start = new Date();
+
+ Random gen = new Random(1251971);
+ long fp = (gen.nextInt() & 0xF) + 1;
+ long pp = (gen.nextInt() & 0xF) + 1;
+ int[] docFreqs = new int[keys.size()];
+ long[] freqPointers = new long[keys.size()];
+ long[] proxPointers = new long[keys.size()];
+ for (int i = 0; i < keys.size(); i++) {
+ docFreqs[i] = (gen.nextInt() & 0xF) + 1;
+ freqPointers[i] = fp;
+ proxPointers[i] = pp;
+ fp += (gen.nextInt() & 0xF) + 1;;
+ pp += (gen.nextInt() & 0xF) + 1;;
+ }
+
+ end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" milliseconds to generate values");
+
+ start = new Date();
+
+ Directory store = new FSDirectory("test.store", true);
+ FieldInfos fis = new FieldInfos();
+
+ TermInfosWriter writer = new TermInfosWriter(store, "words", fis);
+ fis.add("word", false);
+
+ for (int i = 0; i < keys.size(); i++)
+ writer.add((Term)keys.elementAt(i),
+ new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
+
+ writer.close();
+
+ end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" milliseconds to write table");
+
+ System.out.println(" table occupies " +
+ store.fileLength("words.tis") + " bytes");
+
+ start = new Date();
+
+ TermInfosReader reader = new TermInfosReader(store, "words", fis);
+
+ end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" milliseconds to open table");
+
+ start = new Date();
+
+ SegmentTermEnum enum = (SegmentTermEnum)reader.terms();
+ for (int i = 0; i < keys.size(); i++) {
+ enum.next();
+ Term key = (Term)keys.elementAt(i);
+ if (!key.equals(enum.term()))
+ throw new Exception("wrong term: " + enum.term()
+ + ", expected: " + key
+ + " at " + i);
+ TermInfo ti = enum.termInfo();
+ if (ti.docFreq != docFreqs[i])
+ throw
+ new Exception("wrong value: " + Long.toString(ti.docFreq, 16)
+ + ", expected: " + Long.toString(docFreqs[i], 16)
+ + " at " + i);
+ if (ti.freqPointer != freqPointers[i])
+ throw
+ new Exception("wrong value: " + Long.toString(ti.freqPointer, 16)
+ + ", expected: " + Long.toString(freqPointers[i], 16)
+ + " at " + i);
+ if (ti.proxPointer != proxPointers[i])
+ throw
+ new Exception("wrong value: " + Long.toString(ti.proxPointer, 16)
+ + ", expected: " + Long.toString(proxPointers[i], 16)
+ + " at " + i);
+ }
+
+ end = new Date();
+
+ System.out.print(end.getTime() - start.getTime());
+ System.out.println(" milliseconds to iterate over " +
+ keys.size() + " words");
+
+ start = new Date();
+
+ for (int i = 0; i < keys.size(); i++) {
+ Term key = (Term)keys.elementAt(i);
+ TermInfo ti = reader.get(key);
+ if (ti.docFreq != docFreqs[i])
+ throw
+ new Exception("wrong value: " + Long.toString(ti.docFreq, 16)
+ + ", expected: " + Long.toString(docFreqs[i], 16)
+ + " at " + i);
+ if (ti.freqPointer != freqPointers[i])
+ throw
+ new Exception("wrong value: " + Long.toString(ti.freqPointer, 16)
+ + ", expected: " + Long.toString(freqPointers[i], 16)
+ + " at " + i);
+ if (ti.proxPointer != proxPointers[i])
+ throw
+ new Exception("wrong value: " + Long.toString(ti.proxPointer, 16)
+ + ", expected: " + Long.toString(proxPointers[i], 16)
+ + " at " + i);
+ }
+
+ end = new Date();
+
+ System.out.print((end.getTime() - start.getTime()) / (float)keys.size());
+ System.out.println(" average milliseconds per lookup");
+
+ TermEnum e = reader.terms(new Term("word", "azz"));
+ System.out.println("Word after azz is " + e.term().text);
+
+ reader.close();
+
+ store.close();
+ }
+}
diff --git a/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/src/test/org/apache/lucene/queryParser/TestQueryParser.java
new file mode 100644
index 00000000000..a38f2dc3a5c
--- /dev/null
+++ b/src/test/org/apache/lucene/queryParser/TestQueryParser.java
@@ -0,0 +1,189 @@
+package org.apache.lucene.queryParser;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.io.*;
+import junit.framework.*;
+
+import com.lucene.*;
+import com.lucene.queryParser.*;
+import com.lucene.search.*;
+import com.lucene.analysis.*;
+import com.lucene.analysis.Token;
+
+public class TestQueryParser extends TestCase {
+
+ public TestQueryParser(String name) {
+ super(name);
+ }
+
+ public static Analyzer qpAnalyzer = new QPTestAnalyzer();
+
+ public static class QPTestFilter extends TokenFilter {
+
+ /**
+ * Filter which discards the token 'stop' and which expands the
+ * token 'phrase' into 'phrase1 phrase2'
+ */
+ public QPTestFilter(TokenStream in) {
+ input = in;
+ }
+
+ boolean inPhrase = false;
+ int savedStart=0, savedEnd=0;
+
+ public Token next() throws IOException {
+ if (inPhrase) {
+ inPhrase = false;
+ return new Token("phrase2", savedStart, savedEnd);
+ }
+ else
+ for (Token token = input.next(); token != null; token = input.next())
+ if (token.termText().equals("phrase")) {
+ inPhrase = true;
+ savedStart = token.startOffset();
+ savedEnd = token.endOffset();
+ return new Token("phrase1", savedStart, savedEnd);
+ }
+ else if (!token.termText().equals("stop"))
+ return token;
+ return null;
+ }
+ }
+
+ public static class QPTestAnalyzer extends Analyzer {
+
+ public QPTestAnalyzer() {
+ }
+
+ /** Filters LowerCaseTokenizer with StopFilter. */
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return new QPTestFilter(new LowerCaseTokenizer(reader));
+ }
+ }
+
+ /**
+ * initialize this TemplateTester by creating a WebMacro instance
+ * and a default Context.
+ */
+ public void init () throws Exception
+ {
+ }
+
+ public void assertQueryEquals(String query, Analyzer a, String result)
+ throws Exception {
+ if (a == null)
+ a = new SimpleAnalyzer();
+ QueryParser qp = new QueryParser("field", a);
+ Query q = qp.parse(query);
+ String s = q.toString("field");
+ if (!s.equals(result)) {
+ System.err.println("Query /" + query + "/ yielded /" + s
+ + "/, expecting /" + result + "/");
+ assert(false);
+ }
+ }
+
+ public void testSimple() throws Exception {
+ assertQueryEquals("term term term", null, "term term term");
+ assertQueryEquals("term term1 term2", null, "term term term");
+ assertQueryEquals("term 1.0 1 2", null, "term");
+
+ assertQueryEquals("a AND b", null, "+a +b");
+ assertQueryEquals("a AND NOT b", null, "+a -b");
+ assertQueryEquals("a AND -b", null, "+a -b");
+ assertQueryEquals("a AND !b", null, "+a -b");
+ assertQueryEquals("a && b", null, "+a +b");
+ assertQueryEquals("a&&b", null, "+a +b");
+ assertQueryEquals("a && ! b", null, "+a -b");
+
+ assertQueryEquals("a OR b", null, "a b");
+ assertQueryEquals("a || b", null, "a b");
+ assertQueryEquals("a OR !b", null, "a -b");
+ assertQueryEquals("a OR ! b", null, "a -b");
+ assertQueryEquals("a OR -b", null, "a -b");
+
+ assertQueryEquals("+term -term term", null, "+term -term term");
+ assertQueryEquals("foo:term AND field:anotherTerm", null,
+ "+foo:term +anotherterm");
+ assertQueryEquals("term AND \"phrase phrase\"", null,
+ "+term +\"phrase phrase\"");
+
+ assertQueryEquals("germ term^2.0", null, "germ term^2.0");
+ assertQueryEquals("term^2.0", null, "term^2.0");
+
+ assertQueryEquals("(foo OR bar) AND (baz OR boo)", null,
+ "+(foo bar) +(baz boo)");
+ assertQueryEquals("((a OR b) AND NOT c) OR d", null,
+ "(+(a b) -c) d");
+ assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null,
+ "+(apple \"steve jobs\") -(foo bar baz)");
+ assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
+ "+(title:dog title:cat) -author:\"bob dole\"");
+ }
+
+ public void testQPA() throws Exception {
+ assertQueryEquals("term term term", qpAnalyzer, "term term term");
+ assertQueryEquals("term +stop term", qpAnalyzer, "term term");
+ assertQueryEquals("term -stop term", qpAnalyzer, "term term");
+ assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
+ assertQueryEquals("term phrase term", qpAnalyzer,
+ "term \"phrase1 phrase2\" term");
+ assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
+ "+term -\"phrase1 phrase2\" term");
+ assertQueryEquals("stop", qpAnalyzer, "");
+ }
+}
+
diff --git a/src/test/org/apache/lucene/util/PriorityQueueTest.java b/src/test/org/apache/lucene/util/PriorityQueueTest.java
new file mode 100644
index 00000000000..4c487b77cda
--- /dev/null
+++ b/src/test/org/apache/lucene/util/PriorityQueueTest.java
@@ -0,0 +1,97 @@
+package org.apache.lucene.util;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * .
+ */
+
+import java.util.Date;
+import java.util.Random;
+
+class PriorityQueueTest {
+ public static void main(String[] args) {
+ test(10000);
+ }
+
+ public static void test(int count) {
+ PriorityQueue pq = new PriorityQueue(count);
+ Random gen = new Random();
+ int i;
+
+ Date start = new Date();
+
+ for (i = 0; i < count; i++) {
+ pq.put(new Integer(gen.nextInt()));
+ }
+
+ Date end = new Date();
+
+ System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000);
+ System.out.println(" microseconds/put");
+
+ start = new Date();
+
+ int last = Integer.MIN_VALUE;
+ for (i = 0; i < count; i++) {
+ Integer next = (Integer)pq.pop();
+ if (next.intValue() <= last)
+ throw new Error("out of order");
+ last = next.intValue();
+ }
+
+ end = new Date();
+
+ System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000);
+ System.out.println(" microseconds/pop");
+
+ }
+}