diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 287beeb5ace..921fdd012a7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -745,6 +745,10 @@ New Features * LUCENE-3586: CheckIndex and IndexUpgrader allow you to specify the specific FSDirectory implementation to use (with the new -dir-impl command-line option). (Luca Cavanna via Mike McCandless) + +* LUCENE-3634: IndexReader's static main method was moved to a new + tool, CompoundFileExtractor, in contrib/misc. (Robert Muir, Mike + McCandless) Bug fixes diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 6891fee8fe3..c5db1bc1994 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -108,6 +108,9 @@ New Features bigrams are formed for each of Han/Hiragana/Katakana/Hangul independently. Deprecates CJKTokenizer. (Tom Burton-West, Robert Muir) +* LUCENE-3634: IndexReader's static main method was moved to a new + tool, CompoundFileExtractor, in contrib/misc. (Mike McCandless) + API Changes * LUCENE-3596: DirectoryTaxonomyWriter.openIndexWriter() now takes an diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/CompoundFileExtractor.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/CompoundFileExtractor.java new file mode 100644 index 00000000000..a13e09b584f --- /dev/null +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/CompoundFileExtractor.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +/** + * Prints the filename and size of each file within a given compound file. + * Add the -extract flag to extract files to the current working directory. + * In order to make the extracted version of the index work, you have to copy + * the segments file from the compound index into the directory where the extracted files are stored. + * @param args Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> + */ + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.apache.lucene.store.CompoundFileDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.CommandLineUtil; + +public class CompoundFileExtractor { + + public static void main(String [] args) { + String filename = null; + boolean extract = false; + String dirImpl = null; + + int j = 0; + while(j < args.length) { + String arg = args[j]; + if ("-extract".equals(arg)) { + extract = true; + } else if ("-dir-impl".equals(arg)) { + if (j == args.length - 1) { + System.out.println("ERROR: missing value for -dir-impl option"); + System.exit(1); + } + j++; + dirImpl = args[j]; + } else if (filename == null) { + filename = arg; + } + j++; + } + + if (filename == null) { + System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] [-dir-impl X] "); + return; + } + + Directory dir = null; + CompoundFileDirectory cfr = null; + IOContext context = IOContext.READ; + + try { + File file = new File(filename); + String dirname = file.getAbsoluteFile().getParent(); + filename = file.getName(); + if (dirImpl == null) { + dir = FSDirectory.open(new File(dirname)); + } else { + dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname)); + } + + cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false); + + String [] files = cfr.listAll(); + ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable + + for (int i = 0; i < files.length; ++i) { + long len = cfr.fileLength(files[i]); + + if (extract) { + System.out.println("extract " + files[i] + " with " + len + " bytes to local directory..."); + IndexInput ii = cfr.openInput(files[i], context); + + FileOutputStream f = new FileOutputStream(files[i]); + + // read and write with a small buffer, which is more effective than reading byte by byte + byte[] buffer = new byte[1024]; + int chunk = buffer.length; + while(len > 0) { + final int bufLen = (int) Math.min(chunk, len); + ii.readBytes(buffer, 0, bufLen); + f.write(buffer, 0, bufLen); + len -= bufLen; + } + + f.close(); + ii.close(); + } + else + System.out.println(files[i] + ": " + len + " bytes"); + } + } catch (IOException ioe) { + ioe.printStackTrace(); + } + finally { + try { + if (dir != null) + dir.close(); + if (cfr != null) + cfr.close(); + } + catch (IOException ioe) { + ioe.printStackTrace(); + } + } + } +} diff --git a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java index 836dab3bd03..9770db4f6b2 100644 --- a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java +++ b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java @@ -19,7 +19,6 @@ package org.apache.lucene.spatial.tier; import org.apache.lucene.spatial.DistanceUtils; -import java.util.HashMap; import java.util.Map; /** @@ -97,10 +96,4 @@ public class DistanceHandler { //all else fails calculate the distances return DistanceUtils.getDistanceMi(centerLat, centerLng, lat, lng); } - - - public static void main(String args[]){ - DistanceHandler db = new DistanceHandler(new HashMap(), new HashMap(), Precision.TWOHUNDREDFEET); - System.out.println(DistanceHandler.getPrecision(-1234.123456789, db.getPrecision())); - } } diff --git a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/DistanceCheck.java b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/DistanceCheck.java deleted file mode 100644 index 71c5f02beb1..00000000000 --- a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/DistanceCheck.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.spatial.tier; - -import org.apache.lucene.spatial.DistanceUtils; - -import java.text.DecimalFormat; - - -public class DistanceCheck { - - /** - * @param args - */ - public static void main(String[] args) { - double lat1 = 0; - double long1 = 0; - double lat2 = 0; - double long2 = 0; - - for (int i =0; i < 90; i++){ - double dis = DistanceUtils.getDistanceMi(lat1, long1, lat2, long2); - lat1 +=1; - lat2 = lat1 + 0.001; - - System.out.println(lat1+","+long1+","+lat2+","+long2+","+formatDistance(dis)); - - } - - } - - public static String formatDistance (Double d){ - DecimalFormat df1 = new DecimalFormat("####.000000"); - return df1.format(d); - } - -} diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 28b94c45892..2589c982c32 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -18,8 +18,6 @@ package org.apache.lucene.index; */ import java.io.Closeable; -import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.util.Collection; import java.util.Collections; @@ -33,10 +31,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.DocumentStoredFieldVisitor; import org.apache.lucene.search.SearcherManager; // javadocs import org.apache.lucene.store.*; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CommandLineUtil; import org.apache.lucene.util.ReaderUtil; // for javadocs /** IndexReader is an abstract class, providing an interface for accessing an @@ -942,101 +938,6 @@ public abstract class IndexReader implements Closeable { throw new UnsupportedOperationException("This reader does not support this method."); } - /** - * Prints the filename and size of each file within a given compound file. - * Add the -extract flag to extract files to the current working directory. - * In order to make the extracted version of the index work, you have to copy - * the segments file from the compound index into the directory where the extracted files are stored. - * @param args Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> - */ - public static void main(String [] args) { - String filename = null; - boolean extract = false; - String dirImpl = null; - - int j = 0; - while(j < args.length) { - String arg = args[j]; - if ("-extract".equals(arg)) { - extract = true; - } else if ("-dir-impl".equals(arg)) { - if (j == args.length - 1) { - System.out.println("ERROR: missing value for -dir-impl option"); - System.exit(1); - } - j++; - dirImpl = args[j]; - } else if (filename == null) { - filename = arg; - } - j++; - } - - if (filename == null) { - System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] [-dir-impl X] "); - return; - } - - Directory dir = null; - CompoundFileDirectory cfr = null; - IOContext context = IOContext.READ; - - try { - File file = new File(filename); - String dirname = file.getAbsoluteFile().getParent(); - filename = file.getName(); - if (dirImpl == null) { - dir = FSDirectory.open(new File(dirname)); - } else { - dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname)); - } - - cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false); - - String [] files = cfr.listAll(); - ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable - - for (int i = 0; i < files.length; ++i) { - long len = cfr.fileLength(files[i]); - - if (extract) { - System.out.println("extract " + files[i] + " with " + len + " bytes to local directory..."); - IndexInput ii = cfr.openInput(files[i], context); - - FileOutputStream f = new FileOutputStream(files[i]); - - // read and write with a small buffer, which is more effective than reading byte by byte - byte[] buffer = new byte[1024]; - int chunk = buffer.length; - while(len > 0) { - final int bufLen = (int) Math.min(chunk, len); - ii.readBytes(buffer, 0, bufLen); - f.write(buffer, 0, bufLen); - len -= bufLen; - } - - f.close(); - ii.close(); - } - else - System.out.println(files[i] + ": " + len + " bytes"); - } - } catch (IOException ioe) { - ioe.printStackTrace(); - } - finally { - try { - if (dir != null) - dir.close(); - if (cfr != null) - cfr.close(); - } - catch (IOException ioe) { - ioe.printStackTrace(); - } - } - } - /** Returns all commit points that exist in the Directory. * Normally, because the default is {@link * KeepOnlyLastCommitDeletionPolicy}, there would be only diff --git a/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java b/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java index 522ef681025..fbe42db5826 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java @@ -310,17 +310,4 @@ public final class UTF32ToUTF8 { utf8StateCount++; return s; } - - public static void main(String[] args) { - final int startCode = Integer.parseInt(args[0]); - final int endCode = Integer.parseInt(args[1]); - - Automaton a = new Automaton(); - State start = a.getInitialState(); - State end = new State(); - end.setAccept(true); - - UTF32ToUTF8 converter = new UTF32ToUTF8(); - converter.convertOneEdge(start, end, startCode, endCode); - } } diff --git a/lucene/src/java/org/apache/lucene/util/English.java b/lucene/src/test-framework/java/org/apache/lucene/util/English.java similarity index 97% rename from lucene/src/java/org/apache/lucene/util/English.java rename to lucene/src/test-framework/java/org/apache/lucene/util/English.java index 21da4d17098..21dc26c9695 100644 --- a/lucene/src/java/org/apache/lucene/util/English.java +++ b/lucene/src/test-framework/java/org/apache/lucene/util/English.java @@ -182,9 +182,4 @@ public final class English { public static void intToEnglish(int i, StringBuilder result) { longToEnglish(i, result); } - - public static void main(String[] args) { - System.out.println(longToEnglish(Long.parseLong(args[0]))); - } - } diff --git a/lucene/src/test/org/apache/lucene/TestSearch.java b/lucene/src/test/org/apache/lucene/TestSearch.java index 693d672f09d..5ec455c4a61 100644 --- a/lucene/src/test/org/apache/lucene/TestSearch.java +++ b/lucene/src/test/org/apache/lucene/TestSearch.java @@ -24,8 +24,6 @@ import java.io.PrintWriter; import java.io.StringWriter; import org.apache.lucene.util.LuceneTestCase; -import junit.framework.TestSuite; -import junit.textui.TestRunner; import org.apache.lucene.store.*; import org.apache.lucene.document.*; @@ -36,11 +34,6 @@ import org.apache.lucene.search.*; /** JUnit adaptation of an older test case SearchTest. */ public class TestSearch extends LuceneTestCase { - /** Main for running test case by itself. */ - public static void main(String args[]) { - TestRunner.run (new TestSuite(TestSearch.class)); - } - /** This test performs a number of searches. It also compares output * of searches using multi-file index segments with single-file * index segments. diff --git a/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java b/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java index aba30ead730..23d92bd59c5 100644 --- a/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java +++ b/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java @@ -28,18 +28,9 @@ import org.apache.lucene.analysis.*; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.util.LuceneTestCase; -import junit.framework.TestSuite; -import junit.textui.TestRunner; public class TestSearchForDuplicates extends LuceneTestCase { - /** Main for running test case by itself. */ - public static void main(String args[]) { - TestRunner.run (new TestSuite(TestSearchForDuplicates.class)); - } - - - static final String PRIORITY_FIELD ="priority"; static final String ID_FIELD ="id"; static final String HIGH_PRIORITY ="high"; diff --git a/lucene/src/test/org/apache/lucene/index/TestDoc.java b/lucene/src/test/org/apache/lucene/index/TestDoc.java index 07a2a7f0976..6967dcc8d60 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/src/test/org/apache/lucene/index/TestDoc.java @@ -26,8 +26,6 @@ import java.io.StringWriter; import java.util.LinkedList; import java.util.Collection; -import junit.framework.TestSuite; -import junit.textui.TestRunner; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; @@ -45,11 +43,6 @@ import org.apache.lucene.util._TestUtil; /** JUnit adaptation of an older test case DocTest. */ public class TestDoc extends LuceneTestCase { - /** Main for running test case by itself. */ - public static void main(String args[]) { - TestRunner.run (new TestSuite(TestDoc.class)); - } - private File workDir; private File indexDir; private LinkedList files; diff --git a/lucene/src/test/org/apache/lucene/util/TestSortedVIntList.java b/lucene/src/test/org/apache/lucene/util/TestSortedVIntList.java index cef1e7c4fa0..07ecb24ba79 100644 --- a/lucene/src/test/org/apache/lucene/util/TestSortedVIntList.java +++ b/lucene/src/test/org/apache/lucene/util/TestSortedVIntList.java @@ -20,16 +20,10 @@ package org.apache.lucene.util; import java.io.IOException; import java.util.BitSet; -import junit.framework.TestSuite; -import junit.textui.TestRunner; import org.apache.lucene.search.DocIdSetIterator; public class TestSortedVIntList extends LuceneTestCase { - /** Main for running test case by itself. */ - public static void main(String args[]) { - TestRunner.run(new TestSuite(TestSortedVIntList.class)); - } void tstIterator ( SortedVIntList vintList, diff --git a/modules/benchmark/build.xml b/modules/benchmark/build.xml index 8eadf1788de..7da8bf68e0c 100644 --- a/modules/benchmark/build.xml +++ b/modules/benchmark/build.xml @@ -154,6 +154,7 @@ + diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java index 4d20e91b3fd..8dcedc94755 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java @@ -17,10 +17,11 @@ package org.apache.lucene.benchmark.byTask.feeds; * limitations under the License. */ -import org.apache.lucene.util.English; - import java.io.IOException; import java.util.Date; +import java.util.Locale; + +import com.ibm.icu.text.RuleBasedNumberFormat; /** * Creates documents whose content is a long number starting from @@ -32,7 +33,10 @@ public class LongToEnglishContentSource extends ContentSource{ @Override public void close() throws IOException { } - + + // TODO: we could take param to specify locale... + private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH, + RuleBasedNumberFormat.SPELLOUT); @Override public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { docData.clear(); @@ -46,7 +50,8 @@ public class LongToEnglishContentSource extends ContentSource{ ++counter; } } - docData.setBody(English.longToEnglish(curCounter)); + + docData.setBody(rnbf.format(curCounter)); docData.setName("doc_" + String.valueOf(curCounter)); docData.setTitle("title_" + String.valueOf(curCounter)); docData.setDate(new Date()); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java index d97cde525e4..7b414491540 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java @@ -1,3 +1,5 @@ +package org.apache.lucene.benchmark.byTask.feeds; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -15,7 +17,7 @@ * limitations under the License. */ -package org.apache.lucene.benchmark.byTask.feeds; +import java.util.Locale; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; @@ -23,9 +25,8 @@ import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Query; -import org.apache.lucene.util.English; import org.apache.lucene.util.Version; - +import com.ibm.icu.text.RuleBasedNumberFormat; /** * @@ -35,13 +36,16 @@ public class LongToEnglishQueryMaker implements QueryMaker { long counter = Long.MIN_VALUE + 10; protected QueryParser parser; + // TODO: we could take param to specify locale... + private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH, + RuleBasedNumberFormat.SPELLOUT); + public Query makeQuery(int size) throws Exception { throw new UnsupportedOperationException(); } public synchronized Query makeQuery() throws Exception { - - return parser.parse("" + English.longToEnglish(getNextCounter()) + ""); + return parser.parse("" + rnbf.format(getNextCounter()) + ""); } private synchronized long getNextCounter() {