mirror of https://github.com/apache/lucene.git
- Limit to an optional field; reindented (die tabs, die), ASF License 2.0
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@376393 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
646d9646cb
commit
93657ca63a
|
@ -1,58 +1,20 @@
|
||||||
package org.apache.lucene.misc;
|
package org.apache.lucene.misc;
|
||||||
|
|
||||||
/* ====================================================================
|
/**
|
||||||
* The Apache Software License, Version 1.1
|
* Copyright 2004 The Apache Software Foundation
|
||||||
*
|
*
|
||||||
* Copyright (c) 2001,2004 The Apache Software Foundation. All rights
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* reserved.
|
* you may not use this file except in compliance with the License.
|
||||||
*
|
* You may obtain a copy of the License at
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
* are met:
|
*
|
||||||
*
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
*
|
* See the License for the specific language governing permissions and
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
* limitations under the License.
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*/
|
||||||
* the documentation and/or other materials provided with the
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* 3. The end-user documentation included with the redistribution,
|
|
||||||
* if any, must include the following acknowledgment:
|
|
||||||
* "This product includes software developed by the
|
|
||||||
* Apache Software Foundation (http://www.apache.org/)."
|
|
||||||
* Alternately, this acknowledgment may appear in the software itself,
|
|
||||||
* if and wherever such third-party acknowledgments normally appear.
|
|
||||||
*
|
|
||||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
|
||||||
* "Apache Lucene" must not be used to endorse or promote products
|
|
||||||
* derived from this software without prior written permission. For
|
|
||||||
* written permission, please contact apache@apache.org.
|
|
||||||
*
|
|
||||||
* 5. Products derived from this software may not be called "Apache",
|
|
||||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
|
||||||
* prior written permission of the Apache Software Foundation.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
|
||||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
||||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
|
||||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
||||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
||||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
* ====================================================================
|
|
||||||
*
|
|
||||||
* This software consists of voluntary contributions made by many
|
|
||||||
* individuals on behalf of the Apache Software Foundation. For more
|
|
||||||
* information on the Apache Software Foundation, please see
|
|
||||||
* <http://www.apache.org/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
@ -66,58 +28,69 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class HighFreqTerms {
|
public class HighFreqTerms {
|
||||||
|
|
||||||
// The top numTerms will be displayed
|
// The top numTerms will be displayed
|
||||||
public static final int numTerms = 100;
|
public static final int numTerms = 100;
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
IndexReader reader = null;
|
IndexReader reader = null;
|
||||||
if (args.length == 1) {
|
String field = null;
|
||||||
reader = IndexReader.open(args[0]);
|
if (args.length == 1) {
|
||||||
} else {
|
reader = IndexReader.open(args[0]);
|
||||||
usage();
|
} else if (args.length == 2) {
|
||||||
System.exit(1);
|
reader = IndexReader.open(args[0]);
|
||||||
}
|
field = args[1];
|
||||||
|
} else {
|
||||||
|
usage();
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
TermInfoQueue tiq = new TermInfoQueue(numTerms);
|
TermInfoQueue tiq = new TermInfoQueue(numTerms);
|
||||||
TermEnum terms = reader.terms();
|
TermEnum terms = reader.terms();
|
||||||
|
|
||||||
while (terms.next()) {
|
if (field != null) {
|
||||||
tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
|
while (terms.next()) {
|
||||||
}
|
if (terms.term().field().equals(field)) {
|
||||||
|
tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
while (terms.next()) {
|
||||||
|
tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (tiq.size() != 0) {
|
||||||
|
TermInfo termInfo = (TermInfo) tiq.pop();
|
||||||
|
System.out.println(termInfo.term + " " + termInfo.docFreq);
|
||||||
|
}
|
||||||
|
|
||||||
while (tiq.size() != 0) {
|
reader.close();
|
||||||
TermInfo termInfo = (TermInfo) tiq.pop();
|
}
|
||||||
System.out.println(termInfo.term + " " + termInfo.docFreq);
|
|
||||||
}
|
|
||||||
|
|
||||||
reader.close();
|
private static void usage() {
|
||||||
}
|
System.out.println(
|
||||||
|
"\n\n"
|
||||||
private static void usage() {
|
+ "java org.apache.lucene.misc.HighFreqTerms <index dir> [field]\n\n");
|
||||||
System.out.println(
|
}
|
||||||
"\n\n"
|
|
||||||
+ "java org.apache.lucene.misc.HighFreqTerms <index dir>\n\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final class TermInfo {
|
final class TermInfo {
|
||||||
TermInfo(Term t, int df) {
|
TermInfo(Term t, int df) {
|
||||||
term = t;
|
term = t;
|
||||||
docFreq = df;
|
docFreq = df;
|
||||||
}
|
}
|
||||||
int docFreq;
|
int docFreq;
|
||||||
Term term;
|
Term term;
|
||||||
}
|
}
|
||||||
|
|
||||||
final class TermInfoQueue extends PriorityQueue {
|
final class TermInfoQueue extends PriorityQueue {
|
||||||
TermInfoQueue(int size) {
|
TermInfoQueue(int size) {
|
||||||
initialize(size);
|
initialize(size);
|
||||||
}
|
}
|
||||||
|
protected final boolean lessThan(Object a, Object b) {
|
||||||
protected final boolean lessThan(Object a, Object b) {
|
TermInfo termInfoA = (TermInfo) a;
|
||||||
TermInfo termInfoA = (TermInfo) a;
|
TermInfo termInfoB = (TermInfo) b;
|
||||||
TermInfo termInfoB = (TermInfo) b;
|
return termInfoA.docFreq < termInfoB.docFreq;
|
||||||
return termInfoA.docFreq < termInfoB.docFreq;
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue