Merge trunk.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1439991 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-01-29 16:44:16 +00:00
commit 577364e414
73 changed files with 1544 additions and 592 deletions

View File

@ -79,6 +79,9 @@ New Features
near-real-time reader is opened that contains those changes.
(Robert Muir, Mike McCandless)
* LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
creation via the resulting factories using NewAnalyzerTask. (Steve Rowe)
API Changes
* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
@ -104,6 +107,14 @@ Bug Fixes
degrees and barely any height, it would generate so many indexed terms
(> 500k) that it could even cause an OutOfMemoryError. Fixed. (David Smiley)
* LUCENE-4704: Make join queries override hashcode and equals methods.
(Martijn van Groningen)
* LUCENE-4724: Fix bug in CategoryPath which allowed passing null or empty
string components. This is forbidden now (throws an exception). Note that if
you have a taxonomy index created with such strings, you should rebuild it.
(Michael McCandless, Shai Erera)
======================= Lucene 4.1.0 =======================
Changes in backwards compatibility policy

View File

@ -19,25 +19,43 @@ doc.body.tokenized=true
docs.dir=reuters-out
log.step=1000
-AnalyzerFactory(name:shingle-bigrams-unigrams,
StandardTokenizer,
ShingleFilter(maxShingleSize:2, outputUnigrams:true))
-AnalyzerFactory(name:shingle-bigrams,
StandardTokenizer,
ShingleFilter(maxShingleSize:2, outputUnigrams:false))
-AnalyzerFactory(name:shingle-4grams-unigrams,
StandardTokenizer,
ShingleFilter(maxShingleSize:4, outputUnigrams:true))
-AnalyzerFactory(name:shingle-4grams,
StandardTokenizer,
ShingleFilter(maxShingleSize:4, outputUnigrams:false))
-AnalyzerFactory(name:standard-tokenizer-only, StandardTokenizer)
{ "Rounds"
-NewShingleAnalyzer(maxShingleSize:2,outputUnigrams:true)
-NewAnalyzer(shingle-bigrams-unigrams)
-ResetInputs
{ "BigramsAndUnigrams" { ReadTokens > : 10000 }
-NewShingleAnalyzer(maxShingleSize:2,outputUnigrams:false)
-NewAnalyzer(shingle-bigrams)
-ResetInputs
{ "BigramsOnly" { ReadTokens > : 10000 }
-NewShingleAnalyzer(maxShingleSize:4,outputUnigrams:true)
-NewAnalyzer(shingle-4grams-unigrams)
-ResetInputs
{ "FourgramsAndUnigrams" { ReadTokens > : 10000 }
-NewShingleAnalyzer(maxShingleSize:4,outputUnigrams:false)
-NewAnalyzer(shingle-4grams)
-ResetInputs
{ "FourgramsOnly" { ReadTokens > : 10000 }
-NewAnalyzer(standard.StandardAnalyzer)
-NewAnalyzer(standard-tokenizer-only)
-ResetInputs
{ "UnigramsOnly" { ReadTokens > : 10000 }

View File

@ -51,7 +51,7 @@ while (<>) {
# Print out platform info
print "JAVA:\n", `java -version 2>&1`, "\nOS:\n";
if ($^O =~ /win/i) {
if ($^O =~ /(?<!dar)win/i) {
print "$^O\n";
eval {
require Win32;

View File

@ -23,6 +23,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
@ -34,6 +35,7 @@ import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@ -55,6 +57,7 @@ import org.apache.lucene.util.IOUtils;
* <li>Directory, Writer, Reader.
* <li>Taxonomy Directory, Writer, Reader.
* <li>DocMaker, FacetSource and a few instances of QueryMaker.
* <li>Named AnalysisFactories.
* <li>Analyzer.
* <li>Statistics data which updated during the run.
* </ul>
@ -78,6 +81,7 @@ public class PerfRunData implements Closeable {
// directory, analyzer, docMaker - created at startup.
// reader, writer, searcher - maintained by basic tasks.
private Directory directory;
private Map<String,AnalyzerFactory> analyzerFactories = new HashMap<String,AnalyzerFactory>();
private Analyzer analyzer;
private DocMaker docMaker;
private ContentSource contentSource;
@ -358,7 +362,7 @@ public class PerfRunData implements Closeable {
}
/**
* @return Returns the anlyzer.
* @return Returns the analyzer.
*/
public Analyzer getAnalyzer() {
return analyzer;
@ -434,4 +438,7 @@ public class PerfRunData implements Closeable {
return qm;
}
public Map<String,AnalyzerFactory> getAnalyzerFactories() {
return analyzerFactories;
}
}

View File

@ -0,0 +1,459 @@
package org.apache.lucene.benchmark.byTask.tasks;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.FilesystemResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
/**
* Analyzer factory construction task. The name given to the constructed factory may
* be given to NewAnalyzerTask, which will call AnalyzerFactory.create().
*
* Params are in the form argname:argvalue or argname:"argvalue" or argname:'argvalue';
* use backslashes to escape '"' or "'" inside a quoted value when it's used as the enclosing
* quotation mark,
*
* Specify params in a comma separated list of the following, in order:
* <ol>
* <li>Analyzer args:
* <ul>
* <li><b>Required</b>: <code>name:<i>analyzer-factory-name</i></code></li>
* <li>Optional: <tt>positionIncrementGap:<i>int value</i></tt> (default: 0)</li>
* <li>Optional: <tt>offsetGap:<i>int value</i></tt> (default: 1)</li>
* </ul>
* </li>
* <li>zero or more CharFilterFactory's, followed by</li>
* <li>exactly one TokenizerFactory, followed by</li>
* <li>zero or more TokenFilterFactory's</li>
* </ol>
*
* Each component analysis factory map specify <tt>luceneMatchVersion</tt> (defaults to
* {@link Version#LUCENE_CURRENT}) and any of the args understood by the specified
* *Factory class, in the above-describe param format.
* <p/>
* Example:
* <pre>
* -AnalyzerFactory(name:'strip html, fold to ascii, whitespace tokenize, max 10k tokens',
* positionIncrementGap:100,
* HTMLStripCharFilter,
* MappingCharFilter(mapping:'mapping-FoldToASCII.txt'),
* WhitespaceTokenizer(luceneMatchVersion:LUCENE_42),
* TokenLimitFilter(maxTokenCount:10000, consumeAllTokens:false))
* [...]
* -NewAnalyzer('strip html, fold to ascii, whitespace tokenize, max 10k tokens')
* </pre>
* <p/>
* AnalyzerFactory will direct analysis component factories to look for resources
* under the directory specified in the "work.dir" property.
*/
public class AnalyzerFactoryTask extends PerfTask {
private static final String LUCENE_ANALYSIS_PACKAGE_PREFIX = "org.apache.lucene.analysis.";
private static final Pattern ANALYSIS_COMPONENT_SUFFIX_PATTERN
= Pattern.compile("(?s:(?:(?:Token|Char)?Filter|Tokenizer)(?:Factory)?)$");
private static final Pattern TRAILING_DOT_ZERO_PATTERN = Pattern.compile("\\.0$");
private enum ArgType {ANALYZER_ARG, ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER, TOKENFILTER }
String factoryName = null;
Integer positionIncrementGap = null;
Integer offsetGap = null;
private List<CharFilterFactory> charFilterFactories = new ArrayList<CharFilterFactory>();
private TokenizerFactory tokenizerFactory = null;
private List<TokenFilterFactory> tokenFilterFactories = new ArrayList<TokenFilterFactory>();
public AnalyzerFactoryTask(PerfRunData runData) {
super(runData);
}
@Override
public int doLogic() {
return 1;
}
/**
* Sets the params.
* Analysis component factory names may optionally include the "Factory" suffix.
*
* @param params analysis pipeline specification: name, (optional) positionIncrementGap,
* (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
* and 0+ TokenFilterFactory's
*/
@Override
public void setParams(String params) {
super.setParams(params);
ArgType expectedArgType = ArgType.ANALYZER_ARG;
final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
stok.commentChar('#');
stok.quoteChar('"');
stok.quoteChar('\'');
stok.eolIsSignificant(false);
stok.ordinaryChar('(');
stok.ordinaryChar(')');
stok.ordinaryChar(':');
stok.ordinaryChar(',');
try {
while (stok.nextToken() != StreamTokenizer.TT_EOF) {
switch (stok.ttype) {
case ',': {
// Do nothing
break;
}
case StreamTokenizer.TT_WORD: {
if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
final String argName = stok.sval;
if ( ! argName.equalsIgnoreCase("name")
&& ! argName.equalsIgnoreCase("positionIncrementGap")
&& ! argName.equalsIgnoreCase("offsetGap")) {
throw new RuntimeException
("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
}
stok.nextToken();
if (stok.ttype != ':') {
throw new RuntimeException
("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
}
stok.nextToken();
String argValue = stok.sval;
switch (stok.ttype) {
case StreamTokenizer.TT_NUMBER: {
argValue = Double.toString(stok.nval);
// Drop the ".0" from numbers, for integer arguments
argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
// Intentional fallthrough
}
case '"':
case '\'':
case StreamTokenizer.TT_WORD: {
if (argName.equalsIgnoreCase("name")) {
factoryName = argValue;
expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
} else {
int intArgValue = 0;
try {
intArgValue = Integer.parseInt(argValue);
} catch (NumberFormatException e) {
throw new RuntimeException
("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
}
if (argName.equalsIgnoreCase("positionIncrementGap")) {
positionIncrementGap = intArgValue;
} else if (argName.equalsIgnoreCase("offsetGap")) {
offsetGap = intArgValue;
}
}
break;
}
case StreamTokenizer.TT_EOF: {
throw new RuntimeException("Unexpected EOF: " + stok.toString());
}
default: {
throw new RuntimeException
("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
} else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
final String argName = stok.sval;
if (argName.equalsIgnoreCase("positionIncrementGap")
|| argName.equalsIgnoreCase("offsetGap")) {
stok.nextToken();
if (stok.ttype != ':') {
throw new RuntimeException
("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
}
stok.nextToken();
int intArgValue = (int)stok.nval;
switch (stok.ttype) {
case '"':
case '\'':
case StreamTokenizer.TT_WORD: {
intArgValue = 0;
try {
intArgValue = Integer.parseInt(stok.sval.trim());
} catch (NumberFormatException e) {
throw new RuntimeException
("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
}
// Intentional fall-through
}
case StreamTokenizer.TT_NUMBER: {
if (argName.equalsIgnoreCase("positionIncrementGap")) {
positionIncrementGap = intArgValue;
} else if (argName.equalsIgnoreCase("offsetGap")) {
offsetGap = intArgValue;
}
break;
}
case StreamTokenizer.TT_EOF: {
throw new RuntimeException("Unexpected EOF: " + stok.toString());
}
default: {
throw new RuntimeException
("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
break;
}
try {
final Class<? extends CharFilterFactory> clazz;
clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
createAnalysisPipelineComponent(stok, clazz);
} catch (IllegalArgumentException e) {
try {
final Class<? extends TokenizerFactory> clazz;
clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
createAnalysisPipelineComponent(stok, clazz);
expectedArgType = ArgType.TOKENFILTER;
} catch (IllegalArgumentException e2) {
throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '"
+ argName + "' as CharFilterFactory or TokenizerFactory");
}
}
} else { // expectedArgType = ArgType.TOKENFILTER
final String className = stok.sval;
final Class<? extends TokenFilterFactory> clazz;
try {
clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
} catch (IllegalArgumentException e) {
throw new RuntimeException
("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
}
createAnalysisPipelineComponent(stok, clazz);
}
break;
}
default: {
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
}
} catch (RuntimeException e) {
if (e.getMessage().startsWith("Line #")) {
throw e;
} else {
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
}
} catch (Throwable t) {
throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
}
final AnalyzerFactory analyzerFactory = new AnalyzerFactory
(charFilterFactories, tokenizerFactory, tokenFilterFactories);
analyzerFactory.setPositionIncrementGap(positionIncrementGap);
analyzerFactory.setOffsetGap(offsetGap);
getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
}
/**
* Instantiates the given analysis factory class after pulling params from
* the given stream tokenizer, then stores the result in the appropriate
* pipeline component list.
*
* @param stok stream tokenizer from which to draw analysis factory params
* @param clazz analysis factory class to instantiate
*/
private void createAnalysisPipelineComponent
(StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
final AbstractAnalysisFactory instance;
try {
instance = clazz.newInstance();
} catch (Exception e) {
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
}
Version luceneMatchVersion = null;
Map<String,String> argMap = new HashMap<String,String>();
boolean parenthetical = false;
try {
WHILE_LOOP: while (stok.nextToken() != StreamTokenizer.TT_EOF) {
switch (stok.ttype) {
case ',': {
if (parenthetical) {
// Do nothing
break;
} else {
// Finished reading this analysis factory configuration
break WHILE_LOOP;
}
}
case '(': {
if (parenthetical) {
throw new RuntimeException
("Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
}
parenthetical = true;
break;
}
case ')': {
if (parenthetical) {
parenthetical = false;
} else {
throw new RuntimeException
("Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
}
break;
}
case StreamTokenizer.TT_WORD: {
if ( ! parenthetical) {
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
}
String argName = stok.sval;
stok.nextToken();
if (stok.ttype != ':') {
throw new RuntimeException
("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.getSimpleName());
}
stok.nextToken();
String argValue = stok.sval;
switch (stok.ttype) {
case StreamTokenizer.TT_NUMBER: {
argValue = Double.toString(stok.nval);
// Drop the ".0" from numbers, for integer arguments
argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
// Intentional fall-through
}
case '"':
case '\'':
case StreamTokenizer.TT_WORD: {
if (argName.equalsIgnoreCase("luceneMatchVersion")) {
try {
luceneMatchVersion = Version.parseLeniently(argValue);
} catch (IllegalArgumentException e) {
throw new RuntimeException
("Line #" + lineno(stok) + ": Unrecognized luceneMatchVersion '" + argValue + "'", e);
}
} else {
argMap.put(argName, argValue);
}
break;
}
case StreamTokenizer.TT_EOF: {
throw new RuntimeException("Unexpected EOF: " + stok.toString());
}
default: {
throw new RuntimeException
("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
}
}
}
instance.setLuceneMatchVersion
(null == luceneMatchVersion ? Version.LUCENE_CURRENT : luceneMatchVersion);
instance.init(argMap);
if (instance instanceof ResourceLoaderAware) {
File baseDir = new File(getRunData().getConfig().get("work.dir", "work")).getAbsoluteFile();
((ResourceLoaderAware)instance).inform(new FilesystemResourceLoader(baseDir));
}
if (CharFilterFactory.class.isAssignableFrom(clazz)) {
charFilterFactories.add((CharFilterFactory)instance);
} else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
tokenizerFactory = (TokenizerFactory)instance;
} else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
tokenFilterFactories.add((TokenFilterFactory)instance);
}
} catch (RuntimeException e) {
if (e.getMessage().startsWith("Line #")) {
throw (e);
} else {
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
}
} catch (Throwable t) {
throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
}
}
/**
* This method looks up a class with its fully qualified name (FQN), or a short-name
* class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
* as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
* "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
*
* If className contains a period, the class is first looked up as-is, assuming that it
* is an FQN. If this fails, lookup is retried after prepending the Lucene analysis
* package prefix to the class name.
*
* If className does not contain a period, the analysis SPI *Factory.lookupClass()
* methods are used to find the class.
*
* @param className The name or the short name of the class.
* @param expectedType The superclass className is expected to extend
* @return the loaded class.
* @throws ClassNotFoundException if lookup fails
*/
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
throws ClassNotFoundException {
if (className.contains(".")) {
try {
// First, try className == FQN
return Class.forName(className).asSubclass(expectedType);
} catch (ClassNotFoundException e) {
try {
// Second, retry lookup after prepending the Lucene analysis package prefix
return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
} catch (ClassNotFoundException e1) {
throw new ClassNotFoundException("Can't find class '" + className
+ "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
}
}
}
// No dot - use analysis SPI lookup
final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
} else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
} else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
}
throw new ClassNotFoundException("Can't find class '" + className + "'");
}
/* (non-Javadoc)
* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
*/
@Override
public boolean supportsParams() {
return true;
}
/** Returns the current line in the algorithm file */
public int lineno(StreamTokenizer stok) {
return getAlgLineNum() + stok.lineno();
}
}

View File

@ -16,10 +16,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.util.*;
import java.lang.reflect.Constructor;
@ -28,12 +34,12 @@ import java.lang.reflect.Constructor;
*
*/
public class NewAnalyzerTask extends PerfTask {
private List<String> analyzerClassNames;
private List<String> analyzerNames;
private int current;
public NewAnalyzerTask(PerfRunData runData) {
super(runData);
analyzerClassNames = new ArrayList<String>();
analyzerNames = new ArrayList<String>();
}
public static final Analyzer createAnalyzer(String className) throws Exception{
@ -50,55 +56,98 @@ public class NewAnalyzerTask extends PerfTask {
@Override
public int doLogic() throws IOException {
String className = null;
String analyzerName = null;
try {
if (current >= analyzerClassNames.size()) {
if (current >= analyzerNames.size()) {
current = 0;
}
className = analyzerClassNames.get(current++);
analyzerName = analyzerNames.get(current++);
Analyzer analyzer = null;
if (null == className || 0 == className.length()) {
className = "org.apache.lucene.analysis.standard.StandardAnalyzer";
if (null == analyzerName || 0 == analyzerName.length()) {
analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
}
if (-1 == className.indexOf(".")) {
try {
// If no package, first attempt to instantiate a core analyzer
String coreClassName = "org.apache.lucene.analysis.core." + className;
analyzer = createAnalyzer(coreClassName);
className = coreClassName;
} catch (ClassNotFoundException e) {
// If not a core analyzer, try the base analysis package
className = "org.apache.lucene.analysis." + className;
analyzer = createAnalyzer(className);
}
// First, lookup analyzerName as a named analyzer factory
AnalyzerFactory factory = getRunData().getAnalyzerFactories().get(analyzerName);
if (null != factory) {
analyzer = factory.create();
} else {
if (className.startsWith("standard.")) {
className = "org.apache.lucene.analysis." + className;
if (analyzerName.contains(".")) {
if (analyzerName.startsWith("standard.")) {
analyzerName = "org.apache.lucene.analysis." + analyzerName;
}
analyzer = createAnalyzer(analyzerName);
} else { // No package
try {
// Attempt to instantiate a core analyzer
String coreClassName = "org.apache.lucene.analysis.core." + analyzerName;
analyzer = createAnalyzer(coreClassName);
analyzerName = coreClassName;
} catch (ClassNotFoundException e) {
// If not a core analyzer, try the base analysis package
analyzerName = "org.apache.lucene.analysis." + analyzerName;
analyzer = createAnalyzer(analyzerName);
}
}
analyzer = createAnalyzer(className);
}
getRunData().setAnalyzer(analyzer);
System.out.println("Changed Analyzer to: " + className);
} catch (Exception e) {
throw new RuntimeException("Error creating Analyzer: " + className, e);
throw new RuntimeException("Error creating Analyzer: " + analyzerName, e);
}
return 1;
}
/**
* Set the params (analyzerClassName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
* Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
* org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
* <p/>
* Analyzer names may also refer to previously defined AnalyzerFactory's.
* <p/>
* Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
* <p/>
* Example AnalyzerFactory usage:
* <pre>
* -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
* -NewAnalyzer('whitespace tokenized')
* </pre>
* @param params analyzerClassName, or empty for the StandardAnalyzer
*/
@Override
public void setParams(String params) {
super.setParams(params);
for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
String s = tokenizer.nextToken();
analyzerClassNames.add(s.trim());
final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
stok.quoteChar('"');
stok.quoteChar('\'');
stok.eolIsSignificant(false);
stok.ordinaryChar(',');
try {
while (stok.nextToken() != StreamTokenizer.TT_EOF) {
switch (stok.ttype) {
case ',': {
// Do nothing
break;
}
case '\'':
case '\"':
case StreamTokenizer.TT_WORD: {
analyzerNames.add(stok.sval);
break;
}
default: {
throw new RuntimeException("Unexpected token: " + stok.toString());
}
}
}
} catch (RuntimeException e) {
if (e.getMessage().startsWith("Line #")) {
throw e;
} else {
throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", e);
}
} catch (Throwable t) {
throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", t);
}
}
/* (non-Javadoc)

View File

@ -1,117 +0,0 @@
package org.apache.lucene.benchmark.byTask.tasks;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.StringTokenizer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.benchmark.byTask.PerfRunData;
/**
* Task to support benchmarking ShingleFilter / ShingleAnalyzerWrapper
* <p>
* <ul>
* <li> <code>NewShingleAnalyzer</code> (constructs with all defaults)
* <li> <code>NewShingleAnalyzer(analyzer:o.a.l.analysis.StandardAnalyzer,maxShingleSize:2,outputUnigrams:true)</code>
* </ul>
* </p>
*/
public class NewShingleAnalyzerTask extends PerfTask {
private String analyzerClassName = "standard.StandardAnalyzer";
private int maxShingleSize = 2;
private boolean outputUnigrams = true;
public NewShingleAnalyzerTask(PerfRunData runData) {
super(runData);
}
private void setAnalyzer() throws Exception {
Analyzer wrappedAnalyzer = null;
if (null == analyzerClassName || 0 == analyzerClassName.length()) {
analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
}
if (-1 == analyzerClassName.indexOf(".")) {
String coreClassName = "org.apache.lucene.analysis.core." + analyzerClassName;
try {
// If there is no package, first attempt to instantiate a core analyzer
wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(coreClassName);
analyzerClassName = coreClassName;
} catch (ClassNotFoundException e) {
// If this is not a core analyzer, try the base analysis package
analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName;
wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName);
}
} else {
if (analyzerClassName.startsWith("standard.")) {
analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName;
}
wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName);
}
ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
wrappedAnalyzer,
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
maxShingleSize,
ShingleFilter.TOKEN_SEPARATOR,
outputUnigrams,
false);
getRunData().setAnalyzer(analyzer);
}
@Override
public int doLogic() throws Exception {
try {
setAnalyzer();
System.out.println
("Changed Analyzer to: ShingleAnalyzerWrapper, wrapping ShingleFilter over "
+ analyzerClassName);
} catch (Exception e) {
throw new RuntimeException("Error creating Analyzer", e);
}
return 1;
}
@Override
public void setParams(String params) {
super.setParams(params);
StringTokenizer st = new StringTokenizer(params, ",");
while (st.hasMoreTokens()) {
String param = st.nextToken();
StringTokenizer expr = new StringTokenizer(param, ":");
String key = expr.nextToken();
String value = expr.nextToken();
if (key.equalsIgnoreCase("analyzer")) {
analyzerClassName = value;
} else if (key.equalsIgnoreCase("outputUnigrams")) {
outputUnigrams = Boolean.parseBoolean(value);
} else if (key.equalsIgnoreCase("maxShingleSize")) {
maxShingleSize = (int)Double.parseDouble(value);
} else {
throw new RuntimeException("Unknown parameter " + param);
}
}
}
@Override
public boolean supportsParams() {
return true;
}
}

View File

@ -62,6 +62,9 @@ public abstract class PerfTask implements Cloneable {
private boolean runInBackground;
private int deltaPri;
// The first line of this task's definition in the alg file
private int algLineNum = 0;
protected static final String NEW_LINE = System.getProperty("line.separator");
/** Should not be used externally */
@ -317,4 +320,11 @@ public abstract class PerfTask implements Cloneable {
this.disableCounting = disableCounting;
}
public void setAlgLineNum(int algLineNum) {
this.algLineNum = algLineNum;
}
public int getAlgLineNum() {
return algLineNum;
}
}

View File

@ -58,11 +58,12 @@ public class Algorithm {
StreamTokenizer stok = new StreamTokenizer(new StringReader(algTxt));
stok.commentChar('#');
stok.eolIsSignificant(false);
stok.ordinaryChar('"');
stok.quoteChar('"');
stok.quoteChar('\'');
stok.ordinaryChar('/');
stok.ordinaryChar('(');
stok.ordinaryChar(')');
boolean colonOk = false;
boolean colonOk = false;
boolean isDisableCountNextTask = false; // only for primitive tasks
currSequence.setDepth(0);
@ -74,6 +75,7 @@ public class Algorithm {
Constructor<? extends PerfTask> cnstr = taskClass(config,s)
.asSubclass(PerfTask.class).getConstructor(PerfRunData.class);
PerfTask task = cnstr.newInstance(runData);
task.setAlgLineNum(stok.lineno());
task.setDisableCounting(isDisableCountNextTask);
isDisableCountNextTask = false;
currSequence.addTask(task);
@ -90,24 +92,54 @@ public class Algorithm {
if (stok.ttype!='(') {
stok.pushBack();
} else {
// get params, for tasks that supports them, - anything until next ')'
// get params, for tasks that supports them - allow recursive parenthetical expressions
stok.eolIsSignificant(true); // Allow params tokenizer to keep track of line number
StringBuilder params = new StringBuilder();
stok.nextToken();
while (stok.ttype!=')') {
switch (stok.ttype) {
case StreamTokenizer.TT_NUMBER:
params.append(stok.nval);
break;
case StreamTokenizer.TT_WORD:
params.append(stok.sval);
break;
case StreamTokenizer.TT_EOF:
throw new Exception("unexpexted EOF: - "+stok.toString());
default:
params.append((char)stok.ttype);
if (stok.ttype != ')') {
int count = 1;
BALANCED_PARENS: while (true) {
switch (stok.ttype) {
case StreamTokenizer.TT_NUMBER: {
params.append(stok.nval);
break;
}
case StreamTokenizer.TT_WORD: {
params.append(stok.sval);
break;
}
case StreamTokenizer.TT_EOF: {
throw new RuntimeException("Unexpexted EOF: - "+stok.toString());
}
case '"':
case '\'': {
params.append((char)stok.ttype);
// re-escape delimiters, if any
params.append(stok.sval.replaceAll("" + (char)stok.ttype, "\\\\" + (char)stok.ttype));
params.append((char)stok.ttype);
break;
}
case '(': {
params.append((char)stok.ttype);
++count;
break;
}
case ')': {
if (--count >= 1) { // exclude final closing parenthesis
params.append((char)stok.ttype);
} else {
break BALANCED_PARENS;
}
break;
}
default: {
params.append((char)stok.ttype);
}
}
stok.nextToken();
}
stok.nextToken();
}
stok.eolIsSignificant(false);
String prm = params.toString().trim();
if (prm.length()>0) {
task.setParams(prm);
@ -182,10 +214,8 @@ public class Algorithm {
if (stok.ttype!='"') {
stok.pushBack();
} else {
stok.nextToken();
name = stok.sval;
stok.nextToken();
if (stok.ttype!='"' || name==null || name.length()==0) {
if (stok.ttype!='"' || name==null || name.length()==0) {
throw new Exception("sequence name problem - "+stok.toString());
}
}

View File

@ -0,0 +1,132 @@
package org.apache.lucene.benchmark.byTask.utils;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import java.io.Reader;
import java.util.List;
/**
* A factory to create an analyzer.
* See {@link org.apache.lucene.benchmark.byTask.tasks.AnalyzerFactoryTask}
*/
public final class AnalyzerFactory {
final private List<CharFilterFactory> charFilterFactories;
final private TokenizerFactory tokenizerFactory;
final private List<TokenFilterFactory> tokenFilterFactories;
private String name = null;
private Integer positionIncrementGap = null;
private Integer offsetGap = null;
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
TokenizerFactory tokenizerFactory,
List<TokenFilterFactory> tokenFilterFactories) {
this.charFilterFactories = charFilterFactories;
assert null != tokenizerFactory;
this.tokenizerFactory = tokenizerFactory;
this.tokenFilterFactories = tokenFilterFactories;
}
public void setName(String name) {
this.name = name;
}
public void setPositionIncrementGap(Integer positionIncrementGap) {
this.positionIncrementGap = positionIncrementGap;
}
public void setOffsetGap(Integer offsetGap) {
this.offsetGap = offsetGap;
}
public Analyzer create() {
return new Analyzer() {
private final Integer positionIncrementGap = AnalyzerFactory.this.positionIncrementGap;
private final Integer offsetGap = AnalyzerFactory.this.offsetGap;
@Override
public Reader initReader(String fieldName, Reader reader) {
if (charFilterFactories != null && charFilterFactories.size() > 0) {
Reader wrappedReader = reader;
for (CharFilterFactory charFilterFactory : charFilterFactories) {
wrappedReader = charFilterFactory.create(wrappedReader);
}
reader = wrappedReader;
}
return reader;
}
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) {
final Tokenizer tokenizer = tokenizerFactory.create(reader);
TokenStream tokenStream = tokenizer;
for (TokenFilterFactory filterFactory : tokenFilterFactories) {
tokenStream = filterFactory.create(tokenStream);
}
return new TokenStreamComponents(tokenizer, tokenStream);
}
@Override
public int getPositionIncrementGap(String fieldName) {
return null == positionIncrementGap ? super.getPositionIncrementGap(fieldName) : positionIncrementGap;
}
@Override
public int getOffsetGap(String fieldName) {
return null == offsetGap ? super.getOffsetGap(fieldName) : offsetGap;
}
};
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("AnalyzerFactory(");
if (null != name) {
sb.append("name:");
sb.append(name);
sb.append(", ");
}
if (null != positionIncrementGap) {
sb.append("positionIncrementGap:");
sb.append(positionIncrementGap);
sb.append(", ");
}
if (null != offsetGap) {
sb.append("offsetGap:");
sb.append(offsetGap);
sb.append(", ");
}
for (CharFilterFactory charFilterFactory: charFilterFactories) {
sb.append(charFilterFactory);
sb.append(", ");
}
sb.append(tokenizerFactory);
for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
sb.append(", ");
sb.append(tokenFilterFactory);
}
sb.append(')');
return sb.toString();
}
}

View File

@ -71,6 +71,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
public void setUp() throws Exception {
super.setUp();
copyToWorkDir("reuters.first20.lines.txt");
copyToWorkDir("test-mapping-ISOLatin1Accent-partial.txt");
}
/**
@ -1019,63 +1020,79 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
}
/**
* Test that we can create ShingleAnalyzerWrappers.
* Test that we can create shingle analyzers using AnalyzerFactory.
*/
public void testShingleAnalyzer() throws Exception {
String text = "one,two,three, four five six";
// Default analyzer, maxShingleSize, and outputUnigrams
Benchmark benchmark = execBenchmark(getShingleConfig(""));
// StandardTokenizer, maxShingleSize, and outputUnigrams
Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig
("shingle-analyzer", "StandardTokenizer,ShingleFilter"));
benchmark.getRunData().getAnalyzer().tokenStream
("bogus", new StringReader(text)).close();
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
new String[] {"one", "one two", "two", "two three",
"three", "three four", "four", "four five",
"five", "five six", "six"});
// Default analyzer, maxShingleSize = 3, and outputUnigrams = false
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
new String[] { "one", "one two", "two", "two three",
"three", "three four", "four", "four five",
"five", "five six", "six" });
// StandardTokenizer, maxShingleSize = 3, and outputUnigrams = false
benchmark = execBenchmark
(getShingleConfig("maxShingleSize:3,outputUnigrams:false"));
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
new String[] { "one two", "one two three", "two three",
"two three four", "three four",
"three four five", "four five",
"four five six", "five six" });
// WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
(getAnalyzerFactoryConfig
("shingle-analyzer",
"StandardTokenizer,ShingleFilter(maxShingleSize:3,outputUnigrams:false)"));
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
new String[] { "one two", "one two three", "two three",
"two three four", "three four",
"three four five", "four five",
"four five six", "five six" });
// WhitespaceTokenizer, default maxShingleSize and outputUnigrams
benchmark = execBenchmark
(getShingleConfig("analyzer:WhitespaceAnalyzer"));
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
new String[] { "one,two,three,", "one,two,three, four",
"four", "four five", "five", "five six",
"six" });
(getAnalyzerFactoryConfig("shingle-analyzer", "WhitespaceTokenizer,ShingleFilter"));
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
new String[] { "one,two,three,", "one,two,three, four",
"four", "four five", "five", "five six",
"six" });
// WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
// WhitespaceTokenizer, maxShingleSize=3 and outputUnigrams=false
benchmark = execBenchmark
(getShingleConfig
("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
new String[] { "one,two,three, four",
"one,two,three, four five",
"four five", "four five six",
"five six" });
(getAnalyzerFactoryConfig
("shingle-factory",
"WhitespaceTokenizer,ShingleFilter(outputUnigrams:false,maxShingleSize:3)"));
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
new String[] { "one,two,three, four",
"one,two,three, four five",
"four five", "four five six",
"five six" });
}
private void assertEqualShingle
(Analyzer analyzer, String text, String[] expected) throws Exception {
BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
}
private String[] getShingleConfig(String params) {
private String[] getAnalyzerFactoryConfig(String name, String params) {
final String singleQuoteEscapedName = name.replaceAll("'", "\\\\'");
String algLines[] = {
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
"work.dir=" + getWorkDir().getAbsolutePath().replaceAll("\\\\", "/"), // Fix Windows path
"content.source.forever=false",
"directory=RAMDirectory",
"NewShingleAnalyzer(" + params + ")",
"AnalyzerFactory(name:'" + singleQuoteEscapedName + "', " + params + ")",
"NewAnalyzer('" + singleQuoteEscapedName + "')",
"CreateIndex",
"{ \"AddDocs\" AddDoc > : * "
};
return algLines;
}
public void testAnalyzerFactory() throws Exception {
String text = "Fortieth, Quarantième, Cuadragésimo";
Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig
("ascii folded, pattern replaced, standard tokenized, downcased, bigrammed.'analyzer'",
"positionIncrementGap:100,offsetGap:1111,"
+"MappingCharFilter(mapping:'test-mapping-ISOLatin1Accent-partial.txt'),"
+"PatternReplaceCharFilterFactory(pattern:'e(\\\\\\\\S*)m',replacement:\"$1xxx$1\"),"
+"StandardTokenizer,LowerCaseFilter,NGramTokenFilter(minGramSize:2,maxGramSize:2)"));
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
new String[] { "fo", "or", "rt", "ti", "ie", "et", "th",
"qu", "ua", "ar", "ra", "an", "nt", "ti", "ix", "xx", "xx", "xe",
"cu", "ua", "ad", "dr", "ra", "ag", "gs", "si", "ix", "xx", "xx", "xs", "si", "io"});
}
private String getReuters20LinesFile() {
return getWorkDirResourcePath("reuters.first20.lines.txt");

View File

@ -0,0 +1,30 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Syntax:
# "source" => "target"
# "source".length() > 0 (source cannot be empty.)
# "target".length() >= 0 (target can be empty.)
# example:
# "À" => "A"
# "\u00C0" => "A"
# "\u00C0" => "\u0041"
# "ß" => "ss"
# "\t" => " "
# "\n" => ""
# è => e
"\u00E8" => "e"
# é => e
"\u00E9" => "e"

View File

@ -63,7 +63,7 @@ final class ForUtil {
}
final PackedInts.Decoder decoder = PackedInts.getDecoder(format, version, bpv);
final int iterations = computeIterations(decoder);
maxDataSize = Math.max(maxDataSize, iterations * decoder.valueCount());
maxDataSize = Math.max(maxDataSize, iterations * decoder.byteValueCount());
}
}
}
@ -75,7 +75,7 @@ final class ForUtil {
* values with the provided {@link Decoder}.
*/
private static int computeIterations(PackedInts.Decoder decoder) {
return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount());
return (int) Math.ceil((float) BLOCK_SIZE / decoder.byteValueCount());
}
/**
@ -165,9 +165,9 @@ final class ForUtil {
assert numBits > 0 && numBits <= 32 : numBits;
final PackedInts.Encoder encoder = encoders[numBits];
final int iters = iterations[numBits];
assert iters * encoder.valueCount() >= BLOCK_SIZE;
assert iters * encoder.byteValueCount() >= BLOCK_SIZE;
final int encodedSize = encodedSizes[numBits];
assert (iters * encoder.blockCount()) << 3 >= encodedSize;
assert iters * encoder.byteBlockCount() >= encodedSize;
out.writeByte((byte) numBits);
@ -198,7 +198,7 @@ final class ForUtil {
final PackedInts.Decoder decoder = decoders[numBits];
final int iters = iterations[numBits];
assert iters * decoder.valueCount() >= BLOCK_SIZE;
assert iters * decoder.byteValueCount() >= BLOCK_SIZE;
decoder.decode(encoded, 0, decoded, 0, iters);
}

View File

@ -130,8 +130,8 @@ abstract class AbstractBlockPackedWriter {
protected final void writeValues(int bitsRequired) throws IOException {
final PackedInts.Encoder encoder = PackedInts.getEncoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, bitsRequired);
final int iterations = values.length / encoder.valueCount();
final int blockSize = encoder.blockCount() * 8 * iterations;
final int iterations = values.length / encoder.byteValueCount();
final int blockSize = encoder.byteBlockCount() * iterations;
if (blocks == null || blocks.length < blockSize) {
blocks = new byte[blockSize];
}

View File

@ -212,8 +212,8 @@ public final class BlockPackedReaderIterator {
Arrays.fill(values, minValue);
} else {
final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
final int iterations = blockSize / decoder.valueCount();
final int blocksSize = iterations * 8 * decoder.blockCount();
final int iterations = blockSize / decoder.byteValueCount();
final int blocksSize = iterations * decoder.byteBlockCount();
if (blocks == null || blocks.length < blocksSize) {
blocks = new byte[blocksSize];
}

View File

@ -2,7 +2,6 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -153,35 +152,30 @@ abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {
* For every number of bits per value, there is a minimum number of
* blocks (b) / values (v) you need to write in order to reach the next block
* boundary:
* - 16 bits per value -> b=1, v=4
* - 24 bits per value -> b=3, v=8
* - 50 bits per value -> b=25, v=32
* - 63 bits per value -> b=63, v=64
* - 16 bits per value -> b=2, v=1
* - 24 bits per value -> b=3, v=1
* - 50 bits per value -> b=25, v=4
* - 63 bits per value -> b=63, v=8
* - ...
* <p>
*
* A bulk read consists in copying <code>iterations*v</code> values that are
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
* (higher values of <code>iterations</code> are likely to yield a better
* throughput) => this requires n * (b + v) longs in memory.
* <p>
* throughput) => this requires n * (b + 8v) bytes of memory.
*
* This method computes <code>iterations</code> as
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
* <p>
* The resulting number of iterations of this method is guaranteed not to
* overflow when multiplied by
* <tt>8 * {@link PackedInts.Encoder#blockCount()}</tt> or
* <tt>8 * {@link PackedInts.Decoder#blockCount()}</tt>.
* <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
*/
public final int computeIterations(int valueCount, int ramBudget) {
final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount());
if (iterations == 0) {
// at least 1
return 1;
} else if ((iterations - 1) * blockCount() >= valueCount) {
} else if ((iterations - 1) * byteValueCount() >= valueCount) {
// don't allocate for more than the size of the reader
return (int) Math.ceil((double) valueCount / valueCount());
return (int) Math.ceil((double) valueCount / byteValueCount());
} else {
return iterations;
}
}
}
}

View File

@ -1,5 +1,6 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -23,9 +24,12 @@ package org.apache.lucene.util.packed;
class BulkOperationPacked extends BulkOperation {
private final int bitsPerValue;
private final int blockCount;
private final int valueCount;
private final int longBlockCount;
private final int longValueCount;
private final int byteBlockCount;
private final int byteValueCount;
private final long mask;
private final int intMask;
public BulkOperationPacked(int bitsPerValue) {
this.bitsPerValue = bitsPerValue;
@ -34,31 +38,50 @@ class BulkOperationPacked extends BulkOperation {
while ((blocks & 1) == 0) {
blocks >>>= 1;
}
this.blockCount = blocks;
this.valueCount = 64 * blockCount / bitsPerValue;
this.longBlockCount = blocks;
this.longValueCount = 64 * longBlockCount / bitsPerValue;
int byteBlockCount = 8 * longBlockCount;
int byteValueCount = longValueCount;
while ((byteBlockCount & 1) == 0 && (byteValueCount & 1) == 0) {
byteBlockCount >>>= 1;
byteValueCount >>>= 1;
}
this.byteBlockCount = byteBlockCount;
this.byteValueCount = byteValueCount;
if (bitsPerValue == 64) {
this.mask = ~0L;
} else {
this.mask = (1L << bitsPerValue) - 1;
}
assert valueCount * bitsPerValue == 64 * blockCount;
this.intMask = (int) mask;
assert longValueCount * bitsPerValue == 64 * longBlockCount;
}
@Override
public int blockCount() {
return blockCount;
public int longBlockCount() {
return longBlockCount;
}
@Override
public int valueCount() {
return valueCount;
public int longValueCount() {
return longValueCount;
}
@Override
public int byteBlockCount() {
return byteBlockCount;
}
@Override
public int byteValueCount() {
return byteValueCount;
}
@Override
public void decode(long[] blocks, int blocksOffset, long[] values,
int valuesOffset, int iterations) {
int bitsLeft = 64;
for (int i = 0; i < valueCount * iterations; ++i) {
for (int i = 0; i < longValueCount * iterations; ++i) {
bitsLeft -= bitsPerValue;
if (bitsLeft < 0) {
values[valuesOffset++] =
@ -74,22 +97,28 @@ class BulkOperationPacked extends BulkOperation {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values,
int valuesOffset, int iterations) {
int blockBitsLeft = 8;
int valueBitsLeft = bitsPerValue;
long nextValue = 0;
for (int end = valuesOffset + iterations * valueCount; valuesOffset < end; ) {
if (valueBitsLeft > blockBitsLeft) {
nextValue |= (blocks[blocksOffset++] & ((1L << blockBitsLeft) - 1)) << (valueBitsLeft - blockBitsLeft);
valueBitsLeft -= blockBitsLeft;
blockBitsLeft = 8;
long nextValue = 0L;
int bitsLeft = bitsPerValue;
for (int i = 0; i < iterations * byteBlockCount; ++i) {
final long bytes = blocks[blocksOffset++] & 0xFFL;
if (bitsLeft > 8) {
// just buffer
bitsLeft -= 8;
nextValue |= bytes << bitsLeft;
} else {
nextValue |= ((blocks[blocksOffset] & 0xFFL) >>> (blockBitsLeft - valueBitsLeft)) & ((1L << valueBitsLeft) - 1);
values[valuesOffset++] = nextValue;
nextValue = 0;
blockBitsLeft -= valueBitsLeft;
valueBitsLeft = bitsPerValue;
// flush
int bits = 8 - bitsLeft;
values[valuesOffset++] = nextValue | (bytes >>> bits);
while (bits >= bitsPerValue) {
bits -= bitsPerValue;
values[valuesOffset++] = (bytes >>> bits) & mask;
}
// then buffer
bitsLeft = bitsPerValue - bits;
nextValue = (bytes & ((1L << bits) - 1)) << bitsLeft;
}
}
assert bitsLeft == bitsPerValue;
}
@Override
@ -99,7 +128,7 @@ class BulkOperationPacked extends BulkOperation {
throw new UnsupportedOperationException("Cannot decode " + bitsPerValue + "-bits values into an int[]");
}
int bitsLeft = 64;
for (int i = 0; i < valueCount * iterations; ++i) {
for (int i = 0; i < longValueCount * iterations; ++i) {
bitsLeft -= bitsPerValue;
if (bitsLeft < 0) {
values[valuesOffset++] = (int)
@ -115,25 +144,28 @@ class BulkOperationPacked extends BulkOperation {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values,
int valuesOffset, int iterations) {
if (bitsPerValue > 32) {
throw new UnsupportedOperationException("Cannot decode " + bitsPerValue + "-bits values into an int[]");
}
int blockBitsLeft = 8;
int valueBitsLeft = bitsPerValue;
int nextValue = 0;
for (int end = valuesOffset + iterations * valueCount; valuesOffset < end; ) {
if (valueBitsLeft > blockBitsLeft) {
nextValue |= (blocks[blocksOffset++] & ((1L << blockBitsLeft) - 1)) << (valueBitsLeft - blockBitsLeft);
valueBitsLeft -= blockBitsLeft;
blockBitsLeft = 8;
int bitsLeft = bitsPerValue;
for (int i = 0; i < iterations * byteBlockCount; ++i) {
final int bytes = blocks[blocksOffset++] & 0xFF;
if (bitsLeft > 8) {
// just buffer
bitsLeft -= 8;
nextValue |= bytes << bitsLeft;
} else {
nextValue |= ((blocks[blocksOffset] & 0xFFL) >>> (blockBitsLeft - valueBitsLeft)) & ((1L << valueBitsLeft) - 1);
values[valuesOffset++] = nextValue;
nextValue = 0;
blockBitsLeft -= valueBitsLeft;
valueBitsLeft = bitsPerValue;
// flush
int bits = 8 - bitsLeft;
values[valuesOffset++] = nextValue | (bytes >>> bits);
while (bits >= bitsPerValue) {
bits -= bitsPerValue;
values[valuesOffset++] = (bytes >>> bits) & intMask;
}
// then buffer
bitsLeft = bitsPerValue - bits;
nextValue = (bytes & ((1 << bits) - 1)) << bitsLeft;
}
}
assert bitsLeft == bitsPerValue;
}
@Override
@ -141,7 +173,7 @@ class BulkOperationPacked extends BulkOperation {
int blocksOffset, int iterations) {
long nextBlock = 0;
int bitsLeft = 64;
for (int i = 0; i < valueCount * iterations; ++i) {
for (int i = 0; i < longValueCount * iterations; ++i) {
bitsLeft -= bitsPerValue;
if (bitsLeft > 0) {
nextBlock |= values[valuesOffset++] << bitsLeft;
@ -164,7 +196,7 @@ class BulkOperationPacked extends BulkOperation {
int blocksOffset, int iterations) {
long nextBlock = 0;
int bitsLeft = 64;
for (int i = 0; i < valueCount * iterations; ++i) {
for (int i = 0; i < longValueCount * iterations; ++i) {
bitsLeft -= bitsPerValue;
if (bitsLeft > 0) {
nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL) << bitsLeft;
@ -185,47 +217,57 @@ class BulkOperationPacked extends BulkOperation {
@Override
public void encode(long[] values, int valuesOffset, byte[] blocks,
int blocksOffset, int iterations) {
long nextBlock = 0;
int bitsLeft = 64;
for (int i = 0; i < valueCount * iterations; ++i) {
bitsLeft -= bitsPerValue;
if (bitsLeft > 0) {
nextBlock |= values[valuesOffset++] << bitsLeft;
} else if (bitsLeft == 0) {
nextBlock |= values[valuesOffset++];
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
nextBlock = 0;
bitsLeft = 64;
} else { // bitsLeft < 0
nextBlock |= values[valuesOffset] >>> -bitsLeft;
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
nextBlock = (values[valuesOffset++] & ((1L << -bitsLeft) - 1)) << (64 + bitsLeft);
bitsLeft += 64;
int nextBlock = 0;
int bitsLeft = 8;
for (int i = 0; i < byteValueCount * iterations; ++i) {
final long v = values[valuesOffset++];
assert bitsPerValue == 64 || PackedInts.bitsRequired(v) <= bitsPerValue;
if (bitsPerValue < bitsLeft) {
// just buffer
nextBlock |= v << (bitsLeft - bitsPerValue);
bitsLeft -= bitsPerValue;
} else {
// flush as many blocks as possible
int bits = bitsPerValue - bitsLeft;
blocks[blocksOffset++] = (byte) (nextBlock | (v >>> bits));
while (bits >= 8) {
bits -= 8;
blocks[blocksOffset++] = (byte) (v >>> bits);
}
// then buffer
bitsLeft = 8 - bits;
nextBlock = (int) ((v & ((1L << bits) - 1)) << bitsLeft);
}
}
assert bitsLeft == 8;
}
@Override
public void encode(int[] values, int valuesOffset, byte[] blocks,
int blocksOffset, int iterations) {
long nextBlock = 0;
int bitsLeft = 64;
for (int i = 0; i < valueCount * iterations; ++i) {
bitsLeft -= bitsPerValue;
if (bitsLeft > 0) {
nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL) << bitsLeft;
} else if (bitsLeft == 0) {
nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL);
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
nextBlock = 0;
bitsLeft = 64;
} else { // bitsLeft < 0
nextBlock |= (values[valuesOffset] & 0xFFFFFFFFL) >>> -bitsLeft;
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
nextBlock = (values[valuesOffset++] & ((1L << -bitsLeft) - 1)) << (64 + bitsLeft);
bitsLeft += 64;
int nextBlock = 0;
int bitsLeft = 8;
for (int i = 0; i < byteValueCount * iterations; ++i) {
final int v = values[valuesOffset++];
assert PackedInts.bitsRequired(v & 0xFFFFFFFFL) <= bitsPerValue;
if (bitsPerValue < bitsLeft) {
// just buffer
nextBlock |= v << (bitsLeft - bitsPerValue);
bitsLeft -= bitsPerValue;
} else {
// flush as many blocks as possible
int bits = bitsPerValue - bitsLeft;
blocks[blocksOffset++] = (byte) (nextBlock | (v >>> bits));
while (bits >= 8) {
bits -= 8;
blocks[blocksOffset++] = (byte) (v >>> bits);
}
// then buffer
bitsLeft = 8 - bits;
nextBlock = (v & ((1 << bits) - 1)) << bitsLeft;
}
}
assert bitsLeft == 8;
}
}

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked1 extends BulkOperationPacked {
public BulkOperationPacked1() {
super(1);
assert blockCount() == 1;
assert valueCount() == 64;
}
@Override
@ -42,7 +40,7 @@ final class BulkOperationPacked1 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
final byte block = blocks[blocksOffset++];
values[valuesOffset++] = (block >>> 7) & 1;
values[valuesOffset++] = (block >>> 6) & 1;
@ -67,7 +65,7 @@ final class BulkOperationPacked1 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
final byte block = blocks[blocksOffset++];
values[valuesOffset++] = (block >>> 7) & 1;
values[valuesOffset++] = (block >>> 6) & 1;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked10 extends BulkOperationPacked {
public BulkOperationPacked10() {
super(10);
assert blockCount() == 5;
assert valueCount() == 32;
}
@Override
@ -75,7 +73,7 @@ final class BulkOperationPacked10 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 2) | (byte1 >>> 6);
@ -133,7 +131,7 @@ final class BulkOperationPacked10 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 2) | (byte1 >>> 6);

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked11 extends BulkOperationPacked {
public BulkOperationPacked11() {
super(11);
assert blockCount() == 11;
assert valueCount() == 64;
}
@Override
@ -113,7 +111,7 @@ final class BulkOperationPacked11 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 3) | (byte1 >>> 5);
@ -219,7 +217,7 @@ final class BulkOperationPacked11 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 3) | (byte1 >>> 5);

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked12 extends BulkOperationPacked {
public BulkOperationPacked12() {
super(12);
assert blockCount() == 3;
assert valueCount() == 16;
}
@Override
@ -57,7 +55,7 @@ final class BulkOperationPacked12 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 4) | (byte1 >>> 4);
@ -93,7 +91,7 @@ final class BulkOperationPacked12 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 4) | (byte1 >>> 4);

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked13 extends BulkOperationPacked {
public BulkOperationPacked13() {
super(13);
assert blockCount() == 13;
assert valueCount() == 64;
}
@Override
@ -115,7 +113,7 @@ final class BulkOperationPacked13 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 5) | (byte1 >>> 3);
@ -225,7 +223,7 @@ final class BulkOperationPacked13 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 5) | (byte1 >>> 3);

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked14 extends BulkOperationPacked {
public BulkOperationPacked14() {
super(14);
assert blockCount() == 7;
assert valueCount() == 32;
}
@Override
@ -77,7 +75,7 @@ final class BulkOperationPacked14 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 6) | (byte1 >>> 2);
@ -139,7 +137,7 @@ final class BulkOperationPacked14 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 6) | (byte1 >>> 2);

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked15 extends BulkOperationPacked {
public BulkOperationPacked15() {
super(15);
assert blockCount() == 15;
assert valueCount() == 64;
}
@Override
@ -117,7 +115,7 @@ final class BulkOperationPacked15 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 7) | (byte1 >>> 1);
@ -231,7 +229,7 @@ final class BulkOperationPacked15 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 7) | (byte1 >>> 1);

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked16 extends BulkOperationPacked {
public BulkOperationPacked16() {
super(16);
assert blockCount() == 1;
assert valueCount() == 4;
}
@Override
@ -42,7 +40,7 @@ final class BulkOperationPacked16 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 4 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
values[valuesOffset++] = ((blocks[blocksOffset++] & 0xFF) << 8) | (blocks[blocksOffset++] & 0xFF);
}
}
@ -59,7 +57,7 @@ final class BulkOperationPacked16 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 4 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
values[valuesOffset++] = ((blocks[blocksOffset++] & 0xFFL) << 8) | (blocks[blocksOffset++] & 0xFFL);
}
}

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked17 extends BulkOperationPacked {
public BulkOperationPacked17() {
super(17);
assert blockCount() == 17;
assert valueCount() == 64;
}
@Override
@ -119,7 +117,7 @@ final class BulkOperationPacked17 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -237,7 +235,7 @@ final class BulkOperationPacked17 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked18 extends BulkOperationPacked {
public BulkOperationPacked18() {
super(18);
assert blockCount() == 9;
assert valueCount() == 32;
}
@Override
@ -79,7 +77,7 @@ final class BulkOperationPacked18 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -145,7 +143,7 @@ final class BulkOperationPacked18 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked19 extends BulkOperationPacked {
public BulkOperationPacked19() {
super(19);
assert blockCount() == 19;
assert valueCount() == 64;
}
@Override
@ -121,7 +119,7 @@ final class BulkOperationPacked19 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -243,7 +241,7 @@ final class BulkOperationPacked19 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked2 extends BulkOperationPacked {
public BulkOperationPacked2() {
super(2);
assert blockCount() == 1;
assert valueCount() == 32;
}
@Override
@ -42,7 +40,7 @@ final class BulkOperationPacked2 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
final byte block = blocks[blocksOffset++];
values[valuesOffset++] = (block >>> 6) & 3;
values[valuesOffset++] = (block >>> 4) & 3;
@ -63,7 +61,7 @@ final class BulkOperationPacked2 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
final byte block = blocks[blocksOffset++];
values[valuesOffset++] = (block >>> 6) & 3;
values[valuesOffset++] = (block >>> 4) & 3;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked20 extends BulkOperationPacked {
public BulkOperationPacked20() {
super(20);
assert blockCount() == 5;
assert valueCount() == 16;
}
@Override
@ -59,7 +57,7 @@ final class BulkOperationPacked20 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -99,7 +97,7 @@ final class BulkOperationPacked20 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked21 extends BulkOperationPacked {
public BulkOperationPacked21() {
super(21);
assert blockCount() == 21;
assert valueCount() == 64;
}
@Override
@ -123,7 +121,7 @@ final class BulkOperationPacked21 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -249,7 +247,7 @@ final class BulkOperationPacked21 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked22 extends BulkOperationPacked {
public BulkOperationPacked22() {
super(22);
assert blockCount() == 11;
assert valueCount() == 32;
}
@Override
@ -81,7 +79,7 @@ final class BulkOperationPacked22 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -151,7 +149,7 @@ final class BulkOperationPacked22 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked23 extends BulkOperationPacked {
public BulkOperationPacked23() {
super(23);
assert blockCount() == 23;
assert valueCount() == 64;
}
@Override
@ -125,7 +123,7 @@ final class BulkOperationPacked23 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -255,7 +253,7 @@ final class BulkOperationPacked23 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked24 extends BulkOperationPacked {
public BulkOperationPacked24() {
super(24);
assert blockCount() == 3;
assert valueCount() == 8;
}
@Override
@ -49,7 +47,7 @@ final class BulkOperationPacked24 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -76,7 +74,7 @@ final class BulkOperationPacked24 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
final long byte2 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked3 extends BulkOperationPacked {
public BulkOperationPacked3() {
super(3);
assert blockCount() == 3;
assert valueCount() == 64;
}
@Override
@ -105,7 +103,7 @@ final class BulkOperationPacked3 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 5;
values[valuesOffset++] = (byte0 >>> 2) & 7;
@ -195,7 +193,7 @@ final class BulkOperationPacked3 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 5;
values[valuesOffset++] = (byte0 >>> 2) & 7;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked4 extends BulkOperationPacked {
public BulkOperationPacked4() {
super(4);
assert blockCount() == 1;
assert valueCount() == 16;
}
@Override
@ -42,7 +40,7 @@ final class BulkOperationPacked4 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
final byte block = blocks[blocksOffset++];
values[valuesOffset++] = (block >>> 4) & 15;
values[valuesOffset++] = block & 15;
@ -61,7 +59,7 @@ final class BulkOperationPacked4 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
final byte block = blocks[blocksOffset++];
values[valuesOffset++] = (block >>> 4) & 15;
values[valuesOffset++] = block & 15;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked5 extends BulkOperationPacked {
public BulkOperationPacked5() {
super(5);
assert blockCount() == 5;
assert valueCount() == 64;
}
@Override
@ -107,7 +105,7 @@ final class BulkOperationPacked5 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 3;
final int byte1 = blocks[blocksOffset++] & 0xFF;
@ -201,7 +199,7 @@ final class BulkOperationPacked5 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 3;
final long byte1 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked6 extends BulkOperationPacked {
public BulkOperationPacked6() {
super(6);
assert blockCount() == 3;
assert valueCount() == 32;
}
@Override
@ -73,7 +71,7 @@ final class BulkOperationPacked6 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 2;
final int byte1 = blocks[blocksOffset++] & 0xFF;
@ -127,7 +125,7 @@ final class BulkOperationPacked6 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 2;
final long byte1 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked7 extends BulkOperationPacked {
public BulkOperationPacked7() {
super(7);
assert blockCount() == 7;
assert valueCount() == 64;
}
@Override
@ -109,7 +107,7 @@ final class BulkOperationPacked7 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 1;
final int byte1 = blocks[blocksOffset++] & 0xFF;
@ -207,7 +205,7 @@ final class BulkOperationPacked7 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = byte0 >>> 1;
final long byte1 = blocks[blocksOffset++] & 0xFF;

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked8 extends BulkOperationPacked {
public BulkOperationPacked8() {
super(8);
assert blockCount() == 1;
assert valueCount() == 8;
}
@Override
@ -42,7 +40,7 @@ final class BulkOperationPacked8 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;
}
}
@ -59,7 +57,7 @@ final class BulkOperationPacked8 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int j = 0; j < 8 * iterations; ++j) {
for (int j = 0; j < iterations; ++j) {
values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;
}
}

View File

@ -26,8 +26,6 @@ final class BulkOperationPacked9 extends BulkOperationPacked {
public BulkOperationPacked9() {
super(9);
assert blockCount() == 9;
assert valueCount() == 64;
}
@Override
@ -111,7 +109,7 @@ final class BulkOperationPacked9 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final int byte0 = blocks[blocksOffset++] & 0xFF;
final int byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 1) | (byte1 >>> 7);
@ -213,7 +211,7 @@ final class BulkOperationPacked9 extends BulkOperationPacked {
@Override
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
for (int i = 0; i < 8 * iterations; ++i) {
for (int i = 0; i < iterations; ++i) {
final long byte0 = blocks[blocksOffset++] & 0xFF;
final long byte1 = blocks[blocksOffset++] & 0xFF;
values[valuesOffset++] = (byte0 << 1) | (byte1 >>> 7);

View File

@ -35,12 +35,22 @@ final class BulkOperationPackedSingleBlock extends BulkOperation {
}
@Override
public final int blockCount() {
public final int longBlockCount() {
return BLOCK_COUNT;
}
@Override
public int valueCount() {
public final int byteBlockCount() {
return BLOCK_COUNT * 8;
}
@Override
public int longValueCount() {
return valueCount;
}
@Override
public final int byteValueCount() {
return valueCount;
}

View File

@ -140,9 +140,9 @@ class Packed64 extends PackedInts.MutableImpl {
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % decoder.valueCount();
final int offsetInBlocks = index % decoder.longValueCount();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < decoder.valueCount() && len > 0; ++i) {
for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
@ -152,12 +152,12 @@ class Packed64 extends PackedInts.MutableImpl {
}
// bulk get
assert index % decoder.valueCount() == 0;
assert index % decoder.longValueCount() == 0;
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / decoder.valueCount();
final int iterations = len / decoder.longValueCount();
decoder.decode(blocks, blockIndex, arr, off, iterations);
final int gotValues = iterations * decoder.valueCount();
final int gotValues = iterations * decoder.longValueCount();
index += gotValues;
len -= gotValues;
assert len >= 0;
@ -204,9 +204,9 @@ class Packed64 extends PackedInts.MutableImpl {
final PackedInts.Encoder encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % encoder.valueCount();
final int offsetInBlocks = index % encoder.longValueCount();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < encoder.valueCount() && len > 0; ++i) {
for (int i = offsetInBlocks; i < encoder.longValueCount() && len > 0; ++i) {
set(index++, arr[off++]);
--len;
}
@ -216,12 +216,12 @@ class Packed64 extends PackedInts.MutableImpl {
}
// bulk set
assert index % encoder.valueCount() == 0;
assert index % encoder.longValueCount() == 0;
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / encoder.valueCount();
final int iterations = len / encoder.longValueCount();
encoder.encode(arr, off, blocks, blockIndex, iterations);
final int setValues = iterations * encoder.valueCount();
final int setValues = iterations * encoder.longValueCount();
index += setValues;
len -= setValues;
assert len >= 0;

View File

@ -92,8 +92,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk get
assert index % valuesPerBlock == 0;
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert decoder.blockCount() == 1;
assert decoder.valueCount() == valuesPerBlock;
assert decoder.longBlockCount() == 1;
assert decoder.longValueCount() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
decoder.decode(blocks, blockIndex, arr, off, nblocks);
@ -136,8 +136,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
// bulk set
assert index % valuesPerBlock == 0;
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert op.blockCount() == 1;
assert op.valueCount() == valuesPerBlock;
assert op.longBlockCount() == 1;
assert op.longValueCount() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.encode(arr, off, blocks, blockIndex, nblocks);

View File

@ -280,15 +280,28 @@ public class PackedInts {
public static interface Decoder {
/**
* The minimum number of long blocks to decode in a single call.
* The minimum number of long blocks to encode in a single iteration, when
* using long encoding.
*/
int blockCount();
int longBlockCount();
/**
* The number of values that can be stored in <code>blockCount()</code> long
* The number of values that can be stored in {@link #longBlockCount()} long
* blocks.
*/
int valueCount();
int longValueCount();
/**
* The minimum number of byte blocks to encode in a single iteration, when
* using byte encoding.
*/
int byteBlockCount();
/**
* The number of values that can be stored in {@link #byteBlockCount()} byte
* blocks.
*/
int byteValueCount();
/**
* Read <code>iterations * blockCount()</code> blocks from <code>blocks</code>,
@ -350,15 +363,28 @@ public class PackedInts {
public static interface Encoder {
/**
* The minimum number of long blocks to encode in a single call.
* The minimum number of long blocks to encode in a single iteration, when
* using long encoding.
*/
int blockCount();
int longBlockCount();
/**
* The number of values that can be stored in <code>blockCount()</code> long
* The number of values that can be stored in {@link #longBlockCount()} long
* blocks.
*/
int valueCount();
int longValueCount();
/**
* The minimum number of byte blocks to encode in a single iteration, when
* using byte encoding.
*/
int byteBlockCount();
/**
* The number of values that can be stored in {@link #byteBlockCount()} byte
* blocks.
*/
int byteValueCount();
/**
* Read <code>iterations * valueCount()</code> values from <code>values</code>,

View File

@ -39,14 +39,23 @@ final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
this.format = format;
this.packedIntsVersion = packedIntsVersion;
bulkOperation = BulkOperation.of(format, bitsPerValue);
iterations = bulkOperation.computeIterations(valueCount, mem);
iterations = iterations(mem);
assert valueCount == 0 || iterations > 0;
nextBlocks = new byte[8 * iterations * bulkOperation.blockCount()];
nextValues = new LongsRef(new long[iterations * bulkOperation.valueCount()], 0, 0);
nextBlocks = new byte[iterations * bulkOperation.byteBlockCount()];
nextValues = new LongsRef(new long[iterations * bulkOperation.byteValueCount()], 0, 0);
nextValues.offset = nextValues.longs.length;
position = -1;
}
private int iterations(int mem) {
int iterations = bulkOperation.computeIterations(valueCount, mem);
if (packedIntsVersion < PackedInts.VERSION_BYTE_ALIGNED) {
// make sure iterations is a multiple of 8
iterations = (iterations + 7) & 0xFFFFFFF8;
}
return iterations;
}
@Override
public LongsRef next(int count) throws IOException {
assert nextValues.length >= 0;

View File

@ -42,8 +42,8 @@ final class PackedWriter extends PackedInts.Writer {
this.format = format;
encoder = BulkOperation.of(format, bitsPerValue);
iterations = encoder.computeIterations(valueCount, mem);
nextBlocks = new byte[8 * iterations * encoder.blockCount()];
nextValues = new long[iterations * encoder.valueCount()];
nextBlocks = new byte[iterations * encoder.byteBlockCount()];
nextValues = new long[iterations * encoder.byteValueCount()];
off = 0;
written = 0;
finished = false;

View File

@ -57,28 +57,28 @@ FOOTER="""
* For every number of bits per value, there is a minimum number of
* blocks (b) / values (v) you need to write in order to reach the next block
* boundary:
* - 16 bits per value -> b=1, v=4
* - 24 bits per value -> b=3, v=8
* - 50 bits per value -> b=25, v=32
* - 63 bits per value -> b=63, v=64
* - 16 bits per value -> b=2, v=1
* - 24 bits per value -> b=3, v=1
* - 50 bits per value -> b=25, v=4
* - 63 bits per value -> b=63, v=8
* - ...
*
* A bulk read consists in copying <code>iterations*v</code> values that are
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
* (higher values of <code>iterations</code> are likely to yield a better
* throughput) => this requires n * (b + v) longs in memory.
* throughput) => this requires n * (b + 8v) bytes of memory.
*
* This method computes <code>iterations</code> as
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
* <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
*/
public final int computeIterations(int valueCount, int ramBudget) {
final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount());
if (iterations == 0) {
// at least 1
return 1;
} else if ((iterations - 1) * blockCount() >= valueCount) {
} else if ((iterations - 1) * byteValueCount() >= valueCount) {
// don't allocate for more than the size of the reader
return (int) Math.ceil((double) valueCount / valueCount());
return (int) Math.ceil((double) valueCount / byteValueCount());
} else {
return iterations;
}
@ -131,14 +131,11 @@ def block_value_count(bpv, bits=64):
return (blocks, values)
def packed64(bpv, f):
blocks, values = block_value_count(bpv)
mask = (1 << bpv) - 1
f.write("\n")
f.write(" public BulkOperationPacked%d() {\n" %bpv)
f.write(" super(%d);\n" %bpv)
f.write(" assert blockCount() == %d;\n" %blocks)
f.write(" assert valueCount() == %d;\n" %values)
f.write(" }\n\n")
if bpv == 64:
@ -215,20 +212,19 @@ def p64_decode(bpv, f, bits):
if bits < bpv:
f.write(" throw new UnsupportedOperationException();\n")
else:
if is_power_of_two(bpv) and bpv < 8:
f.write(" for (int j = 0; j < 8 * iterations; ++j) {\n")
f.write(" for (int j = 0; j < iterations; ++j) {\n")
f.write(" final byte block = blocks[blocksOffset++];\n")
for shift in xrange(8 - bpv, 0, -bpv):
f.write(" values[valuesOffset++] = (block >>> %d) & %d;\n" %(shift, mask))
f.write(" values[valuesOffset++] = block & %d;\n" %mask)
f.write(" }\n")
elif bpv == 8:
f.write(" for (int j = 0; j < 8 * iterations; ++j) {\n")
f.write(" for (int j = 0; j < iterations; ++j) {\n")
f.write(" values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;\n")
f.write(" }\n")
elif is_power_of_two(bpv) and bpv > 8:
f.write(" for (int j = 0; j < %d * iterations; ++j) {\n" %(64 / bpv))
f.write(" for (int j = 0; j < iterations; ++j) {\n")
m = bits <= 32 and "0xFF" or "0xFFL"
f.write(" values[valuesOffset++] =")
for i in xrange(bpv / 8 - 1):
@ -236,7 +232,7 @@ def p64_decode(bpv, f, bits):
f.write(" (blocks[blocksOffset++] & %s);\n" %m)
f.write(" }\n")
else:
f.write(" for (int i = 0; i < 8 * iterations; ++i) {\n")
f.write(" for (int i = 0; i < iterations; ++i) {\n")
for i in xrange(0, byte_values):
byte_start = i * bpv / 8
bit_start = (i * bpv) % 8

View File

@ -212,7 +212,7 @@ public class TestPackedInts extends LuceneTestCase {
if (!format.isSupported(bpv)) {
continue;
}
final long byteCount = format.byteCount(version, valueCount, bpv);
final long byteCount = format.byteCount(version, valueCount, bpv);
String msg = "format=" + format + ",version=" + version + ",valueCount=" + valueCount + ",bpv=" + bpv;
// test iterator
@ -706,16 +706,22 @@ public class TestPackedInts extends LuceneTestCase {
final PackedInts.Encoder encoder = PackedInts.getEncoder(format, PackedInts.VERSION_CURRENT, bpv);
final PackedInts.Decoder decoder = PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, bpv);
final int blockCount = encoder.blockCount();
final int valueCount = encoder.valueCount();
assertEquals(blockCount, decoder.blockCount());
assertEquals(valueCount, decoder.valueCount());
final int longBlockCount = encoder.longBlockCount();
final int longValueCount = encoder.longValueCount();
final int byteBlockCount = encoder.byteBlockCount();
final int byteValueCount = encoder.byteValueCount();
assertEquals(longBlockCount, decoder.longBlockCount());
assertEquals(longValueCount, decoder.longValueCount());
assertEquals(byteBlockCount, decoder.byteBlockCount());
assertEquals(byteValueCount, decoder.byteValueCount());
final int iterations = random().nextInt(100);
final int longIterations = random().nextInt(100);
final int byteIterations = longIterations * longValueCount / byteValueCount;
assertEquals(longIterations * longValueCount, byteIterations * byteValueCount);
final int blocksOffset = random().nextInt(100);
final int valuesOffset = random().nextInt(100);
final int blocksOffset2 = random().nextInt(100);
final int blocksLen = iterations * blockCount;
final int blocksLen = longIterations * longBlockCount;
// 1. generate random inputs
final long[] blocks = new long[blocksOffset + blocksLen];
@ -729,8 +735,8 @@ public class TestPackedInts extends LuceneTestCase {
}
// 2. decode
final long[] values = new long[valuesOffset + iterations * valueCount];
decoder.decode(blocks, blocksOffset, values, valuesOffset, iterations);
final long[] values = new long[valuesOffset + longIterations * longValueCount];
decoder.decode(blocks, blocksOffset, values, valuesOffset, longIterations);
for (long value : values) {
assertTrue(value <= PackedInts.maxValue(bpv));
}
@ -738,7 +744,7 @@ public class TestPackedInts extends LuceneTestCase {
final int[] intValues;
if (bpv <= 32) {
intValues = new int[values.length];
decoder.decode(blocks, blocksOffset, intValues, valuesOffset, iterations);
decoder.decode(blocks, blocksOffset, intValues, valuesOffset, longIterations);
assertTrue(equals(intValues, values));
} else {
intValues = null;
@ -746,21 +752,21 @@ public class TestPackedInts extends LuceneTestCase {
// 3. re-encode
final long[] blocks2 = new long[blocksOffset2 + blocksLen];
encoder.encode(values, valuesOffset, blocks2, blocksOffset2, iterations);
encoder.encode(values, valuesOffset, blocks2, blocksOffset2, longIterations);
assertArrayEquals(msg, Arrays.copyOfRange(blocks, blocksOffset, blocks.length),
Arrays.copyOfRange(blocks2, blocksOffset2, blocks2.length));
// test encoding from int[]
if (bpv <= 32) {
final long[] blocks3 = new long[blocks2.length];
encoder.encode(intValues, valuesOffset, blocks3, blocksOffset2, iterations);
encoder.encode(intValues, valuesOffset, blocks3, blocksOffset2, longIterations);
assertArrayEquals(msg, blocks2, blocks3);
}
// 4. byte[] decoding
final byte[] byteBlocks = new byte[8 * blocks.length];
ByteBuffer.wrap(byteBlocks).asLongBuffer().put(blocks);
final long[] values2 = new long[valuesOffset + iterations * valueCount];
decoder.decode(byteBlocks, blocksOffset * 8, values2, valuesOffset, iterations);
final long[] values2 = new long[valuesOffset + longIterations * longValueCount];
decoder.decode(byteBlocks, blocksOffset * 8, values2, valuesOffset, byteIterations);
for (long value : values2) {
assertTrue(msg, value <= PackedInts.maxValue(bpv));
}
@ -768,18 +774,18 @@ public class TestPackedInts extends LuceneTestCase {
// test decoding to int[]
if (bpv <= 32) {
final int[] intValues2 = new int[values2.length];
decoder.decode(byteBlocks, blocksOffset * 8, intValues2, valuesOffset, iterations);
decoder.decode(byteBlocks, blocksOffset * 8, intValues2, valuesOffset, byteIterations);
assertTrue(msg, equals(intValues2, values2));
}
// 5. byte[] encoding
final byte[] blocks3 = new byte[8 * (blocksOffset2 + blocksLen)];
encoder.encode(values, valuesOffset, blocks3, 8 * blocksOffset2, iterations);
encoder.encode(values, valuesOffset, blocks3, 8 * blocksOffset2, byteIterations);
assertEquals(msg, LongBuffer.wrap(blocks2), ByteBuffer.wrap(blocks3).asLongBuffer());
// test encoding from int[]
if (bpv <= 32) {
final byte[] blocks4 = new byte[blocks3.length];
encoder.encode(intValues, valuesOffset, blocks4, 8 * blocksOffset2, iterations);
encoder.encode(intValues, valuesOffset, blocks4, 8 * blocksOffset2, byteIterations);
assertArrayEquals(msg, blocks3, blocks4);
}
}

View File

@ -1,7 +1,6 @@
package org.apache.lucene.facet.taxonomy;
import org.apache.lucene.util.Constants;
import java.util.Arrays;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -28,10 +27,6 @@ import org.apache.lucene.util.Constants;
*/
public class CategoryPath implements Comparable<CategoryPath> {
// TODO: revisit when IBM releases Java 7 newer than SR3 (with a fix)
// to validate, run e.g. TestAssociationExample with -Dtests.iters=1000
private static final boolean IS_J9_JAVA7 = Constants.JRE_IS_MINIMUM_JAVA7 && Constants.JVM_VENDOR.contains("IBM");
/** An empty {@link CategoryPath} */
public static final CategoryPath EMPTY = new CategoryPath();
@ -48,7 +43,7 @@ public class CategoryPath implements Comparable<CategoryPath> {
// Used by singleton EMPTY
private CategoryPath() {
components = new String[0];
components = null;
length = 0;
}
@ -67,16 +62,12 @@ public class CategoryPath implements Comparable<CategoryPath> {
/** Construct from the given path components. */
public CategoryPath(final String... components) {
assert components.length > 0 : "use CategoryPath.EMPTY to create an empty path";
if (IS_J9_JAVA7) {
// On IBM J9 Java 1.7.0, if we do 'this.components = components', then
// at some point its length becomes 0 ... quite unexpectedly. If JIT is
// disabled, it doesn't happen. This bypasses the bug by copying the
// array (note, Arrays.copyOf did not help either!).
this.components = new String[components.length];
System.arraycopy(components, 0, this.components, 0, components.length);
} else {
this.components = components;
for (String comp : components) {
if (comp == null || comp.isEmpty()) {
throw new IllegalArgumentException("empty or null components not allowed: " + Arrays.toString(components));
}
}
this.components = components;
length = components.length;
}
@ -84,9 +75,14 @@ public class CategoryPath implements Comparable<CategoryPath> {
public CategoryPath(final String pathString, final char delimiter) {
String[] comps = pathString.split(Character.toString(delimiter));
if (comps.length == 1 && comps[0].isEmpty()) {
components = EMPTY.components;
components = null;
length = 0;
} else {
for (String comp : comps) {
if (comp == null || comp.isEmpty()) {
throw new IllegalArgumentException("empty or null components not allowed: " + Arrays.toString(comps));
}
}
components = comps;
length = components.length;
}

View File

@ -1,5 +1,7 @@
package org.apache.lucene.facet.taxonomy;
import java.util.Arrays;
import org.apache.lucene.facet.FacetTestCase;
import org.junit.Test;
@ -173,9 +175,46 @@ public class TestCategoryPath extends FacetTestCase {
pother = new CategoryPath("a/b/c/e", '/');
assertTrue(pother.compareTo(p) > 0);
assertTrue(p.compareTo(pother) < 0);
pother = new CategoryPath("a/b/c//e", '/');
assertTrue(pother.compareTo(p) < 0);
assertTrue(p.compareTo(pother) > 0);
}
@Test
public void testEmptyNullComponents() throws Exception {
// LUCENE-4724: CategoryPath should not allow empty or null components
String[][] components_tests = new String[][] {
new String[] { "", "test" }, // empty in the beginning
new String[] { "test", "" }, // empty in the end
new String[] { "test", "", "foo" }, // empty in the middle
new String[] { null, "test" }, // null at the beginning
new String[] { "test", null }, // null in the end
new String[] { "test", null, "foo" }, // null in the middle
};
for (String[] components : components_tests) {
try {
assertNotNull(new CategoryPath(components));
fail("empty or null components should not be allowed: " + Arrays.toString(components));
} catch (IllegalArgumentException e) {
// ok
}
}
String[] path_tests = new String[] {
"/test", // empty in the beginning
"test//foo", // empty in the middle
};
for (String path : path_tests) {
try {
assertNotNull(new CategoryPath(path, '/'));
fail("empty or null components should not be allowed: " + path);
} catch (IllegalArgumentException e) {
// ok
}
}
// a trailing path separator is produces only one component
assertNotNull(new CategoryPath("test/", '/'));
}
}

View File

@ -56,6 +56,12 @@ public class TestCompactLabelToOrdinal extends FacetTestCase {
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.onMalformedInput(CodingErrorAction.REPLACE);
uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
// we cannot have empty path components, so eliminate all prefix as well
// as middle consecuive delimiter chars.
uniqueValues[i] = uniqueValues[i].replaceAll("/+", "/");
if (uniqueValues[i].startsWith("/")) {
uniqueValues[i] = uniqueValues[i].substring(1);
}
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) {
i++;
}

View File

@ -71,7 +71,7 @@ public final class JoinUtil {
case None:
TermsCollector termsCollector = TermsCollector.create(fromField, multipleValuesPerDocument);
fromSearcher.search(fromQuery, termsCollector);
return new TermsQuery(toField, termsCollector.getCollectorTerms());
return new TermsQuery(toField, fromQuery, termsCollector.getCollectorTerms());
case Total:
case Max:
case Avg:

View File

@ -92,6 +92,35 @@ class TermsIncludingScoreQuery extends Query {
}
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
} if (!super.equals(obj)) {
return false;
} if (getClass() != obj.getClass()) {
return false;
}
TermsIncludingScoreQuery other = (TermsIncludingScoreQuery) obj;
if (!field.equals(other.field)) {
return false;
}
if (!unwrittenOriginalQuery.equals(other.unwrittenOriginalQuery)) {
return false;
}
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result += prime * field.hashCode();
result += prime * unwrittenOriginalQuery.hashCode();
return result;
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final Weight originalWeight = originalQuery.createWeight(searcher);

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@ -37,13 +38,15 @@ import java.util.Comparator;
class TermsQuery extends MultiTermQuery {
private final BytesRefHash terms;
private final Query fromQuery; // Used for equals() only
/**
* @param field The field that should contain terms that are specified in the previous parameter
* @param terms The terms that matching documents should have. The terms must be sorted by natural order.
*/
TermsQuery(String field, BytesRefHash terms) {
TermsQuery(String field, Query fromQuery, BytesRefHash terms) {
super(field);
this.fromQuery = fromQuery;
this.terms = terms;
}
@ -63,6 +66,31 @@ class TermsQuery extends MultiTermQuery {
'}';
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
} if (!super.equals(obj)) {
return false;
} if (getClass() != obj.getClass()) {
return false;
}
TermsQuery other = (TermsQuery) obj;
if (!fromQuery.equals(other.fromQuery)) {
return false;
}
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result += prime * fromQuery.hashCode();
return result;
}
static class SeekingTermSetTermsEnum extends FilteredTermsEnum {
private final BytesRefHash terms;

View File

@ -74,7 +74,7 @@ public class CommonTermsQuery extends Query {
protected final Occur highFreqOccur;
protected float lowFreqBoost = 1.0f;
protected float highFreqBoost = 1.0f;
protected int minNrShouldMatch = 0;
protected float minNrShouldMatch = 0;
/**
* Creates a new {@link CommonTermsQuery}
@ -84,7 +84,7 @@ public class CommonTermsQuery extends Query {
* @param lowFreqOccur
* {@link Occur} used for low frequency terms
* @param maxTermFrequency
* a value in [0..1] (or absolute number >=1) representing the
* a value in [0..1) (or absolute number >=1) representing the
* maximum threshold of a terms document frequency to be considered a
* low frequency term.
* @throws IllegalArgumentException
@ -104,7 +104,7 @@ public class CommonTermsQuery extends Query {
* @param lowFreqOccur
* {@link Occur} used for low frequency terms
* @param maxTermFrequency
* a value in [0..1] (or absolute number >=1) representing the
* a value in [0..1) (or absolute number >=1) representing the
* maximum threshold of a terms document frequency to be considered a
* low frequency term.
* @param disableCoord
@ -160,15 +160,19 @@ public class CommonTermsQuery extends Query {
return buildQuery(maxDoc, contextArray, queryTerms);
}
protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
if (minNrShouldMatch >= 1.0f || minNrShouldMatch == 0.0f) {
return (int) minNrShouldMatch;
}
return (int) (Math.round(minNrShouldMatch * numOptional));
}
protected Query buildQuery(final int maxDoc,
final TermContext[] contextArray, final Term[] queryTerms) {
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
BooleanQuery highFreq = new BooleanQuery(disableCoord);
highFreq.setBoost(highFreqBoost);
lowFreq.setBoost(lowFreqBoost);
if (lowFreqOccur == Occur.SHOULD) {
lowFreq.setMinimumNumberShouldMatch(minNrShouldMatch);
}
BooleanQuery query = new BooleanQuery(true);
for (int i = 0; i < queryTerms.length; i++) {
TermContext termContext = contextArray[i];
@ -186,6 +190,11 @@ public class CommonTermsQuery extends Query {
}
}
final int numLowFreqClauses = lowFreq.clauses().size();
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
}
if (lowFreq.clauses().isEmpty()) {
/*
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
@ -265,7 +274,9 @@ public class CommonTermsQuery extends Query {
/**
* Specifies a minimum number of the optional BooleanClauses which must be
* satisfied in order to produce a match on the low frequency terms query
* part.
* part. This method accepts a float value in the range [0..1) as a fraction
* of the actual query terms in the low frequent clause or a number
* <tt>&gt;=1</tt> as an absolut number of clauses that need to match.
*
* <p>
* By default no optional clauses are necessary for a match (unless there are
@ -276,7 +287,7 @@ public class CommonTermsQuery extends Query {
* @param min
* the number of optional clauses that must match
*/
public void setMinimumNumberShouldMatch(int min) {
public void setMinimumNumberShouldMatch(float min) {
this.minNrShouldMatch = min;
}
@ -284,7 +295,7 @@ public class CommonTermsQuery extends Query {
* Gets the minimum number of the optional BooleanClauses which must be
* satisfied.
*/
public int getMinimumNumberShouldMatch() {
public float getMinimumNumberShouldMatch() {
return minNrShouldMatch;
}
@ -332,7 +343,7 @@ public class CommonTermsQuery extends Query {
result = prime * result
+ ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
result = prime * result + Float.floatToIntBits(maxTermFrequency);
result = prime * result + minNrShouldMatch;
result = prime * result + Float.floatToIntBits(minNrShouldMatch);
result = prime * result + ((terms == null) ? 0 : terms.hashCode());
return result;
}

View File

@ -175,6 +175,90 @@ public class CommonTermsQueryTest extends LuceneTestCase {
}
}
public void testMinShouldMatch() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
"there is the famous restaurant at the end of the universe",};
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + i, Field.Store.YES));
doc.add(newTextField("field", docs[i], Field.Store.NO));
w.addDocument(doc);
}
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setMinimumNumberShouldMatch(0.5f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 1);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setMinimumNumberShouldMatch(2.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 1);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setMinimumNumberShouldMatch(0.49f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 3);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
query.setMinimumNumberShouldMatch(1.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 3);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
}
r.close();
w.close();
dir.close();
}
public void testIllegalOccur() {
Random random = random();

View File

@ -93,6 +93,8 @@ Bug Fixes
* SOLR-3926: Solr should support better way of finding active sorts (Eirik Lygre via
Erick Erickson)
* SOLR-4342: Fix DataImportHandler stats to be a prper Map (hossman)
Optimizations
----------------------
@ -107,6 +109,12 @@ Optimizations
* SOLR-3915: Color Legend for Cloud UI (steffkes)
* SOLR-4306: Utilize indexInfo=false when gathering core names in UI
(steffkes)
* SOLR-4284: Admin UI - make core list scrollable separate from the rest of
the UI (steffkes)
Other Changes
----------------------

View File

@ -25,6 +25,7 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.SystemIdResolver;
@ -247,7 +248,7 @@ public class DataImportHandler extends RequestHandlerBase implements
return super.getStatistics();
DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
NamedList result = new NamedList();
SimpleOrderedMap result = new SimpleOrderedMap();
result.add("Status", importer.getStatus().toString());

View File

@ -310,7 +310,7 @@ make many changes to an index in a batch and then send the
There is also an <span class="codefrag">optimize</span> command that does the
same things as <span class="codefrag">commit</span>, but also forces all index
segments to be merged into a single segment -- this can be very resource
intsenive, but may be worthwhile for improving search speed if your index
intensive, but may be worthwhile for improving search speed if your index
changes very infrequently.
</p>
<p>
@ -411,7 +411,7 @@ and is useful when testing or debugging queries.
<h2 class="boxed">Highlighting</h2>
<div class="section">
<p>
Hit highlighting returns relevent snippets of each returned document, and highlights
Hit highlighting returns relevant snippets of each returned document, and highlights
terms from the query within those context snippets.
</p>
<p>
@ -522,7 +522,7 @@ Try it out at
<p>
The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
the fields in the index and what type of analysis is applied to them. The current schema your collection is using
may be viewed directly via the <a href="http://localhost:8983/solr/#/collection1/schema">Schema tab</a> in the Admin UI, or explored dynamicly using the <a href="http://localhost:8983/solr/#/collection1/schema-browser">Schema Browser tab</a>.
may be viewed directly via the <a href="http://localhost:8983/solr/#/collection1/schema">Schema tab</a> in the Admin UI, or explored dynamically using the <a href="http://localhost:8983/solr/#/collection1/schema-browser">Schema Browser tab</a>.
</p>
<p>
The best analysis components (tokenization and filtering) for your textual
@ -616,7 +616,7 @@ Mousing over the section label to the left of the section will display the full
<p>
When both <a href="http://localhost:8983/solr/#/collection1/analysis?analysis.fieldvalue=Canon+Power-Shot+SD500&amp;analysis.query=power+shot+sd-500&amp;analysis.fieldtype=text_en_splitting&amp;verbose_output=0">Index and Query</a>
values are provided, two tables will be displayed side by side showing the
results of each chain. Terms in the Index chain results that are equivilent
results of each chain. Terms in the Index chain results that are equivalent
to the final terms produced by the Query chain will be highlighted.
</p>
<p>

View File

@ -111,8 +111,12 @@ limitations under the License.
</ul>
<ul id="menu-selector">
</ul>
<div id="core-selector">
<select data-placeholder="Core Selector"></select>
</div>
<div id="core-menu">
<ul></ul>
</div>
</div>
</div>

View File

@ -172,7 +172,7 @@ ul
#header
{
padding-bottom: 10px;
position: absolute;
position: fixed;
z-index: 42;
}
@ -340,12 +340,6 @@ ul
width: 100%;
}
#content > pre
{
max-height: 600px;
overflow: auto;
}
#content .block
{
margin-bottom: 10px;

View File

@ -1,13 +1,13 @@
#menu-wrapper
{
position: absolute;
top: 90px;
position: fixed;
top: 120px;
width: 150px;
}
.has-environment #menu-wrapper
{
top: 130px;
top: 160px;
}
#menu-wrapper a
@ -18,6 +18,23 @@
text-overflow: ellipsis;
}
#core-selector
{
margin-top: 20px;
padding-right: 10px;
}
#core-selector a
{
padding: 0;
padding-left: 8px;
}
#core-selector select
{
width: 100%;
}
#menu-wrapper .active p
{
background-color: #fafafa;
@ -121,32 +138,27 @@
display: none;
}
#menu-selector
{
margin-top: 20px;
}
#menu-selector p
#core-menu p
{
border-top: 1px solid #f0f0f0;
}
#menu-selector li:first-child p
#core-menu li:first-child p
{
border-top: 0;
}
#menu-selector p a
#core-menu p a
{
background-image: url( ../../img/ico/status-offline.png );
}
#menu-selector .active p a
#core-menu .active p a
{
background-image: url( ../../img/ico/box.png );
}
#menu-selector ul,
#core-menu ul,
#menu ul
{
display: none;
@ -154,7 +166,7 @@
padding-bottom: 10px;
}
#menu-selector .active ul,
#core-menu .active ul,
#menu .active ul
{
display: block;
@ -165,7 +177,7 @@
border-bottom: 0;
}
#menu-selector ul li a,
#core-menu ul li a,
#menu ul li a
{
background-position: 7px 50%;
@ -175,20 +187,20 @@
padding-left: 26px;
}
#menu-selector ul li:last-child a,
#core-menu ul li:last-child a,
#menu ul li:last-child a
{
border-bottom: 0;
}
#menu-selector ul li a:hover,
#core-menu ul li a:hover,
#menu ul li a:hover
{
background-color: #f0f0f0;
color: #333;
}
#menu-selector ul li.active a,
#core-menu ul li.active a,
#menu ul li.active a
{
background-color: #d0d0d0;
@ -213,7 +225,7 @@
#menu #cloud.global .rgraph a { background-image: url( ../../img/ico/asterisk.png ); }
#menu #cloud.global .dump a { background-image: url( ../../img/ico/download-cloud.png ); }
#menu-selector .ping.error a
#core-menu .ping.error a
{
background-color: #ffcccc;
@ -222,17 +234,18 @@
cursor: help;
}
#menu-selector .query a { background-image: url( ../../img/ico/magnifier.png ); }
#menu-selector .schema a { background-image: url( ../../img/ico/table.png ); }
#menu-selector .config a { background-image: url( ../../img/ico/gear.png ); }
#menu-selector .analysis a { background-image: url( ../../img/ico/funnel.png ); }
#menu-selector .schema-browser a { background-image: url( ../../img/ico/book-open-text.png ); }
#menu-selector .replication a { background-image: url( ../../img/ico/node.png ); }
#menu-selector .distribution a { background-image: url( ../../img/ico/node-select.png ); }
#menu-selector .ping a { background-image: url( ../../img/ico/system-monitor.png ); }
#menu-selector .logging a { background-image: url( ../../img/ico/inbox-document-text.png ); }
#menu-selector .plugins a { background-image: url( ../../img/ico/block.png ); }
#menu-selector .dataimport a { background-image: url( ../../img/ico/document-import.png ); }
#core-menu .overview a { background-image: url( ../../img/ico/home.png ); }
#core-menu .query a { background-image: url( ../../img/ico/magnifier.png ); }
#core-menu .schema a { background-image: url( ../../img/ico/table.png ); }
#core-menu .config a { background-image: url( ../../img/ico/gear.png ); }
#core-menu .analysis a { background-image: url( ../../img/ico/funnel.png ); }
#core-menu .schema-browser a { background-image: url( ../../img/ico/book-open-text.png ); }
#core-menu .replication a { background-image: url( ../../img/ico/node.png ); }
#core-menu .distribution a { background-image: url( ../../img/ico/node-select.png ); }
#core-menu .ping a { background-image: url( ../../img/ico/system-monitor.png ); }
#core-menu .logging a { background-image: url( ../../img/ico/inbox-document-text.png ); }
#core-menu .plugins a { background-image: url( ../../img/ico/block.png ); }
#core-menu .dataimport a { background-image: url( ../../img/ico/document-import.png ); }
#content #navigation

View File

@ -545,6 +545,7 @@
clear: left;
float: left;
margin-left: 2px;
white-space: nowrap;
}
#content #schema-browser #data #field .histogram-holder li:hover dl

BIN
solr/webapp/web/img/ico/home.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 752 B

View File

@ -92,20 +92,26 @@ var sammy = $.sammy
$( 'li.active', menu_wrapper )
.removeClass( 'active' );
if( this.params.splat )
// global dashboard doesn't have params.splat
if( !this.params.splat )
{
var selector = '~' === this.params.splat[0][0]
? '#' + this.params.splat[0].replace( /^~/, '' ) + '.global'
: '#menu-selector #' + this.params.splat[0].replace( /\./g, '__' );
this.params.splat = [ '~index' ];
}
var active_element = $( selector, menu_wrapper );
if( 0 === active_element.size() )
{
this.app.error( 'There exists no core with name "' + this.params.splat[0] + '"' );
return false;
}
var selector = '~' === this.params.splat[0][0]
? '#' + this.params.splat[0].replace( /^~/, '' ) + '.global'
: '#core-selector #' + this.params.splat[0].replace( /\./g, '__' );
var active_element = $( selector, menu_wrapper );
if( 0 === active_element.size() )
{
this.app.error( 'There exists no core with name "' + this.params.splat[0] + '"' );
return false;
}
if( active_element.hasClass( 'global' ) )
{
active_element
.addClass( 'active' );
@ -115,10 +121,28 @@ var sammy = $.sammy
.addClass( 'active' );
}
if( !active_element.hasClass( 'global' ) )
$( '#core-selector option[selected]' )
.removeAttr( 'selected' )
.trigger( 'liszt:updated' );
$( '#core-selector .chzn-container > a' )
.addClass( 'chzn-default' );
}
else
{
active_element
.attr( 'selected', 'selected' )
.trigger( 'liszt:updated' );
if( !this.params.splat[1] )
{
this.active_core = active_element;
this.params.splat[1] = 'overview';
}
$( '#core-menu .' + this.params.splat[1] )
.addClass( 'active' );
this.active_core = active_element;
}
}
);
@ -143,9 +167,10 @@ var solr_admin = function( app_config )
plugin_data = null,
this.menu_element = $( '#menu-selector' );
this.config = config;
this.menu_element = $( '#core-selector select' );
this.core_menu = $( '#core-menu ul' );
this.config = config;
this.timeout = null;
this.core_regex_base = '^#\\/([\\w\\d-\\.]+)';
@ -197,6 +222,9 @@ var solr_admin = function( app_config )
that.menu_element
.empty();
var core_list = [];
core_list.push( '<option></option>' );
var core_count = 0;
for( var core_name in that.cores_data )
{
@ -214,32 +242,24 @@ var solr_admin = function( app_config )
classes.push( 'default' );
}
var core_tpl = '<li id="' + core_name.replace( /\./g, '__' ) + '" '
var core_tpl = '<option '
+ ' id="' + core_name.replace( /\./g, '__' ) + '" '
+ ' class="' + classes.join( ' ' ) + '"'
+ ' data-basepath="' + core_path + '"'
+ ' schema="' + cores.status[core_name]['schema'] + '"'
+ ' config="' + cores.status[core_name]['config'] + '"'
+ '>' + "\n"
+ ' <p><a href="#/' + core_name + '" title="' + core_name + '">' + core_name + '</a></p>' + "\n"
+ ' <ul>' + "\n"
+ ' value="#/' + core_name + '"'
+ ' title="' + core_name + '"'
+ '>'
+ core_name
+ '</option>';
+ ' <li class="ping"><a rel="' + core_path + '/admin/ping"><span>Ping</span></a></li>' + "\n"
+ ' <li class="query"><a href="#/' + core_name + '/query"><span>Query</span></a></li>' + "\n"
+ ' <li class="schema"><a href="#/' + core_name + '/schema"><span>Schema</span></a></li>' + "\n"
+ ' <li class="config"><a href="#/' + core_name + '/config"><span>Config</span></a></li>' + "\n"
+ ' <li class="replication"><a href="#/' + core_name + '/replication"><span>Replication</span></a></li>' + "\n"
+ ' <li class="analysis"><a href="#/' + core_name + '/analysis"><span>Analysis</span></a></li>' + "\n"
+ ' <li class="schema-browser"><a href="#/' + core_name + '/schema-browser"><span>Schema Browser</span></a></li>' + "\n"
+ ' <li class="plugins"><a href="#/' + core_name + '/plugins"><span>Plugins / Stats</span></a></li>' + "\n"
+ ' <li class="dataimport"><a href="#/' + core_name + '/dataimport"><span>Dataimport</span></a></li>' + "\n"
+ ' </ul>' + "\n"
+ '</li>';
that.menu_element
.append( core_tpl );
core_list.push( core_tpl );
}
that.menu_element
.append( core_list.join( "\n" ) );
if( cores.initFailures )
{
var failures = [];
@ -277,7 +297,7 @@ var solr_admin = function( app_config )
$.ajax
(
{
url : config.solr_path + config.core_admin_path + '?wt=json',
url : config.solr_path + config.core_admin_path + '?wt=json&indexInfo=false',
dataType : 'json',
beforeSend : function( arr, form, options )
{
@ -288,6 +308,52 @@ var solr_admin = function( app_config )
{
that.set_cores_data( response );
that.menu_element
.chosen()
.off( 'change' )
.on
(
'change',
function( event )
{
location.href = $( 'option:selected', this ).val();
return false;
}
)
.on
(
'liszt:updated',
function( event )
{
var core_name = $( 'option:selected', this ).text();
if( core_name )
{
that.core_menu
.html
(
'<li class="overview"><a href="#/' + core_name + '"><span>Overview</span></a></li>' + "\n" +
'<li class="ping"><a rel="' + that.config.solr_path + '/' + core_name + '/admin/ping"><span>Ping</span></a></li>' + "\n" +
'<li class="query"><a href="#/' + core_name + '/query"><span>Query</span></a></li>' + "\n" +
'<li class="schema"><a href="#/' + core_name + '/schema"><span>Schema</span></a></li>' + "\n" +
'<li class="config"><a href="#/' + core_name + '/config"><span>Config</span></a></li>' + "\n" +
'<li class="replication"><a href="#/' + core_name + '/replication"><span>Replication</span></a></li>' + "\n" +
'<li class="analysis"><a href="#/' + core_name + '/analysis"><span>Analysis</span></a></li>' + "\n" +
'<li class="schema-browser"><a href="#/' + core_name + '/schema-browser"><span>Schema Browser</span></a></li>' + "\n" +
'<li class="plugins"><a href="#/' + core_name + '/plugins"><span>Plugins / Stats</span></a></li>' + "\n" +
'<li class="dataimport"><a href="#/' + core_name + '/dataimport"><span>Dataimport</span></a></li>' + "\n"
)
.show();
}
else
{
that.core_menu
.hide()
.empty();
}
}
);
for( var core_name in response.status )
{
var core_path = config.solr_path + '/' + core_name;

View File

@ -20,12 +20,6 @@ sammy.bind
'cores_load_data',
function( event, params )
{
if( app.cores_data )
{
params.callback( app.cores_data );
return true;
}
$.ajax
(
{
@ -335,7 +329,7 @@ sammy.get
.ajaxForm
(
{
url : app.config.solr_path + app.config.core_admin_path + '?wt=json',
url : app.config.solr_path + app.config.core_admin_path + '?wt=json&indexInfo=false',
dataType : 'json',
beforeSubmit : function( array, form, options )
{

View File

@ -208,9 +208,6 @@ sammy.get
{
var content_element = $( '#content' );
$( '#menu-wrapper #index' )
.addClass( 'active' );
content_element
.html( '<div id="index"></div>' );

View File

@ -21,7 +21,7 @@ sammy.get
/^#\/(~java-properties)$/,
function( context )
{
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
var content_element = $( '#content' );
content_element

View File

@ -406,7 +406,7 @@ sammy.get
/^#\/(~logging)$/,
function( context )
{
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
loglevel_path = core_basepath + '/admin/logging';
var content_element = $( '#content' );
@ -492,7 +492,7 @@ sammy.get
/^#\/(~logging)\/level$/,
function( context )
{
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
loglevel_path = core_basepath + '/admin/logging';
var content_element = $( '#content' );

View File

@ -15,7 +15,7 @@
limitations under the License.
*/
$( '.ping a', app.menu_element )
$( '.ping a', app.core_menu )
.live
(
'click',

View File

@ -228,7 +228,7 @@ sammy.bind
var related_select_element = $( '#related select', params.schema_browser_element )
var type = 'index';
var sammy_basepath = '#/' + $( 'p a', params.active_core ).html() + '/schema-browser';
var sammy_basepath = app.core_menu.find( '.active a' ).attr( 'href' );
if( !related_navigation_meta.hasClass( 'done' ) )
{
@ -640,7 +640,7 @@ sammy.bind
}
related_select_element
.attr( 'rel', '#/' + $( 'p a', params.active_core ).html() + '/schema-browser' )
.attr( 'rel', app.core_menu.find( '.active a' ).attr( 'href' ) )
.append( related_options )
.chosen();

View File

@ -21,7 +21,7 @@ sammy.get
/^#\/(~threads)$/,
function( context )
{
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
var content_element = $( '#content' );
$.get