mirror of
https://github.com/apache/lucene.git
synced 2025-02-17 23:45:09 +00:00
Merge trunk.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1439991 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
577364e414
@ -79,6 +79,9 @@ New Features
|
||||
near-real-time reader is opened that contains those changes.
|
||||
(Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
|
||||
creation via the resulting factories using NewAnalyzerTask. (Steve Rowe)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
|
||||
@ -104,6 +107,14 @@ Bug Fixes
|
||||
degrees and barely any height, it would generate so many indexed terms
|
||||
(> 500k) that it could even cause an OutOfMemoryError. Fixed. (David Smiley)
|
||||
|
||||
* LUCENE-4704: Make join queries override hashcode and equals methods.
|
||||
(Martijn van Groningen)
|
||||
|
||||
* LUCENE-4724: Fix bug in CategoryPath which allowed passing null or empty
|
||||
string components. This is forbidden now (throws an exception). Note that if
|
||||
you have a taxonomy index created with such strings, you should rebuild it.
|
||||
(Michael McCandless, Shai Erera)
|
||||
|
||||
======================= Lucene 4.1.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
@ -19,25 +19,43 @@ doc.body.tokenized=true
|
||||
docs.dir=reuters-out
|
||||
log.step=1000
|
||||
|
||||
-AnalyzerFactory(name:shingle-bigrams-unigrams,
|
||||
StandardTokenizer,
|
||||
ShingleFilter(maxShingleSize:2, outputUnigrams:true))
|
||||
|
||||
-AnalyzerFactory(name:shingle-bigrams,
|
||||
StandardTokenizer,
|
||||
ShingleFilter(maxShingleSize:2, outputUnigrams:false))
|
||||
|
||||
-AnalyzerFactory(name:shingle-4grams-unigrams,
|
||||
StandardTokenizer,
|
||||
ShingleFilter(maxShingleSize:4, outputUnigrams:true))
|
||||
|
||||
-AnalyzerFactory(name:shingle-4grams,
|
||||
StandardTokenizer,
|
||||
ShingleFilter(maxShingleSize:4, outputUnigrams:false))
|
||||
|
||||
-AnalyzerFactory(name:standard-tokenizer-only, StandardTokenizer)
|
||||
|
||||
{ "Rounds"
|
||||
|
||||
-NewShingleAnalyzer(maxShingleSize:2,outputUnigrams:true)
|
||||
-NewAnalyzer(shingle-bigrams-unigrams)
|
||||
-ResetInputs
|
||||
{ "BigramsAndUnigrams" { ReadTokens > : 10000 }
|
||||
|
||||
-NewShingleAnalyzer(maxShingleSize:2,outputUnigrams:false)
|
||||
-NewAnalyzer(shingle-bigrams)
|
||||
-ResetInputs
|
||||
{ "BigramsOnly" { ReadTokens > : 10000 }
|
||||
|
||||
-NewShingleAnalyzer(maxShingleSize:4,outputUnigrams:true)
|
||||
-NewAnalyzer(shingle-4grams-unigrams)
|
||||
-ResetInputs
|
||||
{ "FourgramsAndUnigrams" { ReadTokens > : 10000 }
|
||||
|
||||
-NewShingleAnalyzer(maxShingleSize:4,outputUnigrams:false)
|
||||
-NewAnalyzer(shingle-4grams)
|
||||
-ResetInputs
|
||||
{ "FourgramsOnly" { ReadTokens > : 10000 }
|
||||
|
||||
-NewAnalyzer(standard.StandardAnalyzer)
|
||||
-NewAnalyzer(standard-tokenizer-only)
|
||||
-ResetInputs
|
||||
{ "UnigramsOnly" { ReadTokens > : 10000 }
|
||||
|
||||
|
@ -51,7 +51,7 @@ while (<>) {
|
||||
|
||||
# Print out platform info
|
||||
print "JAVA:\n", `java -version 2>&1`, "\nOS:\n";
|
||||
if ($^O =~ /win/i) {
|
||||
if ($^O =~ /(?<!dar)win/i) {
|
||||
print "$^O\n";
|
||||
eval {
|
||||
require Win32;
|
||||
|
@ -23,6 +23,7 @@ import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
@ -34,6 +35,7 @@ import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
|
||||
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
@ -55,6 +57,7 @@ import org.apache.lucene.util.IOUtils;
|
||||
* <li>Directory, Writer, Reader.
|
||||
* <li>Taxonomy Directory, Writer, Reader.
|
||||
* <li>DocMaker, FacetSource and a few instances of QueryMaker.
|
||||
* <li>Named AnalysisFactories.
|
||||
* <li>Analyzer.
|
||||
* <li>Statistics data which updated during the run.
|
||||
* </ul>
|
||||
@ -78,6 +81,7 @@ public class PerfRunData implements Closeable {
|
||||
// directory, analyzer, docMaker - created at startup.
|
||||
// reader, writer, searcher - maintained by basic tasks.
|
||||
private Directory directory;
|
||||
private Map<String,AnalyzerFactory> analyzerFactories = new HashMap<String,AnalyzerFactory>();
|
||||
private Analyzer analyzer;
|
||||
private DocMaker docMaker;
|
||||
private ContentSource contentSource;
|
||||
@ -358,7 +362,7 @@ public class PerfRunData implements Closeable {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the anlyzer.
|
||||
* @return Returns the analyzer.
|
||||
*/
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
@ -434,4 +438,7 @@ public class PerfRunData implements Closeable {
|
||||
return qm;
|
||||
}
|
||||
|
||||
public Map<String,AnalyzerFactory> getAnalyzerFactories() {
|
||||
return analyzerFactories;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,459 @@
|
||||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.FilesystemResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.StreamTokenizer;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Analyzer factory construction task. The name given to the constructed factory may
|
||||
* be given to NewAnalyzerTask, which will call AnalyzerFactory.create().
|
||||
*
|
||||
* Params are in the form argname:argvalue or argname:"argvalue" or argname:'argvalue';
|
||||
* use backslashes to escape '"' or "'" inside a quoted value when it's used as the enclosing
|
||||
* quotation mark,
|
||||
*
|
||||
* Specify params in a comma separated list of the following, in order:
|
||||
* <ol>
|
||||
* <li>Analyzer args:
|
||||
* <ul>
|
||||
* <li><b>Required</b>: <code>name:<i>analyzer-factory-name</i></code></li>
|
||||
* <li>Optional: <tt>positionIncrementGap:<i>int value</i></tt> (default: 0)</li>
|
||||
* <li>Optional: <tt>offsetGap:<i>int value</i></tt> (default: 1)</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>zero or more CharFilterFactory's, followed by</li>
|
||||
* <li>exactly one TokenizerFactory, followed by</li>
|
||||
* <li>zero or more TokenFilterFactory's</li>
|
||||
* </ol>
|
||||
*
|
||||
* Each component analysis factory map specify <tt>luceneMatchVersion</tt> (defaults to
|
||||
* {@link Version#LUCENE_CURRENT}) and any of the args understood by the specified
|
||||
* *Factory class, in the above-describe param format.
|
||||
* <p/>
|
||||
* Example:
|
||||
* <pre>
|
||||
* -AnalyzerFactory(name:'strip html, fold to ascii, whitespace tokenize, max 10k tokens',
|
||||
* positionIncrementGap:100,
|
||||
* HTMLStripCharFilter,
|
||||
* MappingCharFilter(mapping:'mapping-FoldToASCII.txt'),
|
||||
* WhitespaceTokenizer(luceneMatchVersion:LUCENE_42),
|
||||
* TokenLimitFilter(maxTokenCount:10000, consumeAllTokens:false))
|
||||
* [...]
|
||||
* -NewAnalyzer('strip html, fold to ascii, whitespace tokenize, max 10k tokens')
|
||||
* </pre>
|
||||
* <p/>
|
||||
* AnalyzerFactory will direct analysis component factories to look for resources
|
||||
* under the directory specified in the "work.dir" property.
|
||||
*/
|
||||
public class AnalyzerFactoryTask extends PerfTask {
|
||||
private static final String LUCENE_ANALYSIS_PACKAGE_PREFIX = "org.apache.lucene.analysis.";
|
||||
private static final Pattern ANALYSIS_COMPONENT_SUFFIX_PATTERN
|
||||
= Pattern.compile("(?s:(?:(?:Token|Char)?Filter|Tokenizer)(?:Factory)?)$");
|
||||
private static final Pattern TRAILING_DOT_ZERO_PATTERN = Pattern.compile("\\.0$");
|
||||
|
||||
private enum ArgType {ANALYZER_ARG, ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER, TOKENFILTER }
|
||||
|
||||
String factoryName = null;
|
||||
Integer positionIncrementGap = null;
|
||||
Integer offsetGap = null;
|
||||
private List<CharFilterFactory> charFilterFactories = new ArrayList<CharFilterFactory>();
|
||||
private TokenizerFactory tokenizerFactory = null;
|
||||
private List<TokenFilterFactory> tokenFilterFactories = new ArrayList<TokenFilterFactory>();
|
||||
|
||||
public AnalyzerFactoryTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the params.
|
||||
* Analysis component factory names may optionally include the "Factory" suffix.
|
||||
*
|
||||
* @param params analysis pipeline specification: name, (optional) positionIncrementGap,
|
||||
* (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
|
||||
* and 0+ TokenFilterFactory's
|
||||
*/
|
||||
@Override
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
ArgType expectedArgType = ArgType.ANALYZER_ARG;
|
||||
|
||||
final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
|
||||
stok.commentChar('#');
|
||||
stok.quoteChar('"');
|
||||
stok.quoteChar('\'');
|
||||
stok.eolIsSignificant(false);
|
||||
stok.ordinaryChar('(');
|
||||
stok.ordinaryChar(')');
|
||||
stok.ordinaryChar(':');
|
||||
stok.ordinaryChar(',');
|
||||
try {
|
||||
while (stok.nextToken() != StreamTokenizer.TT_EOF) {
|
||||
switch (stok.ttype) {
|
||||
case ',': {
|
||||
// Do nothing
|
||||
break;
|
||||
}
|
||||
case StreamTokenizer.TT_WORD: {
|
||||
if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
|
||||
final String argName = stok.sval;
|
||||
if ( ! argName.equalsIgnoreCase("name")
|
||||
&& ! argName.equalsIgnoreCase("positionIncrementGap")
|
||||
&& ! argName.equalsIgnoreCase("offsetGap")) {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
|
||||
}
|
||||
stok.nextToken();
|
||||
if (stok.ttype != ':') {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
|
||||
}
|
||||
|
||||
stok.nextToken();
|
||||
String argValue = stok.sval;
|
||||
switch (stok.ttype) {
|
||||
case StreamTokenizer.TT_NUMBER: {
|
||||
argValue = Double.toString(stok.nval);
|
||||
// Drop the ".0" from numbers, for integer arguments
|
||||
argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
|
||||
// Intentional fallthrough
|
||||
}
|
||||
case '"':
|
||||
case '\'':
|
||||
case StreamTokenizer.TT_WORD: {
|
||||
if (argName.equalsIgnoreCase("name")) {
|
||||
factoryName = argValue;
|
||||
expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
|
||||
} else {
|
||||
int intArgValue = 0;
|
||||
try {
|
||||
intArgValue = Integer.parseInt(argValue);
|
||||
} catch (NumberFormatException e) {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
|
||||
}
|
||||
if (argName.equalsIgnoreCase("positionIncrementGap")) {
|
||||
positionIncrementGap = intArgValue;
|
||||
} else if (argName.equalsIgnoreCase("offsetGap")) {
|
||||
offsetGap = intArgValue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case StreamTokenizer.TT_EOF: {
|
||||
throw new RuntimeException("Unexpected EOF: " + stok.toString());
|
||||
}
|
||||
default: {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
|
||||
}
|
||||
}
|
||||
} else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
|
||||
final String argName = stok.sval;
|
||||
|
||||
if (argName.equalsIgnoreCase("positionIncrementGap")
|
||||
|| argName.equalsIgnoreCase("offsetGap")) {
|
||||
stok.nextToken();
|
||||
if (stok.ttype != ':') {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
|
||||
}
|
||||
stok.nextToken();
|
||||
int intArgValue = (int)stok.nval;
|
||||
switch (stok.ttype) {
|
||||
case '"':
|
||||
case '\'':
|
||||
case StreamTokenizer.TT_WORD: {
|
||||
intArgValue = 0;
|
||||
try {
|
||||
intArgValue = Integer.parseInt(stok.sval.trim());
|
||||
} catch (NumberFormatException e) {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
|
||||
}
|
||||
// Intentional fall-through
|
||||
}
|
||||
case StreamTokenizer.TT_NUMBER: {
|
||||
if (argName.equalsIgnoreCase("positionIncrementGap")) {
|
||||
positionIncrementGap = intArgValue;
|
||||
} else if (argName.equalsIgnoreCase("offsetGap")) {
|
||||
offsetGap = intArgValue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case StreamTokenizer.TT_EOF: {
|
||||
throw new RuntimeException("Unexpected EOF: " + stok.toString());
|
||||
}
|
||||
default: {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
try {
|
||||
final Class<? extends CharFilterFactory> clazz;
|
||||
clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
|
||||
createAnalysisPipelineComponent(stok, clazz);
|
||||
} catch (IllegalArgumentException e) {
|
||||
try {
|
||||
final Class<? extends TokenizerFactory> clazz;
|
||||
clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
|
||||
createAnalysisPipelineComponent(stok, clazz);
|
||||
expectedArgType = ArgType.TOKENFILTER;
|
||||
} catch (IllegalArgumentException e2) {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '"
|
||||
+ argName + "' as CharFilterFactory or TokenizerFactory");
|
||||
}
|
||||
}
|
||||
} else { // expectedArgType = ArgType.TOKENFILTER
|
||||
final String className = stok.sval;
|
||||
final Class<? extends TokenFilterFactory> clazz;
|
||||
try {
|
||||
clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
|
||||
}
|
||||
createAnalysisPipelineComponent(stok, clazz);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getMessage().startsWith("Line #")) {
|
||||
throw e;
|
||||
} else {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
|
||||
}
|
||||
|
||||
final AnalyzerFactory analyzerFactory = new AnalyzerFactory
|
||||
(charFilterFactories, tokenizerFactory, tokenFilterFactories);
|
||||
analyzerFactory.setPositionIncrementGap(positionIncrementGap);
|
||||
analyzerFactory.setOffsetGap(offsetGap);
|
||||
getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates the given analysis factory class after pulling params from
|
||||
* the given stream tokenizer, then stores the result in the appropriate
|
||||
* pipeline component list.
|
||||
*
|
||||
* @param stok stream tokenizer from which to draw analysis factory params
|
||||
* @param clazz analysis factory class to instantiate
|
||||
*/
|
||||
private void createAnalysisPipelineComponent
|
||||
(StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
|
||||
final AbstractAnalysisFactory instance;
|
||||
try {
|
||||
instance = clazz.newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
|
||||
}
|
||||
Version luceneMatchVersion = null;
|
||||
Map<String,String> argMap = new HashMap<String,String>();
|
||||
boolean parenthetical = false;
|
||||
try {
|
||||
WHILE_LOOP: while (stok.nextToken() != StreamTokenizer.TT_EOF) {
|
||||
switch (stok.ttype) {
|
||||
case ',': {
|
||||
if (parenthetical) {
|
||||
// Do nothing
|
||||
break;
|
||||
} else {
|
||||
// Finished reading this analysis factory configuration
|
||||
break WHILE_LOOP;
|
||||
}
|
||||
}
|
||||
case '(': {
|
||||
if (parenthetical) {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
|
||||
}
|
||||
parenthetical = true;
|
||||
break;
|
||||
}
|
||||
case ')': {
|
||||
if (parenthetical) {
|
||||
parenthetical = false;
|
||||
} else {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case StreamTokenizer.TT_WORD: {
|
||||
if ( ! parenthetical) {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
|
||||
}
|
||||
String argName = stok.sval;
|
||||
stok.nextToken();
|
||||
if (stok.ttype != ':') {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.getSimpleName());
|
||||
}
|
||||
stok.nextToken();
|
||||
String argValue = stok.sval;
|
||||
switch (stok.ttype) {
|
||||
case StreamTokenizer.TT_NUMBER: {
|
||||
argValue = Double.toString(stok.nval);
|
||||
// Drop the ".0" from numbers, for integer arguments
|
||||
argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
|
||||
// Intentional fall-through
|
||||
}
|
||||
case '"':
|
||||
case '\'':
|
||||
case StreamTokenizer.TT_WORD: {
|
||||
if (argName.equalsIgnoreCase("luceneMatchVersion")) {
|
||||
try {
|
||||
luceneMatchVersion = Version.parseLeniently(argValue);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Unrecognized luceneMatchVersion '" + argValue + "'", e);
|
||||
}
|
||||
} else {
|
||||
argMap.put(argName, argValue);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case StreamTokenizer.TT_EOF: {
|
||||
throw new RuntimeException("Unexpected EOF: " + stok.toString());
|
||||
}
|
||||
default: {
|
||||
throw new RuntimeException
|
||||
("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
instance.setLuceneMatchVersion
|
||||
(null == luceneMatchVersion ? Version.LUCENE_CURRENT : luceneMatchVersion);
|
||||
instance.init(argMap);
|
||||
if (instance instanceof ResourceLoaderAware) {
|
||||
File baseDir = new File(getRunData().getConfig().get("work.dir", "work")).getAbsoluteFile();
|
||||
((ResourceLoaderAware)instance).inform(new FilesystemResourceLoader(baseDir));
|
||||
}
|
||||
if (CharFilterFactory.class.isAssignableFrom(clazz)) {
|
||||
charFilterFactories.add((CharFilterFactory)instance);
|
||||
} else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
|
||||
tokenizerFactory = (TokenizerFactory)instance;
|
||||
} else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
|
||||
tokenFilterFactories.add((TokenFilterFactory)instance);
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getMessage().startsWith("Line #")) {
|
||||
throw (e);
|
||||
} else {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method looks up a class with its fully qualified name (FQN), or a short-name
|
||||
* class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
|
||||
* as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
|
||||
* "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
|
||||
*
|
||||
* If className contains a period, the class is first looked up as-is, assuming that it
|
||||
* is an FQN. If this fails, lookup is retried after prepending the Lucene analysis
|
||||
* package prefix to the class name.
|
||||
*
|
||||
* If className does not contain a period, the analysis SPI *Factory.lookupClass()
|
||||
* methods are used to find the class.
|
||||
*
|
||||
* @param className The name or the short name of the class.
|
||||
* @param expectedType The superclass className is expected to extend
|
||||
* @return the loaded class.
|
||||
* @throws ClassNotFoundException if lookup fails
|
||||
*/
|
||||
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
|
||||
throws ClassNotFoundException {
|
||||
if (className.contains(".")) {
|
||||
try {
|
||||
// First, try className == FQN
|
||||
return Class.forName(className).asSubclass(expectedType);
|
||||
} catch (ClassNotFoundException e) {
|
||||
try {
|
||||
// Second, retry lookup after prepending the Lucene analysis package prefix
|
||||
return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
|
||||
} catch (ClassNotFoundException e1) {
|
||||
throw new ClassNotFoundException("Can't find class '" + className
|
||||
+ "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
// No dot - use analysis SPI lookup
|
||||
final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
|
||||
if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
|
||||
return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
|
||||
} else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
|
||||
return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
|
||||
} else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
|
||||
return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
|
||||
}
|
||||
|
||||
throw new ClassNotFoundException("Can't find class '" + className + "'");
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
|
||||
*/
|
||||
@Override
|
||||
public boolean supportsParams() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns the current line in the algorithm file */
|
||||
public int lineno(StreamTokenizer stok) {
|
||||
return getAlgLineNum() + stok.lineno();
|
||||
}
|
||||
}
|
@ -16,10 +16,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StreamTokenizer;
|
||||
import java.io.StringReader;
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
|
||||
@ -28,12 +34,12 @@ import java.lang.reflect.Constructor;
|
||||
*
|
||||
*/
|
||||
public class NewAnalyzerTask extends PerfTask {
|
||||
private List<String> analyzerClassNames;
|
||||
private List<String> analyzerNames;
|
||||
private int current;
|
||||
|
||||
public NewAnalyzerTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
analyzerClassNames = new ArrayList<String>();
|
||||
analyzerNames = new ArrayList<String>();
|
||||
}
|
||||
|
||||
public static final Analyzer createAnalyzer(String className) throws Exception{
|
||||
@ -50,55 +56,98 @@ public class NewAnalyzerTask extends PerfTask {
|
||||
|
||||
@Override
|
||||
public int doLogic() throws IOException {
|
||||
String className = null;
|
||||
String analyzerName = null;
|
||||
try {
|
||||
if (current >= analyzerClassNames.size()) {
|
||||
if (current >= analyzerNames.size()) {
|
||||
current = 0;
|
||||
}
|
||||
className = analyzerClassNames.get(current++);
|
||||
analyzerName = analyzerNames.get(current++);
|
||||
Analyzer analyzer = null;
|
||||
if (null == className || 0 == className.length()) {
|
||||
className = "org.apache.lucene.analysis.standard.StandardAnalyzer";
|
||||
if (null == analyzerName || 0 == analyzerName.length()) {
|
||||
analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
|
||||
}
|
||||
if (-1 == className.indexOf(".")) {
|
||||
try {
|
||||
// If no package, first attempt to instantiate a core analyzer
|
||||
String coreClassName = "org.apache.lucene.analysis.core." + className;
|
||||
analyzer = createAnalyzer(coreClassName);
|
||||
className = coreClassName;
|
||||
} catch (ClassNotFoundException e) {
|
||||
// If not a core analyzer, try the base analysis package
|
||||
className = "org.apache.lucene.analysis." + className;
|
||||
analyzer = createAnalyzer(className);
|
||||
}
|
||||
// First, lookup analyzerName as a named analyzer factory
|
||||
AnalyzerFactory factory = getRunData().getAnalyzerFactories().get(analyzerName);
|
||||
if (null != factory) {
|
||||
analyzer = factory.create();
|
||||
} else {
|
||||
if (className.startsWith("standard.")) {
|
||||
className = "org.apache.lucene.analysis." + className;
|
||||
if (analyzerName.contains(".")) {
|
||||
if (analyzerName.startsWith("standard.")) {
|
||||
analyzerName = "org.apache.lucene.analysis." + analyzerName;
|
||||
}
|
||||
analyzer = createAnalyzer(analyzerName);
|
||||
} else { // No package
|
||||
try {
|
||||
// Attempt to instantiate a core analyzer
|
||||
String coreClassName = "org.apache.lucene.analysis.core." + analyzerName;
|
||||
analyzer = createAnalyzer(coreClassName);
|
||||
analyzerName = coreClassName;
|
||||
} catch (ClassNotFoundException e) {
|
||||
// If not a core analyzer, try the base analysis package
|
||||
analyzerName = "org.apache.lucene.analysis." + analyzerName;
|
||||
analyzer = createAnalyzer(analyzerName);
|
||||
}
|
||||
}
|
||||
analyzer = createAnalyzer(className);
|
||||
}
|
||||
getRunData().setAnalyzer(analyzer);
|
||||
System.out.println("Changed Analyzer to: " + className);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Error creating Analyzer: " + className, e);
|
||||
throw new RuntimeException("Error creating Analyzer: " + analyzerName, e);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the params (analyzerClassName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
|
||||
* Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
|
||||
* org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
|
||||
* <p/>
|
||||
* Analyzer names may also refer to previously defined AnalyzerFactory's.
|
||||
* <p/>
|
||||
* Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
|
||||
* <p/>
|
||||
* Example AnalyzerFactory usage:
|
||||
* <pre>
|
||||
* -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
|
||||
* -NewAnalyzer('whitespace tokenized')
|
||||
* </pre>
|
||||
* @param params analyzerClassName, or empty for the StandardAnalyzer
|
||||
*/
|
||||
@Override
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
|
||||
String s = tokenizer.nextToken();
|
||||
analyzerClassNames.add(s.trim());
|
||||
final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
|
||||
stok.quoteChar('"');
|
||||
stok.quoteChar('\'');
|
||||
stok.eolIsSignificant(false);
|
||||
stok.ordinaryChar(',');
|
||||
try {
|
||||
while (stok.nextToken() != StreamTokenizer.TT_EOF) {
|
||||
switch (stok.ttype) {
|
||||
case ',': {
|
||||
// Do nothing
|
||||
break;
|
||||
}
|
||||
case '\'':
|
||||
case '\"':
|
||||
case StreamTokenizer.TT_WORD: {
|
||||
analyzerNames.add(stok.sval);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
throw new RuntimeException("Unexpected token: " + stok.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getMessage().startsWith("Line #")) {
|
||||
throw e;
|
||||
} else {
|
||||
throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", e);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", t);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -1,117 +0,0 @@
|
||||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
|
||||
/**
|
||||
* Task to support benchmarking ShingleFilter / ShingleAnalyzerWrapper
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li> <code>NewShingleAnalyzer</code> (constructs with all defaults)
|
||||
* <li> <code>NewShingleAnalyzer(analyzer:o.a.l.analysis.StandardAnalyzer,maxShingleSize:2,outputUnigrams:true)</code>
|
||||
* </ul>
|
||||
* </p>
|
||||
*/
|
||||
public class NewShingleAnalyzerTask extends PerfTask {
|
||||
|
||||
private String analyzerClassName = "standard.StandardAnalyzer";
|
||||
private int maxShingleSize = 2;
|
||||
private boolean outputUnigrams = true;
|
||||
|
||||
public NewShingleAnalyzerTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
private void setAnalyzer() throws Exception {
|
||||
Analyzer wrappedAnalyzer = null;
|
||||
if (null == analyzerClassName || 0 == analyzerClassName.length()) {
|
||||
analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
|
||||
}
|
||||
if (-1 == analyzerClassName.indexOf(".")) {
|
||||
String coreClassName = "org.apache.lucene.analysis.core." + analyzerClassName;
|
||||
try {
|
||||
// If there is no package, first attempt to instantiate a core analyzer
|
||||
wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(coreClassName);
|
||||
analyzerClassName = coreClassName;
|
||||
} catch (ClassNotFoundException e) {
|
||||
// If this is not a core analyzer, try the base analysis package
|
||||
analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName;
|
||||
wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName);
|
||||
}
|
||||
} else {
|
||||
if (analyzerClassName.startsWith("standard.")) {
|
||||
analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName;
|
||||
}
|
||||
wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName);
|
||||
}
|
||||
|
||||
ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
|
||||
wrappedAnalyzer,
|
||||
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
|
||||
maxShingleSize,
|
||||
ShingleFilter.TOKEN_SEPARATOR,
|
||||
outputUnigrams,
|
||||
false);
|
||||
getRunData().setAnalyzer(analyzer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws Exception {
|
||||
try {
|
||||
setAnalyzer();
|
||||
System.out.println
|
||||
("Changed Analyzer to: ShingleAnalyzerWrapper, wrapping ShingleFilter over "
|
||||
+ analyzerClassName);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Error creating Analyzer", e);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
StringTokenizer st = new StringTokenizer(params, ",");
|
||||
while (st.hasMoreTokens()) {
|
||||
String param = st.nextToken();
|
||||
StringTokenizer expr = new StringTokenizer(param, ":");
|
||||
String key = expr.nextToken();
|
||||
String value = expr.nextToken();
|
||||
if (key.equalsIgnoreCase("analyzer")) {
|
||||
analyzerClassName = value;
|
||||
} else if (key.equalsIgnoreCase("outputUnigrams")) {
|
||||
outputUnigrams = Boolean.parseBoolean(value);
|
||||
} else if (key.equalsIgnoreCase("maxShingleSize")) {
|
||||
maxShingleSize = (int)Double.parseDouble(value);
|
||||
} else {
|
||||
throw new RuntimeException("Unknown parameter " + param);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsParams() {
|
||||
return true;
|
||||
}
|
||||
}
|
@ -62,6 +62,9 @@ public abstract class PerfTask implements Cloneable {
|
||||
private boolean runInBackground;
|
||||
private int deltaPri;
|
||||
|
||||
// The first line of this task's definition in the alg file
|
||||
private int algLineNum = 0;
|
||||
|
||||
protected static final String NEW_LINE = System.getProperty("line.separator");
|
||||
|
||||
/** Should not be used externally */
|
||||
@ -317,4 +320,11 @@ public abstract class PerfTask implements Cloneable {
|
||||
this.disableCounting = disableCounting;
|
||||
}
|
||||
|
||||
public void setAlgLineNum(int algLineNum) {
|
||||
this.algLineNum = algLineNum;
|
||||
}
|
||||
|
||||
public int getAlgLineNum() {
|
||||
return algLineNum;
|
||||
}
|
||||
}
|
||||
|
@ -58,11 +58,12 @@ public class Algorithm {
|
||||
StreamTokenizer stok = new StreamTokenizer(new StringReader(algTxt));
|
||||
stok.commentChar('#');
|
||||
stok.eolIsSignificant(false);
|
||||
stok.ordinaryChar('"');
|
||||
stok.quoteChar('"');
|
||||
stok.quoteChar('\'');
|
||||
stok.ordinaryChar('/');
|
||||
stok.ordinaryChar('(');
|
||||
stok.ordinaryChar(')');
|
||||
boolean colonOk = false;
|
||||
boolean colonOk = false;
|
||||
boolean isDisableCountNextTask = false; // only for primitive tasks
|
||||
currSequence.setDepth(0);
|
||||
|
||||
@ -74,6 +75,7 @@ public class Algorithm {
|
||||
Constructor<? extends PerfTask> cnstr = taskClass(config,s)
|
||||
.asSubclass(PerfTask.class).getConstructor(PerfRunData.class);
|
||||
PerfTask task = cnstr.newInstance(runData);
|
||||
task.setAlgLineNum(stok.lineno());
|
||||
task.setDisableCounting(isDisableCountNextTask);
|
||||
isDisableCountNextTask = false;
|
||||
currSequence.addTask(task);
|
||||
@ -90,24 +92,54 @@ public class Algorithm {
|
||||
if (stok.ttype!='(') {
|
||||
stok.pushBack();
|
||||
} else {
|
||||
// get params, for tasks that supports them, - anything until next ')'
|
||||
// get params, for tasks that supports them - allow recursive parenthetical expressions
|
||||
stok.eolIsSignificant(true); // Allow params tokenizer to keep track of line number
|
||||
StringBuilder params = new StringBuilder();
|
||||
stok.nextToken();
|
||||
while (stok.ttype!=')') {
|
||||
switch (stok.ttype) {
|
||||
case StreamTokenizer.TT_NUMBER:
|
||||
params.append(stok.nval);
|
||||
break;
|
||||
case StreamTokenizer.TT_WORD:
|
||||
params.append(stok.sval);
|
||||
break;
|
||||
case StreamTokenizer.TT_EOF:
|
||||
throw new Exception("unexpexted EOF: - "+stok.toString());
|
||||
default:
|
||||
params.append((char)stok.ttype);
|
||||
if (stok.ttype != ')') {
|
||||
int count = 1;
|
||||
BALANCED_PARENS: while (true) {
|
||||
switch (stok.ttype) {
|
||||
case StreamTokenizer.TT_NUMBER: {
|
||||
params.append(stok.nval);
|
||||
break;
|
||||
}
|
||||
case StreamTokenizer.TT_WORD: {
|
||||
params.append(stok.sval);
|
||||
break;
|
||||
}
|
||||
case StreamTokenizer.TT_EOF: {
|
||||
throw new RuntimeException("Unexpexted EOF: - "+stok.toString());
|
||||
}
|
||||
case '"':
|
||||
case '\'': {
|
||||
params.append((char)stok.ttype);
|
||||
// re-escape delimiters, if any
|
||||
params.append(stok.sval.replaceAll("" + (char)stok.ttype, "\\\\" + (char)stok.ttype));
|
||||
params.append((char)stok.ttype);
|
||||
break;
|
||||
}
|
||||
case '(': {
|
||||
params.append((char)stok.ttype);
|
||||
++count;
|
||||
break;
|
||||
}
|
||||
case ')': {
|
||||
if (--count >= 1) { // exclude final closing parenthesis
|
||||
params.append((char)stok.ttype);
|
||||
} else {
|
||||
break BALANCED_PARENS;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
params.append((char)stok.ttype);
|
||||
}
|
||||
}
|
||||
stok.nextToken();
|
||||
}
|
||||
stok.nextToken();
|
||||
}
|
||||
stok.eolIsSignificant(false);
|
||||
String prm = params.toString().trim();
|
||||
if (prm.length()>0) {
|
||||
task.setParams(prm);
|
||||
@ -182,10 +214,8 @@ public class Algorithm {
|
||||
if (stok.ttype!='"') {
|
||||
stok.pushBack();
|
||||
} else {
|
||||
stok.nextToken();
|
||||
name = stok.sval;
|
||||
stok.nextToken();
|
||||
if (stok.ttype!='"' || name==null || name.length()==0) {
|
||||
if (stok.ttype!='"' || name==null || name.length()==0) {
|
||||
throw new Exception("sequence name problem - "+stok.toString());
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,132 @@
|
||||
package org.apache.lucene.benchmark.byTask.utils;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A factory to create an analyzer.
|
||||
* See {@link org.apache.lucene.benchmark.byTask.tasks.AnalyzerFactoryTask}
|
||||
*/
|
||||
public final class AnalyzerFactory {
|
||||
final private List<CharFilterFactory> charFilterFactories;
|
||||
final private TokenizerFactory tokenizerFactory;
|
||||
final private List<TokenFilterFactory> tokenFilterFactories;
|
||||
private String name = null;
|
||||
private Integer positionIncrementGap = null;
|
||||
private Integer offsetGap = null;
|
||||
|
||||
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
|
||||
TokenizerFactory tokenizerFactory,
|
||||
List<TokenFilterFactory> tokenFilterFactories) {
|
||||
this.charFilterFactories = charFilterFactories;
|
||||
assert null != tokenizerFactory;
|
||||
this.tokenizerFactory = tokenizerFactory;
|
||||
this.tokenFilterFactories = tokenFilterFactories;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setPositionIncrementGap(Integer positionIncrementGap) {
|
||||
this.positionIncrementGap = positionIncrementGap;
|
||||
}
|
||||
|
||||
public void setOffsetGap(Integer offsetGap) {
|
||||
this.offsetGap = offsetGap;
|
||||
}
|
||||
|
||||
public Analyzer create() {
|
||||
return new Analyzer() {
|
||||
private final Integer positionIncrementGap = AnalyzerFactory.this.positionIncrementGap;
|
||||
private final Integer offsetGap = AnalyzerFactory.this.offsetGap;
|
||||
|
||||
@Override
|
||||
public Reader initReader(String fieldName, Reader reader) {
|
||||
if (charFilterFactories != null && charFilterFactories.size() > 0) {
|
||||
Reader wrappedReader = reader;
|
||||
for (CharFilterFactory charFilterFactory : charFilterFactories) {
|
||||
wrappedReader = charFilterFactory.create(wrappedReader);
|
||||
}
|
||||
reader = wrappedReader;
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
final Tokenizer tokenizer = tokenizerFactory.create(reader);
|
||||
TokenStream tokenStream = tokenizer;
|
||||
for (TokenFilterFactory filterFactory : tokenFilterFactories) {
|
||||
tokenStream = filterFactory.create(tokenStream);
|
||||
}
|
||||
return new TokenStreamComponents(tokenizer, tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
return null == positionIncrementGap ? super.getPositionIncrementGap(fieldName) : positionIncrementGap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOffsetGap(String fieldName) {
|
||||
return null == offsetGap ? super.getOffsetGap(fieldName) : offsetGap;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder("AnalyzerFactory(");
|
||||
if (null != name) {
|
||||
sb.append("name:");
|
||||
sb.append(name);
|
||||
sb.append(", ");
|
||||
}
|
||||
if (null != positionIncrementGap) {
|
||||
sb.append("positionIncrementGap:");
|
||||
sb.append(positionIncrementGap);
|
||||
sb.append(", ");
|
||||
}
|
||||
if (null != offsetGap) {
|
||||
sb.append("offsetGap:");
|
||||
sb.append(offsetGap);
|
||||
sb.append(", ");
|
||||
}
|
||||
for (CharFilterFactory charFilterFactory: charFilterFactories) {
|
||||
sb.append(charFilterFactory);
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append(tokenizerFactory);
|
||||
for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
|
||||
sb.append(", ");
|
||||
sb.append(tokenFilterFactory);
|
||||
}
|
||||
sb.append(')');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
@ -71,6 +71,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
copyToWorkDir("reuters.first20.lines.txt");
|
||||
copyToWorkDir("test-mapping-ISOLatin1Accent-partial.txt");
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1019,63 +1020,79 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we can create ShingleAnalyzerWrappers.
|
||||
* Test that we can create shingle analyzers using AnalyzerFactory.
|
||||
*/
|
||||
public void testShingleAnalyzer() throws Exception {
|
||||
String text = "one,two,three, four five six";
|
||||
|
||||
// Default analyzer, maxShingleSize, and outputUnigrams
|
||||
Benchmark benchmark = execBenchmark(getShingleConfig(""));
|
||||
// StandardTokenizer, maxShingleSize, and outputUnigrams
|
||||
Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig
|
||||
("shingle-analyzer", "StandardTokenizer,ShingleFilter"));
|
||||
benchmark.getRunData().getAnalyzer().tokenStream
|
||||
("bogus", new StringReader(text)).close();
|
||||
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] {"one", "one two", "two", "two three",
|
||||
"three", "three four", "four", "four five",
|
||||
"five", "five six", "six"});
|
||||
// Default analyzer, maxShingleSize = 3, and outputUnigrams = false
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "one", "one two", "two", "two three",
|
||||
"three", "three four", "four", "four five",
|
||||
"five", "five six", "six" });
|
||||
// StandardTokenizer, maxShingleSize = 3, and outputUnigrams = false
|
||||
benchmark = execBenchmark
|
||||
(getShingleConfig("maxShingleSize:3,outputUnigrams:false"));
|
||||
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "one two", "one two three", "two three",
|
||||
"two three four", "three four",
|
||||
"three four five", "four five",
|
||||
"four five six", "five six" });
|
||||
// WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
|
||||
(getAnalyzerFactoryConfig
|
||||
("shingle-analyzer",
|
||||
"StandardTokenizer,ShingleFilter(maxShingleSize:3,outputUnigrams:false)"));
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "one two", "one two three", "two three",
|
||||
"two three four", "three four",
|
||||
"three four five", "four five",
|
||||
"four five six", "five six" });
|
||||
// WhitespaceTokenizer, default maxShingleSize and outputUnigrams
|
||||
benchmark = execBenchmark
|
||||
(getShingleConfig("analyzer:WhitespaceAnalyzer"));
|
||||
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "one,two,three,", "one,two,three, four",
|
||||
"four", "four five", "five", "five six",
|
||||
"six" });
|
||||
(getAnalyzerFactoryConfig("shingle-analyzer", "WhitespaceTokenizer,ShingleFilter"));
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "one,two,three,", "one,two,three, four",
|
||||
"four", "four five", "five", "five six",
|
||||
"six" });
|
||||
|
||||
// WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
|
||||
// WhitespaceTokenizer, maxShingleSize=3 and outputUnigrams=false
|
||||
benchmark = execBenchmark
|
||||
(getShingleConfig
|
||||
("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
|
||||
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "one,two,three, four",
|
||||
"one,two,three, four five",
|
||||
"four five", "four five six",
|
||||
"five six" });
|
||||
(getAnalyzerFactoryConfig
|
||||
("shingle-factory",
|
||||
"WhitespaceTokenizer,ShingleFilter(outputUnigrams:false,maxShingleSize:3)"));
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "one,two,three, four",
|
||||
"one,two,three, four five",
|
||||
"four five", "four five six",
|
||||
"five six" });
|
||||
}
|
||||
|
||||
private void assertEqualShingle
|
||||
(Analyzer analyzer, String text, String[] expected) throws Exception {
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
|
||||
}
|
||||
|
||||
private String[] getShingleConfig(String params) {
|
||||
private String[] getAnalyzerFactoryConfig(String name, String params) {
|
||||
final String singleQuoteEscapedName = name.replaceAll("'", "\\\\'");
|
||||
String algLines[] = {
|
||||
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
|
||||
"docs.file=" + getReuters20LinesFile(),
|
||||
"work.dir=" + getWorkDir().getAbsolutePath().replaceAll("\\\\", "/"), // Fix Windows path
|
||||
"content.source.forever=false",
|
||||
"directory=RAMDirectory",
|
||||
"NewShingleAnalyzer(" + params + ")",
|
||||
"AnalyzerFactory(name:'" + singleQuoteEscapedName + "', " + params + ")",
|
||||
"NewAnalyzer('" + singleQuoteEscapedName + "')",
|
||||
"CreateIndex",
|
||||
"{ \"AddDocs\" AddDoc > : * "
|
||||
};
|
||||
return algLines;
|
||||
}
|
||||
|
||||
public void testAnalyzerFactory() throws Exception {
|
||||
String text = "Fortieth, Quarantième, Cuadragésimo";
|
||||
Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig
|
||||
("ascii folded, pattern replaced, standard tokenized, downcased, bigrammed.'analyzer'",
|
||||
"positionIncrementGap:100,offsetGap:1111,"
|
||||
+"MappingCharFilter(mapping:'test-mapping-ISOLatin1Accent-partial.txt'),"
|
||||
+"PatternReplaceCharFilterFactory(pattern:'e(\\\\\\\\S*)m',replacement:\"$1xxx$1\"),"
|
||||
+"StandardTokenizer,LowerCaseFilter,NGramTokenFilter(minGramSize:2,maxGramSize:2)"));
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] { "fo", "or", "rt", "ti", "ie", "et", "th",
|
||||
"qu", "ua", "ar", "ra", "an", "nt", "ti", "ix", "xx", "xx", "xe",
|
||||
"cu", "ua", "ad", "dr", "ra", "ag", "gs", "si", "ix", "xx", "xx", "xs", "si", "io"});
|
||||
}
|
||||
|
||||
private String getReuters20LinesFile() {
|
||||
return getWorkDirResourcePath("reuters.first20.lines.txt");
|
||||
|
@ -0,0 +1,30 @@
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Syntax:
|
||||
# "source" => "target"
|
||||
# "source".length() > 0 (source cannot be empty.)
|
||||
# "target".length() >= 0 (target can be empty.)
|
||||
|
||||
# example:
|
||||
# "À" => "A"
|
||||
# "\u00C0" => "A"
|
||||
# "\u00C0" => "\u0041"
|
||||
# "ß" => "ss"
|
||||
# "\t" => " "
|
||||
# "\n" => ""
|
||||
|
||||
# è => e
|
||||
"\u00E8" => "e"
|
||||
|
||||
# é => e
|
||||
"\u00E9" => "e"
|
@ -63,7 +63,7 @@ final class ForUtil {
|
||||
}
|
||||
final PackedInts.Decoder decoder = PackedInts.getDecoder(format, version, bpv);
|
||||
final int iterations = computeIterations(decoder);
|
||||
maxDataSize = Math.max(maxDataSize, iterations * decoder.valueCount());
|
||||
maxDataSize = Math.max(maxDataSize, iterations * decoder.byteValueCount());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -75,7 +75,7 @@ final class ForUtil {
|
||||
* values with the provided {@link Decoder}.
|
||||
*/
|
||||
private static int computeIterations(PackedInts.Decoder decoder) {
|
||||
return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount());
|
||||
return (int) Math.ceil((float) BLOCK_SIZE / decoder.byteValueCount());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -165,9 +165,9 @@ final class ForUtil {
|
||||
assert numBits > 0 && numBits <= 32 : numBits;
|
||||
final PackedInts.Encoder encoder = encoders[numBits];
|
||||
final int iters = iterations[numBits];
|
||||
assert iters * encoder.valueCount() >= BLOCK_SIZE;
|
||||
assert iters * encoder.byteValueCount() >= BLOCK_SIZE;
|
||||
final int encodedSize = encodedSizes[numBits];
|
||||
assert (iters * encoder.blockCount()) << 3 >= encodedSize;
|
||||
assert iters * encoder.byteBlockCount() >= encodedSize;
|
||||
|
||||
out.writeByte((byte) numBits);
|
||||
|
||||
@ -198,7 +198,7 @@ final class ForUtil {
|
||||
|
||||
final PackedInts.Decoder decoder = decoders[numBits];
|
||||
final int iters = iterations[numBits];
|
||||
assert iters * decoder.valueCount() >= BLOCK_SIZE;
|
||||
assert iters * decoder.byteValueCount() >= BLOCK_SIZE;
|
||||
|
||||
decoder.decode(encoded, 0, decoded, 0, iters);
|
||||
}
|
||||
|
@ -130,8 +130,8 @@ abstract class AbstractBlockPackedWriter {
|
||||
|
||||
protected final void writeValues(int bitsRequired) throws IOException {
|
||||
final PackedInts.Encoder encoder = PackedInts.getEncoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, bitsRequired);
|
||||
final int iterations = values.length / encoder.valueCount();
|
||||
final int blockSize = encoder.blockCount() * 8 * iterations;
|
||||
final int iterations = values.length / encoder.byteValueCount();
|
||||
final int blockSize = encoder.byteBlockCount() * iterations;
|
||||
if (blocks == null || blocks.length < blockSize) {
|
||||
blocks = new byte[blockSize];
|
||||
}
|
||||
|
@ -212,8 +212,8 @@ public final class BlockPackedReaderIterator {
|
||||
Arrays.fill(values, minValue);
|
||||
} else {
|
||||
final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
|
||||
final int iterations = blockSize / decoder.valueCount();
|
||||
final int blocksSize = iterations * 8 * decoder.blockCount();
|
||||
final int iterations = blockSize / decoder.byteValueCount();
|
||||
final int blocksSize = iterations * decoder.byteBlockCount();
|
||||
if (blocks == null || blocks.length < blocksSize) {
|
||||
blocks = new byte[blocksSize];
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -153,35 +152,30 @@ abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {
|
||||
* For every number of bits per value, there is a minimum number of
|
||||
* blocks (b) / values (v) you need to write in order to reach the next block
|
||||
* boundary:
|
||||
* - 16 bits per value -> b=1, v=4
|
||||
* - 24 bits per value -> b=3, v=8
|
||||
* - 50 bits per value -> b=25, v=32
|
||||
* - 63 bits per value -> b=63, v=64
|
||||
* - 16 bits per value -> b=2, v=1
|
||||
* - 24 bits per value -> b=3, v=1
|
||||
* - 50 bits per value -> b=25, v=4
|
||||
* - 63 bits per value -> b=63, v=8
|
||||
* - ...
|
||||
* <p>
|
||||
*
|
||||
* A bulk read consists in copying <code>iterations*v</code> values that are
|
||||
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
|
||||
* (higher values of <code>iterations</code> are likely to yield a better
|
||||
* throughput) => this requires n * (b + v) longs in memory.
|
||||
* <p>
|
||||
* throughput) => this requires n * (b + 8v) bytes of memory.
|
||||
*
|
||||
* This method computes <code>iterations</code> as
|
||||
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
|
||||
* <p>
|
||||
* The resulting number of iterations of this method is guaranteed not to
|
||||
* overflow when multiplied by
|
||||
* <tt>8 * {@link PackedInts.Encoder#blockCount()}</tt> or
|
||||
* <tt>8 * {@link PackedInts.Decoder#blockCount()}</tt>.
|
||||
* <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
|
||||
*/
|
||||
public final int computeIterations(int valueCount, int ramBudget) {
|
||||
final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
|
||||
final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount());
|
||||
if (iterations == 0) {
|
||||
// at least 1
|
||||
return 1;
|
||||
} else if ((iterations - 1) * blockCount() >= valueCount) {
|
||||
} else if ((iterations - 1) * byteValueCount() >= valueCount) {
|
||||
// don't allocate for more than the size of the reader
|
||||
return (int) Math.ceil((double) valueCount / valueCount());
|
||||
return (int) Math.ceil((double) valueCount / byteValueCount());
|
||||
} else {
|
||||
return iterations;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -23,9 +24,12 @@ package org.apache.lucene.util.packed;
|
||||
class BulkOperationPacked extends BulkOperation {
|
||||
|
||||
private final int bitsPerValue;
|
||||
private final int blockCount;
|
||||
private final int valueCount;
|
||||
private final int longBlockCount;
|
||||
private final int longValueCount;
|
||||
private final int byteBlockCount;
|
||||
private final int byteValueCount;
|
||||
private final long mask;
|
||||
private final int intMask;
|
||||
|
||||
public BulkOperationPacked(int bitsPerValue) {
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
@ -34,31 +38,50 @@ class BulkOperationPacked extends BulkOperation {
|
||||
while ((blocks & 1) == 0) {
|
||||
blocks >>>= 1;
|
||||
}
|
||||
this.blockCount = blocks;
|
||||
this.valueCount = 64 * blockCount / bitsPerValue;
|
||||
this.longBlockCount = blocks;
|
||||
this.longValueCount = 64 * longBlockCount / bitsPerValue;
|
||||
int byteBlockCount = 8 * longBlockCount;
|
||||
int byteValueCount = longValueCount;
|
||||
while ((byteBlockCount & 1) == 0 && (byteValueCount & 1) == 0) {
|
||||
byteBlockCount >>>= 1;
|
||||
byteValueCount >>>= 1;
|
||||
}
|
||||
this.byteBlockCount = byteBlockCount;
|
||||
this.byteValueCount = byteValueCount;
|
||||
if (bitsPerValue == 64) {
|
||||
this.mask = ~0L;
|
||||
} else {
|
||||
this.mask = (1L << bitsPerValue) - 1;
|
||||
}
|
||||
assert valueCount * bitsPerValue == 64 * blockCount;
|
||||
this.intMask = (int) mask;
|
||||
assert longValueCount * bitsPerValue == 64 * longBlockCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int blockCount() {
|
||||
return blockCount;
|
||||
public int longBlockCount() {
|
||||
return longBlockCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int valueCount() {
|
||||
return valueCount;
|
||||
public int longValueCount() {
|
||||
return longValueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int byteBlockCount() {
|
||||
return byteBlockCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int byteValueCount() {
|
||||
return byteValueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void decode(long[] blocks, int blocksOffset, long[] values,
|
||||
int valuesOffset, int iterations) {
|
||||
int bitsLeft = 64;
|
||||
for (int i = 0; i < valueCount * iterations; ++i) {
|
||||
for (int i = 0; i < longValueCount * iterations; ++i) {
|
||||
bitsLeft -= bitsPerValue;
|
||||
if (bitsLeft < 0) {
|
||||
values[valuesOffset++] =
|
||||
@ -74,22 +97,28 @@ class BulkOperationPacked extends BulkOperation {
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values,
|
||||
int valuesOffset, int iterations) {
|
||||
int blockBitsLeft = 8;
|
||||
int valueBitsLeft = bitsPerValue;
|
||||
long nextValue = 0;
|
||||
for (int end = valuesOffset + iterations * valueCount; valuesOffset < end; ) {
|
||||
if (valueBitsLeft > blockBitsLeft) {
|
||||
nextValue |= (blocks[blocksOffset++] & ((1L << blockBitsLeft) - 1)) << (valueBitsLeft - blockBitsLeft);
|
||||
valueBitsLeft -= blockBitsLeft;
|
||||
blockBitsLeft = 8;
|
||||
long nextValue = 0L;
|
||||
int bitsLeft = bitsPerValue;
|
||||
for (int i = 0; i < iterations * byteBlockCount; ++i) {
|
||||
final long bytes = blocks[blocksOffset++] & 0xFFL;
|
||||
if (bitsLeft > 8) {
|
||||
// just buffer
|
||||
bitsLeft -= 8;
|
||||
nextValue |= bytes << bitsLeft;
|
||||
} else {
|
||||
nextValue |= ((blocks[blocksOffset] & 0xFFL) >>> (blockBitsLeft - valueBitsLeft)) & ((1L << valueBitsLeft) - 1);
|
||||
values[valuesOffset++] = nextValue;
|
||||
nextValue = 0;
|
||||
blockBitsLeft -= valueBitsLeft;
|
||||
valueBitsLeft = bitsPerValue;
|
||||
// flush
|
||||
int bits = 8 - bitsLeft;
|
||||
values[valuesOffset++] = nextValue | (bytes >>> bits);
|
||||
while (bits >= bitsPerValue) {
|
||||
bits -= bitsPerValue;
|
||||
values[valuesOffset++] = (bytes >>> bits) & mask;
|
||||
}
|
||||
// then buffer
|
||||
bitsLeft = bitsPerValue - bits;
|
||||
nextValue = (bytes & ((1L << bits) - 1)) << bitsLeft;
|
||||
}
|
||||
}
|
||||
assert bitsLeft == bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -99,7 +128,7 @@ class BulkOperationPacked extends BulkOperation {
|
||||
throw new UnsupportedOperationException("Cannot decode " + bitsPerValue + "-bits values into an int[]");
|
||||
}
|
||||
int bitsLeft = 64;
|
||||
for (int i = 0; i < valueCount * iterations; ++i) {
|
||||
for (int i = 0; i < longValueCount * iterations; ++i) {
|
||||
bitsLeft -= bitsPerValue;
|
||||
if (bitsLeft < 0) {
|
||||
values[valuesOffset++] = (int)
|
||||
@ -115,25 +144,28 @@ class BulkOperationPacked extends BulkOperation {
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values,
|
||||
int valuesOffset, int iterations) {
|
||||
if (bitsPerValue > 32) {
|
||||
throw new UnsupportedOperationException("Cannot decode " + bitsPerValue + "-bits values into an int[]");
|
||||
}
|
||||
int blockBitsLeft = 8;
|
||||
int valueBitsLeft = bitsPerValue;
|
||||
int nextValue = 0;
|
||||
for (int end = valuesOffset + iterations * valueCount; valuesOffset < end; ) {
|
||||
if (valueBitsLeft > blockBitsLeft) {
|
||||
nextValue |= (blocks[blocksOffset++] & ((1L << blockBitsLeft) - 1)) << (valueBitsLeft - blockBitsLeft);
|
||||
valueBitsLeft -= blockBitsLeft;
|
||||
blockBitsLeft = 8;
|
||||
int bitsLeft = bitsPerValue;
|
||||
for (int i = 0; i < iterations * byteBlockCount; ++i) {
|
||||
final int bytes = blocks[blocksOffset++] & 0xFF;
|
||||
if (bitsLeft > 8) {
|
||||
// just buffer
|
||||
bitsLeft -= 8;
|
||||
nextValue |= bytes << bitsLeft;
|
||||
} else {
|
||||
nextValue |= ((blocks[blocksOffset] & 0xFFL) >>> (blockBitsLeft - valueBitsLeft)) & ((1L << valueBitsLeft) - 1);
|
||||
values[valuesOffset++] = nextValue;
|
||||
nextValue = 0;
|
||||
blockBitsLeft -= valueBitsLeft;
|
||||
valueBitsLeft = bitsPerValue;
|
||||
// flush
|
||||
int bits = 8 - bitsLeft;
|
||||
values[valuesOffset++] = nextValue | (bytes >>> bits);
|
||||
while (bits >= bitsPerValue) {
|
||||
bits -= bitsPerValue;
|
||||
values[valuesOffset++] = (bytes >>> bits) & intMask;
|
||||
}
|
||||
// then buffer
|
||||
bitsLeft = bitsPerValue - bits;
|
||||
nextValue = (bytes & ((1 << bits) - 1)) << bitsLeft;
|
||||
}
|
||||
}
|
||||
assert bitsLeft == bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -141,7 +173,7 @@ class BulkOperationPacked extends BulkOperation {
|
||||
int blocksOffset, int iterations) {
|
||||
long nextBlock = 0;
|
||||
int bitsLeft = 64;
|
||||
for (int i = 0; i < valueCount * iterations; ++i) {
|
||||
for (int i = 0; i < longValueCount * iterations; ++i) {
|
||||
bitsLeft -= bitsPerValue;
|
||||
if (bitsLeft > 0) {
|
||||
nextBlock |= values[valuesOffset++] << bitsLeft;
|
||||
@ -164,7 +196,7 @@ class BulkOperationPacked extends BulkOperation {
|
||||
int blocksOffset, int iterations) {
|
||||
long nextBlock = 0;
|
||||
int bitsLeft = 64;
|
||||
for (int i = 0; i < valueCount * iterations; ++i) {
|
||||
for (int i = 0; i < longValueCount * iterations; ++i) {
|
||||
bitsLeft -= bitsPerValue;
|
||||
if (bitsLeft > 0) {
|
||||
nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL) << bitsLeft;
|
||||
@ -185,47 +217,57 @@ class BulkOperationPacked extends BulkOperation {
|
||||
@Override
|
||||
public void encode(long[] values, int valuesOffset, byte[] blocks,
|
||||
int blocksOffset, int iterations) {
|
||||
long nextBlock = 0;
|
||||
int bitsLeft = 64;
|
||||
for (int i = 0; i < valueCount * iterations; ++i) {
|
||||
bitsLeft -= bitsPerValue;
|
||||
if (bitsLeft > 0) {
|
||||
nextBlock |= values[valuesOffset++] << bitsLeft;
|
||||
} else if (bitsLeft == 0) {
|
||||
nextBlock |= values[valuesOffset++];
|
||||
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
|
||||
nextBlock = 0;
|
||||
bitsLeft = 64;
|
||||
} else { // bitsLeft < 0
|
||||
nextBlock |= values[valuesOffset] >>> -bitsLeft;
|
||||
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
|
||||
nextBlock = (values[valuesOffset++] & ((1L << -bitsLeft) - 1)) << (64 + bitsLeft);
|
||||
bitsLeft += 64;
|
||||
int nextBlock = 0;
|
||||
int bitsLeft = 8;
|
||||
for (int i = 0; i < byteValueCount * iterations; ++i) {
|
||||
final long v = values[valuesOffset++];
|
||||
assert bitsPerValue == 64 || PackedInts.bitsRequired(v) <= bitsPerValue;
|
||||
if (bitsPerValue < bitsLeft) {
|
||||
// just buffer
|
||||
nextBlock |= v << (bitsLeft - bitsPerValue);
|
||||
bitsLeft -= bitsPerValue;
|
||||
} else {
|
||||
// flush as many blocks as possible
|
||||
int bits = bitsPerValue - bitsLeft;
|
||||
blocks[blocksOffset++] = (byte) (nextBlock | (v >>> bits));
|
||||
while (bits >= 8) {
|
||||
bits -= 8;
|
||||
blocks[blocksOffset++] = (byte) (v >>> bits);
|
||||
}
|
||||
// then buffer
|
||||
bitsLeft = 8 - bits;
|
||||
nextBlock = (int) ((v & ((1L << bits) - 1)) << bitsLeft);
|
||||
}
|
||||
}
|
||||
assert bitsLeft == 8;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encode(int[] values, int valuesOffset, byte[] blocks,
|
||||
int blocksOffset, int iterations) {
|
||||
long nextBlock = 0;
|
||||
int bitsLeft = 64;
|
||||
for (int i = 0; i < valueCount * iterations; ++i) {
|
||||
bitsLeft -= bitsPerValue;
|
||||
if (bitsLeft > 0) {
|
||||
nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL) << bitsLeft;
|
||||
} else if (bitsLeft == 0) {
|
||||
nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL);
|
||||
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
|
||||
nextBlock = 0;
|
||||
bitsLeft = 64;
|
||||
} else { // bitsLeft < 0
|
||||
nextBlock |= (values[valuesOffset] & 0xFFFFFFFFL) >>> -bitsLeft;
|
||||
blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
|
||||
nextBlock = (values[valuesOffset++] & ((1L << -bitsLeft) - 1)) << (64 + bitsLeft);
|
||||
bitsLeft += 64;
|
||||
int nextBlock = 0;
|
||||
int bitsLeft = 8;
|
||||
for (int i = 0; i < byteValueCount * iterations; ++i) {
|
||||
final int v = values[valuesOffset++];
|
||||
assert PackedInts.bitsRequired(v & 0xFFFFFFFFL) <= bitsPerValue;
|
||||
if (bitsPerValue < bitsLeft) {
|
||||
// just buffer
|
||||
nextBlock |= v << (bitsLeft - bitsPerValue);
|
||||
bitsLeft -= bitsPerValue;
|
||||
} else {
|
||||
// flush as many blocks as possible
|
||||
int bits = bitsPerValue - bitsLeft;
|
||||
blocks[blocksOffset++] = (byte) (nextBlock | (v >>> bits));
|
||||
while (bits >= 8) {
|
||||
bits -= 8;
|
||||
blocks[blocksOffset++] = (byte) (v >>> bits);
|
||||
}
|
||||
// then buffer
|
||||
bitsLeft = 8 - bits;
|
||||
nextBlock = (v & ((1 << bits) - 1)) << bitsLeft;
|
||||
}
|
||||
}
|
||||
assert bitsLeft == 8;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked1 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked1() {
|
||||
super(1);
|
||||
assert blockCount() == 1;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -42,7 +40,7 @@ final class BulkOperationPacked1 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
final byte block = blocks[blocksOffset++];
|
||||
values[valuesOffset++] = (block >>> 7) & 1;
|
||||
values[valuesOffset++] = (block >>> 6) & 1;
|
||||
@ -67,7 +65,7 @@ final class BulkOperationPacked1 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
final byte block = blocks[blocksOffset++];
|
||||
values[valuesOffset++] = (block >>> 7) & 1;
|
||||
values[valuesOffset++] = (block >>> 6) & 1;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked10 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked10() {
|
||||
super(10);
|
||||
assert blockCount() == 5;
|
||||
assert valueCount() == 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -75,7 +73,7 @@ final class BulkOperationPacked10 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 2) | (byte1 >>> 6);
|
||||
@ -133,7 +131,7 @@ final class BulkOperationPacked10 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 2) | (byte1 >>> 6);
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked11 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked11() {
|
||||
super(11);
|
||||
assert blockCount() == 11;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -113,7 +111,7 @@ final class BulkOperationPacked11 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 3) | (byte1 >>> 5);
|
||||
@ -219,7 +217,7 @@ final class BulkOperationPacked11 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 3) | (byte1 >>> 5);
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked12 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked12() {
|
||||
super(12);
|
||||
assert blockCount() == 3;
|
||||
assert valueCount() == 16;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -57,7 +55,7 @@ final class BulkOperationPacked12 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 4) | (byte1 >>> 4);
|
||||
@ -93,7 +91,7 @@ final class BulkOperationPacked12 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 4) | (byte1 >>> 4);
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked13 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked13() {
|
||||
super(13);
|
||||
assert blockCount() == 13;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -115,7 +113,7 @@ final class BulkOperationPacked13 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 5) | (byte1 >>> 3);
|
||||
@ -225,7 +223,7 @@ final class BulkOperationPacked13 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 5) | (byte1 >>> 3);
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked14 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked14() {
|
||||
super(14);
|
||||
assert blockCount() == 7;
|
||||
assert valueCount() == 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -77,7 +75,7 @@ final class BulkOperationPacked14 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 6) | (byte1 >>> 2);
|
||||
@ -139,7 +137,7 @@ final class BulkOperationPacked14 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 6) | (byte1 >>> 2);
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked15 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked15() {
|
||||
super(15);
|
||||
assert blockCount() == 15;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -117,7 +115,7 @@ final class BulkOperationPacked15 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 7) | (byte1 >>> 1);
|
||||
@ -231,7 +229,7 @@ final class BulkOperationPacked15 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 7) | (byte1 >>> 1);
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked16 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked16() {
|
||||
super(16);
|
||||
assert blockCount() == 1;
|
||||
assert valueCount() == 4;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -42,7 +40,7 @@ final class BulkOperationPacked16 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 4 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
values[valuesOffset++] = ((blocks[blocksOffset++] & 0xFF) << 8) | (blocks[blocksOffset++] & 0xFF);
|
||||
}
|
||||
}
|
||||
@ -59,7 +57,7 @@ final class BulkOperationPacked16 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 4 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
values[valuesOffset++] = ((blocks[blocksOffset++] & 0xFFL) << 8) | (blocks[blocksOffset++] & 0xFFL);
|
||||
}
|
||||
}
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked17 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked17() {
|
||||
super(17);
|
||||
assert blockCount() == 17;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -119,7 +117,7 @@ final class BulkOperationPacked17 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -237,7 +235,7 @@ final class BulkOperationPacked17 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked18 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked18() {
|
||||
super(18);
|
||||
assert blockCount() == 9;
|
||||
assert valueCount() == 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -79,7 +77,7 @@ final class BulkOperationPacked18 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -145,7 +143,7 @@ final class BulkOperationPacked18 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked19 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked19() {
|
||||
super(19);
|
||||
assert blockCount() == 19;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -121,7 +119,7 @@ final class BulkOperationPacked19 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -243,7 +241,7 @@ final class BulkOperationPacked19 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked2 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked2() {
|
||||
super(2);
|
||||
assert blockCount() == 1;
|
||||
assert valueCount() == 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -42,7 +40,7 @@ final class BulkOperationPacked2 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
final byte block = blocks[blocksOffset++];
|
||||
values[valuesOffset++] = (block >>> 6) & 3;
|
||||
values[valuesOffset++] = (block >>> 4) & 3;
|
||||
@ -63,7 +61,7 @@ final class BulkOperationPacked2 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
final byte block = blocks[blocksOffset++];
|
||||
values[valuesOffset++] = (block >>> 6) & 3;
|
||||
values[valuesOffset++] = (block >>> 4) & 3;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked20 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked20() {
|
||||
super(20);
|
||||
assert blockCount() == 5;
|
||||
assert valueCount() == 16;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -59,7 +57,7 @@ final class BulkOperationPacked20 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -99,7 +97,7 @@ final class BulkOperationPacked20 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked21 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked21() {
|
||||
super(21);
|
||||
assert blockCount() == 21;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -123,7 +121,7 @@ final class BulkOperationPacked21 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -249,7 +247,7 @@ final class BulkOperationPacked21 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked22 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked22() {
|
||||
super(22);
|
||||
assert blockCount() == 11;
|
||||
assert valueCount() == 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -81,7 +79,7 @@ final class BulkOperationPacked22 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -151,7 +149,7 @@ final class BulkOperationPacked22 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked23 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked23() {
|
||||
super(23);
|
||||
assert blockCount() == 23;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -125,7 +123,7 @@ final class BulkOperationPacked23 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -255,7 +253,7 @@ final class BulkOperationPacked23 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked24 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked24() {
|
||||
super(24);
|
||||
assert blockCount() == 3;
|
||||
assert valueCount() == 8;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -49,7 +47,7 @@ final class BulkOperationPacked24 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -76,7 +74,7 @@ final class BulkOperationPacked24 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte2 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked3 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked3() {
|
||||
super(3);
|
||||
assert blockCount() == 3;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -105,7 +103,7 @@ final class BulkOperationPacked3 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 5;
|
||||
values[valuesOffset++] = (byte0 >>> 2) & 7;
|
||||
@ -195,7 +193,7 @@ final class BulkOperationPacked3 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 5;
|
||||
values[valuesOffset++] = (byte0 >>> 2) & 7;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked4 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked4() {
|
||||
super(4);
|
||||
assert blockCount() == 1;
|
||||
assert valueCount() == 16;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -42,7 +40,7 @@ final class BulkOperationPacked4 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
final byte block = blocks[blocksOffset++];
|
||||
values[valuesOffset++] = (block >>> 4) & 15;
|
||||
values[valuesOffset++] = block & 15;
|
||||
@ -61,7 +59,7 @@ final class BulkOperationPacked4 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
final byte block = blocks[blocksOffset++];
|
||||
values[valuesOffset++] = (block >>> 4) & 15;
|
||||
values[valuesOffset++] = block & 15;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked5 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked5() {
|
||||
super(5);
|
||||
assert blockCount() == 5;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -107,7 +105,7 @@ final class BulkOperationPacked5 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 3;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -201,7 +199,7 @@ final class BulkOperationPacked5 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 3;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked6 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked6() {
|
||||
super(6);
|
||||
assert blockCount() == 3;
|
||||
assert valueCount() == 32;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -73,7 +71,7 @@ final class BulkOperationPacked6 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 2;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -127,7 +125,7 @@ final class BulkOperationPacked6 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 2;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked7 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked7() {
|
||||
super(7);
|
||||
assert blockCount() == 7;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -109,7 +107,7 @@ final class BulkOperationPacked7 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 1;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
@ -207,7 +205,7 @@ final class BulkOperationPacked7 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = byte0 >>> 1;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked8 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked8() {
|
||||
super(8);
|
||||
assert blockCount() == 1;
|
||||
assert valueCount() == 8;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -42,7 +40,7 @@ final class BulkOperationPacked8 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;
|
||||
}
|
||||
}
|
||||
@ -59,7 +57,7 @@ final class BulkOperationPacked8 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int j = 0; j < 8 * iterations; ++j) {
|
||||
for (int j = 0; j < iterations; ++j) {
|
||||
values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;
|
||||
}
|
||||
}
|
||||
|
@ -26,8 +26,6 @@ final class BulkOperationPacked9 extends BulkOperationPacked {
|
||||
|
||||
public BulkOperationPacked9() {
|
||||
super(9);
|
||||
assert blockCount() == 9;
|
||||
assert valueCount() == 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -111,7 +109,7 @@ final class BulkOperationPacked9 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final int byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 1) | (byte1 >>> 7);
|
||||
@ -213,7 +211,7 @@ final class BulkOperationPacked9 extends BulkOperationPacked {
|
||||
|
||||
@Override
|
||||
public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
|
||||
for (int i = 0; i < 8 * iterations; ++i) {
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final long byte0 = blocks[blocksOffset++] & 0xFF;
|
||||
final long byte1 = blocks[blocksOffset++] & 0xFF;
|
||||
values[valuesOffset++] = (byte0 << 1) | (byte1 >>> 7);
|
||||
|
@ -35,12 +35,22 @@ final class BulkOperationPackedSingleBlock extends BulkOperation {
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int blockCount() {
|
||||
public final int longBlockCount() {
|
||||
return BLOCK_COUNT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int valueCount() {
|
||||
public final int byteBlockCount() {
|
||||
return BLOCK_COUNT * 8;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int longValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int byteValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
|
@ -140,9 +140,9 @@ class Packed64 extends PackedInts.MutableImpl {
|
||||
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||
|
||||
// go to the next block where the value does not span across two blocks
|
||||
final int offsetInBlocks = index % decoder.valueCount();
|
||||
final int offsetInBlocks = index % decoder.longValueCount();
|
||||
if (offsetInBlocks != 0) {
|
||||
for (int i = offsetInBlocks; i < decoder.valueCount() && len > 0; ++i) {
|
||||
for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
|
||||
arr[off++] = get(index++);
|
||||
--len;
|
||||
}
|
||||
@ -152,12 +152,12 @@ class Packed64 extends PackedInts.MutableImpl {
|
||||
}
|
||||
|
||||
// bulk get
|
||||
assert index % decoder.valueCount() == 0;
|
||||
assert index % decoder.longValueCount() == 0;
|
||||
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
|
||||
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
|
||||
final int iterations = len / decoder.valueCount();
|
||||
final int iterations = len / decoder.longValueCount();
|
||||
decoder.decode(blocks, blockIndex, arr, off, iterations);
|
||||
final int gotValues = iterations * decoder.valueCount();
|
||||
final int gotValues = iterations * decoder.longValueCount();
|
||||
index += gotValues;
|
||||
len -= gotValues;
|
||||
assert len >= 0;
|
||||
@ -204,9 +204,9 @@ class Packed64 extends PackedInts.MutableImpl {
|
||||
final PackedInts.Encoder encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||
|
||||
// go to the next block where the value does not span across two blocks
|
||||
final int offsetInBlocks = index % encoder.valueCount();
|
||||
final int offsetInBlocks = index % encoder.longValueCount();
|
||||
if (offsetInBlocks != 0) {
|
||||
for (int i = offsetInBlocks; i < encoder.valueCount() && len > 0; ++i) {
|
||||
for (int i = offsetInBlocks; i < encoder.longValueCount() && len > 0; ++i) {
|
||||
set(index++, arr[off++]);
|
||||
--len;
|
||||
}
|
||||
@ -216,12 +216,12 @@ class Packed64 extends PackedInts.MutableImpl {
|
||||
}
|
||||
|
||||
// bulk set
|
||||
assert index % encoder.valueCount() == 0;
|
||||
assert index % encoder.longValueCount() == 0;
|
||||
int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
|
||||
assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
|
||||
final int iterations = len / encoder.valueCount();
|
||||
final int iterations = len / encoder.longValueCount();
|
||||
encoder.encode(arr, off, blocks, blockIndex, iterations);
|
||||
final int setValues = iterations * encoder.valueCount();
|
||||
final int setValues = iterations * encoder.longValueCount();
|
||||
index += setValues;
|
||||
len -= setValues;
|
||||
assert len >= 0;
|
||||
|
@ -92,8 +92,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||
// bulk get
|
||||
assert index % valuesPerBlock == 0;
|
||||
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert decoder.blockCount() == 1;
|
||||
assert decoder.valueCount() == valuesPerBlock;
|
||||
assert decoder.longBlockCount() == 1;
|
||||
assert decoder.longValueCount() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
decoder.decode(blocks, blockIndex, arr, off, nblocks);
|
||||
@ -136,8 +136,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
|
||||
// bulk set
|
||||
assert index % valuesPerBlock == 0;
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert op.blockCount() == 1;
|
||||
assert op.valueCount() == valuesPerBlock;
|
||||
assert op.longBlockCount() == 1;
|
||||
assert op.longValueCount() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
op.encode(arr, off, blocks, blockIndex, nblocks);
|
||||
|
@ -280,15 +280,28 @@ public class PackedInts {
|
||||
public static interface Decoder {
|
||||
|
||||
/**
|
||||
* The minimum number of long blocks to decode in a single call.
|
||||
* The minimum number of long blocks to encode in a single iteration, when
|
||||
* using long encoding.
|
||||
*/
|
||||
int blockCount();
|
||||
int longBlockCount();
|
||||
|
||||
/**
|
||||
* The number of values that can be stored in <code>blockCount()</code> long
|
||||
* The number of values that can be stored in {@link #longBlockCount()} long
|
||||
* blocks.
|
||||
*/
|
||||
int valueCount();
|
||||
int longValueCount();
|
||||
|
||||
/**
|
||||
* The minimum number of byte blocks to encode in a single iteration, when
|
||||
* using byte encoding.
|
||||
*/
|
||||
int byteBlockCount();
|
||||
|
||||
/**
|
||||
* The number of values that can be stored in {@link #byteBlockCount()} byte
|
||||
* blocks.
|
||||
*/
|
||||
int byteValueCount();
|
||||
|
||||
/**
|
||||
* Read <code>iterations * blockCount()</code> blocks from <code>blocks</code>,
|
||||
@ -350,15 +363,28 @@ public class PackedInts {
|
||||
public static interface Encoder {
|
||||
|
||||
/**
|
||||
* The minimum number of long blocks to encode in a single call.
|
||||
* The minimum number of long blocks to encode in a single iteration, when
|
||||
* using long encoding.
|
||||
*/
|
||||
int blockCount();
|
||||
int longBlockCount();
|
||||
|
||||
/**
|
||||
* The number of values that can be stored in <code>blockCount()</code> long
|
||||
* The number of values that can be stored in {@link #longBlockCount()} long
|
||||
* blocks.
|
||||
*/
|
||||
int valueCount();
|
||||
int longValueCount();
|
||||
|
||||
/**
|
||||
* The minimum number of byte blocks to encode in a single iteration, when
|
||||
* using byte encoding.
|
||||
*/
|
||||
int byteBlockCount();
|
||||
|
||||
/**
|
||||
* The number of values that can be stored in {@link #byteBlockCount()} byte
|
||||
* blocks.
|
||||
*/
|
||||
int byteValueCount();
|
||||
|
||||
/**
|
||||
* Read <code>iterations * valueCount()</code> values from <code>values</code>,
|
||||
|
@ -39,14 +39,23 @@ final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
|
||||
this.format = format;
|
||||
this.packedIntsVersion = packedIntsVersion;
|
||||
bulkOperation = BulkOperation.of(format, bitsPerValue);
|
||||
iterations = bulkOperation.computeIterations(valueCount, mem);
|
||||
iterations = iterations(mem);
|
||||
assert valueCount == 0 || iterations > 0;
|
||||
nextBlocks = new byte[8 * iterations * bulkOperation.blockCount()];
|
||||
nextValues = new LongsRef(new long[iterations * bulkOperation.valueCount()], 0, 0);
|
||||
nextBlocks = new byte[iterations * bulkOperation.byteBlockCount()];
|
||||
nextValues = new LongsRef(new long[iterations * bulkOperation.byteValueCount()], 0, 0);
|
||||
nextValues.offset = nextValues.longs.length;
|
||||
position = -1;
|
||||
}
|
||||
|
||||
private int iterations(int mem) {
|
||||
int iterations = bulkOperation.computeIterations(valueCount, mem);
|
||||
if (packedIntsVersion < PackedInts.VERSION_BYTE_ALIGNED) {
|
||||
// make sure iterations is a multiple of 8
|
||||
iterations = (iterations + 7) & 0xFFFFFFF8;
|
||||
}
|
||||
return iterations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongsRef next(int count) throws IOException {
|
||||
assert nextValues.length >= 0;
|
||||
|
@ -42,8 +42,8 @@ final class PackedWriter extends PackedInts.Writer {
|
||||
this.format = format;
|
||||
encoder = BulkOperation.of(format, bitsPerValue);
|
||||
iterations = encoder.computeIterations(valueCount, mem);
|
||||
nextBlocks = new byte[8 * iterations * encoder.blockCount()];
|
||||
nextValues = new long[iterations * encoder.valueCount()];
|
||||
nextBlocks = new byte[iterations * encoder.byteBlockCount()];
|
||||
nextValues = new long[iterations * encoder.byteValueCount()];
|
||||
off = 0;
|
||||
written = 0;
|
||||
finished = false;
|
||||
|
@ -57,28 +57,28 @@ FOOTER="""
|
||||
* For every number of bits per value, there is a minimum number of
|
||||
* blocks (b) / values (v) you need to write in order to reach the next block
|
||||
* boundary:
|
||||
* - 16 bits per value -> b=1, v=4
|
||||
* - 24 bits per value -> b=3, v=8
|
||||
* - 50 bits per value -> b=25, v=32
|
||||
* - 63 bits per value -> b=63, v=64
|
||||
* - 16 bits per value -> b=2, v=1
|
||||
* - 24 bits per value -> b=3, v=1
|
||||
* - 50 bits per value -> b=25, v=4
|
||||
* - 63 bits per value -> b=63, v=8
|
||||
* - ...
|
||||
*
|
||||
* A bulk read consists in copying <code>iterations*v</code> values that are
|
||||
* contained in <code>iterations*b</code> blocks into a <code>long[]</code>
|
||||
* (higher values of <code>iterations</code> are likely to yield a better
|
||||
* throughput) => this requires n * (b + v) longs in memory.
|
||||
* throughput) => this requires n * (b + 8v) bytes of memory.
|
||||
*
|
||||
* This method computes <code>iterations</code> as
|
||||
* <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
|
||||
* <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
|
||||
*/
|
||||
public final int computeIterations(int valueCount, int ramBudget) {
|
||||
final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
|
||||
final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount());
|
||||
if (iterations == 0) {
|
||||
// at least 1
|
||||
return 1;
|
||||
} else if ((iterations - 1) * blockCount() >= valueCount) {
|
||||
} else if ((iterations - 1) * byteValueCount() >= valueCount) {
|
||||
// don't allocate for more than the size of the reader
|
||||
return (int) Math.ceil((double) valueCount / valueCount());
|
||||
return (int) Math.ceil((double) valueCount / byteValueCount());
|
||||
} else {
|
||||
return iterations;
|
||||
}
|
||||
@ -131,14 +131,11 @@ def block_value_count(bpv, bits=64):
|
||||
return (blocks, values)
|
||||
|
||||
def packed64(bpv, f):
|
||||
blocks, values = block_value_count(bpv)
|
||||
mask = (1 << bpv) - 1
|
||||
|
||||
f.write("\n")
|
||||
f.write(" public BulkOperationPacked%d() {\n" %bpv)
|
||||
f.write(" super(%d);\n" %bpv)
|
||||
f.write(" assert blockCount() == %d;\n" %blocks)
|
||||
f.write(" assert valueCount() == %d;\n" %values)
|
||||
f.write(" }\n\n")
|
||||
|
||||
if bpv == 64:
|
||||
@ -215,20 +212,19 @@ def p64_decode(bpv, f, bits):
|
||||
if bits < bpv:
|
||||
f.write(" throw new UnsupportedOperationException();\n")
|
||||
else:
|
||||
|
||||
if is_power_of_two(bpv) and bpv < 8:
|
||||
f.write(" for (int j = 0; j < 8 * iterations; ++j) {\n")
|
||||
f.write(" for (int j = 0; j < iterations; ++j) {\n")
|
||||
f.write(" final byte block = blocks[blocksOffset++];\n")
|
||||
for shift in xrange(8 - bpv, 0, -bpv):
|
||||
f.write(" values[valuesOffset++] = (block >>> %d) & %d;\n" %(shift, mask))
|
||||
f.write(" values[valuesOffset++] = block & %d;\n" %mask)
|
||||
f.write(" }\n")
|
||||
elif bpv == 8:
|
||||
f.write(" for (int j = 0; j < 8 * iterations; ++j) {\n")
|
||||
f.write(" for (int j = 0; j < iterations; ++j) {\n")
|
||||
f.write(" values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;\n")
|
||||
f.write(" }\n")
|
||||
elif is_power_of_two(bpv) and bpv > 8:
|
||||
f.write(" for (int j = 0; j < %d * iterations; ++j) {\n" %(64 / bpv))
|
||||
f.write(" for (int j = 0; j < iterations; ++j) {\n")
|
||||
m = bits <= 32 and "0xFF" or "0xFFL"
|
||||
f.write(" values[valuesOffset++] =")
|
||||
for i in xrange(bpv / 8 - 1):
|
||||
@ -236,7 +232,7 @@ def p64_decode(bpv, f, bits):
|
||||
f.write(" (blocks[blocksOffset++] & %s);\n" %m)
|
||||
f.write(" }\n")
|
||||
else:
|
||||
f.write(" for (int i = 0; i < 8 * iterations; ++i) {\n")
|
||||
f.write(" for (int i = 0; i < iterations; ++i) {\n")
|
||||
for i in xrange(0, byte_values):
|
||||
byte_start = i * bpv / 8
|
||||
bit_start = (i * bpv) % 8
|
||||
|
@ -212,7 +212,7 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
if (!format.isSupported(bpv)) {
|
||||
continue;
|
||||
}
|
||||
final long byteCount = format.byteCount(version, valueCount, bpv);
|
||||
final long byteCount = format.byteCount(version, valueCount, bpv);
|
||||
String msg = "format=" + format + ",version=" + version + ",valueCount=" + valueCount + ",bpv=" + bpv;
|
||||
|
||||
// test iterator
|
||||
@ -706,16 +706,22 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
|
||||
final PackedInts.Encoder encoder = PackedInts.getEncoder(format, PackedInts.VERSION_CURRENT, bpv);
|
||||
final PackedInts.Decoder decoder = PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, bpv);
|
||||
final int blockCount = encoder.blockCount();
|
||||
final int valueCount = encoder.valueCount();
|
||||
assertEquals(blockCount, decoder.blockCount());
|
||||
assertEquals(valueCount, decoder.valueCount());
|
||||
final int longBlockCount = encoder.longBlockCount();
|
||||
final int longValueCount = encoder.longValueCount();
|
||||
final int byteBlockCount = encoder.byteBlockCount();
|
||||
final int byteValueCount = encoder.byteValueCount();
|
||||
assertEquals(longBlockCount, decoder.longBlockCount());
|
||||
assertEquals(longValueCount, decoder.longValueCount());
|
||||
assertEquals(byteBlockCount, decoder.byteBlockCount());
|
||||
assertEquals(byteValueCount, decoder.byteValueCount());
|
||||
|
||||
final int iterations = random().nextInt(100);
|
||||
final int longIterations = random().nextInt(100);
|
||||
final int byteIterations = longIterations * longValueCount / byteValueCount;
|
||||
assertEquals(longIterations * longValueCount, byteIterations * byteValueCount);
|
||||
final int blocksOffset = random().nextInt(100);
|
||||
final int valuesOffset = random().nextInt(100);
|
||||
final int blocksOffset2 = random().nextInt(100);
|
||||
final int blocksLen = iterations * blockCount;
|
||||
final int blocksLen = longIterations * longBlockCount;
|
||||
|
||||
// 1. generate random inputs
|
||||
final long[] blocks = new long[blocksOffset + blocksLen];
|
||||
@ -729,8 +735,8 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
}
|
||||
|
||||
// 2. decode
|
||||
final long[] values = new long[valuesOffset + iterations * valueCount];
|
||||
decoder.decode(blocks, blocksOffset, values, valuesOffset, iterations);
|
||||
final long[] values = new long[valuesOffset + longIterations * longValueCount];
|
||||
decoder.decode(blocks, blocksOffset, values, valuesOffset, longIterations);
|
||||
for (long value : values) {
|
||||
assertTrue(value <= PackedInts.maxValue(bpv));
|
||||
}
|
||||
@ -738,7 +744,7 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
final int[] intValues;
|
||||
if (bpv <= 32) {
|
||||
intValues = new int[values.length];
|
||||
decoder.decode(blocks, blocksOffset, intValues, valuesOffset, iterations);
|
||||
decoder.decode(blocks, blocksOffset, intValues, valuesOffset, longIterations);
|
||||
assertTrue(equals(intValues, values));
|
||||
} else {
|
||||
intValues = null;
|
||||
@ -746,21 +752,21 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
|
||||
// 3. re-encode
|
||||
final long[] blocks2 = new long[blocksOffset2 + blocksLen];
|
||||
encoder.encode(values, valuesOffset, blocks2, blocksOffset2, iterations);
|
||||
encoder.encode(values, valuesOffset, blocks2, blocksOffset2, longIterations);
|
||||
assertArrayEquals(msg, Arrays.copyOfRange(blocks, blocksOffset, blocks.length),
|
||||
Arrays.copyOfRange(blocks2, blocksOffset2, blocks2.length));
|
||||
// test encoding from int[]
|
||||
if (bpv <= 32) {
|
||||
final long[] blocks3 = new long[blocks2.length];
|
||||
encoder.encode(intValues, valuesOffset, blocks3, blocksOffset2, iterations);
|
||||
encoder.encode(intValues, valuesOffset, blocks3, blocksOffset2, longIterations);
|
||||
assertArrayEquals(msg, blocks2, blocks3);
|
||||
}
|
||||
|
||||
// 4. byte[] decoding
|
||||
final byte[] byteBlocks = new byte[8 * blocks.length];
|
||||
ByteBuffer.wrap(byteBlocks).asLongBuffer().put(blocks);
|
||||
final long[] values2 = new long[valuesOffset + iterations * valueCount];
|
||||
decoder.decode(byteBlocks, blocksOffset * 8, values2, valuesOffset, iterations);
|
||||
final long[] values2 = new long[valuesOffset + longIterations * longValueCount];
|
||||
decoder.decode(byteBlocks, blocksOffset * 8, values2, valuesOffset, byteIterations);
|
||||
for (long value : values2) {
|
||||
assertTrue(msg, value <= PackedInts.maxValue(bpv));
|
||||
}
|
||||
@ -768,18 +774,18 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
// test decoding to int[]
|
||||
if (bpv <= 32) {
|
||||
final int[] intValues2 = new int[values2.length];
|
||||
decoder.decode(byteBlocks, blocksOffset * 8, intValues2, valuesOffset, iterations);
|
||||
decoder.decode(byteBlocks, blocksOffset * 8, intValues2, valuesOffset, byteIterations);
|
||||
assertTrue(msg, equals(intValues2, values2));
|
||||
}
|
||||
|
||||
// 5. byte[] encoding
|
||||
final byte[] blocks3 = new byte[8 * (blocksOffset2 + blocksLen)];
|
||||
encoder.encode(values, valuesOffset, blocks3, 8 * blocksOffset2, iterations);
|
||||
encoder.encode(values, valuesOffset, blocks3, 8 * blocksOffset2, byteIterations);
|
||||
assertEquals(msg, LongBuffer.wrap(blocks2), ByteBuffer.wrap(blocks3).asLongBuffer());
|
||||
// test encoding from int[]
|
||||
if (bpv <= 32) {
|
||||
final byte[] blocks4 = new byte[blocks3.length];
|
||||
encoder.encode(intValues, valuesOffset, blocks4, 8 * blocksOffset2, iterations);
|
||||
encoder.encode(intValues, valuesOffset, blocks4, 8 * blocksOffset2, byteIterations);
|
||||
assertArrayEquals(msg, blocks3, blocks4);
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -28,10 +27,6 @@ import org.apache.lucene.util.Constants;
|
||||
*/
|
||||
public class CategoryPath implements Comparable<CategoryPath> {
|
||||
|
||||
// TODO: revisit when IBM releases Java 7 newer than SR3 (with a fix)
|
||||
// to validate, run e.g. TestAssociationExample with -Dtests.iters=1000
|
||||
private static final boolean IS_J9_JAVA7 = Constants.JRE_IS_MINIMUM_JAVA7 && Constants.JVM_VENDOR.contains("IBM");
|
||||
|
||||
/** An empty {@link CategoryPath} */
|
||||
public static final CategoryPath EMPTY = new CategoryPath();
|
||||
|
||||
@ -48,7 +43,7 @@ public class CategoryPath implements Comparable<CategoryPath> {
|
||||
|
||||
// Used by singleton EMPTY
|
||||
private CategoryPath() {
|
||||
components = new String[0];
|
||||
components = null;
|
||||
length = 0;
|
||||
}
|
||||
|
||||
@ -67,16 +62,12 @@ public class CategoryPath implements Comparable<CategoryPath> {
|
||||
/** Construct from the given path components. */
|
||||
public CategoryPath(final String... components) {
|
||||
assert components.length > 0 : "use CategoryPath.EMPTY to create an empty path";
|
||||
if (IS_J9_JAVA7) {
|
||||
// On IBM J9 Java 1.7.0, if we do 'this.components = components', then
|
||||
// at some point its length becomes 0 ... quite unexpectedly. If JIT is
|
||||
// disabled, it doesn't happen. This bypasses the bug by copying the
|
||||
// array (note, Arrays.copyOf did not help either!).
|
||||
this.components = new String[components.length];
|
||||
System.arraycopy(components, 0, this.components, 0, components.length);
|
||||
} else {
|
||||
this.components = components;
|
||||
for (String comp : components) {
|
||||
if (comp == null || comp.isEmpty()) {
|
||||
throw new IllegalArgumentException("empty or null components not allowed: " + Arrays.toString(components));
|
||||
}
|
||||
}
|
||||
this.components = components;
|
||||
length = components.length;
|
||||
}
|
||||
|
||||
@ -84,9 +75,14 @@ public class CategoryPath implements Comparable<CategoryPath> {
|
||||
public CategoryPath(final String pathString, final char delimiter) {
|
||||
String[] comps = pathString.split(Character.toString(delimiter));
|
||||
if (comps.length == 1 && comps[0].isEmpty()) {
|
||||
components = EMPTY.components;
|
||||
components = null;
|
||||
length = 0;
|
||||
} else {
|
||||
for (String comp : comps) {
|
||||
if (comp == null || comp.isEmpty()) {
|
||||
throw new IllegalArgumentException("empty or null components not allowed: " + Arrays.toString(comps));
|
||||
}
|
||||
}
|
||||
components = comps;
|
||||
length = components.length;
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -173,9 +175,46 @@ public class TestCategoryPath extends FacetTestCase {
|
||||
pother = new CategoryPath("a/b/c/e", '/');
|
||||
assertTrue(pother.compareTo(p) > 0);
|
||||
assertTrue(p.compareTo(pother) < 0);
|
||||
pother = new CategoryPath("a/b/c//e", '/');
|
||||
assertTrue(pother.compareTo(p) < 0);
|
||||
assertTrue(p.compareTo(pother) > 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyNullComponents() throws Exception {
|
||||
// LUCENE-4724: CategoryPath should not allow empty or null components
|
||||
String[][] components_tests = new String[][] {
|
||||
new String[] { "", "test" }, // empty in the beginning
|
||||
new String[] { "test", "" }, // empty in the end
|
||||
new String[] { "test", "", "foo" }, // empty in the middle
|
||||
new String[] { null, "test" }, // null at the beginning
|
||||
new String[] { "test", null }, // null in the end
|
||||
new String[] { "test", null, "foo" }, // null in the middle
|
||||
};
|
||||
|
||||
for (String[] components : components_tests) {
|
||||
try {
|
||||
assertNotNull(new CategoryPath(components));
|
||||
fail("empty or null components should not be allowed: " + Arrays.toString(components));
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
|
||||
String[] path_tests = new String[] {
|
||||
"/test", // empty in the beginning
|
||||
"test//foo", // empty in the middle
|
||||
};
|
||||
|
||||
for (String path : path_tests) {
|
||||
try {
|
||||
assertNotNull(new CategoryPath(path, '/'));
|
||||
fail("empty or null components should not be allowed: " + path);
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
|
||||
// a trailing path separator is produces only one component
|
||||
assertNotNull(new CategoryPath("test/", '/'));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -56,6 +56,12 @@ public class TestCompactLabelToOrdinal extends FacetTestCase {
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.onMalformedInput(CodingErrorAction.REPLACE);
|
||||
uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
|
||||
// we cannot have empty path components, so eliminate all prefix as well
|
||||
// as middle consecuive delimiter chars.
|
||||
uniqueValues[i] = uniqueValues[i].replaceAll("/+", "/");
|
||||
if (uniqueValues[i].startsWith("/")) {
|
||||
uniqueValues[i] = uniqueValues[i].substring(1);
|
||||
}
|
||||
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) {
|
||||
i++;
|
||||
}
|
||||
|
@ -71,7 +71,7 @@ public final class JoinUtil {
|
||||
case None:
|
||||
TermsCollector termsCollector = TermsCollector.create(fromField, multipleValuesPerDocument);
|
||||
fromSearcher.search(fromQuery, termsCollector);
|
||||
return new TermsQuery(toField, termsCollector.getCollectorTerms());
|
||||
return new TermsQuery(toField, fromQuery, termsCollector.getCollectorTerms());
|
||||
case Total:
|
||||
case Max:
|
||||
case Avg:
|
||||
|
@ -92,6 +92,35 @@ class TermsIncludingScoreQuery extends Query {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
} if (!super.equals(obj)) {
|
||||
return false;
|
||||
} if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
TermsIncludingScoreQuery other = (TermsIncludingScoreQuery) obj;
|
||||
if (!field.equals(other.field)) {
|
||||
return false;
|
||||
}
|
||||
if (!unwrittenOriginalQuery.equals(other.unwrittenOriginalQuery)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result += prime * field.hashCode();
|
||||
result += prime * unwrittenOriginalQuery.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||
final Weight originalWeight = originalQuery.createWeight(searcher);
|
||||
|
@ -21,6 +21,7 @@ import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
@ -37,13 +38,15 @@ import java.util.Comparator;
|
||||
class TermsQuery extends MultiTermQuery {
|
||||
|
||||
private final BytesRefHash terms;
|
||||
private final Query fromQuery; // Used for equals() only
|
||||
|
||||
/**
|
||||
* @param field The field that should contain terms that are specified in the previous parameter
|
||||
* @param terms The terms that matching documents should have. The terms must be sorted by natural order.
|
||||
*/
|
||||
TermsQuery(String field, BytesRefHash terms) {
|
||||
TermsQuery(String field, Query fromQuery, BytesRefHash terms) {
|
||||
super(field);
|
||||
this.fromQuery = fromQuery;
|
||||
this.terms = terms;
|
||||
}
|
||||
|
||||
@ -63,6 +66,31 @@ class TermsQuery extends MultiTermQuery {
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
} if (!super.equals(obj)) {
|
||||
return false;
|
||||
} if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
TermsQuery other = (TermsQuery) obj;
|
||||
if (!fromQuery.equals(other.fromQuery)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result += prime * fromQuery.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
static class SeekingTermSetTermsEnum extends FilteredTermsEnum {
|
||||
|
||||
private final BytesRefHash terms;
|
||||
|
@ -74,7 +74,7 @@ public class CommonTermsQuery extends Query {
|
||||
protected final Occur highFreqOccur;
|
||||
protected float lowFreqBoost = 1.0f;
|
||||
protected float highFreqBoost = 1.0f;
|
||||
protected int minNrShouldMatch = 0;
|
||||
protected float minNrShouldMatch = 0;
|
||||
|
||||
/**
|
||||
* Creates a new {@link CommonTermsQuery}
|
||||
@ -84,7 +84,7 @@ public class CommonTermsQuery extends Query {
|
||||
* @param lowFreqOccur
|
||||
* {@link Occur} used for low frequency terms
|
||||
* @param maxTermFrequency
|
||||
* a value in [0..1] (or absolute number >=1) representing the
|
||||
* a value in [0..1) (or absolute number >=1) representing the
|
||||
* maximum threshold of a terms document frequency to be considered a
|
||||
* low frequency term.
|
||||
* @throws IllegalArgumentException
|
||||
@ -104,7 +104,7 @@ public class CommonTermsQuery extends Query {
|
||||
* @param lowFreqOccur
|
||||
* {@link Occur} used for low frequency terms
|
||||
* @param maxTermFrequency
|
||||
* a value in [0..1] (or absolute number >=1) representing the
|
||||
* a value in [0..1) (or absolute number >=1) representing the
|
||||
* maximum threshold of a terms document frequency to be considered a
|
||||
* low frequency term.
|
||||
* @param disableCoord
|
||||
@ -160,15 +160,19 @@ public class CommonTermsQuery extends Query {
|
||||
return buildQuery(maxDoc, contextArray, queryTerms);
|
||||
}
|
||||
|
||||
protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
|
||||
if (minNrShouldMatch >= 1.0f || minNrShouldMatch == 0.0f) {
|
||||
return (int) minNrShouldMatch;
|
||||
}
|
||||
return (int) (Math.round(minNrShouldMatch * numOptional));
|
||||
}
|
||||
|
||||
protected Query buildQuery(final int maxDoc,
|
||||
final TermContext[] contextArray, final Term[] queryTerms) {
|
||||
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
|
||||
BooleanQuery highFreq = new BooleanQuery(disableCoord);
|
||||
highFreq.setBoost(highFreqBoost);
|
||||
lowFreq.setBoost(lowFreqBoost);
|
||||
if (lowFreqOccur == Occur.SHOULD) {
|
||||
lowFreq.setMinimumNumberShouldMatch(minNrShouldMatch);
|
||||
}
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
TermContext termContext = contextArray[i];
|
||||
@ -186,6 +190,11 @@ public class CommonTermsQuery extends Query {
|
||||
}
|
||||
|
||||
}
|
||||
final int numLowFreqClauses = lowFreq.clauses().size();
|
||||
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
|
||||
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
|
||||
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
if (lowFreq.clauses().isEmpty()) {
|
||||
/*
|
||||
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
|
||||
@ -265,7 +274,9 @@ public class CommonTermsQuery extends Query {
|
||||
/**
|
||||
* Specifies a minimum number of the optional BooleanClauses which must be
|
||||
* satisfied in order to produce a match on the low frequency terms query
|
||||
* part.
|
||||
* part. This method accepts a float value in the range [0..1) as a fraction
|
||||
* of the actual query terms in the low frequent clause or a number
|
||||
* <tt>>=1</tt> as an absolut number of clauses that need to match.
|
||||
*
|
||||
* <p>
|
||||
* By default no optional clauses are necessary for a match (unless there are
|
||||
@ -276,7 +287,7 @@ public class CommonTermsQuery extends Query {
|
||||
* @param min
|
||||
* the number of optional clauses that must match
|
||||
*/
|
||||
public void setMinimumNumberShouldMatch(int min) {
|
||||
public void setMinimumNumberShouldMatch(float min) {
|
||||
this.minNrShouldMatch = min;
|
||||
}
|
||||
|
||||
@ -284,7 +295,7 @@ public class CommonTermsQuery extends Query {
|
||||
* Gets the minimum number of the optional BooleanClauses which must be
|
||||
* satisfied.
|
||||
*/
|
||||
public int getMinimumNumberShouldMatch() {
|
||||
public float getMinimumNumberShouldMatch() {
|
||||
return minNrShouldMatch;
|
||||
}
|
||||
|
||||
@ -332,7 +343,7 @@ public class CommonTermsQuery extends Query {
|
||||
result = prime * result
|
||||
+ ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
|
||||
result = prime * result + Float.floatToIntBits(maxTermFrequency);
|
||||
result = prime * result + minNrShouldMatch;
|
||||
result = prime * result + Float.floatToIntBits(minNrShouldMatch);
|
||||
result = prime * result + ((terms == null) ? 0 : terms.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
@ -175,6 +175,90 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
public void testMinShouldMatch() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
String[] docs = new String[] {"this is the end of the world right",
|
||||
"is this it or maybe not",
|
||||
"this is the end of the universe as we know it",
|
||||
"there is the famous restaurant at the end of the universe",};
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "" + i, Field.Store.YES));
|
||||
doc.add(newTextField("field", docs[i], Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
IndexSearcher s = newSearcher(r);
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(0.5f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 1);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
}
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(2.0f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 1);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
}
|
||||
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(0.49f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 3);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
|
||||
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
|
||||
}
|
||||
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(1.0f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 3);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
|
||||
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testIllegalOccur() {
|
||||
Random random = random();
|
||||
|
||||
|
@ -93,6 +93,8 @@ Bug Fixes
|
||||
* SOLR-3926: Solr should support better way of finding active sorts (Eirik Lygre via
|
||||
Erick Erickson)
|
||||
|
||||
* SOLR-4342: Fix DataImportHandler stats to be a prper Map (hossman)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
@ -107,6 +109,12 @@ Optimizations
|
||||
|
||||
* SOLR-3915: Color Legend for Cloud UI (steffkes)
|
||||
|
||||
* SOLR-4306: Utilize indexInfo=false when gathering core names in UI
|
||||
(steffkes)
|
||||
|
||||
* SOLR-4284: Admin UI - make core list scrollable separate from the rest of
|
||||
the UI (steffkes)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -25,6 +25,7 @@ import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.util.SystemIdResolver;
|
||||
@ -247,7 +248,7 @@ public class DataImportHandler extends RequestHandlerBase implements
|
||||
return super.getStatistics();
|
||||
|
||||
DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
|
||||
NamedList result = new NamedList();
|
||||
SimpleOrderedMap result = new SimpleOrderedMap();
|
||||
|
||||
result.add("Status", importer.getStatus().toString());
|
||||
|
||||
|
@ -310,7 +310,7 @@ make many changes to an index in a batch and then send the
|
||||
There is also an <span class="codefrag">optimize</span> command that does the
|
||||
same things as <span class="codefrag">commit</span>, but also forces all index
|
||||
segments to be merged into a single segment -- this can be very resource
|
||||
intsenive, but may be worthwhile for improving search speed if your index
|
||||
intensive, but may be worthwhile for improving search speed if your index
|
||||
changes very infrequently.
|
||||
</p>
|
||||
<p>
|
||||
@ -411,7 +411,7 @@ and is useful when testing or debugging queries.
|
||||
<h2 class="boxed">Highlighting</h2>
|
||||
<div class="section">
|
||||
<p>
|
||||
Hit highlighting returns relevent snippets of each returned document, and highlights
|
||||
Hit highlighting returns relevant snippets of each returned document, and highlights
|
||||
terms from the query within those context snippets.
|
||||
</p>
|
||||
<p>
|
||||
@ -522,7 +522,7 @@ Try it out at
|
||||
<p>
|
||||
The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
|
||||
the fields in the index and what type of analysis is applied to them. The current schema your collection is using
|
||||
may be viewed directly via the <a href="http://localhost:8983/solr/#/collection1/schema">Schema tab</a> in the Admin UI, or explored dynamicly using the <a href="http://localhost:8983/solr/#/collection1/schema-browser">Schema Browser tab</a>.
|
||||
may be viewed directly via the <a href="http://localhost:8983/solr/#/collection1/schema">Schema tab</a> in the Admin UI, or explored dynamically using the <a href="http://localhost:8983/solr/#/collection1/schema-browser">Schema Browser tab</a>.
|
||||
</p>
|
||||
<p>
|
||||
The best analysis components (tokenization and filtering) for your textual
|
||||
@ -616,7 +616,7 @@ Mousing over the section label to the left of the section will display the full
|
||||
<p>
|
||||
When both <a href="http://localhost:8983/solr/#/collection1/analysis?analysis.fieldvalue=Canon+Power-Shot+SD500&analysis.query=power+shot+sd-500&analysis.fieldtype=text_en_splitting&verbose_output=0">Index and Query</a>
|
||||
values are provided, two tables will be displayed side by side showing the
|
||||
results of each chain. Terms in the Index chain results that are equivilent
|
||||
results of each chain. Terms in the Index chain results that are equivalent
|
||||
to the final terms produced by the Query chain will be highlighted.
|
||||
</p>
|
||||
<p>
|
||||
|
@ -111,8 +111,12 @@ limitations under the License.
|
||||
|
||||
</ul>
|
||||
|
||||
<ul id="menu-selector">
|
||||
</ul>
|
||||
<div id="core-selector">
|
||||
<select data-placeholder="Core Selector"></select>
|
||||
</div>
|
||||
<div id="core-menu">
|
||||
<ul></ul>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
@ -172,7 +172,7 @@ ul
|
||||
#header
|
||||
{
|
||||
padding-bottom: 10px;
|
||||
position: absolute;
|
||||
position: fixed;
|
||||
z-index: 42;
|
||||
}
|
||||
|
||||
@ -340,12 +340,6 @@ ul
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
#content > pre
|
||||
{
|
||||
max-height: 600px;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
#content .block
|
||||
{
|
||||
margin-bottom: 10px;
|
||||
|
@ -1,13 +1,13 @@
|
||||
#menu-wrapper
|
||||
{
|
||||
position: absolute;
|
||||
top: 90px;
|
||||
position: fixed;
|
||||
top: 120px;
|
||||
width: 150px;
|
||||
}
|
||||
|
||||
.has-environment #menu-wrapper
|
||||
{
|
||||
top: 130px;
|
||||
top: 160px;
|
||||
}
|
||||
|
||||
#menu-wrapper a
|
||||
@ -18,6 +18,23 @@
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
|
||||
#core-selector
|
||||
{
|
||||
margin-top: 20px;
|
||||
padding-right: 10px;
|
||||
}
|
||||
|
||||
#core-selector a
|
||||
{
|
||||
padding: 0;
|
||||
padding-left: 8px;
|
||||
}
|
||||
|
||||
#core-selector select
|
||||
{
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
#menu-wrapper .active p
|
||||
{
|
||||
background-color: #fafafa;
|
||||
@ -121,32 +138,27 @@
|
||||
display: none;
|
||||
}
|
||||
|
||||
#menu-selector
|
||||
{
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
#menu-selector p
|
||||
#core-menu p
|
||||
{
|
||||
border-top: 1px solid #f0f0f0;
|
||||
}
|
||||
|
||||
#menu-selector li:first-child p
|
||||
#core-menu li:first-child p
|
||||
{
|
||||
border-top: 0;
|
||||
}
|
||||
|
||||
#menu-selector p a
|
||||
#core-menu p a
|
||||
{
|
||||
background-image: url( ../../img/ico/status-offline.png );
|
||||
}
|
||||
|
||||
#menu-selector .active p a
|
||||
#core-menu .active p a
|
||||
{
|
||||
background-image: url( ../../img/ico/box.png );
|
||||
}
|
||||
|
||||
#menu-selector ul,
|
||||
#core-menu ul,
|
||||
#menu ul
|
||||
{
|
||||
display: none;
|
||||
@ -154,7 +166,7 @@
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
|
||||
#menu-selector .active ul,
|
||||
#core-menu .active ul,
|
||||
#menu .active ul
|
||||
{
|
||||
display: block;
|
||||
@ -165,7 +177,7 @@
|
||||
border-bottom: 0;
|
||||
}
|
||||
|
||||
#menu-selector ul li a,
|
||||
#core-menu ul li a,
|
||||
#menu ul li a
|
||||
{
|
||||
background-position: 7px 50%;
|
||||
@ -175,20 +187,20 @@
|
||||
padding-left: 26px;
|
||||
}
|
||||
|
||||
#menu-selector ul li:last-child a,
|
||||
#core-menu ul li:last-child a,
|
||||
#menu ul li:last-child a
|
||||
{
|
||||
border-bottom: 0;
|
||||
}
|
||||
|
||||
#menu-selector ul li a:hover,
|
||||
#core-menu ul li a:hover,
|
||||
#menu ul li a:hover
|
||||
{
|
||||
background-color: #f0f0f0;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
#menu-selector ul li.active a,
|
||||
#core-menu ul li.active a,
|
||||
#menu ul li.active a
|
||||
{
|
||||
background-color: #d0d0d0;
|
||||
@ -213,7 +225,7 @@
|
||||
#menu #cloud.global .rgraph a { background-image: url( ../../img/ico/asterisk.png ); }
|
||||
#menu #cloud.global .dump a { background-image: url( ../../img/ico/download-cloud.png ); }
|
||||
|
||||
#menu-selector .ping.error a
|
||||
#core-menu .ping.error a
|
||||
{
|
||||
|
||||
background-color: #ffcccc;
|
||||
@ -222,17 +234,18 @@
|
||||
cursor: help;
|
||||
}
|
||||
|
||||
#menu-selector .query a { background-image: url( ../../img/ico/magnifier.png ); }
|
||||
#menu-selector .schema a { background-image: url( ../../img/ico/table.png ); }
|
||||
#menu-selector .config a { background-image: url( ../../img/ico/gear.png ); }
|
||||
#menu-selector .analysis a { background-image: url( ../../img/ico/funnel.png ); }
|
||||
#menu-selector .schema-browser a { background-image: url( ../../img/ico/book-open-text.png ); }
|
||||
#menu-selector .replication a { background-image: url( ../../img/ico/node.png ); }
|
||||
#menu-selector .distribution a { background-image: url( ../../img/ico/node-select.png ); }
|
||||
#menu-selector .ping a { background-image: url( ../../img/ico/system-monitor.png ); }
|
||||
#menu-selector .logging a { background-image: url( ../../img/ico/inbox-document-text.png ); }
|
||||
#menu-selector .plugins a { background-image: url( ../../img/ico/block.png ); }
|
||||
#menu-selector .dataimport a { background-image: url( ../../img/ico/document-import.png ); }
|
||||
#core-menu .overview a { background-image: url( ../../img/ico/home.png ); }
|
||||
#core-menu .query a { background-image: url( ../../img/ico/magnifier.png ); }
|
||||
#core-menu .schema a { background-image: url( ../../img/ico/table.png ); }
|
||||
#core-menu .config a { background-image: url( ../../img/ico/gear.png ); }
|
||||
#core-menu .analysis a { background-image: url( ../../img/ico/funnel.png ); }
|
||||
#core-menu .schema-browser a { background-image: url( ../../img/ico/book-open-text.png ); }
|
||||
#core-menu .replication a { background-image: url( ../../img/ico/node.png ); }
|
||||
#core-menu .distribution a { background-image: url( ../../img/ico/node-select.png ); }
|
||||
#core-menu .ping a { background-image: url( ../../img/ico/system-monitor.png ); }
|
||||
#core-menu .logging a { background-image: url( ../../img/ico/inbox-document-text.png ); }
|
||||
#core-menu .plugins a { background-image: url( ../../img/ico/block.png ); }
|
||||
#core-menu .dataimport a { background-image: url( ../../img/ico/document-import.png ); }
|
||||
|
||||
|
||||
#content #navigation
|
||||
|
@ -545,6 +545,7 @@
|
||||
clear: left;
|
||||
float: left;
|
||||
margin-left: 2px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
#content #schema-browser #data #field .histogram-holder li:hover dl
|
||||
|
BIN
solr/webapp/web/img/ico/home.png
Executable file
BIN
solr/webapp/web/img/ico/home.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 752 B |
@ -92,20 +92,26 @@ var sammy = $.sammy
|
||||
$( 'li.active', menu_wrapper )
|
||||
.removeClass( 'active' );
|
||||
|
||||
if( this.params.splat )
|
||||
// global dashboard doesn't have params.splat
|
||||
if( !this.params.splat )
|
||||
{
|
||||
var selector = '~' === this.params.splat[0][0]
|
||||
? '#' + this.params.splat[0].replace( /^~/, '' ) + '.global'
|
||||
: '#menu-selector #' + this.params.splat[0].replace( /\./g, '__' );
|
||||
this.params.splat = [ '~index' ];
|
||||
}
|
||||
|
||||
var active_element = $( selector, menu_wrapper );
|
||||
|
||||
if( 0 === active_element.size() )
|
||||
{
|
||||
this.app.error( 'There exists no core with name "' + this.params.splat[0] + '"' );
|
||||
return false;
|
||||
}
|
||||
var selector = '~' === this.params.splat[0][0]
|
||||
? '#' + this.params.splat[0].replace( /^~/, '' ) + '.global'
|
||||
: '#core-selector #' + this.params.splat[0].replace( /\./g, '__' );
|
||||
|
||||
var active_element = $( selector, menu_wrapper );
|
||||
|
||||
if( 0 === active_element.size() )
|
||||
{
|
||||
this.app.error( 'There exists no core with name "' + this.params.splat[0] + '"' );
|
||||
return false;
|
||||
}
|
||||
|
||||
if( active_element.hasClass( 'global' ) )
|
||||
{
|
||||
active_element
|
||||
.addClass( 'active' );
|
||||
|
||||
@ -115,10 +121,28 @@ var sammy = $.sammy
|
||||
.addClass( 'active' );
|
||||
}
|
||||
|
||||
if( !active_element.hasClass( 'global' ) )
|
||||
$( '#core-selector option[selected]' )
|
||||
.removeAttr( 'selected' )
|
||||
.trigger( 'liszt:updated' );
|
||||
|
||||
$( '#core-selector .chzn-container > a' )
|
||||
.addClass( 'chzn-default' );
|
||||
}
|
||||
else
|
||||
{
|
||||
active_element
|
||||
.attr( 'selected', 'selected' )
|
||||
.trigger( 'liszt:updated' );
|
||||
|
||||
if( !this.params.splat[1] )
|
||||
{
|
||||
this.active_core = active_element;
|
||||
this.params.splat[1] = 'overview';
|
||||
}
|
||||
|
||||
$( '#core-menu .' + this.params.splat[1] )
|
||||
.addClass( 'active' );
|
||||
|
||||
this.active_core = active_element;
|
||||
}
|
||||
}
|
||||
);
|
||||
@ -143,9 +167,10 @@ var solr_admin = function( app_config )
|
||||
|
||||
plugin_data = null,
|
||||
|
||||
this.menu_element = $( '#menu-selector' );
|
||||
this.config = config;
|
||||
this.menu_element = $( '#core-selector select' );
|
||||
this.core_menu = $( '#core-menu ul' );
|
||||
|
||||
this.config = config;
|
||||
this.timeout = null;
|
||||
|
||||
this.core_regex_base = '^#\\/([\\w\\d-\\.]+)';
|
||||
@ -197,6 +222,9 @@ var solr_admin = function( app_config )
|
||||
that.menu_element
|
||||
.empty();
|
||||
|
||||
var core_list = [];
|
||||
core_list.push( '<option></option>' );
|
||||
|
||||
var core_count = 0;
|
||||
for( var core_name in that.cores_data )
|
||||
{
|
||||
@ -214,32 +242,24 @@ var solr_admin = function( app_config )
|
||||
classes.push( 'default' );
|
||||
}
|
||||
|
||||
var core_tpl = '<li id="' + core_name.replace( /\./g, '__' ) + '" '
|
||||
var core_tpl = '<option '
|
||||
+ ' id="' + core_name.replace( /\./g, '__' ) + '" '
|
||||
+ ' class="' + classes.join( ' ' ) + '"'
|
||||
+ ' data-basepath="' + core_path + '"'
|
||||
+ ' schema="' + cores.status[core_name]['schema'] + '"'
|
||||
+ ' config="' + cores.status[core_name]['config'] + '"'
|
||||
+ '>' + "\n"
|
||||
+ ' <p><a href="#/' + core_name + '" title="' + core_name + '">' + core_name + '</a></p>' + "\n"
|
||||
+ ' <ul>' + "\n"
|
||||
+ ' value="#/' + core_name + '"'
|
||||
+ ' title="' + core_name + '"'
|
||||
+ '>'
|
||||
+ core_name
|
||||
+ '</option>';
|
||||
|
||||
+ ' <li class="ping"><a rel="' + core_path + '/admin/ping"><span>Ping</span></a></li>' + "\n"
|
||||
+ ' <li class="query"><a href="#/' + core_name + '/query"><span>Query</span></a></li>' + "\n"
|
||||
+ ' <li class="schema"><a href="#/' + core_name + '/schema"><span>Schema</span></a></li>' + "\n"
|
||||
+ ' <li class="config"><a href="#/' + core_name + '/config"><span>Config</span></a></li>' + "\n"
|
||||
+ ' <li class="replication"><a href="#/' + core_name + '/replication"><span>Replication</span></a></li>' + "\n"
|
||||
+ ' <li class="analysis"><a href="#/' + core_name + '/analysis"><span>Analysis</span></a></li>' + "\n"
|
||||
+ ' <li class="schema-browser"><a href="#/' + core_name + '/schema-browser"><span>Schema Browser</span></a></li>' + "\n"
|
||||
+ ' <li class="plugins"><a href="#/' + core_name + '/plugins"><span>Plugins / Stats</span></a></li>' + "\n"
|
||||
+ ' <li class="dataimport"><a href="#/' + core_name + '/dataimport"><span>Dataimport</span></a></li>' + "\n"
|
||||
|
||||
+ ' </ul>' + "\n"
|
||||
+ '</li>';
|
||||
|
||||
that.menu_element
|
||||
.append( core_tpl );
|
||||
core_list.push( core_tpl );
|
||||
}
|
||||
|
||||
that.menu_element
|
||||
.append( core_list.join( "\n" ) );
|
||||
|
||||
if( cores.initFailures )
|
||||
{
|
||||
var failures = [];
|
||||
@ -277,7 +297,7 @@ var solr_admin = function( app_config )
|
||||
$.ajax
|
||||
(
|
||||
{
|
||||
url : config.solr_path + config.core_admin_path + '?wt=json',
|
||||
url : config.solr_path + config.core_admin_path + '?wt=json&indexInfo=false',
|
||||
dataType : 'json',
|
||||
beforeSend : function( arr, form, options )
|
||||
{
|
||||
@ -288,6 +308,52 @@ var solr_admin = function( app_config )
|
||||
{
|
||||
that.set_cores_data( response );
|
||||
|
||||
that.menu_element
|
||||
.chosen()
|
||||
.off( 'change' )
|
||||
.on
|
||||
(
|
||||
'change',
|
||||
function( event )
|
||||
{
|
||||
location.href = $( 'option:selected', this ).val();
|
||||
return false;
|
||||
}
|
||||
)
|
||||
.on
|
||||
(
|
||||
'liszt:updated',
|
||||
function( event )
|
||||
{
|
||||
var core_name = $( 'option:selected', this ).text();
|
||||
|
||||
if( core_name )
|
||||
{
|
||||
that.core_menu
|
||||
.html
|
||||
(
|
||||
'<li class="overview"><a href="#/' + core_name + '"><span>Overview</span></a></li>' + "\n" +
|
||||
'<li class="ping"><a rel="' + that.config.solr_path + '/' + core_name + '/admin/ping"><span>Ping</span></a></li>' + "\n" +
|
||||
'<li class="query"><a href="#/' + core_name + '/query"><span>Query</span></a></li>' + "\n" +
|
||||
'<li class="schema"><a href="#/' + core_name + '/schema"><span>Schema</span></a></li>' + "\n" +
|
||||
'<li class="config"><a href="#/' + core_name + '/config"><span>Config</span></a></li>' + "\n" +
|
||||
'<li class="replication"><a href="#/' + core_name + '/replication"><span>Replication</span></a></li>' + "\n" +
|
||||
'<li class="analysis"><a href="#/' + core_name + '/analysis"><span>Analysis</span></a></li>' + "\n" +
|
||||
'<li class="schema-browser"><a href="#/' + core_name + '/schema-browser"><span>Schema Browser</span></a></li>' + "\n" +
|
||||
'<li class="plugins"><a href="#/' + core_name + '/plugins"><span>Plugins / Stats</span></a></li>' + "\n" +
|
||||
'<li class="dataimport"><a href="#/' + core_name + '/dataimport"><span>Dataimport</span></a></li>' + "\n"
|
||||
)
|
||||
.show();
|
||||
}
|
||||
else
|
||||
{
|
||||
that.core_menu
|
||||
.hide()
|
||||
.empty();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
for( var core_name in response.status )
|
||||
{
|
||||
var core_path = config.solr_path + '/' + core_name;
|
||||
|
@ -20,12 +20,6 @@ sammy.bind
|
||||
'cores_load_data',
|
||||
function( event, params )
|
||||
{
|
||||
if( app.cores_data )
|
||||
{
|
||||
params.callback( app.cores_data );
|
||||
return true;
|
||||
}
|
||||
|
||||
$.ajax
|
||||
(
|
||||
{
|
||||
@ -335,7 +329,7 @@ sammy.get
|
||||
.ajaxForm
|
||||
(
|
||||
{
|
||||
url : app.config.solr_path + app.config.core_admin_path + '?wt=json',
|
||||
url : app.config.solr_path + app.config.core_admin_path + '?wt=json&indexInfo=false',
|
||||
dataType : 'json',
|
||||
beforeSubmit : function( array, form, options )
|
||||
{
|
||||
|
@ -208,9 +208,6 @@ sammy.get
|
||||
{
|
||||
var content_element = $( '#content' );
|
||||
|
||||
$( '#menu-wrapper #index' )
|
||||
.addClass( 'active' );
|
||||
|
||||
content_element
|
||||
.html( '<div id="index"></div>' );
|
||||
|
||||
|
@ -21,7 +21,7 @@ sammy.get
|
||||
/^#\/(~java-properties)$/,
|
||||
function( context )
|
||||
{
|
||||
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
var content_element = $( '#content' );
|
||||
|
||||
content_element
|
||||
|
@ -406,7 +406,7 @@ sammy.get
|
||||
/^#\/(~logging)$/,
|
||||
function( context )
|
||||
{
|
||||
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
loglevel_path = core_basepath + '/admin/logging';
|
||||
var content_element = $( '#content' );
|
||||
|
||||
@ -492,7 +492,7 @@ sammy.get
|
||||
/^#\/(~logging)\/level$/,
|
||||
function( context )
|
||||
{
|
||||
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
loglevel_path = core_basepath + '/admin/logging';
|
||||
var content_element = $( '#content' );
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
$( '.ping a', app.menu_element )
|
||||
$( '.ping a', app.core_menu )
|
||||
.live
|
||||
(
|
||||
'click',
|
||||
|
@ -228,7 +228,7 @@ sammy.bind
|
||||
var related_select_element = $( '#related select', params.schema_browser_element )
|
||||
var type = 'index';
|
||||
|
||||
var sammy_basepath = '#/' + $( 'p a', params.active_core ).html() + '/schema-browser';
|
||||
var sammy_basepath = app.core_menu.find( '.active a' ).attr( 'href' );
|
||||
|
||||
if( !related_navigation_meta.hasClass( 'done' ) )
|
||||
{
|
||||
@ -640,7 +640,7 @@ sammy.bind
|
||||
}
|
||||
|
||||
related_select_element
|
||||
.attr( 'rel', '#/' + $( 'p a', params.active_core ).html() + '/schema-browser' )
|
||||
.attr( 'rel', app.core_menu.find( '.active a' ).attr( 'href' ) )
|
||||
.append( related_options )
|
||||
.chosen();
|
||||
|
||||
|
@ -21,7 +21,7 @@ sammy.get
|
||||
/^#\/(~threads)$/,
|
||||
function( context )
|
||||
{
|
||||
var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
|
||||
var content_element = $( '#content' );
|
||||
|
||||
$.get
|
||||
|
Loading…
x
Reference in New Issue
Block a user