mirror of https://github.com/apache/lucene.git
LUCENE-2844: spatial benchmark
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1536176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e45d67083e
commit
bc1b8ac507
|
@ -24,6 +24,7 @@
|
|||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" scope="TEST" module-name="benchmark-conf" />
|
||||
<orderEntry type="module" module-name="spatial" />
|
||||
<orderEntry type="module" module-name="facet" />
|
||||
<orderEntry type="module" module-name="highlighter" />
|
||||
<orderEntry type="module" module-name="icu" />
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="module-library">
|
||||
<orderEntry type="module-library" exported="">
|
||||
<library>
|
||||
<CLASSES>
|
||||
<root url="file://$MODULE_DIR$/lib" />
|
||||
|
|
|
@ -79,6 +79,11 @@
|
|||
<artifactId>lucene-facet</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-spatial</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j</artifactId>
|
||||
|
|
|
@ -37,6 +37,8 @@
|
|||
<available file="temp/20news-18828.tar.gz" property="20news-18828.exists"/>
|
||||
<available file="${working.dir}/20news-18828" property="20news-18828.expanded"/>
|
||||
<available file="${working.dir}/mini_newsgroups" property="mini.expanded"/>
|
||||
<available file="temp/allCountries.txt.bz2" property="geonames.exists"/>
|
||||
<available file="${working.dir}/geonames" property="geonames.expanded"/>
|
||||
|
||||
<available file="temp/enwiki-20070527-pages-articles.xml.bz2" property="enwiki.exists"/>
|
||||
<available file="temp/enwiki-20070527-pages-articles.xml" property="enwiki.expanded"/>
|
||||
|
@ -62,6 +64,25 @@
|
|||
<bunzip2 src="temp/enwiki-20070527-pages-articles.xml.bz2" dest="temp"/>
|
||||
</target>
|
||||
|
||||
<target name="geonames-files" depends="check-files">
|
||||
<mkdir dir="temp"/>
|
||||
<antcall target="get-geonames"/>
|
||||
<antcall target="expand-geonames"/>
|
||||
</target>
|
||||
|
||||
<target name="get-geonames" unless="geonames.exists">
|
||||
<!-- note: latest data is at: http://download.geonames.org/export/dump/allCountries.zip
|
||||
and then randomize with: gsort -R -S 1500M file.txt > file_random.txt
|
||||
and then compress with: bzip2 -9 -k file_random.txt -->
|
||||
<get src="http://people.apache.org/~dsmiley/data/geonames_20130921_randomOrder_allCountries.txt.bz2"
|
||||
dest="temp/allCountries.txt.bz2"/>
|
||||
</target>
|
||||
|
||||
<target name="expand-geonames" unless="geonames.expanded">
|
||||
<mkdir dir="${working.dir}/geonames"/>
|
||||
<bunzip2 src="temp/allCountries.txt.bz2" dest="${working.dir}/geonames"/>
|
||||
</target>
|
||||
|
||||
<target name="get-news-20" unless="20news-18828.exists">
|
||||
<get src="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz"
|
||||
dest="temp/news20.tar.gz"/>
|
||||
|
@ -147,8 +168,10 @@
|
|||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<pathelement path="${queryparser.jar}"/>
|
||||
<pathelement path="${facet.jar}"/>
|
||||
<pathelement path="${spatial.jar}"/>
|
||||
<pathelement path="${queries.jar}"/>
|
||||
<fileset dir="${common.dir}/analysis/icu/lib"/>
|
||||
<fileset dir="${common.dir}/spatial/lib"/>
|
||||
<path refid="base.classpath"/>
|
||||
<fileset dir="lib"/>
|
||||
</path>
|
||||
|
@ -158,7 +181,8 @@
|
|||
<pathelement path="${benchmark.ext.classpath}"/>
|
||||
</path>
|
||||
|
||||
<target name="javadocs" depends="javadocs-memory,javadocs-highlighter,javadocs-analyzers-common,javadocs-queryparser,javadocs-facet,compile-core">
|
||||
<target name="javadocs" depends="javadocs-memory,javadocs-highlighter,javadocs-analyzers-common,
|
||||
javadocs-queryparser,javadocs-facet,javadocs-spatial,compile-core">
|
||||
<invoke-module-javadoc>
|
||||
<links>
|
||||
<link href="../memory"/>
|
||||
|
@ -166,6 +190,7 @@
|
|||
<link href="../analyzers-common"/>
|
||||
<link href="../queryparser"/>
|
||||
<link href="../facet"/>
|
||||
<link href="../spatial"/>
|
||||
</links>
|
||||
</invoke-module-javadoc>
|
||||
</target>
|
||||
|
@ -256,7 +281,7 @@
|
|||
</ant>
|
||||
</target>
|
||||
|
||||
<target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
|
||||
<target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial"/>
|
||||
|
||||
<target name="compile-test" depends="copy-alg-files-for-testing,module-build.compile-test"/>
|
||||
<target name="copy-alg-files-for-testing" description="copy .alg files as resources for testing">
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
#/**
|
||||
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# * contributor license agreements. See the NOTICE file distributed with
|
||||
# * this work for additional information regarding copyright ownership.
|
||||
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# * (the "License"); you may not use this file except in compliance with
|
||||
# * the License. You may obtain a copy of the License at
|
||||
# *
|
||||
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||
# *
|
||||
# * Unless required by applicable law or agreed to in writing, software
|
||||
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# * See the License for the specific language governing permissions and
|
||||
# * limitations under the License.
|
||||
# */
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Spatial search benchmark
|
||||
# In order to use this, you'll need to first run 'ant geonames-files'.
|
||||
# You may need more memory when running this: -Dtask.mem=1000M
|
||||
# For docs on what options are available, see the javadocs.
|
||||
|
||||
### Spatial Context, Grid, Strategy config
|
||||
doc.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialDocMaker
|
||||
# SpatialContext: see SpatialContextFactory.makeSpatialContext
|
||||
#spatial.spatialContextFactory=com.spatial4j.core.context.jts.JtsSpatialContextFactory
|
||||
#spatial.geo=true
|
||||
#spatial.distCalculator=haversine
|
||||
#spatial.worldBounds=...
|
||||
# Spatial Grid: (PrefixTree) see SpatialPrefixTreeFactory.makeSPT
|
||||
#spatial.prefixTree=geohash (or quad)
|
||||
#spatial.maxLevels=11
|
||||
#spatial.maxDistErr (in degrees) to compute maxLevels -- defaults to 1 meter's worth
|
||||
# RecursivePrefixTreeStrategy:
|
||||
spatial.docPointsOnly=true
|
||||
#spatial.distErrPct=.25
|
||||
#spatial.prefixGridScanLevel=-4
|
||||
|
||||
### Source & Doc
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource
|
||||
line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
|
||||
docs.file=work/geonames/allCountries.txt
|
||||
doc.tokenized=false
|
||||
# Next 3 props convert doc points to circles with a random radius and then optionally bbox'es
|
||||
#doc.spatial.radiusDegrees=0.0
|
||||
#doc.spatial.radiusDegreesRandPlusMinus=0.0
|
||||
#doc.spatial.bbox=false
|
||||
|
||||
### Directory
|
||||
directory=FSDirectory
|
||||
#directory=RamDirectory
|
||||
compound=false
|
||||
merge.factor=10
|
||||
ram.flush.mb=64
|
||||
concurrent.merge.scheduler.max.thread.count=2
|
||||
|
||||
### Query
|
||||
query.maker=org.apache.lucene.benchmark.byTask.feeds.SpatialFileQueryMaker
|
||||
query.file=work/geonames/allCountries.txt
|
||||
query.file.line.parser=org.apache.lucene.benchmark.byTask.feeds.GeonamesLineParser
|
||||
query.file.maxQueries=1000
|
||||
# Next 3 props convert query points to circles with a random radius and then optionally bbox'es
|
||||
query.spatial.radiusDegrees=0
|
||||
query.spatial.radiusDegreesRandPlusMinus=3
|
||||
query.spatial.bbox=false
|
||||
|
||||
query.spatial.score=false
|
||||
#query.spatial.predicate=Intersects
|
||||
# (defaults to spatial.distErrPct)
|
||||
query.spatial.distErrPct=qDistErrPct:0.0:0.025:0.1:0.5
|
||||
|
||||
### Misc
|
||||
|
||||
log.step.AddDoc = 100000
|
||||
task.max.depth.log=1
|
||||
|
||||
# -------------------------------------------------------------------------------------
|
||||
|
||||
{ "Populate"
|
||||
ResetSystemErase
|
||||
CreateIndex
|
||||
#1 million docs
|
||||
[{ "MAddDocs" AddDoc} : 250000] : 4
|
||||
ForceMerge(1)
|
||||
CommitIndex
|
||||
CloseIndex
|
||||
|
||||
RepSumByPref MAddDocs
|
||||
} : 1
|
||||
#set above round to 0 on subsequent runs if not changing indexing but experimenting with search
|
||||
|
||||
OpenReader
|
||||
{"WarmJIT" Search > : 4000
|
||||
CloseReader
|
||||
|
||||
{ "Rounds"
|
||||
ResetSystemSoft
|
||||
|
||||
OpenReader
|
||||
Search
|
||||
{"RealQueries" Search > : 2000
|
||||
CloseReader
|
||||
|
||||
NewRound
|
||||
} : 4
|
||||
|
||||
|
||||
#RepSumByName
|
||||
RepSumByPrefRound RealQueries
|
||||
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A line parser for Geonames.org data.
|
||||
* See <a href="http://download.geonames.org/export/dump/readme.txt">'geoname' table</a>.
|
||||
* Requires {@link SpatialDocMaker}.
|
||||
*/
|
||||
public class GeonamesLineParser extends LineDocSource.LineParser {
|
||||
|
||||
/** This header will be ignored; the geonames format is fixed and doesn't have a header line. */
|
||||
public GeonamesLineParser(String[] header) {
|
||||
super(header);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseLine(DocData docData, String line) {
|
||||
String[] parts = line.split("\\t", 7);//no more than first 6 fields needed
|
||||
|
||||
// Sample data line:
|
||||
// 3578267, Morne du Vitet, Morne du Vitet, 17.88333, -62.8, ...
|
||||
// ID, Name, Alternate name (unused), Lat, Lon, ...
|
||||
|
||||
docData.setID(Integer.parseInt(parts[0]));//note: overwrites ID assigned by LineDocSource
|
||||
docData.setName(parts[1]);
|
||||
docData.setBody(parts[4]+","+parts[5]); // latitude , longitude
|
||||
}
|
||||
}
|
|
@ -0,0 +1,207 @@
|
|||
package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.spatial4j.core.context.SpatialContext;
|
||||
import com.spatial4j.core.context.SpatialContextFactory;
|
||||
import com.spatial4j.core.shape.Point;
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.spatial.SpatialStrategy;
|
||||
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Indexes spatial data according to a configured {@link SpatialStrategy} with optional
|
||||
* shape transformation via a configured {@link ShapeConverter}. The converter can turn points into
|
||||
* circles and bounding boxes, in order to vary the type of indexing performance tests.
|
||||
* Unless it's subclass-ed to do otherwise, this class configures a {@link SpatialContext},
|
||||
* {@link SpatialPrefixTree}, and {@link RecursivePrefixTreeStrategy}. The Strategy is made
|
||||
* available to a query maker via the static method {@link #getSpatialStrategy(int)}.
|
||||
* See spatial.alg for a listing of spatial parameters, in particular those starting with "spatial."
|
||||
* and "doc.spatial".
|
||||
*/
|
||||
public class SpatialDocMaker extends DocMaker {
|
||||
|
||||
public static final String SPATIAL_FIELD = "spatial";
|
||||
|
||||
//cache spatialStrategy by round number
|
||||
private static Map<Integer,SpatialStrategy> spatialStrategyCache = new HashMap<Integer,SpatialStrategy>();
|
||||
|
||||
private SpatialStrategy strategy;
|
||||
private ShapeConverter shapeConverter;
|
||||
|
||||
/**
|
||||
* Looks up the SpatialStrategy from the given round --
|
||||
* {@link org.apache.lucene.benchmark.byTask.utils.Config#getRoundNumber()}. It's an error
|
||||
* if it wasn't created already for this round -- when SpatialDocMaker is initialized.
|
||||
*/
|
||||
public static SpatialStrategy getSpatialStrategy(int roundNumber) {
|
||||
SpatialStrategy result = spatialStrategyCache.get(roundNumber);
|
||||
if (result == null) {
|
||||
throw new IllegalStateException("Strategy should have been init'ed by SpatialDocMaker by now");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a SpatialStrategy from configuration options.
|
||||
*/
|
||||
protected SpatialStrategy makeSpatialStrategy(final Config config) {
|
||||
//A Map view of Config that prefixes keys with "spatial."
|
||||
Map<String, String> configMap = new AbstractMap<String, String>() {
|
||||
@Override
|
||||
public Set<Entry<String, String>> entrySet() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String get(Object key) {
|
||||
return config.get("spatial." + key, null);
|
||||
}
|
||||
};
|
||||
|
||||
SpatialContext ctx = SpatialContextFactory.makeSpatialContext(configMap, null);
|
||||
|
||||
//Some day the strategy might be initialized with a factory but such a factory
|
||||
// is non-existent.
|
||||
return makeSpatialStrategy(config, configMap, ctx);
|
||||
}
|
||||
|
||||
protected SpatialStrategy makeSpatialStrategy(final Config config, Map<String, String> configMap,
|
||||
SpatialContext ctx) {
|
||||
//A factory for the prefix tree grid
|
||||
SpatialPrefixTree grid = SpatialPrefixTreeFactory.makeSPT(configMap, null, ctx);
|
||||
|
||||
RecursivePrefixTreeStrategy strategy = new RecursivePrefixTreeStrategy(grid, SPATIAL_FIELD) {
|
||||
{
|
||||
//protected field
|
||||
this.pointsOnly = config.get("spatial.docPointsOnly", false);
|
||||
}
|
||||
};
|
||||
|
||||
int prefixGridScanLevel = config.get("query.spatial.prefixGridScanLevel", -4);
|
||||
if (prefixGridScanLevel < 0)
|
||||
prefixGridScanLevel = grid.getMaxLevels() + prefixGridScanLevel;
|
||||
strategy.setPrefixGridScanLevel(prefixGridScanLevel);
|
||||
|
||||
double distErrPct = config.get("spatial.distErrPct", .025);//doc & query; a default
|
||||
strategy.setDistErrPct(distErrPct);
|
||||
return strategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setConfig(Config config, ContentSource source) {
|
||||
super.setConfig(config, source);
|
||||
SpatialStrategy existing = spatialStrategyCache.get(config.getRoundNumber());
|
||||
if (existing == null) {
|
||||
//new round; we need to re-initialize
|
||||
strategy = makeSpatialStrategy(config);
|
||||
spatialStrategyCache.put(config.getRoundNumber(), strategy);
|
||||
//TODO remove previous round config?
|
||||
shapeConverter = makeShapeConverter(strategy, config, "doc.spatial.");
|
||||
System.out.println("Spatial Strategy: " + strategy);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optionally converts points to circles, and optionally bbox'es result.
|
||||
*/
|
||||
public static ShapeConverter makeShapeConverter(final SpatialStrategy spatialStrategy,
|
||||
Config config, String configKeyPrefix) {
|
||||
//by default does no conversion
|
||||
final double radiusDegrees = config.get(configKeyPrefix+"radiusDegrees", 0.0);
|
||||
final double plusMinus = config.get(configKeyPrefix+"radiusDegreesRandPlusMinus", 0.0);
|
||||
final boolean bbox = config.get(configKeyPrefix + "bbox", false);
|
||||
|
||||
return new ShapeConverter() {
|
||||
@Override
|
||||
public Shape convert(Shape shape) {
|
||||
if (shape instanceof Point && (radiusDegrees != 0.0 || plusMinus != 0.0)) {
|
||||
Point point = (Point)shape;
|
||||
double radius = radiusDegrees;
|
||||
if (plusMinus > 0.0) {
|
||||
Random random = new Random(point.hashCode());//use hashCode so it's reproducibly random
|
||||
radius += random.nextDouble() * 2 * plusMinus - plusMinus;
|
||||
radius = Math.abs(radius);//can happen if configured plusMinus > radiusDegrees
|
||||
}
|
||||
shape = spatialStrategy.getSpatialContext().makeCircle(point, radius);
|
||||
}
|
||||
if (bbox)
|
||||
shape = shape.getBoundingBox();
|
||||
return shape;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Converts one shape to another. Created by
|
||||
* {@link #makeShapeConverter(org.apache.lucene.spatial.SpatialStrategy, org.apache.lucene.benchmark.byTask.utils.Config, String)} */
|
||||
public interface ShapeConverter {
|
||||
Shape convert(Shape shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document makeDocument() throws Exception {
|
||||
|
||||
DocState docState = getDocState();
|
||||
|
||||
Document doc = super.makeDocument();
|
||||
|
||||
// Set SPATIAL_FIELD from body
|
||||
DocData docData = docState.docData;
|
||||
// makeDocument() resets docState.getBody() so we can't look there; look in Document
|
||||
String shapeStr = doc.getField(DocMaker.BODY_FIELD).stringValue();
|
||||
Shape shape = makeShapeFromString(strategy, docData.getName(), shapeStr);
|
||||
if (shape != null) {
|
||||
shape = shapeConverter.convert(shape);
|
||||
//index
|
||||
for (Field f : strategy.createIndexableFields(shape)) {
|
||||
doc.add(f);
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
public static Shape makeShapeFromString(SpatialStrategy strategy, String name, String shapeStr) {
|
||||
if (shapeStr != null && shapeStr.length() > 0) {
|
||||
try {
|
||||
return strategy.getSpatialContext().readShape(shapeStr);
|
||||
} catch (Exception e) {//InvalidShapeException TODO
|
||||
System.err.println("Shape "+name+" wasn't parseable: "+e+" (skipping it)");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document makeDocument(int size) throws Exception {
|
||||
//TODO consider abusing the 'size' notion to number of shapes per document
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.spatial4j.core.shape.Shape;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.queries.CustomScoreQuery;
|
||||
import org.apache.lucene.queries.function.FunctionQuery;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.spatial.SpatialStrategy;
|
||||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Reads spatial data from the body field docs from an internally created {@link LineDocSource}.
|
||||
* It's parsed by {@link com.spatial4j.core.context.SpatialContext#readShape(String)} and then
|
||||
* further manipulated via a configurable {@link SpatialDocMaker.ShapeConverter}. When using point
|
||||
* data, it's likely you'll want to configure the shape converter so that the query shapes actually
|
||||
* cover a region. The queries are all created & cached in advance. This query maker works in
|
||||
* conjunction with {@link SpatialDocMaker}. See spatial.alg for a listing of options, in
|
||||
* particular the options starting with "query.".
|
||||
*/
|
||||
public class SpatialFileQueryMaker extends AbstractQueryMaker {
|
||||
protected SpatialStrategy strategy;
|
||||
protected double distErrPct;//NaN if not set
|
||||
protected SpatialOperation operation;
|
||||
protected boolean score;
|
||||
|
||||
protected SpatialDocMaker.ShapeConverter shapeConverter;
|
||||
|
||||
@Override
|
||||
public void setConfig(Config config) throws Exception {
|
||||
strategy = SpatialDocMaker.getSpatialStrategy(config.getRoundNumber());
|
||||
shapeConverter = SpatialDocMaker.makeShapeConverter(strategy, config, "query.spatial.");
|
||||
|
||||
distErrPct = config.get("query.spatial.distErrPct", Double.NaN);
|
||||
operation = SpatialOperation.get(config.get("query.spatial.predicate", "Intersects"));
|
||||
score = config.get("query.spatial.score", false);
|
||||
|
||||
super.setConfig(config);//call last, will call prepareQueries()
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query[] prepareQueries() throws Exception {
|
||||
final int maxQueries = config.get("query.file.maxQueries", 1000);
|
||||
Config srcConfig = new Config(new Properties());
|
||||
srcConfig.set("docs.file", config.get("query.file", null));
|
||||
srcConfig.set("line.parser", config.get("query.file.line.parser", null));
|
||||
srcConfig.set("content.source.forever", "false");
|
||||
|
||||
List<Query> queries = new ArrayList<>();
|
||||
LineDocSource src = new LineDocSource();
|
||||
try {
|
||||
src.setConfig(srcConfig);
|
||||
src.resetInputs();
|
||||
DocData docData = new DocData();
|
||||
for (int i = 0; i < maxQueries; i++) {
|
||||
docData = src.getNextDocData(docData);
|
||||
Shape shape = SpatialDocMaker.makeShapeFromString(strategy, docData.getName(), docData.getBody());
|
||||
if (shape != null) {
|
||||
shape = shapeConverter.convert(shape);
|
||||
queries.add(makeQueryFromShape(shape));
|
||||
} else {
|
||||
i--;//skip
|
||||
}
|
||||
}
|
||||
} catch (NoMoreDataException e) {
|
||||
//all-done
|
||||
} finally {
|
||||
src.close();
|
||||
}
|
||||
return queries.toArray(new Query[queries.size()]);
|
||||
}
|
||||
|
||||
|
||||
protected Query makeQueryFromShape(Shape shape) {
|
||||
SpatialArgs args = new SpatialArgs(operation, shape);
|
||||
if (!Double.isNaN(distErrPct))
|
||||
args.setDistErrPct(distErrPct);
|
||||
|
||||
if (score) {
|
||||
ValueSource valueSource = strategy.makeDistanceValueSource(shape.getCenter());
|
||||
return new CustomScoreQuery(strategy.makeQuery(args), new FunctionQuery(valueSource));
|
||||
} else {
|
||||
//strategy.makeQuery() could potentially score (isn't well defined) so instead we call
|
||||
// makeFilter() and wrap
|
||||
|
||||
Filter filter = strategy.makeFilter(args);
|
||||
if (filter instanceof QueryWrapperFilter) {
|
||||
return ((QueryWrapperFilter)filter).getQuery();
|
||||
} else {
|
||||
return new ConstantScoreQuery(filter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -469,9 +469,10 @@ regular index/search work tasks, report tasks, and control tasks.
|
|||
its own queryMaker instance.
|
||||
<li>
|
||||
<font color="#FF0066">CommitIndex</font> and
|
||||
<font color="#FF0066">Optimize</font> can be used to commit
|
||||
changes to the index and/or optimize the index created thus
|
||||
far.
|
||||
<font color="#FF0066">ForceMerge</font> can be used to commit
|
||||
changes to the index then merge the index segments. The integer
|
||||
parameter specifies how many segments to merge down to (default
|
||||
1).
|
||||
<li>
|
||||
<font color="#FF0066">WriteLineDoc</font> prepares a 'line'
|
||||
file where each line holds a document with <i>title</i>,
|
||||
|
@ -593,6 +594,9 @@ Here is a list of currently defined properties:
|
|||
<ul><li>doc.delete.step
|
||||
</li></ul>
|
||||
</li>
|
||||
|
||||
<li><b>Spatial</b>: Numerous; see spatial.alg
|
||||
</li>
|
||||
|
||||
<li><b>Task alternative packages</b>:
|
||||
<ul><li>alt.tasks.packages
|
||||
|
|
Loading…
Reference in New Issue