mirror of https://github.com/apache/lucene.git
LUCENE-3079: faceting module (port to trunk)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1141246 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4401ef4dae
commit
4291f52c70
|
@ -42,6 +42,9 @@
|
|||
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
||||
<classpathentry kind="src" path="modules/common/src/java"/>
|
||||
<classpathentry kind="src" path="modules/common/src/test"/>
|
||||
<classpathentry kind="src" path="modules/facet/src/java"/>
|
||||
<classpathentry kind="src" path="modules/facet/src/examples"/>
|
||||
<classpathentry kind="src" path="modules/facet/src/test"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
||||
<classpathentry kind="src" path="modules/queries/src/java"/>
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/work" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/work" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,73 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-facet</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Facets</name>
|
||||
<description>
|
||||
Package for Faceted Indexing and Search
|
||||
</description>
|
||||
<properties>
|
||||
<module-directory>modules/facet</module-directory>
|
||||
<build-directory>build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/java</outputDirectory>
|
||||
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
|
||||
<sourceDirectory>src/java</sourceDirectory>
|
||||
<testSourceDirectory>src/test</testSourceDirectory>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
|
@ -65,7 +65,13 @@ New Features
|
|||
* LUCENE-3234: provide a limit on phrase analysis in FastVectorHighlighter for
|
||||
highlighting speed up. Use FastVectorHighlighter.setPhraseLimit() to set limit
|
||||
(e.g. 5000). (Mike Sokolov via Koji Sekiguchi)
|
||||
|
||||
|
||||
* LUCENE-3079: a new facet module which provides faceted indexing & search
|
||||
capabilities. It allows managing a taxonomy of categories, and index them
|
||||
with documents. It also provides search API for aggregating (e.g. count)
|
||||
the weights of the categories that are relevant to the search results.
|
||||
(Shai Erera)
|
||||
|
||||
* LUCENE-3171: Added BlockJoinQuery and BlockJoinCollector, under the
|
||||
new contrib/join module, to enable searches that require joining
|
||||
between parent and child documents. Joined (children + parent)
|
||||
|
|
|
@ -18,12 +18,14 @@
|
|||
-->
|
||||
|
||||
<project name="modules" default="test" basedir=".">
|
||||
<!-- TODO: at some point we should probably iterate like contrib-crawl -->
|
||||
<target name="test" description="Test all modules">
|
||||
<sequential>
|
||||
<subant target="test" inheritall="false" failonerror="true">
|
||||
<fileset dir="analysis" includes="build.xml" />
|
||||
<fileset dir="benchmark" includes="build.xml" />
|
||||
<fileset dir="common" includes="build.xml" />
|
||||
<fileset dir="facet" includes="build.xml" />
|
||||
<fileset dir="grouping" includes="build.xml" />
|
||||
<fileset dir="queries" includes="build.xml" />
|
||||
<fileset dir="join" includes="build.xml" />
|
||||
|
@ -38,6 +40,7 @@
|
|||
<fileset dir="analysis" includes="build.xml" />
|
||||
<fileset dir="benchmark" includes="build.xml" />
|
||||
<fileset dir="common" includes="build.xml" />
|
||||
<fileset dir="facet" includes="build.xml" />
|
||||
<fileset dir="grouping" includes="build.xml" />
|
||||
<fileset dir="queries" includes="build.xml" />
|
||||
<fileset dir="join" includes="build.xml" />
|
||||
|
@ -52,6 +55,7 @@
|
|||
<fileset dir="analysis" includes="build.xml" />
|
||||
<fileset dir="benchmark" includes="build.xml" />
|
||||
<fileset dir="common" includes="build.xml" />
|
||||
<fileset dir="facet" includes="build.xml" />
|
||||
<fileset dir="grouping" includes="build.xml" />
|
||||
<fileset dir="queries" includes="build.xml" />
|
||||
<fileset dir="join" includes="build.xml" />
|
||||
|
@ -66,6 +70,7 @@
|
|||
<fileset dir="analysis" includes="build.xml" />
|
||||
<fileset dir="benchmark" includes="build.xml" />
|
||||
<fileset dir="common" includes="build.xml" />
|
||||
<fileset dir="facet" includes="build.xml" />
|
||||
<fileset dir="grouping" includes="build.xml" />
|
||||
<fileset dir="queries" includes="build.xml" />
|
||||
<fileset dir="join" includes="build.xml" />
|
||||
|
@ -81,6 +86,7 @@
|
|||
<fileset dir="analysis" includes="build.xml" />
|
||||
<fileset dir="benchmark" includes="build.xml" />
|
||||
<fileset dir="common" includes="build.xml" />
|
||||
<fileset dir="facet" includes="build.xml" />
|
||||
<fileset dir="grouping" includes="build.xml" />
|
||||
<fileset dir="queries" includes="build.xml" />
|
||||
<fileset dir="join" includes="build.xml" />
|
||||
|
@ -94,6 +100,7 @@
|
|||
<fileset dir="analysis" includes="build.xml" />
|
||||
<fileset dir="benchmark" includes="build.xml" />
|
||||
<fileset dir="common" includes="build.xml" />
|
||||
<fileset dir="facet" includes="build.xml" />
|
||||
<fileset dir="grouping" includes="build.xml" />
|
||||
<fileset dir="queries" includes="build.xml" />
|
||||
<fileset dir="join" includes="build.xml" />
|
||||
|
@ -109,6 +116,7 @@
|
|||
<fileset dir="analysis" includes="build.xml" />
|
||||
<fileset dir="benchmark" includes="build.xml" />
|
||||
<fileset dir="common" includes="build.xml" />
|
||||
<fileset dir="facet" includes="build.xml" />
|
||||
<fileset dir="grouping" includes="build.xml" />
|
||||
<fileset dir="queries" includes="build.xml" />
|
||||
<fileset dir="join" includes="build.xml" />
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,5 @@
|
|||
Apache Lucene Facets
|
||||
Copyright 2011 The Apache Software Foundation
|
||||
|
||||
This product includes software developed by
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
|
@ -0,0 +1,75 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="facet" default="default">
|
||||
|
||||
<description>
|
||||
Faceted search module
|
||||
</description>
|
||||
|
||||
<property name="build.dir" location="build/" />
|
||||
<import file="../../lucene/contrib/contrib-build.xml"/>
|
||||
|
||||
<property name="build.dir" location="build/" />
|
||||
<property name="dist.dir" location="dist/" />
|
||||
|
||||
<!-- TODO, cut over tests to MockAnalyzer etc and nuke this dependency -->
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="examples.classpath">
|
||||
<path refid="classpath" />
|
||||
<pathelement location="${build.dir}/classes/java" />
|
||||
<pathelement path="${analyzers-common.jar}" />
|
||||
</path>
|
||||
|
||||
<path id="test.classpath">
|
||||
<path refid="test.base.classpath" />
|
||||
<pathelement location="${build.dir}/classes/examples" />
|
||||
<pathelement path="${analyzers-common.jar}" />
|
||||
</path>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}" />
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-examples" description="Compiles Facets examples">
|
||||
<compile srcdir="src/examples" destdir="${build.dir}/classes/examples">
|
||||
<classpath refid="examples.classpath" />
|
||||
</compile>
|
||||
</target>
|
||||
|
||||
<target name="jar-examples" depends="compile-examples">
|
||||
<jarify basedir="${build.dir}/classes/examples"
|
||||
destfile="${build.dir}/${final.name}-examples.jar"
|
||||
title="Lucene Search Engine: ${ant.project.name}-examples" />
|
||||
</target>
|
||||
|
||||
<target name="jar-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="jar-core">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="compile-core" depends="jar-analyzers-common,common.compile-core,compile-examples" description="Compiles facet classes" />
|
||||
|
||||
<target name="jar-core" depends="common.jar-core,jar-examples" />
|
||||
|
||||
</project>
|
|
@ -0,0 +1,49 @@
|
|||
package org.apache.lucene.facet.example;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Result of running an example program.
|
||||
* This is a general object for allowing to write a test
|
||||
* that runs an example and verifies its results.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ExampleResult {
|
||||
|
||||
private List<FacetResult> facetResults;
|
||||
|
||||
/**
|
||||
* @return the facet results
|
||||
*/
|
||||
public List<FacetResult> getFacetResults() {
|
||||
return facetResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param facetResults the facet results to set
|
||||
*/
|
||||
public void setFacetResults(List<FacetResult> facetResults) {
|
||||
this.facetResults = facetResults;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package org.apache.lucene.facet.example;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ExampleUtils {
|
||||
|
||||
public static final boolean VERBOSE = Boolean.getBoolean("tests.verbose");
|
||||
|
||||
/** The Lucene {@link Version} used by the example code. */
|
||||
public static final Version EXAMPLE_VER = Version.LUCENE_31;
|
||||
|
||||
public static void log(Object msg) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(msg.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.facet.example.adaptive;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import org.apache.lucene.facet.example.ExampleResult;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.example.simple.SimpleIndexer;
|
||||
import org.apache.lucene.facet.example.simple.SimpleSearcher;
|
||||
import org.apache.lucene.facet.search.AdaptiveFacetsAccumulator;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Driver for the adaptive sample, using the {@link AdaptiveFacetsAccumulator}.
|
||||
* Indexing is the same as in {@link SimpleSearcher}
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AdaptiveMain {
|
||||
|
||||
/**
|
||||
* Driver for the adaptive sample.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
new AdaptiveMain().runSample();
|
||||
ExampleUtils.log("DONE");
|
||||
}
|
||||
|
||||
public ExampleResult runSample() throws Exception {
|
||||
|
||||
// create Directories for the search index and for the taxonomy index
|
||||
Directory indexDir = new RAMDirectory();
|
||||
Directory taxoDir = new RAMDirectory();
|
||||
|
||||
// index the sample documents
|
||||
ExampleUtils.log("index the adaptive sample documents...");
|
||||
SimpleIndexer.index(indexDir, taxoDir);
|
||||
|
||||
ExampleUtils.log("search the adaptive sample documents...");
|
||||
List<FacetResult> facetRes = AdaptiveSearcher.searchWithFacets(indexDir, taxoDir);
|
||||
|
||||
ExampleResult res = new ExampleResult();
|
||||
res.setFacetResults(facetRes);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
package org.apache.lucene.facet.example.adaptive;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.example.simple.SimpleUtils;
|
||||
import org.apache.lucene.facet.search.AdaptiveFacetsAccumulator;
|
||||
import org.apache.lucene.facet.search.ScoredDocIdCollector;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Search with facets through the {@link AdaptiveFacetsAccumulator}
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AdaptiveSearcher {
|
||||
|
||||
/**
|
||||
* Search with facets through the {@link AdaptiveFacetsAccumulator}
|
||||
* @param indexDir Directory of the search index.
|
||||
* @param taxoDir Directory of the taxonomy index.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
* @return facet results
|
||||
*/
|
||||
public static List<FacetResult> searchWithFacets (Directory indexDir, Directory taxoDir) throws Exception {
|
||||
// prepare index reader and taxonomy.
|
||||
TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
|
||||
IndexReader indexReader = IndexReader.open(indexDir);
|
||||
|
||||
// prepare searcher to search against
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
// faceted search is working in 2 steps:
|
||||
// 1. collect matching documents
|
||||
// 2. aggregate facets for collected documents and
|
||||
// generate the requested faceted results from the aggregated facets
|
||||
|
||||
// step 1: collect matching documents into a collector
|
||||
Query q = new TermQuery(new Term(SimpleUtils.TEXT,"white"));
|
||||
ExampleUtils.log("Query: "+q);
|
||||
|
||||
// regular collector for scoring matched documents
|
||||
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true);
|
||||
|
||||
// docids collector for guiding facets accumulation (scoring disabled)
|
||||
ScoredDocIdCollector docIdsCollecor = ScoredDocIdCollector.create(indexReader.maxDoc(), false);
|
||||
|
||||
// Faceted search parameters indicate which facets are we interested in
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams();
|
||||
facetSearchParams.addFacetRequest(new CountFacetRequest(new CategoryPath("root","a"), 10));
|
||||
|
||||
// search, into both collectors. note: in case only facets accumulation
|
||||
// is required, the topDocCollector part can be totally discarded
|
||||
searcher.search(q, MultiCollector.wrap(topDocsCollector, docIdsCollecor));
|
||||
|
||||
// Obtain facets results and print them
|
||||
AdaptiveFacetsAccumulator accumulator = new AdaptiveFacetsAccumulator(facetSearchParams, indexReader, taxo);
|
||||
List<FacetResult> res = accumulator.accumulate(docIdsCollecor.getScoredDocIDs());
|
||||
|
||||
int i = 0;
|
||||
for (FacetResult facetResult : res) {
|
||||
ExampleUtils.log("Res "+(i++)+": "+facetResult);
|
||||
}
|
||||
|
||||
// we're done, close the index reader and the taxonomy.
|
||||
indexReader.close();
|
||||
taxo.close();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,132 @@
|
|||
package org.apache.lucene.facet.example.association;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.EnhancementsDocumentBuilder;
|
||||
import org.apache.lucene.facet.enhancements.association.AssociationProperty;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.example.simple.SimpleUtils;
|
||||
import org.apache.lucene.facet.index.CategoryContainer;
|
||||
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Sample indexer creates an index, and adds to it sample documents with
|
||||
* categories, which can be simple or contain associations.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationIndexer {
|
||||
|
||||
/**
|
||||
* Create an index, and adds to it sample documents and categories.
|
||||
*
|
||||
* @param indexDir
|
||||
* Directory in which the index should be created.
|
||||
* @param taxoDir
|
||||
* Directory in which the taxonomy index should be created.
|
||||
* @throws Exception
|
||||
* on error (no detailed exception handling here for sample
|
||||
* simplicity
|
||||
*/
|
||||
public static void index(Directory indexDir, Directory taxoDir) throws Exception {
|
||||
|
||||
// create and open an index writer
|
||||
IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));
|
||||
|
||||
// create and open a taxonomy writer
|
||||
TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);
|
||||
|
||||
// loop over sample documents
|
||||
int nDocsAdded = 0;
|
||||
int nFacetsAdded = 0;
|
||||
for (int docNum = 0; docNum < SimpleUtils.docTexts.length; docNum++) {
|
||||
ExampleUtils.log(" ++++ DOC ID: " + docNum);
|
||||
// obtain the sample categories for current document
|
||||
CategoryContainer categoryContainer = new CategoryContainer();
|
||||
for (CategoryPath path : SimpleUtils.categories[docNum]) {
|
||||
categoryContainer.addCategory(path);
|
||||
ExampleUtils.log("\t ++++ PATH: " + path);
|
||||
}
|
||||
// and also those with associations
|
||||
CategoryPath[] associationsPaths = AssociationUtils.categories[docNum];
|
||||
AssociationProperty[] associationProps = AssociationUtils.associations[docNum];
|
||||
for (int i = 0; i < associationsPaths.length; i++) {
|
||||
categoryContainer.addCategory(associationsPaths[i], associationProps[i]);
|
||||
ExampleUtils.log("\t $$$$ Association: ("
|
||||
+ associationsPaths[i] + "," + associationProps[i]
|
||||
+ ")");
|
||||
}
|
||||
|
||||
// we do not alter indexing parameters!
|
||||
// a category document builder will add the categories to a document
|
||||
// once build() is called
|
||||
CategoryDocumentBuilder categoryDocBuilder = new EnhancementsDocumentBuilder(
|
||||
taxo, AssociationUtils.assocIndexingParams);
|
||||
categoryDocBuilder.setCategories(categoryContainer);
|
||||
|
||||
// create a plain Lucene document and add some regular Lucene fields
|
||||
// to it
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum],
|
||||
Store.YES, Index.ANALYZED));
|
||||
doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum],
|
||||
Store.NO, Index.ANALYZED));
|
||||
|
||||
// invoke the category document builder for adding categories to the
|
||||
// document and,
|
||||
// as required, to the taxonomy index
|
||||
categoryDocBuilder.build(doc);
|
||||
|
||||
// finally add the document to the index
|
||||
iw.addDocument(doc);
|
||||
|
||||
nDocsAdded++;
|
||||
nFacetsAdded += categoryContainer.size();
|
||||
}
|
||||
|
||||
// commit changes.
|
||||
// we commit changes to the taxonomy index prior to committing them to
|
||||
// the search index.
|
||||
// this is important, so that all facets referred to by documents in the
|
||||
// search index
|
||||
// will indeed exist in the taxonomy index.
|
||||
taxo.commit();
|
||||
iw.commit();
|
||||
|
||||
// close the taxonomy index and the index - all modifications are
|
||||
// now safely in the provided directories: indexDir and taxoDir.
|
||||
taxo.close();
|
||||
iw.close();
|
||||
|
||||
ExampleUtils.log("Indexed " + nDocsAdded + " documents with overall "
|
||||
+ nFacetsAdded + " facets.");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
package org.apache.lucene.facet.example.association;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import org.apache.lucene.facet.example.ExampleResult;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Driver for the simple sample.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationMain {
|
||||
|
||||
/**
|
||||
* Driver for the simple sample.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
new AssociationMain().runSumIntAssociationSample();
|
||||
new AssociationMain().runSumFloatAssociationSample();
|
||||
ExampleUtils.log("DONE");
|
||||
}
|
||||
|
||||
public ExampleResult runSumIntAssociationSample() throws Exception {
|
||||
|
||||
// create Directories for the search index and for the taxonomy index
|
||||
Directory indexDir = new RAMDirectory();//FSDirectory.open(new File("/tmp/111"));
|
||||
Directory taxoDir = new RAMDirectory();
|
||||
|
||||
// index the sample documents
|
||||
ExampleUtils.log("index the sample documents...");
|
||||
AssociationIndexer.index(indexDir, taxoDir);
|
||||
|
||||
ExampleUtils.log("search the sample documents...");
|
||||
List<FacetResult> facetRes = AssociationSearcher.searchSumIntAssociation(indexDir, taxoDir);
|
||||
|
||||
ExampleResult res = new ExampleResult();
|
||||
res.setFacetResults(facetRes);
|
||||
return res;
|
||||
}
|
||||
|
||||
public ExampleResult runSumFloatAssociationSample() throws Exception {
|
||||
|
||||
// create Directories for the search index and for the taxonomy index
|
||||
Directory indexDir = new RAMDirectory();//FSDirectory.open(new File("/tmp/111"));
|
||||
Directory taxoDir = new RAMDirectory();
|
||||
|
||||
// index the sample documents
|
||||
ExampleUtils.log("index the sample documents...");
|
||||
AssociationIndexer.index(indexDir, taxoDir);
|
||||
|
||||
ExampleUtils.log("search the sample documents...");
|
||||
List<FacetResult> facetRes = AssociationSearcher.searchSumFloatAssociation(indexDir, taxoDir);
|
||||
|
||||
ExampleResult res = new ExampleResult();
|
||||
res.setFacetResults(facetRes);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
package org.apache.lucene.facet.example.association;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.facet.example.simple.SimpleSearcher;
|
||||
import org.apache.lucene.facet.search.params.association.AssociationFloatSumFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.association.AssociationIntSumFacetRequest;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* AssociationSearcher searches index with facets, evaluating the facets with
|
||||
* their associated $int value
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationSearcher {
|
||||
|
||||
/** Search an index with a sum of int-association. */
|
||||
public static List<FacetResult> searchSumIntAssociation(Directory indexDir,
|
||||
Directory taxoDir) throws Exception {
|
||||
// prepare index reader
|
||||
IndexReader indexReader = IndexReader.open(indexDir);
|
||||
TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
|
||||
|
||||
AssociationIntSumFacetRequest facetRequest = new AssociationIntSumFacetRequest(
|
||||
new CategoryPath("tags"), 10);
|
||||
|
||||
List<FacetResult> res = SimpleSearcher.searchWithRequest(indexReader, taxo,
|
||||
AssociationUtils.assocIndexingParams, facetRequest);
|
||||
|
||||
// close readers
|
||||
taxo.close();
|
||||
indexReader.close();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Search an index with a sum of float-association. */
|
||||
public static List<FacetResult> searchSumFloatAssociation(Directory indexDir,
|
||||
Directory taxoDir) throws Exception {
|
||||
// prepare index reader
|
||||
IndexReader indexReader = IndexReader.open(indexDir);
|
||||
TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
|
||||
|
||||
AssociationFloatSumFacetRequest facetRequest = new AssociationFloatSumFacetRequest(
|
||||
new CategoryPath("genre"), 10);
|
||||
|
||||
List<FacetResult> res = SimpleSearcher.searchWithRequest(indexReader, taxo,
|
||||
AssociationUtils.assocIndexingParams, facetRequest);
|
||||
|
||||
// close readers
|
||||
taxo.close();
|
||||
indexReader.close();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
package org.apache.lucene.facet.example.association;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.association.AssociationEnhancement;
|
||||
import org.apache.lucene.facet.enhancements.association.AssociationFloatProperty;
|
||||
import org.apache.lucene.facet.enhancements.association.AssociationIntProperty;
|
||||
import org.apache.lucene.facet.enhancements.association.AssociationProperty;
|
||||
import org.apache.lucene.facet.enhancements.params.DefaultEnhancementsIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationUtils {
|
||||
|
||||
/**
|
||||
* Categories: categories[D][N] == category-path with association no. N for
|
||||
* document no. D.
|
||||
*/
|
||||
public static CategoryPath[][] categories = {
|
||||
// Doc #1
|
||||
{ new CategoryPath("tags", "lucene") ,
|
||||
new CategoryPath("genre", "computing")
|
||||
},
|
||||
|
||||
// Doc #2
|
||||
{ new CategoryPath("tags", "lucene"),
|
||||
new CategoryPath("tags", "solr"),
|
||||
new CategoryPath("genre", "computing"),
|
||||
new CategoryPath("genre", "software")
|
||||
}
|
||||
};
|
||||
|
||||
public static AssociationProperty[][] associations = {
|
||||
// Doc #1 associations
|
||||
{
|
||||
/* 3 occurrences for tag 'lucene' */
|
||||
new AssociationIntProperty(3),
|
||||
/* 87% confidence level of genre 'computing' */
|
||||
new AssociationFloatProperty(0.87f)
|
||||
},
|
||||
|
||||
// Doc #2 associations
|
||||
{
|
||||
/* 1 occurrence for tag 'lucene' */
|
||||
new AssociationIntProperty(1),
|
||||
/* 2 occurrences for tag 'solr' */
|
||||
new AssociationIntProperty(2),
|
||||
/* 75% confidence level of genre 'computing' */
|
||||
new AssociationFloatProperty(0.75f),
|
||||
/* 34% confidence level of genre 'software' */
|
||||
new AssociationFloatProperty(0.34f),
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Indexing Params: the indexing params to use when dealing with
|
||||
* associations.
|
||||
*/
|
||||
public static final DefaultEnhancementsIndexingParams assocIndexingParams =
|
||||
new DefaultEnhancementsIndexingParams(new AssociationEnhancement());
|
||||
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
package org.apache.lucene.facet.example.merge;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.PayloadProcessorProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.index.FacetsPayloadProcessorProvider;
|
||||
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter.DiskOrdinalMap;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter.MemoryOrdinalMap;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter.OrdinalMap;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TaxonomyMergeUtils {
|
||||
|
||||
/**
|
||||
* Merges the given taxonomy and index directories. Note that this method
|
||||
* opens {@link LuceneTaxonomyWriter} and {@link IndexWriter} on the
|
||||
* respective destination indexes. Therefore if you have a writer open on any
|
||||
* of them, it should be closed, or you should use
|
||||
* {@link #merge(Directory, Directory, IndexWriter, LuceneTaxonomyWriter)}
|
||||
* instead.
|
||||
*
|
||||
* @see #merge(Directory, Directory, IndexWriter, LuceneTaxonomyWriter)
|
||||
*/
|
||||
public static void merge(Directory srcIndexDir, Directory srcTaxDir,
|
||||
Directory destIndexDir, Directory destTaxDir) throws IOException {
|
||||
IndexWriter destIndexWriter = new IndexWriter(destIndexDir,
|
||||
new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, null));
|
||||
LuceneTaxonomyWriter destTaxWriter = new LuceneTaxonomyWriter(destTaxDir);
|
||||
merge(srcIndexDir, srcTaxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter);
|
||||
destTaxWriter.close();
|
||||
destIndexWriter.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the given taxonomy and index directories and commits the changes to
|
||||
* the given writers. This method uses {@link MemoryOrdinalMap} to store the
|
||||
* mapped ordinals. If you cannot afford the memory, you can use
|
||||
* {@link #merge(Directory, Directory, OrdinalMap, IndexWriter, LuceneTaxonomyWriter)}
|
||||
* by passing {@link DiskOrdinalMap}.
|
||||
*
|
||||
* @see #merge(Directory, Directory, OrdinalMap, IndexWriter, LuceneTaxonomyWriter)
|
||||
*/
|
||||
public static void merge(Directory srcIndexDir, Directory srcTaxDir,
|
||||
IndexWriter destIndexWriter,
|
||||
LuceneTaxonomyWriter destTaxWriter) throws IOException {
|
||||
merge(srcIndexDir, srcTaxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the given taxonomy and index directories and commits the changes to
|
||||
* the given writers.
|
||||
*/
|
||||
public static void merge(Directory srcIndexDir, Directory srcTaxDir,
|
||||
OrdinalMap map, IndexWriter destIndexWriter,
|
||||
LuceneTaxonomyWriter destTaxWriter) throws IOException {
|
||||
// merge the taxonomies
|
||||
destTaxWriter.addTaxonomies(new Directory[] { srcTaxDir }, new OrdinalMap[] { map });
|
||||
|
||||
PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider(
|
||||
srcIndexDir, map.getMap(), new DefaultFacetIndexingParams());
|
||||
destIndexWriter.setPayloadProcessorProvider(payloadProcessor);
|
||||
|
||||
IndexReader reader = IndexReader.open(srcIndexDir);
|
||||
try {
|
||||
destIndexWriter.addIndexes(reader);
|
||||
|
||||
// commit changes to taxonomy and index respectively.
|
||||
destTaxWriter.commit();
|
||||
destIndexWriter.commit();
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
package org.apache.lucene.facet.example.multiCL;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import org.apache.lucene.DocumentBuilder;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.example.simple.SimpleUtils;
|
||||
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Sample indexer creates an index, and adds to it sample documents and facets
|
||||
* with multiple CategoryLists specified for different facets, so there are different
|
||||
* category lists for different facets.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class MultiCLIndexer {
|
||||
|
||||
// Number of documents to index
|
||||
public static int NUM_DOCS = 100;
|
||||
// Number of facets to add per document
|
||||
public static int NUM_FACETS_PER_DOC = 10;
|
||||
// Number of tokens in title
|
||||
public static int TITLE_LENGTH = 5;
|
||||
// Number of tokens in text
|
||||
public static int TEXT_LENGTH = 100;
|
||||
|
||||
// Lorum ipsum to use as content - this will be tokenized and used for document
|
||||
// titles/text.
|
||||
static String words = "Sed ut perspiciatis unde omnis iste natus error sit "
|
||||
+ "voluptatem accusantium doloremque laudantium totam rem aperiam "
|
||||
+ "eaque ipsa quae ab illo inventore veritatis et quasi architecto "
|
||||
+ "beatae vitae dicta sunt explicabo Nemo enim ipsam voluptatem "
|
||||
+ "quia voluptas sit aspernatur aut odit aut fugit sed quia consequuntur "
|
||||
+ "magni dolores eos qui ratione voluptatem sequi nesciunt Neque porro "
|
||||
+ "quisquam est qui dolorem ipsum quia dolor sit amet consectetur adipisci velit "
|
||||
+ "sed quia non numquam eius modi tempora incidunt ut labore et dolore "
|
||||
+ "magnam aliquam quaerat voluptatem Ut enim ad minima veniam "
|
||||
+ "quis nostrum exercitationem ullam corporis suscipit laboriosam "
|
||||
+ "nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure"
|
||||
+ "reprehenderit qui in ea voluptate velit esse quam nihil molestiae "
|
||||
+ "consequatur vel illum qui dolorem eum fugiat quo voluptas nulla pariatur";
|
||||
// PerDimensionIndexingParams for multiple category lists
|
||||
public static PerDimensionIndexingParams MULTI_IPARAMS = new PerDimensionIndexingParams();
|
||||
|
||||
// Initialize PerDimensionIndexingParams
|
||||
static {
|
||||
MULTI_IPARAMS.addCategoryListParams(new CategoryPath("0"),
|
||||
new CategoryListParams(new Term("$Digits", "Zero")));
|
||||
MULTI_IPARAMS.addCategoryListParams(new CategoryPath("1"),
|
||||
new CategoryListParams(new Term("$Digits", "One")));
|
||||
MULTI_IPARAMS.addCategoryListParams(new CategoryPath("2"),
|
||||
new CategoryListParams(new Term("$Digits", "Two")));
|
||||
MULTI_IPARAMS.addCategoryListParams(new CategoryPath("3"),
|
||||
new CategoryListParams(new Term("$Digits", "Three")));
|
||||
MULTI_IPARAMS.addCategoryListParams(new CategoryPath("4"),
|
||||
new CategoryListParams(new Term("$Digits", "Four")));
|
||||
MULTI_IPARAMS.addCategoryListParams(new CategoryPath("5"),
|
||||
new CategoryListParams(new Term("$Digits", "Five")));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an index, and adds to it sample documents and facets.
|
||||
* @param indexDir Directory in which the index should be created.
|
||||
* @param taxoDir Directory in which the taxonomy index should be created.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
*/
|
||||
public static void index(Directory indexDir, Directory taxoDir)
|
||||
throws Exception {
|
||||
|
||||
Random random = new Random(2003);
|
||||
|
||||
String[] docTitles = new String[NUM_DOCS];
|
||||
String[] docTexts = new String[NUM_DOCS];
|
||||
CategoryPath[][] cPaths = new CategoryPath[NUM_DOCS][NUM_FACETS_PER_DOC];
|
||||
|
||||
String[] tokens = words.split(" ");
|
||||
for (int docNum = 0; docNum < NUM_DOCS; docNum++) {
|
||||
String title = "";
|
||||
String text = "";
|
||||
for (int j = 0; j < TITLE_LENGTH; j++) {
|
||||
title = title + tokens[random.nextInt(tokens.length)] + " ";
|
||||
}
|
||||
docTitles[docNum] = title;
|
||||
|
||||
for (int j = 0; j < TEXT_LENGTH; j++) {
|
||||
text = text + tokens[random.nextInt(tokens.length)] + " ";
|
||||
}
|
||||
docTexts[docNum] = text;
|
||||
|
||||
for (int facetNum = 0; facetNum < NUM_FACETS_PER_DOC; facetNum++) {
|
||||
cPaths[docNum][facetNum] = new CategoryPath(Integer
|
||||
.toString(random.nextInt(7)), Integer.toString(random.nextInt(10)));
|
||||
}
|
||||
}
|
||||
index(indexDir, taxoDir, MULTI_IPARAMS, docTitles, docTexts, cPaths);
|
||||
}
|
||||
|
||||
/**
|
||||
* More advanced method for specifying custom indexing params, doc texts,
|
||||
* doc titles and category paths.
|
||||
*/
|
||||
public static void index(Directory indexDir, Directory taxoDir,
|
||||
FacetIndexingParams iParams, String[] docTitles,
|
||||
String[] docTexts, CategoryPath[][] cPaths) throws Exception {
|
||||
// create and open an index writer
|
||||
IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(
|
||||
ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer).setOpenMode(OpenMode.CREATE));
|
||||
// create and open a taxonomy writer
|
||||
LuceneTaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);
|
||||
index(iw, taxo, iParams, docTitles, docTexts, cPaths);
|
||||
}
|
||||
|
||||
/**
|
||||
* More advanced method for specifying custom indexing params, doc texts,
|
||||
* doc titles and category paths.
|
||||
* <p>
|
||||
* Create an index, and adds to it sample documents and facets.
|
||||
* @throws Exception
|
||||
* on error (no detailed exception handling here for sample
|
||||
* simplicity
|
||||
*/
|
||||
public static void index(IndexWriter iw, LuceneTaxonomyWriter taxo,
|
||||
FacetIndexingParams iParams, String[] docTitles,
|
||||
String[] docTexts, CategoryPath[][] cPaths) throws Exception {
|
||||
|
||||
// loop over sample documents
|
||||
int nDocsAdded = 0;
|
||||
int nFacetsAdded = 0;
|
||||
for (int docNum = 0; docNum < SimpleUtils.docTexts.length; docNum++) {
|
||||
List<CategoryPath> facetList = SimpleUtils.categoryPathArrayToList(cPaths[docNum]);
|
||||
|
||||
// we do not alter indexing parameters!
|
||||
// a category document builder will add the categories to a document
|
||||
// once build() is called
|
||||
DocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder(
|
||||
taxo, iParams).setCategoryPaths(facetList);
|
||||
|
||||
// create a plain Lucene document and add some regular Lucene fields
|
||||
// to it
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(SimpleUtils.TITLE, docTitles[docNum], Store.YES, Index.ANALYZED));
|
||||
doc.add(new Field(SimpleUtils.TEXT, docTexts[docNum], Store.NO, Index.ANALYZED));
|
||||
|
||||
// finally add the document to the index
|
||||
categoryDocBuilder.build(doc);
|
||||
iw.addDocument(doc);
|
||||
|
||||
nDocsAdded++;
|
||||
nFacetsAdded += facetList.size();
|
||||
}
|
||||
|
||||
// commit changes.
|
||||
// we commit changes to the taxonomy index prior to committing them to
|
||||
// the search index.
|
||||
// this is important, so that all facets referred to by documents in the
|
||||
// search index
|
||||
// will indeed exist in the taxonomy index.
|
||||
taxo.commit();
|
||||
iw.commit();
|
||||
|
||||
// close the taxonomy index and the index - all modifications are
|
||||
// now safely in the provided directories: indexDir and taxoDir.
|
||||
taxo.close();
|
||||
iw.close();
|
||||
|
||||
ExampleUtils.log("Indexed " + nDocsAdded + " documents with overall "
|
||||
+ nFacetsAdded + " facets.");
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
index(new RAMDirectory(), new RAMDirectory());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package org.apache.lucene.facet.example.multiCL;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import org.apache.lucene.facet.example.ExampleResult;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class MultiCLMain {
|
||||
|
||||
/**
|
||||
* Driver for the multi sample.
|
||||
*
|
||||
* @throws Exception
|
||||
* on error (no detailed exception handling here for sample
|
||||
* simplicity
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
new MultiCLMain().runSample();
|
||||
ExampleUtils.log("DONE");
|
||||
}
|
||||
|
||||
public ExampleResult runSample() throws Exception {
|
||||
|
||||
// create Directories for the search index and for the taxonomy index
|
||||
Directory indexDir = new RAMDirectory();
|
||||
Directory taxoDir = new RAMDirectory();
|
||||
|
||||
// index the sample documents
|
||||
ExampleUtils.log("index the sample documents...");
|
||||
MultiCLIndexer.index(indexDir, taxoDir);
|
||||
|
||||
ExampleUtils.log("search the sample documents...");
|
||||
List<FacetResult> facetRes = MultiCLSearcher.searchWithFacets(indexDir,
|
||||
taxoDir, MultiCLIndexer.MULTI_IPARAMS);
|
||||
|
||||
ExampleResult res = new ExampleResult();
|
||||
res.setFacetResults(facetRes);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
package org.apache.lucene.facet.example.multiCL;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.example.simple.SimpleUtils;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MultiSearcher searches index with facets over an index with multiple
|
||||
* category lists.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class MultiCLSearcher {
|
||||
|
||||
/**
|
||||
* Search an index with facets.
|
||||
*
|
||||
* @param indexDir
|
||||
* Directory of the search index.
|
||||
* @param taxoDir
|
||||
* Directory of the taxonomy index.
|
||||
* @throws Exception
|
||||
* on error (no detailed exception handling here for sample
|
||||
* simplicity
|
||||
* @return facet results
|
||||
*/
|
||||
public static List<FacetResult> searchWithFacets(Directory indexDir,
|
||||
Directory taxoDir, FacetIndexingParams iParams) throws Exception {
|
||||
|
||||
// prepare index reader and taxonomy.
|
||||
IndexReader indexReader = IndexReader.open(indexDir);
|
||||
TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
|
||||
|
||||
// Get results
|
||||
List<FacetResult> results = searchWithFacets(indexReader, taxo, iParams);
|
||||
|
||||
// we're done, close the index reader and the taxonomy.
|
||||
indexReader.close();
|
||||
taxo.close();
|
||||
return results;
|
||||
}
|
||||
|
||||
public static List<FacetResult> searchWithFacets(IndexReader indexReader,
|
||||
TaxonomyReader taxo, FacetIndexingParams iParams) throws Exception {
|
||||
// prepare searcher to search against
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
// faceted search is working in 2 steps:
|
||||
// 1. collect matching documents
|
||||
// 2. aggregate facets for collected documents and
|
||||
// generate the requested faceted results from the aggregated facets
|
||||
|
||||
// step 1: create a query for finding matching documents for which we
|
||||
// accumulate facets
|
||||
Query q = new TermQuery(new Term(SimpleUtils.TEXT, "Quis"));
|
||||
ExampleUtils.log("Query: " + q);
|
||||
|
||||
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10,
|
||||
true);
|
||||
|
||||
// Faceted search parameters indicate which facets are we interested in
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams(iParams);
|
||||
facetSearchParams.addFacetRequest(new CountFacetRequest(
|
||||
new CategoryPath("5"), 10));
|
||||
facetSearchParams.addFacetRequest(new CountFacetRequest(
|
||||
new CategoryPath("5", "5"), 10));
|
||||
facetSearchParams.addFacetRequest(new CountFacetRequest(
|
||||
new CategoryPath("6", "2"), 10));
|
||||
|
||||
// Facets collector is the simplest interface for faceted search.
|
||||
// It provides faceted search functions that are sufficient to many
|
||||
// application,
|
||||
// although it is insufficient for tight control on faceted search
|
||||
// behavior - in those
|
||||
// situations other, more low-level interfaces are available, as
|
||||
// demonstrated in other search examples.
|
||||
FacetsCollector facetsCollector = new FacetsCollector(
|
||||
facetSearchParams, indexReader, taxo);
|
||||
|
||||
// perform documents search and facets accumulation
|
||||
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
|
||||
|
||||
// Obtain facets results and print them
|
||||
List<FacetResult> res = facetsCollector.getFacetResults();
|
||||
|
||||
int i = 0;
|
||||
for (FacetResult facetResult : res) {
|
||||
ExampleUtils.log("Res " + (i++) + ": " + facetResult);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
package org.apache.lucene.facet.example.simple;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.DocumentBuilder;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Sample indexer creates an index, and adds to it sample documents and facets.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleIndexer {
|
||||
|
||||
/**
|
||||
* Create an index, and adds to it sample documents and facets.
|
||||
* @param indexDir Directory in which the index should be created.
|
||||
* @param taxoDir Directory in which the taxonomy index should be created.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
*/
|
||||
public static void index (Directory indexDir, Directory taxoDir) throws Exception {
|
||||
|
||||
// create and open an index writer
|
||||
IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));
|
||||
|
||||
// create and open a taxonomy writer
|
||||
TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);
|
||||
|
||||
// loop over sample documents
|
||||
int nDocsAdded = 0;
|
||||
int nFacetsAdded = 0;
|
||||
for (int docNum=0; docNum<SimpleUtils.docTexts.length; docNum++) {
|
||||
|
||||
// obtain the sample facets for current document
|
||||
List<CategoryPath> facetList = SimpleUtils.categoryPathArrayToList(SimpleUtils.categories[docNum]);
|
||||
|
||||
// we do not alter indexing parameters!
|
||||
// a category document builder will add the categories to a document once build() is called
|
||||
DocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder(taxo).setCategoryPaths(facetList);
|
||||
|
||||
// create a plain Lucene document and add some regular Lucene fields to it
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum], Store.YES, Index.ANALYZED));
|
||||
doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum], Store.NO, Index.ANALYZED));
|
||||
|
||||
// invoke the category document builder for adding categories to the document and,
|
||||
// as required, to the taxonomy index
|
||||
categoryDocBuilder.build(doc);
|
||||
|
||||
// finally add the document to the index
|
||||
iw.addDocument(doc);
|
||||
|
||||
nDocsAdded ++;
|
||||
nFacetsAdded += facetList.size();
|
||||
}
|
||||
|
||||
// commit changes.
|
||||
// we commit changes to the taxonomy index prior to committing them to the search index.
|
||||
// this is important, so that all facets referred to by documents in the search index
|
||||
// will indeed exist in the taxonomy index.
|
||||
taxo.commit();
|
||||
iw.commit();
|
||||
|
||||
// close the taxonomy index and the index - all modifications are
|
||||
// now safely in the provided directories: indexDir and taxoDir.
|
||||
taxo.close();
|
||||
iw.close();
|
||||
|
||||
ExampleUtils.log("Indexed "+nDocsAdded+" documents with overall "+nFacetsAdded+" facets.");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
package org.apache.lucene.facet.example.simple;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import org.apache.lucene.facet.example.ExampleResult;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Driver for the simple sample.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleMain {
|
||||
|
||||
/**
|
||||
* Driver for the simple sample.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
new SimpleMain().runSimple();
|
||||
new SimpleMain().runDrillDown().getFacetResults();
|
||||
ExampleUtils.log("DONE");
|
||||
}
|
||||
|
||||
public ExampleResult runSimple() throws Exception {
|
||||
// create Directories for the search index and for the taxonomy index
|
||||
Directory indexDir = new RAMDirectory();
|
||||
Directory taxoDir = new RAMDirectory();
|
||||
|
||||
// index the sample documents
|
||||
ExampleUtils.log("index the sample documents...");
|
||||
SimpleIndexer.index(indexDir, taxoDir);
|
||||
|
||||
// open readers
|
||||
TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
|
||||
IndexReader indexReader = IndexReader.open(indexDir, true);
|
||||
|
||||
ExampleUtils.log("search the sample documents...");
|
||||
List<FacetResult> facetRes = SimpleSearcher.searchWithFacets(indexReader, taxo);
|
||||
|
||||
// close readers
|
||||
taxo.close();
|
||||
indexReader.close();
|
||||
|
||||
ExampleResult res = new ExampleResult();
|
||||
res.setFacetResults(facetRes);
|
||||
return res;
|
||||
}
|
||||
|
||||
public ExampleResult runDrillDown() throws Exception {
|
||||
// create Directories for the search index and for the taxonomy index
|
||||
Directory indexDir = new RAMDirectory();
|
||||
Directory taxoDir = new RAMDirectory();
|
||||
|
||||
// index the sample documents
|
||||
ExampleUtils.log("index the sample documents...");
|
||||
SimpleIndexer.index(indexDir, taxoDir);
|
||||
|
||||
// open readers
|
||||
TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
|
||||
IndexReader indexReader = IndexReader.open(indexDir, true);
|
||||
|
||||
ExampleUtils.log("search the sample documents...");
|
||||
List<FacetResult> facetRes = SimpleSearcher.searchWithDrillDown(indexReader, taxo);
|
||||
|
||||
// close readers
|
||||
taxo.close();
|
||||
indexReader.close();
|
||||
|
||||
ExampleResult res = new ExampleResult();
|
||||
res.setFacetResults(facetRes);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,168 @@
|
|||
package org.apache.lucene.facet.example.simple;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.DrillDown;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* SampleSearcer searches index with facets.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleSearcher {
|
||||
|
||||
/**
|
||||
* Search an index with facets.
|
||||
* @param indexReader index reader.
|
||||
* @param taxoReader taxonomy reader.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
* @return facet results
|
||||
*/
|
||||
public static List<FacetResult> searchWithFacets (IndexReader indexReader,
|
||||
TaxonomyReader taxoReader) throws Exception {
|
||||
CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root","a"), 10);
|
||||
return searchWithRequest(indexReader, taxoReader, null, facetRequest);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search an index with facets for given facet requests.
|
||||
* @param indexReader index reader.
|
||||
* @param taxoReader taxonomy reader.
|
||||
* @param indexingParams the facet indexing params
|
||||
* @param facetRequests facet requests of interest
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
* @return facet results
|
||||
*/
|
||||
public static List<FacetResult> searchWithRequest(IndexReader indexReader,
|
||||
TaxonomyReader taxoReader, FacetIndexingParams indexingParams,
|
||||
FacetRequest... facetRequests) throws Exception {
|
||||
Query q = new TermQuery(new Term(SimpleUtils.TEXT, "white"));
|
||||
return searchWithRequestAndQuery(q, indexReader, taxoReader,
|
||||
indexingParams, facetRequests);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search an index with facets for given query and facet requests.
|
||||
* @param q query of interest
|
||||
* @param indexReader index reader.
|
||||
* @param taxoReader taxonomy reader.
|
||||
* @param indexingParams the facet indexing params
|
||||
* @param facetRequests facet requests of interest
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
* @return facet results
|
||||
*/
|
||||
public static List<FacetResult> searchWithRequestAndQuery(Query q,
|
||||
IndexReader indexReader, TaxonomyReader taxoReader,
|
||||
FacetIndexingParams indexingParams, FacetRequest... facetRequests)
|
||||
throws Exception {
|
||||
|
||||
ExampleUtils.log("Query: " + q);
|
||||
// prepare searcher to search against
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
|
||||
// collect matching documents into a collector
|
||||
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true);
|
||||
|
||||
if (indexingParams == null) {
|
||||
indexingParams = new DefaultFacetIndexingParams();
|
||||
}
|
||||
|
||||
// Faceted search parameters indicate which facets are we interested in
|
||||
FacetSearchParams facetSearchParams = new FacetSearchParams(indexingParams);
|
||||
|
||||
// Add the facet requests of interest to the search params
|
||||
for (FacetRequest frq : facetRequests) {
|
||||
facetSearchParams.addFacetRequest(frq);
|
||||
}
|
||||
|
||||
FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, indexReader, taxoReader);
|
||||
|
||||
// perform documents search and facets accumulation
|
||||
searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector));
|
||||
|
||||
// Obtain facets results and print them
|
||||
List<FacetResult> res = facetsCollector.getFacetResults();
|
||||
|
||||
int i = 0;
|
||||
for (FacetResult facetResult : res) {
|
||||
ExampleUtils.log("Res " + (i++) + ": " + facetResult);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search an index with facets drill-down.
|
||||
* @param indexReader index reader.
|
||||
* @param taxoReader taxonomy reader.
|
||||
* @throws Exception on error (no detailed exception handling here for sample simplicity
|
||||
* @return facet results
|
||||
*/
|
||||
public static List<FacetResult> searchWithDrillDown(IndexReader indexReader,
|
||||
TaxonomyReader taxoReader) throws Exception {
|
||||
|
||||
// base query the user is interested in
|
||||
Query baseQuery = new TermQuery(new Term(SimpleUtils.TEXT, "white"));
|
||||
|
||||
// facet of interest
|
||||
CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root","a"), 10);
|
||||
|
||||
// initial search - all docs matching the base query will contribute to the accumulation
|
||||
List<FacetResult> res1 = searchWithRequest(indexReader, taxoReader, null, facetRequest);
|
||||
|
||||
// a single result (because there was a single request)
|
||||
FacetResult fres = res1.get(0);
|
||||
|
||||
// assume the user is interested in the second sub-result
|
||||
// (just take the second sub-result returned by the iterator - we know there are 3 results!)
|
||||
Iterator<? extends FacetResultNode> resIterator = fres.getFacetResultNode().getSubResults().iterator();
|
||||
resIterator.next(); // skip first result
|
||||
CategoryPath categoryOfInterest = resIterator.next().getLabel();
|
||||
|
||||
// drill-down preparation: turn the base query into a drill-down query for the category of interest
|
||||
Query q2 = DrillDown.query(baseQuery, categoryOfInterest);
|
||||
|
||||
// that's it - search with the new query and we're done!
|
||||
// only documents both matching the base query AND containing the
|
||||
// category of interest will contribute to the new accumulation
|
||||
return searchWithRequestAndQuery(q2, indexReader, taxoReader, null, facetRequest);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package org.apache.lucene.facet.example.simple;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
|
||||
import org.apache.lucene.facet.example.ExampleUtils;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Some definitions for the Simple Sample.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleUtils {
|
||||
|
||||
/**
|
||||
* Documents text field.
|
||||
*/
|
||||
public static final String TEXT = "text";
|
||||
|
||||
/**
|
||||
* Documents title field.
|
||||
*/
|
||||
public static final String TITLE = "title";
|
||||
|
||||
/**
|
||||
* sample documents text (for the text field).
|
||||
*/
|
||||
public static String[] docTexts = {
|
||||
"the white car is the one I want.",
|
||||
"the white dog does not belong to anyone.",
|
||||
};
|
||||
|
||||
/**
|
||||
* sample documents titles (for the title field).
|
||||
*/
|
||||
public static String[] docTitles = {
|
||||
"white car",
|
||||
"white dog",
|
||||
};
|
||||
|
||||
/**
|
||||
* Categories: categories[D][N] == category-path no. N for document no. D.
|
||||
*/
|
||||
public static CategoryPath[][] categories = {
|
||||
{ new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
|
||||
{ new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
|
||||
};
|
||||
|
||||
/**
|
||||
* Analyzer used in the simple sample.
|
||||
*/
|
||||
public static final Analyzer analyzer = new WhitespaceAnalyzer(ExampleUtils.EXAMPLE_VER);
|
||||
|
||||
/**
|
||||
* Utility method: List of category paths out of an array of them...
|
||||
* @param categoryPaths input array of category paths.
|
||||
*/
|
||||
public static List<CategoryPath> categoryPathArrayToList (CategoryPath...categoryPaths) {
|
||||
ArrayList<CategoryPath> res = new ArrayList<CategoryPath>();
|
||||
for (CategoryPath categoryPath : categoryPaths) {
|
||||
res.add(categoryPath);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Simple faceted indexing and search sample</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Simple faceted indexing and search sample</h1>
|
||||
|
||||
A simple faceted example, showing how to:
|
||||
<ol>
|
||||
<li>Create an index.</li>
|
||||
<li>Add documents with facets to the index.</li>
|
||||
<li>Search the index.</li>
|
||||
</ol>
|
||||
|
||||
For more complex examples see the other sample code packages.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,77 @@
|
|||
package org.apache.lucene;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An interface which standardizes the process of building an indexable
|
||||
* {@link Document}.
|
||||
* <p>
|
||||
* The idea is that implementations implement {@link #build(Document doc)},
|
||||
* which adds to the given Document whatever {@link Field}s it wants to add. A
|
||||
* DocumentBuilder is also allowed to inspect or change existing Fields in the
|
||||
* Document, if it wishes to.
|
||||
* <p>
|
||||
* Implementations should normally have a constructor with parameters which
|
||||
* determine what {@link #build(Document)} will add to doc.<br>
|
||||
* To allow reuse of the DocumentBuilder object, implementations are also
|
||||
* encouraged to have a setter method, which remembers its parameters just like
|
||||
* the constructor. This setter method cannot be described in this interface,
|
||||
* because it will take different parameters in each implementation.
|
||||
* <p>
|
||||
* The interface defines a builder pattern, which allows applications to invoke
|
||||
* several document builders in the following way:
|
||||
*
|
||||
* <pre>
|
||||
* builder1.build(builder2.build(builder3.build(new Document())));
|
||||
* </pre>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface DocumentBuilder {
|
||||
|
||||
/** An exception thrown from {@link DocumentBuilder}'s build(). */
|
||||
public static class DocumentBuilderException extends Exception {
|
||||
|
||||
public DocumentBuilderException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public DocumentBuilderException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public DocumentBuilderException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
public DocumentBuilderException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds to the given document whatever {@link Field}s the implementation needs
|
||||
* to add. Return the docunment instance to allow for chaining calls.
|
||||
*/
|
||||
public Document build(Document doc) throws DocumentBuilderException;
|
||||
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package org.apache.lucene.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A parent class for exceptions thrown by the Facets code.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FacetException extends IOException {
|
||||
|
||||
public FacetException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public FacetException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public FacetException(String message, Throwable cause) {
|
||||
super(message);
|
||||
initCause(cause);
|
||||
}
|
||||
|
||||
public FacetException(Throwable cause) {
|
||||
initCause(cause);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
package org.apache.lucene.facet.enhancements;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryListTokenizer;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This interface allows easy addition of enhanced category features. Usually, a
|
||||
* {@link CategoryEnhancement} will correspond to a {@link CategoryProperty}.
|
||||
* <p>
|
||||
* A category enhancement can contribute to the index in two possible ways:
|
||||
* <ol>
|
||||
* <li>To each category with data relevant to the enhancement, add this data to
|
||||
* the category's token payload, through
|
||||
* {@link #getCategoryTokenBytes(CategoryAttribute)}. This data will be read
|
||||
* during search using {@link #extractCategoryTokenData(byte[], int, int)}.</li>
|
||||
* <li>To each document which contains categories with data relevant to the
|
||||
* enhancement, add a {@link CategoryListTokenizer} through
|
||||
* {@link #getCategoryListTokenizer(TokenStream, EnhancementsIndexingParams, TaxonomyWriter)}
|
||||
* . The {@link CategoryListTokenizer} should add a single token which includes
|
||||
* all the enhancement relevant data from the categories. The category list
|
||||
* token's text is defined by {@link #getCategoryListTermText()}.</li>
|
||||
* </ol>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface CategoryEnhancement {
|
||||
|
||||
/**
|
||||
* Get the bytes to be added to the category token payload for this
|
||||
* enhancement.
|
||||
* <p>
|
||||
* <b>NOTE</b>: The returned array is copied, it is recommended to allocate
|
||||
* a new one each time.
|
||||
* <p>
|
||||
* The bytes generated by this method are the input of
|
||||
* {@link #extractCategoryTokenData(byte[], int, int)}.
|
||||
*
|
||||
* @param categoryAttribute
|
||||
* The attribute of the category.
|
||||
* @return The bytes to be added to the category token payload for this
|
||||
* enhancement.
|
||||
*/
|
||||
byte[] getCategoryTokenBytes(CategoryAttribute categoryAttribute);
|
||||
|
||||
/**
|
||||
* Get the data of this enhancement from a category token payload.
|
||||
* <p>
|
||||
* The input bytes for this method are generated in
|
||||
* {@link #getCategoryTokenBytes(CategoryAttribute)}.
|
||||
*
|
||||
* @param buffer
|
||||
* The payload buffer.
|
||||
* @param offset
|
||||
* The offset of this enhancement's data in the buffer.
|
||||
* @param length
|
||||
* The length of this enhancement's data (bytes).
|
||||
* @return An Object containing the data.
|
||||
*/
|
||||
Object extractCategoryTokenData(byte[] buffer, int offset, int length);
|
||||
|
||||
/**
|
||||
* Declarative method to indicate whether this enhancement generates
|
||||
* separate category list.
|
||||
*
|
||||
* @return {@code true} if generates category list, else {@code false}.
|
||||
*/
|
||||
boolean generatesCategoryList();
|
||||
|
||||
/**
|
||||
* Returns the text of this enhancement's category list term.
|
||||
*
|
||||
* @return The text of this enhancement's category list term.
|
||||
*/
|
||||
String getCategoryListTermText();
|
||||
|
||||
/**
|
||||
* Get the {@link CategoryListTokenizer} which generates the category list
|
||||
* for this enhancement. If {@link #generatesCategoryList()} returns
|
||||
* {@code false} this method will not be called.
|
||||
*
|
||||
* @param tokenizer
|
||||
* The input stream containing categories.
|
||||
* @param indexingParams
|
||||
* The indexing params to use.
|
||||
* @param taxonomyWriter
|
||||
* The taxonomy to add categories and get their ordinals.
|
||||
* @return A {@link CategoryListTokenizer} generating the category list for
|
||||
* this enhancement, with {@code tokenizer} as it's input.
|
||||
*/
|
||||
CategoryListTokenizer getCategoryListTokenizer(TokenStream tokenizer,
|
||||
EnhancementsIndexingParams indexingParams,
|
||||
TaxonomyWriter taxonomyWriter);
|
||||
|
||||
/**
|
||||
* Get a {@link CategoryProperty} class to be retained when creating
|
||||
* {@link CategoryParentsStream}.
|
||||
*
|
||||
* @return the {@link CategoryProperty} class to be retained when creating
|
||||
* {@link CategoryParentsStream}, or {@code null} if there is no
|
||||
* such property.
|
||||
*/
|
||||
Class<? extends CategoryProperty> getRetainableProperty();
|
||||
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
package org.apache.lucene.facet.enhancements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
|
||||
import org.apache.lucene.util.Vint8;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A tokenizer which adds to each category token payload according to the
|
||||
* {@link CategoryEnhancement}s defined in the given
|
||||
* {@link EnhancementsIndexingParams}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class EnhancementsCategoryTokenizer extends CategoryTokenizer {
|
||||
|
||||
/**
|
||||
* The data buffer used for payload instance.
|
||||
*/
|
||||
protected byte[] payloadBytes;
|
||||
|
||||
/**
|
||||
* The category enhancements to handle
|
||||
*/
|
||||
protected List<CategoryEnhancement> enhancements;
|
||||
|
||||
/**
|
||||
* Buffers for enhancement payload bytes
|
||||
*/
|
||||
protected byte[][] enhancementBytes;
|
||||
|
||||
private int nStart;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param input
|
||||
* The stream of category tokens.
|
||||
* @param indexingParams
|
||||
* The indexing params to use.
|
||||
* @throws IOException
|
||||
*/
|
||||
public EnhancementsCategoryTokenizer(TokenStream input,
|
||||
EnhancementsIndexingParams indexingParams) throws IOException {
|
||||
super(input, indexingParams);
|
||||
payloadBytes = new byte[Vint8.MAXIMUM_BYTES_NEEDED
|
||||
* (indexingParams.getCategoryEnhancements().size() + 1)];
|
||||
enhancements = indexingParams.getCategoryEnhancements();
|
||||
if (enhancements != null) {
|
||||
// create array of bytes per enhancement
|
||||
enhancementBytes = new byte[enhancements.size()][];
|
||||
// write once the number of enhancements in the payload bytes
|
||||
nStart = Vint8.encode(enhancements.size(), payloadBytes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPayload() {
|
||||
this.payloadAttribute.setPayload(null);
|
||||
if (enhancements == null) {
|
||||
return;
|
||||
}
|
||||
// clear previous payload content
|
||||
int nBytes = nStart;
|
||||
int i = 0;
|
||||
int nEnhancementBytes = 0;
|
||||
for (CategoryEnhancement enhancement : enhancements) {
|
||||
// get payload bytes from each enhancement
|
||||
enhancementBytes[i] = enhancement
|
||||
.getCategoryTokenBytes(categoryAttribute);
|
||||
// write the number of bytes in the payload
|
||||
if (enhancementBytes[i] == null) {
|
||||
nBytes += Vint8.encode(0, payloadBytes, nBytes);
|
||||
} else {
|
||||
nBytes += Vint8.encode(enhancementBytes[i].length,
|
||||
payloadBytes, nBytes);
|
||||
nEnhancementBytes += enhancementBytes[i].length;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (nEnhancementBytes > 0) {
|
||||
// make sure we have space for all bytes
|
||||
if (payloadBytes.length < nBytes + nEnhancementBytes) {
|
||||
byte[] temp = new byte[(nBytes + nEnhancementBytes) * 2];
|
||||
System.arraycopy(payloadBytes, 0, temp, 0, nBytes);
|
||||
payloadBytes = temp;
|
||||
}
|
||||
for (i = 0; i < enhancementBytes.length; i++) {
|
||||
// add the enhancement payload bytes after the existing bytes
|
||||
if (enhancementBytes[i] != null) {
|
||||
System.arraycopy(enhancementBytes[i], 0, payloadBytes,
|
||||
nBytes, enhancementBytes[i].length);
|
||||
nBytes += enhancementBytes[i].length;
|
||||
}
|
||||
}
|
||||
payload.setData(payloadBytes, 0, nBytes);
|
||||
payloadAttribute.setPayload(payload);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
package org.apache.lucene.facet.enhancements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams;
|
||||
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryListTokenizer;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An {@link EnhancementsDocumentBuilder} is a {@link CategoryDocumentBuilder}
|
||||
* which adds categories to documents according to the list of
|
||||
* {@link CategoryEnhancement}s from {@link EnhancementsIndexingParams}. The
|
||||
* additions over {@link CategoryDocumentBuilder} could be in both category
|
||||
* tokens, and additional category lists.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class EnhancementsDocumentBuilder extends CategoryDocumentBuilder {
|
||||
|
||||
/**
|
||||
* @param taxonomyWriter
|
||||
* @param params
|
||||
* Indexing params which include {@link CategoryEnhancement}s.
|
||||
* @throws IOException
|
||||
*/
|
||||
public EnhancementsDocumentBuilder(TaxonomyWriter taxonomyWriter,
|
||||
EnhancementsIndexingParams params) throws IOException {
|
||||
super(taxonomyWriter, params);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStream getParentsStream(CategoryAttributesStream categoryAttributesStream) {
|
||||
List<Class<? extends CategoryProperty>> toRetainList = ((EnhancementsIndexingParams) indexingParams)
|
||||
.getRetainableProperties();
|
||||
if (toRetainList != null) {
|
||||
CategoryParentsStream categoryParentsStream = new CategoryParentsStream(
|
||||
categoryAttributesStream, taxonomyWriter, indexingParams);
|
||||
for (Class<? extends CategoryProperty> toRetain : toRetainList) {
|
||||
categoryParentsStream.addRetainableProperty(toRetain);
|
||||
}
|
||||
return categoryParentsStream;
|
||||
}
|
||||
return super.getParentsStream(categoryAttributesStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CategoryListTokenizer getCategoryListTokenizer(TokenStream categoryStream) {
|
||||
CategoryListTokenizer tokenizer = super.getCategoryListTokenizer(categoryStream);
|
||||
// Add tokenizer for each enhancement that produces category list
|
||||
for (CategoryEnhancement enhancement : ((EnhancementsIndexingParams) indexingParams)
|
||||
.getCategoryEnhancements()) {
|
||||
if (enhancement.generatesCategoryList()) {
|
||||
tokenizer = enhancement.getCategoryListTokenizer(tokenizer,
|
||||
(EnhancementsIndexingParams) indexingParams,
|
||||
taxonomyWriter);
|
||||
}
|
||||
}
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CategoryTokenizer getCategoryTokenizer(TokenStream categoryStream)
|
||||
throws IOException {
|
||||
return new EnhancementsCategoryTokenizer(categoryStream,
|
||||
(EnhancementsIndexingParams) indexingParams);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
package org.apache.lucene.facet.enhancements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import org.apache.lucene.facet.search.PayloadIterator;
|
||||
import org.apache.lucene.util.Vint8;
|
||||
import org.apache.lucene.util.Vint8.Position;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link PayloadIterator} for iterating over category posting lists generated
|
||||
* using {@link EnhancementsCategoryTokenizer}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class EnhancementsPayloadIterator extends PayloadIterator {
|
||||
|
||||
private CategoryEnhancement[] EnhancedCategories;
|
||||
int nEnhancements;
|
||||
private int[] enhancementLength;
|
||||
private int[] enhancementStart;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param enhancementsList
|
||||
* A list of the {@link CategoryEnhancement}s from the indexing
|
||||
* params.
|
||||
* @param indexReader
|
||||
* A reader of the index.
|
||||
* @param term
|
||||
* The category term to iterate.
|
||||
* @throws IOException
|
||||
*/
|
||||
public EnhancementsPayloadIterator(
|
||||
List<CategoryEnhancement> enhancementsList,
|
||||
IndexReader indexReader, Term term) throws IOException {
|
||||
super(indexReader, term);
|
||||
EnhancedCategories = enhancementsList
|
||||
.toArray(new CategoryEnhancement[enhancementsList.size()]);
|
||||
enhancementLength = new int[EnhancedCategories.length];
|
||||
enhancementStart = new int[EnhancedCategories.length];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean setdoc(int docId) throws IOException {
|
||||
if (!super.setdoc(docId)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// read header - number of enhancements and their lengths
|
||||
Position position = new Position();
|
||||
nEnhancements = Vint8.decode(buffer, position);
|
||||
for (int i = 0; i < nEnhancements; i++) {
|
||||
enhancementLength[i] = Vint8.decode(buffer, position);
|
||||
}
|
||||
|
||||
// set enhancements start points
|
||||
enhancementStart[0] = position.pos;
|
||||
for (int i = 1; i < nEnhancements; i++) {
|
||||
enhancementStart[i] = enhancementStart[i - 1] + enhancementLength[i - 1];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the data of the current category and document for a certain
|
||||
* enhancement, or {@code null} if no such enhancement exists.
|
||||
*
|
||||
* @param enhancedCategory
|
||||
* The category enhancement to apply.
|
||||
* @return the data of the current category and document for a certain
|
||||
* enhancement, or {@code null} if no such enhancement exists.
|
||||
*/
|
||||
public Object getCategoryData(CategoryEnhancement enhancedCategory) {
|
||||
for (int i = 0; i < nEnhancements; i++) {
|
||||
if (enhancedCategory.equals(EnhancedCategories[i])) {
|
||||
return enhancedCategory.extractCategoryTokenData(buffer,
|
||||
enhancementStart[i], enhancementLength[i]);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
package org.apache.lucene.facet.enhancements.association;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.CategoryEnhancement;
|
||||
import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryListTokenizer;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.util.Vint8;
|
||||
import org.apache.lucene.util.Vint8.Position;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link CategoryEnhancement} for adding associations data to the index
|
||||
* (categories with {@link AssociationProperty}s).
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationEnhancement implements CategoryEnhancement {
|
||||
|
||||
static final String CATEGORY_LIST_TERM_TEXT = "CATEGORY_ASSOCIATION_LIST";
|
||||
|
||||
/** Property Classes which extend AssociationProperty */
|
||||
private static final HashSet<Class<? extends CategoryProperty>> ASSOCIATION_PROPERTY_CLASSES;
|
||||
|
||||
/** Property Classes which do not extend AssociationProperty */
|
||||
private static final HashSet<Class<? extends CategoryProperty>> NON_ASSOCIATION_PROPERTY_CLASSES;
|
||||
|
||||
static {
|
||||
ASSOCIATION_PROPERTY_CLASSES = new HashSet<Class<? extends CategoryProperty>>();
|
||||
NON_ASSOCIATION_PROPERTY_CLASSES = new HashSet<Class<? extends CategoryProperty>>();
|
||||
}
|
||||
|
||||
/**
|
||||
* For a given class which extends a CategoryProperty, answers whether it is
|
||||
* an instance of AssociationProperty (AP) or not. <br>
|
||||
* This method is a cheaper replacement for a call to
|
||||
* <code>instanceof</code>. It has two HashSets - one for classes which are
|
||||
* an extension to AP and one for the classes which are not. Whenever a
|
||||
* property class is introduced:
|
||||
* <ul>
|
||||
* <li>if it is known as a property class extending AP (contained in the
|
||||
* validHashSet)- returns true</li>
|
||||
* <li>if it is known as a property class NOT extending AP - returns false</li>
|
||||
* <li>
|
||||
* If it was not matched against both sets, it calls 'instanceof' to find
|
||||
* out if it extends AP, puts it in the matching Set and returning true or
|
||||
* false accordingly</li>
|
||||
*</ul>
|
||||
*
|
||||
* NOTE: 'instanceof' is only called once per a Class (not instance) of a
|
||||
* property. And as there are few properties (currently 4 concrete
|
||||
* implementations) the two sets would be rather small
|
||||
*/
|
||||
public static boolean isAssociationProperty(Class<? extends CategoryProperty> clazz) {
|
||||
if (ASSOCIATION_PROPERTY_CLASSES.contains(clazz)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (NON_ASSOCIATION_PROPERTY_CLASSES.contains(clazz)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (AssociationProperty.class.isAssignableFrom(clazz)) {
|
||||
ASSOCIATION_PROPERTY_CLASSES.add(clazz);
|
||||
return true;
|
||||
}
|
||||
|
||||
NON_ASSOCIATION_PROPERTY_CLASSES.add(clazz);
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean generatesCategoryList() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getCategoryListTermText() {
|
||||
return CATEGORY_LIST_TERM_TEXT;
|
||||
}
|
||||
|
||||
public CategoryListTokenizer getCategoryListTokenizer(
|
||||
TokenStream tokenizer, EnhancementsIndexingParams indexingParams,
|
||||
TaxonomyWriter taxonomyWriter) {
|
||||
return new AssociationListTokenizer(tokenizer, indexingParams, this);
|
||||
}
|
||||
|
||||
public byte[] getCategoryTokenBytes(CategoryAttribute categoryAttribute) {
|
||||
|
||||
AssociationProperty property = getAssociationProperty(categoryAttribute);
|
||||
|
||||
if (property == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int association = property.getAssociation();
|
||||
int bytesNeeded = Vint8.bytesNeeded(association);
|
||||
byte[] buffer = new byte[bytesNeeded];
|
||||
Vint8.encode(association, buffer, 0);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
public static AssociationProperty getAssociationProperty(
|
||||
CategoryAttribute categoryAttribute) {
|
||||
AssociationProperty property = null;
|
||||
Set<Class<? extends CategoryProperty>> propertyClasses = categoryAttribute
|
||||
.getPropertyClasses();
|
||||
if (propertyClasses == null) {
|
||||
return null;
|
||||
}
|
||||
for (Class<? extends CategoryProperty> clazz : propertyClasses) {
|
||||
if (isAssociationProperty(clazz)) {
|
||||
property = (AssociationProperty) categoryAttribute
|
||||
.getProperty(clazz);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return property;
|
||||
}
|
||||
|
||||
public Object extractCategoryTokenData(byte[] buffer, int offset, int length) {
|
||||
if (length == 0) {
|
||||
return null;
|
||||
}
|
||||
Integer i = Integer.valueOf(Vint8.decode(buffer, new Position(offset)));
|
||||
return i;
|
||||
}
|
||||
|
||||
public Class<? extends CategoryProperty> getRetainableProperty() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
package org.apache.lucene.facet.enhancements.association;
|
||||
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An {@link AssociationProperty} which treats the association as float - the
|
||||
* association bits are actually float bits, and thus merging two associations
|
||||
* is done by float summation.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationFloatProperty extends AssociationProperty {
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param value
|
||||
* The association value.
|
||||
*/
|
||||
public AssociationFloatProperty(float value) {
|
||||
super(Float.floatToIntBits(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(other instanceof AssociationFloatProperty)) {
|
||||
return false;
|
||||
}
|
||||
AssociationFloatProperty o = (AssociationFloatProperty) other;
|
||||
return o.association == this.association;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return "AssociationFloatProperty".hashCode() * 31 + (int) association;
|
||||
}
|
||||
|
||||
public void merge(CategoryProperty other) {
|
||||
AssociationFloatProperty o = (AssociationFloatProperty) other;
|
||||
this.association = Float.floatToIntBits(Float
|
||||
.intBitsToFloat((int) this.association)
|
||||
+ Float.intBitsToFloat((int) o.association));
|
||||
}
|
||||
|
||||
public float getFloatAssociation() {
|
||||
return Float.intBitsToFloat((int) association);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + ": " + Float.intBitsToFloat(getAssociation());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package org.apache.lucene.facet.enhancements.association;
|
||||
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An {@link AssociationProperty} which treats the association as int - merges
|
||||
* two associations by summation.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationIntProperty extends AssociationProperty {
|
||||
|
||||
/**
|
||||
* @param value
|
||||
* The association value.
|
||||
*/
|
||||
public AssociationIntProperty(int value) {
|
||||
super(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(other instanceof AssociationIntProperty)) {
|
||||
return false;
|
||||
}
|
||||
AssociationIntProperty o = (AssociationIntProperty) other;
|
||||
return o.association == this.association;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return "AssociationIntProperty".hashCode() * 31 + (int) association;
|
||||
}
|
||||
|
||||
public void merge(CategoryProperty other) {
|
||||
AssociationIntProperty o = (AssociationIntProperty) other;
|
||||
this.association += o.association;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
package org.apache.lucene.facet.enhancements.association;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.CategoryEnhancement;
|
||||
import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams;
|
||||
import org.apache.lucene.facet.index.CategoryListPayloadStream;
|
||||
import org.apache.lucene.facet.index.attributes.OrdinalProperty;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryListTokenizer;
|
||||
import org.apache.lucene.util.encoding.SimpleIntEncoder;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Tokenizer for associations of a category
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationListTokenizer extends CategoryListTokenizer {
|
||||
|
||||
protected CategoryListPayloadStream payloadStream;
|
||||
|
||||
private String categoryListTermText;
|
||||
|
||||
public AssociationListTokenizer(TokenStream input,
|
||||
EnhancementsIndexingParams indexingParams, CategoryEnhancement enhancement) {
|
||||
super(input, indexingParams);
|
||||
categoryListTermText = enhancement.getCategoryListTermText();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleStartOfInput() throws IOException {
|
||||
payloadStream = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (categoryAttribute != null) {
|
||||
AssociationProperty associationProperty = AssociationEnhancement
|
||||
.getAssociationProperty(categoryAttribute);
|
||||
if (associationProperty != null
|
||||
&& associationProperty.hasBeenSet()) {
|
||||
OrdinalProperty ordinalProperty = (OrdinalProperty) categoryAttribute
|
||||
.getProperty(OrdinalProperty.class);
|
||||
if (ordinalProperty == null) {
|
||||
throw new IOException(
|
||||
"Error: Association without ordinal");
|
||||
}
|
||||
|
||||
if (payloadStream == null) {
|
||||
payloadStream = new CategoryListPayloadStream(
|
||||
new SimpleIntEncoder());
|
||||
}
|
||||
payloadStream.appendIntToStream(ordinalProperty
|
||||
.getOrdinal());
|
||||
payloadStream.appendIntToStream(associationProperty
|
||||
.getAssociation());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (payloadStream != null) {
|
||||
termAttribute.setEmpty().append(categoryListTermText);
|
||||
payload.setData(payloadStream.convertStreamToByteArray());
|
||||
payloadAttribute.setPayload(payload);
|
||||
payloadStream = null;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
package org.apache.lucene.facet.enhancements.association;
|
||||
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link CategoryProperty} associating a single integer value to a
|
||||
* {@link CategoryAttribute}. It should be used to describe the association
|
||||
* between the category and the document.
|
||||
* <p>
|
||||
* This class leave to extending classes the definition of
|
||||
* {@link #merge(CategoryProperty)} policy for the integer associations.
|
||||
* <p>
|
||||
* <B>Note:</B> The association value is added both to a special category list,
|
||||
* and to the category tokens.
|
||||
*
|
||||
* @see AssociationEnhancement
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class AssociationProperty implements CategoryProperty {
|
||||
|
||||
protected long association = Integer.MAX_VALUE + 1;
|
||||
|
||||
/**
|
||||
* Construct an {@link AssociationProperty}.
|
||||
*
|
||||
* @param value
|
||||
* The association value.
|
||||
*/
|
||||
public AssociationProperty(int value) {
|
||||
this.association = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the association value.
|
||||
*
|
||||
* @return The association value.
|
||||
*/
|
||||
public int getAssociation() {
|
||||
return (int) association;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this attribute has been set (not all categories have an
|
||||
* association).
|
||||
*/
|
||||
public boolean hasBeenSet() {
|
||||
return this.association <= Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + ": " + association;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
package org.apache.lucene.facet.enhancements.association;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.search.PayloadIntDecodingIterator;
|
||||
import org.apache.lucene.util.collections.IntIterator;
|
||||
import org.apache.lucene.util.collections.IntToIntMap;
|
||||
import org.apache.lucene.util.encoding.SimpleIntDecoder;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows easy iteration over the associations payload, decoding and breaking it
|
||||
* to (ordinal, value) pairs, stored in a hash.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class AssociationsPayloadIterator {
|
||||
|
||||
/**
|
||||
* Default Term for associations
|
||||
*/
|
||||
public static final Term ASSOCIATION_POSTING_TERM = new Term(
|
||||
CategoryListParams.DEFAULT_TERM.field(),
|
||||
AssociationEnhancement.CATEGORY_LIST_TERM_TEXT);
|
||||
|
||||
/**
|
||||
* Hash mapping to ordinals to the associated int value
|
||||
*/
|
||||
private IntToIntMap ordinalToAssociationMap;
|
||||
|
||||
/**
|
||||
* An inner payload decoder which actually goes through the posting and
|
||||
* decode the ints representing the ordinals and the values
|
||||
*/
|
||||
private PayloadIntDecodingIterator associationPayloadIter;
|
||||
|
||||
/**
|
||||
* Marking whether there are associations (at all) in the given index
|
||||
*/
|
||||
private boolean hasAssociations = false;
|
||||
|
||||
/**
|
||||
* The long-special-value returned for ordinals which have no associated int
|
||||
* value. It is not in the int range of values making it a valid mark.
|
||||
*/
|
||||
public final static long NO_ASSOCIATION = Integer.MAX_VALUE + 1;
|
||||
|
||||
/**
|
||||
* Construct a new association-iterator, initializing the inner payload
|
||||
* iterator, with the supplied term and checking whether there are any
|
||||
* associations within the given index
|
||||
*
|
||||
* @param reader
|
||||
* a reader containing the postings to be iterated
|
||||
* @param field
|
||||
* the field containing the relevant associations list term
|
||||
*/
|
||||
public AssociationsPayloadIterator(IndexReader reader, String field)
|
||||
throws IOException {
|
||||
// Initialize the payloadDecodingIterator
|
||||
associationPayloadIter = new PayloadIntDecodingIterator(
|
||||
reader,
|
||||
// TODO (Facet): should consolidate with AssociationListTokenizer which
|
||||
// uses AssociationEnhancement.getCatTermText()
|
||||
new Term(field, AssociationEnhancement.CATEGORY_LIST_TERM_TEXT),
|
||||
new SimpleIntDecoder());
|
||||
|
||||
// Check whether there are any associations
|
||||
hasAssociations = associationPayloadIter.init();
|
||||
|
||||
ordinalToAssociationMap = new IntToIntMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* Skipping to the next document, fetching its associations & populating the
|
||||
* map.
|
||||
*
|
||||
* @param docId
|
||||
* document id to be skipped to
|
||||
* @return true if the document contains associations and they were fetched
|
||||
* correctly. false otherwise.
|
||||
* @throws IOException
|
||||
* on error
|
||||
*/
|
||||
public boolean setNextDoc(int docId) throws IOException {
|
||||
ordinalToAssociationMap.clear();
|
||||
boolean docContainsAssociations = false;
|
||||
try {
|
||||
docContainsAssociations = fetchAssociations(docId);
|
||||
} catch (IOException e) {
|
||||
IOException ioe = new IOException(
|
||||
"An Error occured while reading a document's associations payload (docId="
|
||||
+ docId + ")");
|
||||
ioe.initCause(e);
|
||||
throw ioe;
|
||||
}
|
||||
|
||||
return docContainsAssociations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get int association value for the given ordinal. <br>
|
||||
* The return is either an int value casted as long if the ordinal has an
|
||||
* associated value. Otherwise the returned value would be
|
||||
* {@link #NO_ASSOCIATION} which is 'pure long' value (e.g not in the int
|
||||
* range of values)
|
||||
*
|
||||
* @param ordinal
|
||||
* for which the association value is requested
|
||||
* @return the associated int value (encapsulated in a long) if the ordinal
|
||||
* had an associated value, or {@link #NO_ASSOCIATION} otherwise
|
||||
*/
|
||||
public long getAssociation(int ordinal) {
|
||||
if (ordinalToAssociationMap.containsKey(ordinal)) {
|
||||
return ordinalToAssociationMap.get(ordinal);
|
||||
}
|
||||
|
||||
return NO_ASSOCIATION;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an iterator over the ordinals which has an association for the
|
||||
* document set by {@link #setNextDoc(int)}.
|
||||
*/
|
||||
public IntIterator getAssociatedOrdinals() {
|
||||
return ordinalToAssociationMap.keyIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips to the given docId, getting the values in pairs of (ordinal, value)
|
||||
* and populating the map
|
||||
*
|
||||
* @param docId
|
||||
* document id owning the associations
|
||||
* @return true if associations were fetched successfully, false otherwise
|
||||
* @throws IOException
|
||||
* on error
|
||||
*/
|
||||
private boolean fetchAssociations(int docId) throws IOException {
|
||||
// No associations at all? don't bother trying to seek the docID in the
|
||||
// posting
|
||||
if (!hasAssociations) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// No associations for this document? well, nothing to decode than,
|
||||
// return false
|
||||
if (!associationPayloadIter.skipTo(docId)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// loop over all the values decoded from the payload in pairs.
|
||||
for (;;) {
|
||||
// Get the ordinal
|
||||
long ordinal = associationPayloadIter.nextCategory();
|
||||
|
||||
// if no ordinal - it's the end of data, break the loop
|
||||
if (ordinal > Integer.MAX_VALUE) {
|
||||
break;
|
||||
}
|
||||
|
||||
// get the associated value
|
||||
long association = associationPayloadIter.nextCategory();
|
||||
// If we're at this step - it means we have an ordinal, do we have
|
||||
// an association for it?
|
||||
if (association > Integer.MAX_VALUE) {
|
||||
// No association!!! A Broken Pair!! PANIC!
|
||||
throw new IOException(
|
||||
"ERROR! Associations should come in pairs of (ordinal, value), yet this payload has an odd number of values! (docId="
|
||||
+ docId + ")");
|
||||
}
|
||||
// Populate the map with the given ordinal and association pair
|
||||
ordinalToAssociationMap.put((int) ordinal, (int) association);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime
|
||||
* result
|
||||
+ ((associationPayloadIter == null) ? 0
|
||||
: associationPayloadIter.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AssociationsPayloadIterator other = (AssociationsPayloadIterator) obj;
|
||||
if (associationPayloadIter == null) {
|
||||
if (other.associationPayloadIter != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!associationPayloadIter.equals(other.associationPayloadIter)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Association category enhancements</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Association category enhancements</h1>
|
||||
|
||||
A {@link org.apache.lucene.facet.enhancements.CategoryEnhancement CategoryEnhancement}
|
||||
for adding associations data to the index (categories with
|
||||
{@link org.apache.lucene.facet.enhancements.association.AssociationProperty AssociationProperty}'s).
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,32 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Enhanced category features</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Enhanced category features</h1>
|
||||
|
||||
Mechanisms for addition of enhanced category features.
|
||||
<p>A {@link org.apache.lucene.facet.enhancements.CategoryEnhancement CategoryEnhancement}
|
||||
(which can correspond to a
|
||||
{@link org.apache.lucene.facet.index.attributes.CategoryProperty CategoryProperty})
|
||||
can contribute to the index in two possible ways:
|
||||
<ol>
|
||||
<li>To each category with data relevant to the enhancement,
|
||||
add this data to the category's token payload, through
|
||||
{@link org.apache.lucene.facet.enhancements.CategoryEnhancement#getCategoryTokenBytes(CategoryAttribute) CategoryEnhancement.getCategoryTokenBytes()}.
|
||||
This data will be read during search using
|
||||
{@link org.apache.lucene.facet.enhancements.CategoryEnhancement#extractCategoryTokenData(byte[], int, int) CategoryEnhancement.extractCategoryTokenData()}.
|
||||
</li>
|
||||
<li>To each document which contains categories with data relevant to the enhancement, add a
|
||||
{@link org.apache.lucene.facet.index.streaming.CategoryListTokenizer CategoryListTokenizer} through
|
||||
{@link org.apache.lucene.facet.enhancements.CategoryEnhancement#getCategoryListTokenizer CategoryEnhancement.getCategoryListTokenizer()} .
|
||||
The
|
||||
{@link org.apache.lucene.facet.index.streaming.CategoryListTokenizer CategoryListTokenizer}
|
||||
should add a single token which includes all the enhancement relevant data from the categories.
|
||||
The category list token's text is defined by
|
||||
{@link org.apache.lucene.facet.enhancements.CategoryEnhancement#getCategoryListTermText() CategoryEnhancement.getCategoryListTermText()}.
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,98 @@
|
|||
package org.apache.lucene.facet.enhancements.params;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.CategoryEnhancement;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Default implementation of {@link EnhancementsIndexingParams}
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultEnhancementsIndexingParams extends
|
||||
PerDimensionIndexingParams implements EnhancementsIndexingParams {
|
||||
|
||||
private List<CategoryEnhancement> enhancedCategories;
|
||||
|
||||
/**
|
||||
* Construct with a certain {@link CategoryEnhancement enhancement}
|
||||
* @throws IllegalArgumentException if no enhancements are provided
|
||||
*/
|
||||
public DefaultEnhancementsIndexingParams(CategoryEnhancement... enhancements) {
|
||||
super();
|
||||
validateparams(enhancements);
|
||||
addCategoryEnhancements(enhancements);
|
||||
}
|
||||
|
||||
private void validateparams(CategoryEnhancement... enhancements) {
|
||||
if (enhancements==null || enhancements.length<1) {
|
||||
throw new IllegalArgumentException("at least one enhancement is required");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct with certain {@link CategoryEnhancement enhancements}
|
||||
* and {@link CategoryListParams}
|
||||
* @throws IllegalArgumentException if no enhancements are provided
|
||||
*/
|
||||
public DefaultEnhancementsIndexingParams(
|
||||
CategoryListParams categoryListParams,
|
||||
CategoryEnhancement... enhancements) {
|
||||
super(categoryListParams);
|
||||
validateparams(enhancements);
|
||||
addCategoryEnhancements(enhancements);
|
||||
}
|
||||
|
||||
public void addCategoryEnhancements(CategoryEnhancement... enhancements) {
|
||||
if (enhancedCategories == null) {
|
||||
enhancedCategories = new ArrayList<CategoryEnhancement>();
|
||||
}
|
||||
for (CategoryEnhancement categoryEnhancement : enhancements) {
|
||||
enhancedCategories.add(categoryEnhancement);
|
||||
}
|
||||
}
|
||||
|
||||
public List<CategoryEnhancement> getCategoryEnhancements() {
|
||||
if (enhancedCategories == null || enhancedCategories.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return enhancedCategories;
|
||||
}
|
||||
|
||||
public List<Class<? extends CategoryProperty>> getRetainableProperties() {
|
||||
if (enhancedCategories == null) {
|
||||
return null;
|
||||
}
|
||||
List<Class<? extends CategoryProperty>> retainableProperties = new ArrayList<Class<? extends CategoryProperty>>();
|
||||
for (CategoryEnhancement enhancement : enhancedCategories) {
|
||||
if (enhancement.getRetainableProperty() != null) {
|
||||
retainableProperties.add(enhancement.getRetainableProperty());
|
||||
}
|
||||
}
|
||||
if (retainableProperties.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return retainableProperties;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
package org.apache.lucene.facet.enhancements.params;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.enhancements.CategoryEnhancement;
|
||||
import org.apache.lucene.facet.enhancements.EnhancementsDocumentBuilder;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* {@link FacetIndexingParams Facet indexing parameters} for defining
|
||||
* {@link CategoryEnhancement category enhancements}. It must contain at least
|
||||
* one enhancement, otherwise nothing is "enhanced" about it. When there are
|
||||
* more than one, the order matters - see {@link #getCategoryEnhancements()}.
|
||||
*
|
||||
* @see EnhancementsDocumentBuilder
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface EnhancementsIndexingParams extends FacetIndexingParams {
|
||||
|
||||
/**
|
||||
* Add {@link CategoryEnhancement}s to the indexing parameters
|
||||
* @param enhancements enhancements to add
|
||||
*/
|
||||
public void addCategoryEnhancements(CategoryEnhancement... enhancements);
|
||||
|
||||
/**
|
||||
* Get a list of the active category enhancements. If no enhancements exist
|
||||
* return {@code null}. The order of enhancements in the returned list
|
||||
* dictates the order in which the enhancements data appear in the category
|
||||
* tokens payload.
|
||||
*
|
||||
* @return A list of the active category enhancements, or {@code null} if
|
||||
* there are no enhancements.
|
||||
*/
|
||||
public List<CategoryEnhancement> getCategoryEnhancements();
|
||||
|
||||
/**
|
||||
* Get a list of {@link CategoryProperty} classes to be retained when
|
||||
* creating {@link CategoryParentsStream}.
|
||||
*
|
||||
* @return the list of {@link CategoryProperty} classes to be retained when
|
||||
* creating {@link CategoryParentsStream}, or {@code null} if there
|
||||
* are no such properties.
|
||||
*/
|
||||
public List<Class<? extends CategoryProperty>> getRetainableProperties();
|
||||
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Enhanced category features</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Enhanced category features</h1>
|
||||
|
||||
{@link org.apache.lucene.facet.index.params.FacetIndexingParams FacetIndexingParams}
|
||||
used by
|
||||
{@link org.apache.lucene.facet.enhancements.EnhancementsDocumentBuilder EnhancementsDocumentBuilder}
|
||||
for adding
|
||||
{@link org.apache.lucene.facet.enhancements.CategoryEnhancement CategoryEnhancement}'s
|
||||
to the indexing parameters, and accessing them during indexing and search.
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,282 @@
|
|||
package org.apache.lucene.facet.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.Attribute;
|
||||
|
||||
import org.apache.lucene.facet.FacetException;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttributeImpl;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A container to add categories which are to be introduced to
|
||||
* {@link CategoryDocumentBuilder#setCategories(Iterable)}. Categories can be
|
||||
* added with Properties.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryContainer implements Iterable<CategoryAttribute>, Serializable {
|
||||
|
||||
protected transient Map<CategoryPath, CategoryAttribute> map;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public CategoryContainer() {
|
||||
map = new HashMap<CategoryPath, CategoryAttribute>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a category.
|
||||
*
|
||||
* @param categoryPath
|
||||
* The path of the category.
|
||||
* @return The {@link CategoryAttribute} of the category.
|
||||
*/
|
||||
public CategoryAttribute addCategory(CategoryPath categoryPath) {
|
||||
return mapCategoryAttribute(categoryPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a category with a property.
|
||||
*
|
||||
* @param categoryPath
|
||||
* The path of the category.
|
||||
* @param property
|
||||
* The property to associate to the category.
|
||||
* @return The {@link CategoryAttribute} of the category.
|
||||
*/
|
||||
public CategoryAttribute addCategory(CategoryPath categoryPath,
|
||||
CategoryProperty property) {
|
||||
/*
|
||||
* This method is a special case of addCategory with multiple
|
||||
* properties, but it is kept here for two reasons: 1) Using the array
|
||||
* version has some performance cost, and 2) it is expected that most
|
||||
* calls will be for this version (single property).
|
||||
*/
|
||||
CategoryAttribute ca = mapCategoryAttribute(categoryPath);
|
||||
ca.addProperty(property);
|
||||
return ca;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a category with multiple properties.
|
||||
*
|
||||
* @param categoryPath
|
||||
* The path of the category.
|
||||
* @param properties
|
||||
* The properties to associate to the category.
|
||||
* @return The {@link CategoryAttribute} of the category.
|
||||
* @throws FacetException
|
||||
* When the category already has a property of the same type as
|
||||
* one of the new properties, and merging for this property type
|
||||
* is prohibited.
|
||||
*/
|
||||
public CategoryAttribute addCategory(CategoryPath categoryPath,
|
||||
CategoryProperty... properties) throws FacetException {
|
||||
CategoryAttribute ca = mapCategoryAttribute(categoryPath);
|
||||
for (CategoryProperty attribute : properties) {
|
||||
ca.addProperty(attribute);
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an entire {@link CategoryAttribute}.
|
||||
*
|
||||
* @param categoryAttribute
|
||||
* The {@link CategoryAttribute} to add.
|
||||
* @return The {@link CategoryAttribute} of the category (could be different
|
||||
* from the one provided).
|
||||
* @throws FacetException
|
||||
*/
|
||||
public CategoryAttribute addCategory(CategoryAttribute categoryAttribute)
|
||||
throws FacetException {
|
||||
CategoryAttribute ca = mapCategoryAttribute(categoryAttribute
|
||||
.getCategoryPath());
|
||||
Set<Class<? extends CategoryProperty>> propertyClasses = categoryAttribute
|
||||
.getPropertyClasses();
|
||||
if (propertyClasses != null) {
|
||||
for (Class<? extends CategoryProperty> propertyClass : propertyClasses) {
|
||||
ca.addProperty(categoryAttribute.getProperty(propertyClass));
|
||||
}
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the {@link CategoryAttribute} object for a specific
|
||||
* {@link CategoryPath}, from the map.
|
||||
*/
|
||||
private final CategoryAttribute mapCategoryAttribute(
|
||||
CategoryPath categoryPath) {
|
||||
CategoryAttribute ca = map.get(categoryPath);
|
||||
if (ca == null) {
|
||||
ca = new CategoryAttributeImpl(categoryPath);
|
||||
map.put(categoryPath, ca);
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the {@link CategoryAttribute} this container has for a certain
|
||||
* category, or {@code null} if the category is not in the container.
|
||||
*
|
||||
* @param categoryPath
|
||||
* The category path of the requested category.
|
||||
*/
|
||||
public CategoryAttribute getCategoryAttribute(CategoryPath categoryPath) {
|
||||
return map.get(categoryPath);
|
||||
}
|
||||
|
||||
public Iterator<CategoryAttribute> iterator() {
|
||||
return map.values().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all categories.
|
||||
*/
|
||||
public void clear() {
|
||||
map.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the categories from another {@link CategoryContainer} to this one.
|
||||
*
|
||||
* @param other
|
||||
* The {@link CategoryContainer} to take categories from.
|
||||
* @throws FacetException
|
||||
* If any prohibited merge of category properties is attempted.
|
||||
*/
|
||||
public void merge(CategoryContainer other) throws FacetException {
|
||||
for (CategoryAttribute categoryAttribute : other.map.values()) {
|
||||
addCategory(categoryAttribute);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of categories in the container.
|
||||
*
|
||||
* @return The number of categories in the container.
|
||||
*/
|
||||
public int size() {
|
||||
return map.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder("CategoryContainer");
|
||||
for (CategoryAttribute ca : map.values()) {
|
||||
builder.append('\n');
|
||||
builder.append('\t');
|
||||
builder.append(ca.toString());
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize object content to given {@link ObjectOutputStream}
|
||||
*/
|
||||
private void writeObject(ObjectOutputStream out) throws IOException {
|
||||
out.defaultWriteObject();
|
||||
// write the number of categories
|
||||
out.writeInt(size());
|
||||
// write the category attributes
|
||||
for (CategoryAttribute ca : this) {
|
||||
serializeCategoryAttribute(out, ca);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize each of the {@link CategoryAttribute}s to the given
|
||||
* {@link ObjectOutputStream}.<br>
|
||||
* NOTE: {@link CategoryProperty}s are {@link Serializable}, but do not
|
||||
* assume that Lucene's {@link Attribute}s are as well
|
||||
* @throws IOException
|
||||
*/
|
||||
protected void serializeCategoryAttribute(ObjectOutputStream out,
|
||||
CategoryAttribute ca) throws IOException {
|
||||
out.writeObject(ca.getCategoryPath());
|
||||
Set<Class<? extends CategoryProperty>> propertyClasses = ca.getPropertyClasses();
|
||||
if (propertyClasses != null) {
|
||||
out.writeInt(propertyClasses.size());
|
||||
for (Class<? extends CategoryProperty> clazz : propertyClasses) {
|
||||
out.writeObject(ca.getProperty(clazz));
|
||||
}
|
||||
} else {
|
||||
out.writeInt(0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deserialize object from given {@link ObjectInputStream}
|
||||
*/
|
||||
private void readObject(ObjectInputStream in) throws IOException,
|
||||
ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
map = new HashMap<CategoryPath, CategoryAttribute>();
|
||||
int size = in.readInt();
|
||||
for (int i = 0; i < size; i++) {
|
||||
deserializeCategoryAttribute(in);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* De-Serialize each of the {@link CategoryAttribute}s from the given
|
||||
* {@link ObjectInputStream}.
|
||||
*/
|
||||
protected void deserializeCategoryAttribute(ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
CategoryPath cp = (CategoryPath) in.readObject();
|
||||
int nProperties = in.readInt();
|
||||
if (nProperties == 0) {
|
||||
addCategory(cp);
|
||||
} else {
|
||||
for (int j = 0; j < nProperties; j++) {
|
||||
CategoryProperty property = (CategoryProperty) in.readObject();
|
||||
addCategory(cp, property);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (! (o instanceof CategoryContainer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CategoryContainer that = (CategoryContainer)o;
|
||||
return this.map.equals(that.map);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return map.hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,298 @@
|
|||
package org.apache.lucene.facet.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
import org.apache.lucene.DocumentBuilder;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttributesIterable;
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
|
||||
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryListTokenizer;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
|
||||
import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
|
||||
import org.apache.lucene.facet.index.streaming.CountingListTokenizer;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A utility class which allows attachment of {@link CategoryPath}s or
|
||||
* {@link CategoryAttribute}s to a given document using a taxonomy.<br>
|
||||
* Construction could be done with either a given {@link FacetIndexingParams} or
|
||||
* the default implementation {@link DefaultFacetIndexingParams}.<br>
|
||||
* A CategoryDocumentBuilder can be reused by repeatedly setting the categories
|
||||
* and building the document. Categories are provided either as
|
||||
* {@link CategoryAttribute} elements through {@link #setCategories(Iterable)},
|
||||
* or as {@link CategoryPath} elements through
|
||||
* {@link #setCategoryPaths(Iterable)}.
|
||||
* <p>
|
||||
* Note that both {@link #setCategories(Iterable)} and
|
||||
* {@link #setCategoryPaths(Iterable)} return this
|
||||
* {@link CategoryDocumentBuilder}, allowing the following pattern: {@code new
|
||||
* CategoryDocumentBuilder(taxonomy,
|
||||
* params).setCategories(categories).build(doc)}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryDocumentBuilder implements DocumentBuilder {
|
||||
|
||||
/**
|
||||
* A {@link TaxonomyWriter} for adding categories and retrieving their
|
||||
* ordinals.
|
||||
*/
|
||||
protected final TaxonomyWriter taxonomyWriter;
|
||||
|
||||
/**
|
||||
* Parameters to be used when indexing categories.
|
||||
*/
|
||||
protected final FacetIndexingParams indexingParams;
|
||||
|
||||
/**
|
||||
* A list of fields which is filled at ancestors' construction and used
|
||||
* during {@link CategoryDocumentBuilder#build(Document)}.
|
||||
*/
|
||||
protected final ArrayList<Field> fieldList = new ArrayList<Field>();
|
||||
|
||||
protected Map<String, List<CategoryAttribute>> categoriesMap;
|
||||
|
||||
/**
|
||||
* Creating a facets document builder with default facet indexing
|
||||
* parameters.<br>
|
||||
* See:
|
||||
* {@link #CategoryDocumentBuilder(TaxonomyWriter, FacetIndexingParams)}
|
||||
*
|
||||
* @param taxonomyWriter
|
||||
* to which new categories will be added, as well as translating
|
||||
* known categories to ordinals
|
||||
* @throws IOException
|
||||
*
|
||||
*/
|
||||
public CategoryDocumentBuilder(TaxonomyWriter taxonomyWriter)
|
||||
throws IOException {
|
||||
this(taxonomyWriter, new DefaultFacetIndexingParams());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creating a facets document builder with a given facet indexing parameters
|
||||
* object.<br>
|
||||
*
|
||||
* @param taxonomyWriter
|
||||
* to which new categories will be added, as well as translating
|
||||
* known categories to ordinals
|
||||
* @param params
|
||||
* holds all parameters the indexing process should use such as
|
||||
* category-list parameters
|
||||
* @throws IOException
|
||||
*/
|
||||
public CategoryDocumentBuilder(TaxonomyWriter taxonomyWriter,
|
||||
FacetIndexingParams params) throws IOException {
|
||||
this.taxonomyWriter = taxonomyWriter;
|
||||
this.indexingParams = params;
|
||||
this.categoriesMap = new HashMap<String, List<CategoryAttribute>>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the categories of the document builder from an {@link Iterable} of
|
||||
* {@link CategoryPath} objects.
|
||||
*
|
||||
* @param categoryPaths
|
||||
* An iterable of CategoryPath objects which holds the categories
|
||||
* (facets) which will be added to the document at
|
||||
* {@link #build(Document)}
|
||||
* @return This CategoryDocumentBuilder, to enable this one line call:
|
||||
* {@code new} {@link #CategoryDocumentBuilder(TaxonomyWriter)}.
|
||||
* {@link #setCategoryPaths(Iterable)}.{@link #build(Document)}.
|
||||
* @throws IOException
|
||||
*/
|
||||
public CategoryDocumentBuilder setCategoryPaths(
|
||||
Iterable<CategoryPath> categoryPaths) throws IOException {
|
||||
if (categoryPaths == null) {
|
||||
fieldList.clear();
|
||||
return this;
|
||||
}
|
||||
return setCategories(new CategoryAttributesIterable(categoryPaths));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the categories of the document builder from an {@link Iterable} of
|
||||
* {@link CategoryAttribute} objects.
|
||||
*
|
||||
* @param categories
|
||||
* An iterable of {@link CategoryAttribute} objects which holds
|
||||
* the categories (facets) which will be added to the document at
|
||||
* {@link #build(Document)}
|
||||
* @return This CategoryDocumentBuilder, to enable this one line call:
|
||||
* {@code new} {@link #CategoryDocumentBuilder(TaxonomyWriter)}.
|
||||
* {@link #setCategories(Iterable)}.{@link #build(Document)}.
|
||||
* @throws IOException
|
||||
*/
|
||||
public CategoryDocumentBuilder setCategories(
|
||||
Iterable<CategoryAttribute> categories) throws IOException {
|
||||
fieldList.clear();
|
||||
if (categories == null) {
|
||||
return this;
|
||||
}
|
||||
|
||||
// get field-name to a list of facets mapping as different facets could
|
||||
// be added to different category-lists on different fields
|
||||
fillCategoriesMap(categories);
|
||||
|
||||
// creates a different stream for each different field
|
||||
for (Entry<String, List<CategoryAttribute>> e : categoriesMap
|
||||
.entrySet()) {
|
||||
// create a category attributes stream for the array of facets
|
||||
CategoryAttributesStream categoryAttributesStream = new CategoryAttributesStream(
|
||||
e.getValue());
|
||||
|
||||
// Set a suitable {@link TokenStream} using
|
||||
// CategoryParentsStream, followed by CategoryListTokenizer and
|
||||
// CategoryTokenizer composition (the ordering of the last two is
|
||||
// not mandatory).
|
||||
CategoryParentsStream parentsStream = (CategoryParentsStream) getParentsStream(categoryAttributesStream);
|
||||
CategoryListTokenizer categoryListTokenizer = getCategoryListTokenizer(parentsStream);
|
||||
CategoryTokenizer stream = getCategoryTokenizer(categoryListTokenizer);
|
||||
|
||||
// Finally creating a suitable field with stream and adding it to a
|
||||
// master field-list, used during the build process (see
|
||||
// super.build())
|
||||
fieldList.add(new Field(e.getKey(), stream));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a stream of categories which includes the parents, according to
|
||||
* policies defined in indexing parameters.
|
||||
*
|
||||
* @param categoryAttributesStream
|
||||
* The input stream
|
||||
* @return The parents stream.
|
||||
* @see OrdinalPolicy OrdinalPolicy (for policy of adding category tokens for parents)
|
||||
* @see PathPolicy PathPolicy (for policy of adding category <b>list</b> tokens for parents)
|
||||
*/
|
||||
protected TokenStream getParentsStream(
|
||||
CategoryAttributesStream categoryAttributesStream) {
|
||||
return new CategoryParentsStream(categoryAttributesStream,
|
||||
taxonomyWriter, indexingParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills the categories mapping between a field name and a list of
|
||||
* categories that belongs to it according to this builder's
|
||||
* {@link FacetIndexingParams} object
|
||||
*
|
||||
* @param categories
|
||||
* Iterable over the category attributes
|
||||
*/
|
||||
protected void fillCategoriesMap(Iterable<CategoryAttribute> categories)
|
||||
throws IOException {
|
||||
categoriesMap.clear();
|
||||
|
||||
// for-each category
|
||||
for (CategoryAttribute category : categories) {
|
||||
// extracting the field-name to which this category belongs
|
||||
String fieldName = indexingParams.getCategoryListParams(
|
||||
category.getCategoryPath()).getTerm().field();
|
||||
|
||||
// getting the list of categories which belongs to that field
|
||||
List<CategoryAttribute> list = categoriesMap.get(fieldName);
|
||||
|
||||
// if no such list exists
|
||||
if (list == null) {
|
||||
// adding a new one to the map
|
||||
list = new ArrayList<CategoryAttribute>();
|
||||
categoriesMap.put(fieldName, list);
|
||||
}
|
||||
|
||||
// adding the new category to the list
|
||||
list.add(category.clone());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a category list tokenizer (or a series of such tokenizers) to create
|
||||
* the <b>category list tokens</b>.
|
||||
*
|
||||
* @param categoryStream
|
||||
* A stream containing {@link CategoryAttribute} with the
|
||||
* relevant data.
|
||||
* @return The category list tokenizer (or series of tokenizers) to be used
|
||||
* in creating category list tokens.
|
||||
*/
|
||||
protected CategoryListTokenizer getCategoryListTokenizer(
|
||||
TokenStream categoryStream) {
|
||||
return getCountingListTokenizer(categoryStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a {@link CountingListTokenizer} for creating counting list token.
|
||||
*
|
||||
* @param categoryStream
|
||||
* A stream containing {@link CategoryAttribute}s with the
|
||||
* relevant data.
|
||||
* @return A counting list tokenizer to be used in creating counting list
|
||||
* token.
|
||||
*/
|
||||
protected CountingListTokenizer getCountingListTokenizer(
|
||||
TokenStream categoryStream) {
|
||||
return new CountingListTokenizer(categoryStream, indexingParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a {@link CategoryTokenizer} to create the <b>category tokens</b>.
|
||||
* This method can be overridden for adding more attributes to the category
|
||||
* tokens.
|
||||
*
|
||||
* @param categoryStream
|
||||
* A stream containing {@link CategoryAttribute} with the
|
||||
* relevant data.
|
||||
* @return The {@link CategoryTokenizer} to be used in creating category
|
||||
* tokens.
|
||||
* @throws IOException
|
||||
*/
|
||||
protected CategoryTokenizer getCategoryTokenizer(TokenStream categoryStream)
|
||||
throws IOException {
|
||||
return new CategoryTokenizer(categoryStream, indexingParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the fields created in one of the "set" methods to the document
|
||||
*/
|
||||
public Document build(Document doc) {
|
||||
for (Field f : fieldList) {
|
||||
f.setOmitNorms(true);
|
||||
doc.add(f);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package org.apache.lucene.facet.index;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.encoding.IntEncoder;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Accumulates category IDs for a single document, for writing in byte array
|
||||
* form, for example, to a Lucene Payload.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryListPayloadStream {
|
||||
|
||||
private ByteArrayOutputStream baos = new ByteArrayOutputStream(50);
|
||||
private IntEncoder encoder;
|
||||
|
||||
/** Creates a Payload stream using the specified encoder. */
|
||||
public CategoryListPayloadStream(IntEncoder encoder) {
|
||||
this.encoder = encoder;
|
||||
this.encoder.reInit(baos);
|
||||
}
|
||||
|
||||
/** Appends an integer to the stream. */
|
||||
public void appendIntToStream(int intValue) throws IOException {
|
||||
encoder.encode(intValue);
|
||||
}
|
||||
|
||||
/** Returns the streamed bytes so far accumulated, as an array of bytes. */
|
||||
public byte[] convertStreamToByteArray() {
|
||||
try {
|
||||
encoder.close();
|
||||
return baos.toByteArray();
|
||||
} catch (IOException e) {
|
||||
// This cannot happen, because of BAOS (no I/O).
|
||||
return new byte[0];
|
||||
}
|
||||
}
|
||||
|
||||
/** Resets this stream to begin building a new payload. */
|
||||
public void reset() throws IOException {
|
||||
encoder.close();
|
||||
baos.reset();
|
||||
encoder.reInit(baos);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,188 @@
|
|||
package org.apache.lucene.facet.index;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.PayloadProcessorProvider;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter.OrdinalMap;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.encoding.IntDecoder;
|
||||
import org.apache.lucene.util.encoding.IntEncoder;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link PayloadProcessorProvider} for updating facets ordinal references,
|
||||
* based on an ordinal map. You should use this code in conjunction with merging
|
||||
* taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
|
||||
* which maps the 'old' payloads to the 'new' ones. You can use that map to
|
||||
* re-map the payloads which contain the facets information (ordinals) either
|
||||
* before or while merging the indexes.
|
||||
* <p>
|
||||
* For re-mapping the ordinals before you merge the indexes, do the following:
|
||||
*
|
||||
* <pre>
|
||||
* // merge the old taxonomy with the new one.
|
||||
* OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
|
||||
* int[] ordmap = map.getMap();
|
||||
*
|
||||
* // re-map the ordinals on the old directory.
|
||||
* Directory oldDir;
|
||||
* FacetsPayloadProcessorProvider fppp = new FacetsPayloadProcessorProvider(
|
||||
* oldDir, ordmap);
|
||||
* IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
|
||||
* conf.setMergePolicy(new ForceOptimizeMergePolicy());
|
||||
* IndexWriter writer = new IndexWriter(oldDir, conf);
|
||||
* writer.setPayloadProcessorProvider(fppp);
|
||||
* writer.optimize();
|
||||
* writer.close();
|
||||
*
|
||||
* // merge that directory with the new index.
|
||||
* IndexWriter newWriter; // opened on the 'new' Directory
|
||||
* newWriter.addIndexes(oldDir);
|
||||
* newWriter.commit();
|
||||
* </pre>
|
||||
*
|
||||
* For re-mapping the ordinals during index merge, do the following:
|
||||
*
|
||||
* <pre>
|
||||
* // merge the old taxonomy with the new one.
|
||||
* OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
|
||||
* int[] ordmap = map.getMap();
|
||||
*
|
||||
* // Add the index and re-map ordinals on the go
|
||||
* IndexReader r = IndexReader.open(oldDir);
|
||||
* IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
|
||||
* IndexWriter writer = new IndexWriter(newDir, conf);
|
||||
* writer.setPayloadProcessorProvider(fppp);
|
||||
* writer.addIndexes(r);
|
||||
* writer.commit();
|
||||
* </pre>
|
||||
* <p>
|
||||
* <b>NOTE:</b> while the second example looks simpler, IndexWriter may trigger
|
||||
* a long merge due to addIndexes. The first example avoids this perhaps
|
||||
* unneeded merge, as well as can be done separately (e.g. on another node)
|
||||
* before the index is merged.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
|
||||
|
||||
private final Directory workDir;
|
||||
|
||||
private final DirPayloadProcessor dirProcessor;
|
||||
|
||||
/**
|
||||
* Construct FacetsPayloadProcessorProvider with FacetIndexingParams
|
||||
*
|
||||
* @param dir the {@link Directory} containing the segments to update
|
||||
* @param ordinalMap an array mapping previous facets ordinals to new ones
|
||||
* @param indexingParams the facets indexing parameters
|
||||
*/
|
||||
public FacetsPayloadProcessorProvider(Directory dir, int[] ordinalMap,
|
||||
FacetIndexingParams indexingParams) {
|
||||
workDir = dir;
|
||||
dirProcessor = new FacetsDirPayloadProcessor(indexingParams, ordinalMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
|
||||
if (workDir != dir) {
|
||||
return null;
|
||||
}
|
||||
return dirProcessor;
|
||||
}
|
||||
|
||||
public static class FacetsDirPayloadProcessor extends DirPayloadProcessor {
|
||||
|
||||
private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
|
||||
|
||||
private final int[] ordinalMap;
|
||||
|
||||
/**
|
||||
* Construct FacetsDirPayloadProcessor with custom FacetIndexingParams
|
||||
* @param ordinalMap an array mapping previous facets ordinals to new ones
|
||||
* @param indexingParams the facets indexing parameters
|
||||
*/
|
||||
protected FacetsDirPayloadProcessor(FacetIndexingParams indexingParams, int[] ordinalMap) {
|
||||
this.ordinalMap = ordinalMap;
|
||||
for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
|
||||
termMap.put(params.getTerm(), params);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public PayloadProcessor getProcessor(String field, BytesRef bytes) throws IOException {
|
||||
// TODO (Facet): don't create terms
|
||||
CategoryListParams params = termMap.get(new Term(field, bytes));
|
||||
if (params == null) {
|
||||
return null;
|
||||
}
|
||||
return new FacetsPayloadProcessor(params, ordinalMap);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** A PayloadProcessor for updating facets ordinal references, based on an ordinal map */
|
||||
public static class FacetsPayloadProcessor extends PayloadProcessor {
|
||||
|
||||
private final IntEncoder encoder;
|
||||
private final IntDecoder decoder;
|
||||
private final int[] ordinalMap;
|
||||
private final ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
|
||||
/**
|
||||
* @param params defines the encoding of facet ordinals as payload
|
||||
* @param ordinalMap an array mapping previous facets ordinals to new ones
|
||||
*/
|
||||
protected FacetsPayloadProcessor(CategoryListParams params, int[] ordinalMap) {
|
||||
encoder = params.createEncoder();
|
||||
decoder = encoder.createMatchingDecoder();
|
||||
this.ordinalMap = ordinalMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processPayload(BytesRef payload) throws IOException {
|
||||
InputStream is = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
|
||||
decoder.reInit(is);
|
||||
os.reset();
|
||||
encoder.reInit(os);
|
||||
long ordinal;
|
||||
while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
|
||||
int newOrdinal = ordinalMap[(int)ordinal];
|
||||
encoder.encode(newOrdinal);
|
||||
}
|
||||
encoder.close();
|
||||
// TODO (Facet): avoid copy?
|
||||
byte out[] = os.toByteArray();
|
||||
payload.bytes = out;
|
||||
payload.offset = 0;
|
||||
payload.length = out.length;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
package org.apache.lucene.facet.index.attributes;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.Attribute;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An attribute which contains for a certain category the {@link CategoryPath}
|
||||
* and additional properties.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface CategoryAttribute extends Attribute {
|
||||
|
||||
/**
|
||||
* Set the content of this {@link CategoryAttribute} from another
|
||||
* {@link CategoryAttribute} object.
|
||||
*
|
||||
* @param other
|
||||
* The {@link CategoryAttribute} to take the content from.
|
||||
*/
|
||||
public void set(CategoryAttribute other);
|
||||
|
||||
/**
|
||||
* Sets the category path value of this attribute.
|
||||
*
|
||||
* @param cp
|
||||
* A category path. May not be null.
|
||||
*/
|
||||
public void setCategoryPath(CategoryPath cp);
|
||||
|
||||
/**
|
||||
* Returns the value of this attribute: a category path.
|
||||
*
|
||||
* @return The category path last assigned to this attribute, or null if
|
||||
* none has been assigned.
|
||||
*/
|
||||
public CategoryPath getCategoryPath();
|
||||
|
||||
/**
|
||||
* Add a property. The property can be later retrieved using
|
||||
* {@link #getProperty(Class)} with this property class .<br>
|
||||
* Adding multiple properties of the same class is forbidden.
|
||||
*
|
||||
* @param property
|
||||
* The property to add.
|
||||
* @throws UnsupportedOperationException
|
||||
* When attempting to add a property of a class that was added
|
||||
* before and merge is prohibited.
|
||||
*/
|
||||
public void addProperty(CategoryProperty property)
|
||||
throws UnsupportedOperationException;
|
||||
|
||||
/**
|
||||
* Get a property of a certain property class.
|
||||
*
|
||||
* @param propertyClass
|
||||
* The required property class.
|
||||
* @return The property of the given class, or null if no such property
|
||||
* exists.
|
||||
*/
|
||||
public CategoryProperty getProperty(
|
||||
Class<? extends CategoryProperty> propertyClass);
|
||||
|
||||
/**
|
||||
* Get a property of one of given property classes.
|
||||
*
|
||||
* @param propertyClasses
|
||||
* The property classes.
|
||||
* @return A property matching one of the given classes, or null if no such
|
||||
* property exists.
|
||||
*/
|
||||
public CategoryProperty getProperty(
|
||||
Collection<Class<? extends CategoryProperty>> propertyClasses);
|
||||
|
||||
/**
|
||||
* Get all the active property classes.
|
||||
*
|
||||
* @return A set containing the active property classes, or {@code null} if
|
||||
* there are no properties.
|
||||
*/
|
||||
public Set<Class<? extends CategoryProperty>> getPropertyClasses();
|
||||
|
||||
/**
|
||||
* Clone this {@link CategoryAttribute}.
|
||||
*
|
||||
* @return A clone of this {@link CategoryAttribute}.
|
||||
*/
|
||||
public CategoryAttribute clone();
|
||||
|
||||
/**
|
||||
* Resets this attribute to its initial value: a null category path and no
|
||||
* properties.
|
||||
*/
|
||||
public void clear();
|
||||
|
||||
/**
|
||||
* Clear all properties.
|
||||
*/
|
||||
public void clearProperties();
|
||||
|
||||
/**
|
||||
* Remove an property of a certain property class.
|
||||
*
|
||||
* @param propertyClass
|
||||
* The required property class.
|
||||
*/
|
||||
public void remove(Class<? extends CategoryProperty> propertyClass);
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
package org.apache.lucene.facet.index.attributes;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An implementation of {@link CategoryAttribute}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CategoryAttributeImpl extends AttributeImpl implements
|
||||
CategoryAttribute {
|
||||
|
||||
/**
|
||||
* The category path instance.
|
||||
*/
|
||||
protected CategoryPath categoryPath;
|
||||
|
||||
/**
|
||||
* A map of properties associated to the current category path.
|
||||
*/
|
||||
protected HashMap<Class<? extends CategoryProperty>, CategoryProperty> properties;
|
||||
|
||||
/**
|
||||
* Construct an empty CategoryAttributeImpl.
|
||||
*/
|
||||
public CategoryAttributeImpl() {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a CategoryAttributeImpl with the given CategoryPath.
|
||||
*
|
||||
* @param categoryPath
|
||||
* The category path to use.
|
||||
*/
|
||||
public CategoryAttributeImpl(CategoryPath categoryPath) {
|
||||
setCategoryPath(categoryPath);
|
||||
}
|
||||
|
||||
public void set(CategoryAttribute other) {
|
||||
((CategoryAttributeImpl) other).copyTo(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the category path value.
|
||||
*
|
||||
* @return The category path last assigned to this attribute, or null if
|
||||
* none has been assigned.
|
||||
*/
|
||||
public CategoryPath getCategoryPath() {
|
||||
return categoryPath;
|
||||
}
|
||||
|
||||
public void setCategoryPath(CategoryPath cp) {
|
||||
categoryPath = cp;
|
||||
}
|
||||
|
||||
public void addProperty(CategoryProperty property)
|
||||
throws UnsupportedOperationException {
|
||||
if (properties == null) {
|
||||
properties = new HashMap<Class<? extends CategoryProperty>, CategoryProperty>();
|
||||
}
|
||||
CategoryProperty existing = properties.get(property.getClass());
|
||||
if (existing == null) {
|
||||
properties.put(property.getClass(), property);
|
||||
} else {
|
||||
existing.merge(property);
|
||||
}
|
||||
}
|
||||
|
||||
public CategoryProperty getProperty(
|
||||
Class<? extends CategoryProperty> propertyClass) {
|
||||
if (properties == null) {
|
||||
return null;
|
||||
}
|
||||
return properties.get(propertyClass);
|
||||
}
|
||||
|
||||
public CategoryProperty getProperty(
|
||||
Collection<Class<? extends CategoryProperty>> propertyClasses) {
|
||||
if (properties == null) {
|
||||
return null;
|
||||
}
|
||||
for (Class<? extends CategoryProperty> propertyClass : propertyClasses) {
|
||||
CategoryProperty categoryProperty = properties.get(propertyClass);
|
||||
if (categoryProperty != null) {
|
||||
return categoryProperty;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
((CategoryAttributeImpl) target).categoryPath = this.categoryPath;
|
||||
((CategoryAttributeImpl) target).properties = this.properties;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public CategoryAttribute clone() {
|
||||
CategoryAttributeImpl ca = (CategoryAttributeImpl) super.clone();
|
||||
if (categoryPath != null) {
|
||||
ca.categoryPath = (CategoryPath) categoryPath.clone();
|
||||
}
|
||||
if (properties != null && !properties.isEmpty()) {
|
||||
ca.properties = (HashMap<Class<? extends CategoryProperty>, CategoryProperty>) properties
|
||||
.clone();
|
||||
}
|
||||
return ca;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
categoryPath = null;
|
||||
clearProperties();
|
||||
}
|
||||
|
||||
public void clearProperties() {
|
||||
if (properties != null) {
|
||||
properties.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof CategoryAttributeImpl)) {
|
||||
return false;
|
||||
}
|
||||
CategoryAttributeImpl other = (CategoryAttributeImpl) o;
|
||||
if (categoryPath == null) {
|
||||
return (other.categoryPath == null);
|
||||
}
|
||||
if (!categoryPath.equals(other.categoryPath)) {
|
||||
return false;
|
||||
}
|
||||
if (properties == null || properties.isEmpty()) {
|
||||
return (other.properties == null || other.properties.isEmpty());
|
||||
}
|
||||
return properties.equals(other.properties);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if (categoryPath == null) {
|
||||
return 0;
|
||||
}
|
||||
int hashCode = categoryPath.hashCode();
|
||||
if (properties != null && !properties.isEmpty()) {
|
||||
hashCode ^= properties.hashCode();
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public Set<Class<? extends CategoryProperty>> getPropertyClasses() {
|
||||
if (properties == null || properties.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return properties.keySet();
|
||||
}
|
||||
|
||||
public void remove(Class<? extends CategoryProperty> propertyClass) {
|
||||
properties.remove(propertyClass);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package org.apache.lucene.facet.index.attributes;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class transforms an {@link Iterable} of {@link CategoryPath} objects
|
||||
* into an {@link Iterable} of {@link CategoryAttribute} objects, which can be
|
||||
* used to construct a {@link CategoryAttributesStream}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryAttributesIterable implements Iterable<CategoryAttribute> {
|
||||
|
||||
private Iterable<CategoryPath> inputIterable;
|
||||
|
||||
public CategoryAttributesIterable(Iterable<CategoryPath> inputIterable) {
|
||||
this.inputIterable = inputIterable;
|
||||
}
|
||||
|
||||
public Iterator<CategoryAttribute> iterator() {
|
||||
return new CategoryAttributesIterator(this.inputIterable);
|
||||
}
|
||||
|
||||
private static class CategoryAttributesIterator implements Iterator<CategoryAttribute> {
|
||||
|
||||
private Iterator<CategoryPath> internalIterator;
|
||||
private CategoryAttributeImpl categoryAttributeImpl;
|
||||
|
||||
public CategoryAttributesIterator(Iterable<CategoryPath> inputIterable) {
|
||||
this.internalIterator = inputIterable.iterator();
|
||||
this.categoryAttributeImpl = new CategoryAttributeImpl();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return this.internalIterator.hasNext();
|
||||
}
|
||||
|
||||
public CategoryAttribute next() {
|
||||
this.categoryAttributeImpl.setCategoryPath(this.internalIterator
|
||||
.next());
|
||||
return this.categoryAttributeImpl;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
this.internalIterator.remove();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
package org.apache.lucene.facet.index.attributes;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.facet.index.CategoryContainer;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Property that can be added to {@link CategoryAttribute}s during indexing.
|
||||
* Note that properties are put in a map and could be shallow copied during
|
||||
* {@link CategoryAttributeImpl#clone()}, therefore reuse of
|
||||
* {@link CategoryProperty} objects is not recommended. Also extends
|
||||
* {@link Serializable}, making the {@link CategoryContainer} serialization more
|
||||
* elegant.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface CategoryProperty extends Serializable {
|
||||
|
||||
/**
|
||||
* When adding categories with properties to a certain document, it is
|
||||
* possible that the same category will be added more than once with
|
||||
* different instances of the same property. This method defined how to
|
||||
* treat such cases, by merging the newly added property into the one
|
||||
* previously added. Implementing classes can assume that this method will
|
||||
* be called only with a property of the same class.
|
||||
*
|
||||
* @param other
|
||||
* The category property to merge.
|
||||
* @throws UnsupportedOperationException
|
||||
* If merging is prohibited for this property.
|
||||
*/
|
||||
public void merge(CategoryProperty other)
|
||||
throws UnsupportedOperationException;
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package org.apache.lucene.facet.index.attributes;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link CategoryProperty} holding the ordinal from the taxonomy of the
|
||||
* current category in {@link CategoryAttribute}.
|
||||
* <p>
|
||||
* Ordinal properties are added internally during processing of category
|
||||
* streams, and it is recommended not to use it externally.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class OrdinalProperty implements CategoryProperty {
|
||||
|
||||
protected int ordinal = -1;
|
||||
|
||||
public int getOrdinal() {
|
||||
return ordinal;
|
||||
}
|
||||
|
||||
public boolean hasBeenSet() {
|
||||
return this.ordinal >= 0;
|
||||
}
|
||||
|
||||
public void setOrdinal(int value) {
|
||||
this.ordinal = value;
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
this.ordinal = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(other instanceof OrdinalProperty)) {
|
||||
return false;
|
||||
}
|
||||
OrdinalProperty o = (OrdinalProperty) other;
|
||||
return o.ordinal == this.ordinal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.ordinal;
|
||||
}
|
||||
|
||||
public void merge(CategoryProperty other) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Merging ordinal attributes is prohibited");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Category attributes and their properties for indexing</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Category attributes and their properties for indexing</h1>
|
||||
|
||||
Attributes for a {@link org.apache.lucene.facet.taxonomy.CategoryPath category},
|
||||
possibly containing
|
||||
{@link org.apache.lucene.facet.index.attributes.CategoryProperty category property}'s.
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,43 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class filters our the ROOT category ID. For more information see
|
||||
* {@link OrdinalPolicy}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultOrdinalPolicy implements OrdinalPolicy {
|
||||
|
||||
/**
|
||||
* Filters out (returns false) ordinals equal or less than
|
||||
* {@link TaxonomyReader#ROOT_ORDINAL}. true otherwise.
|
||||
*/
|
||||
public boolean shouldAdd(int ordinal) {
|
||||
return ordinal > TaxonomyReader.ROOT_ORDINAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as NO-OP as the default is not taxonomy dependent
|
||||
*/
|
||||
public void init(TaxonomyWriter taxonomyWriter) { }
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class filters our the ROOT category path. For more information see
|
||||
* {@link PathPolicy}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultPathPolicy implements PathPolicy {
|
||||
|
||||
/**
|
||||
* Filters out (returns false) CategoryPaths equal or less than
|
||||
* {@link TaxonomyReader#ROOT_ORDINAL}. true otherwise.
|
||||
*/
|
||||
public boolean shouldAdd(CategoryPath categoryPath) {
|
||||
return categoryPath.length() > 0;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Filter out any "top level" category ordinals. <br> {@link #shouldAdd(int)}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class NonTopLevelOrdinalPolicy implements OrdinalPolicy {
|
||||
|
||||
/**
|
||||
* The taxonomyWriter with which the given ordinals' parent is determined.
|
||||
*/
|
||||
private TaxonomyWriter taxonomyWriter;
|
||||
|
||||
/**
|
||||
* Constructs a new non-top-level-ordinal-filter. With a given
|
||||
* taxonomyWriter.
|
||||
*
|
||||
*/
|
||||
public NonTopLevelOrdinalPolicy() {
|
||||
this.taxonomyWriter = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param taxonomyWriter
|
||||
* A relevant taxonomyWriter object, with which ordinals sent to
|
||||
* {@link #shouldAdd(int)} are examined.
|
||||
*/
|
||||
public void init(TaxonomyWriter taxonomyWriter) {
|
||||
this.taxonomyWriter = taxonomyWriter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters out ordinal which are ROOT or who's parent is ROOT. In order to
|
||||
* determine if a parent is root, there's a need for
|
||||
* {@link TaxonomyWriter#getParent(int)}.
|
||||
*/
|
||||
public boolean shouldAdd(int ordinal) {
|
||||
if (ordinal > TaxonomyReader.ROOT_ORDINAL) {
|
||||
try {
|
||||
if (this.taxonomyWriter.getParent(ordinal) > TaxonomyReader.ROOT_ORDINAL) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class filters our the ROOT category, and it's direct descendants. For
|
||||
* more information see {@link PathPolicy}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class NonTopLevelPathPolicy implements PathPolicy {
|
||||
|
||||
/**
|
||||
* The shortest path length delivered is two components (root + one child).
|
||||
*/
|
||||
public final int DEFAULT_MINIMAL_SUBPATH_LENGTH = 2;
|
||||
|
||||
/**
|
||||
* Filters out (returns false) CategoryPaths equal or less than
|
||||
* {@link TaxonomyReader#ROOT_ORDINAL}. true otherwise.
|
||||
*/
|
||||
public boolean shouldAdd(CategoryPath categoryPath) {
|
||||
return categoryPath.length() >= DEFAULT_MINIMAL_SUBPATH_LENGTH;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Filtering category ordinals in {@link CategoryParentsStream}, where a given
|
||||
* category ordinal is added to the stream, and than its parents are being added
|
||||
* one after the other using {@link TaxonomyWriter#getParent(int)}. <br>
|
||||
* That loop should have a stop point - the default approach (excluding the
|
||||
* ROOT) is implemented in {@link DefaultOrdinalPolicy}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface OrdinalPolicy extends Serializable {
|
||||
|
||||
/**
|
||||
* Check whether a given category ordinal should be added to the stream.
|
||||
*
|
||||
* @param ordinal
|
||||
* A given category ordinal which is to be tested for stream
|
||||
* addition.
|
||||
* @return <code>true</code> if the category should be added.
|
||||
* <code>false</code> otherwise.
|
||||
*/
|
||||
public abstract boolean shouldAdd(int ordinal);
|
||||
|
||||
/**
|
||||
* Initialize the policy with a TaxonomyWriter. This method can be
|
||||
* implemented as noop if the ordinal policy is not taxonomy dependent
|
||||
*
|
||||
* @param taxonomyWriter
|
||||
* A relevant taxonomyWriter object, with which ordinals sent to
|
||||
* {@link #shouldAdd(int)} are examined.
|
||||
*/
|
||||
public abstract void init(TaxonomyWriter taxonomyWriter);
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Filtering category paths in {@link CategoryParentsStream}, where a given
|
||||
* category is added to the stream, and than all its parents are being
|
||||
* added one after the other by successively removing the last component. <br>
|
||||
* That loop should have a stop point - the default approach (excluding the
|
||||
* ROOT) is implemented in {@link DefaultOrdinalPolicy}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface PathPolicy extends Serializable {
|
||||
|
||||
/**
|
||||
* Check whether a given category path should be added to the stream.
|
||||
*
|
||||
* @param categoryPath
|
||||
* A given category path which is to be tested for stream
|
||||
* addition.
|
||||
* @return <code>true</code> if the category path should be added.
|
||||
* <code>false</code> otherwise.
|
||||
*/
|
||||
public abstract boolean shouldAdd(CategoryPath categoryPath);
|
||||
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Policies for indexing categories</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Policies for indexing categories</h1>
|
||||
|
||||
There are two kinds of policies:
|
||||
<ul>
|
||||
<li>Path policies are based on the path of the category.</li>
|
||||
<li>Ordinal policies are based on the ordinal of the category.</li>
|
||||
</ul>
|
||||
|
||||
Policies are "consulted" with during indexing, for deciding whether a category should
|
||||
be added to the index or not. The two kinds of policies can be used for different purposes.
|
||||
For example, path policies dictates which categories can participate in a drill-down operation,
|
||||
while ordinal policies affect which can be accumulated (e.g. counted).
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,15 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Indexing of document categories</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Indexing of document categories</h1>
|
||||
|
||||
Attachment of
|
||||
{@link org.apache.lucene.facet.taxonomy.CategoryPath CategoryPath}'s
|
||||
or {@link org.apache.lucene.facet.index.attributes.CategoryAttribute CategoryAttribute}'s
|
||||
to a given document using a
|
||||
{@link org.apache.lucene.facet.taxonomy.TaxonomyWriter Taxonomy}.
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,149 @@
|
|||
package org.apache.lucene.facet.index.params;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.facet.search.PayloadIntDecodingIterator;
|
||||
import org.apache.lucene.facet.search.TotalFacetCounts;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.util.encoding.DGapIntEncoder;
|
||||
import org.apache.lucene.util.encoding.IntDecoder;
|
||||
import org.apache.lucene.util.encoding.IntEncoder;
|
||||
import org.apache.lucene.util.encoding.SortingIntEncoder;
|
||||
import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
|
||||
import org.apache.lucene.util.encoding.VInt8IntEncoder;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Contains parameters for a category list *
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryListParams implements Serializable {
|
||||
|
||||
/** The default term used to store the facets information. */
|
||||
public static final Term DEFAULT_TERM = new Term("$facets", "$fulltree$");
|
||||
|
||||
private final Term term;
|
||||
|
||||
private final int hashCode;
|
||||
|
||||
/**
|
||||
* Constructs a default category list parameters object, using
|
||||
* {@link #DEFAULT_TERM}.
|
||||
*/
|
||||
public CategoryListParams() {
|
||||
this(DEFAULT_TERM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a category list parameters object, using the given {@link Term}.
|
||||
* @param term who's payload hold the category-list.
|
||||
*/
|
||||
public CategoryListParams(Term term) {
|
||||
this.term = term;
|
||||
// Pre-compute the hashCode because these objects are immutable. Saves
|
||||
// some time on the comparisons later.
|
||||
this.hashCode = term.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* A {@link Term} who's payload holds the category-list.
|
||||
*/
|
||||
public final Term getTerm() {
|
||||
return term;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows to override how categories are encoded and decoded. A matching
|
||||
* {@link IntDecoder} is provided by the {@link IntEncoder}.
|
||||
* <p>
|
||||
* Default implementation creates a new Sorting(<b>Unique</b>(DGap)) encoder.
|
||||
* Uniqueness in this regard means when the same category appears twice in a
|
||||
* document, only one appearance would be encoded. This has effect on facet
|
||||
* counting results.
|
||||
* <p>
|
||||
* Some possible considerations when overriding may be:
|
||||
* <ul>
|
||||
* <li>an application "knows" that all categories are unique. So no need to
|
||||
* pass through the unique filter.</li>
|
||||
* <li>Another application might wish to count multiple occurrences of the
|
||||
* same category, or, use a faster encoding which will consume more space.</li>
|
||||
* </ul>
|
||||
* In any event when changing this value make sure you know what you are
|
||||
* doing, and test the results - e.g. counts, if the application is about
|
||||
* counting facets.
|
||||
*/
|
||||
public IntEncoder createEncoder() {
|
||||
return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())));
|
||||
}
|
||||
|
||||
/**
|
||||
* Equality is defined by the 'term' that defines this category list.
|
||||
* Sub-classes should override this method if a more complex calculation
|
||||
* is needed to ensure equality.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof CategoryListParams)) {
|
||||
return false;
|
||||
}
|
||||
CategoryListParams other = (CategoryListParams) o;
|
||||
if (this.hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
// The above hashcodes might equal each other in the case of a collision,
|
||||
// so at this point only directly term equality testing will settle
|
||||
// the equality test.
|
||||
return this.term.equals(other.term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hashcode is similar to {@link #equals(Object)}, in that it uses
|
||||
* the term that defines this category list to derive the hashcode.
|
||||
* Subclasses need to ensure that equality/hashcode is correctly defined,
|
||||
* or there could be side-effects in the {@link TotalFacetCounts} caching
|
||||
* mechanism (as the filename for a Total Facet Counts array cache
|
||||
* is dependent on the hashCode, so it should consistently return the same
|
||||
* hash for identity).
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.hashCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the category list iterator for the specified partition.
|
||||
*/
|
||||
public CategoryListIterator createCategoryListIterator(IndexReader reader,
|
||||
int partition) throws IOException {
|
||||
String categoryListTermStr = PartitionsUtils.partitionName(this, partition);
|
||||
Term payloadTerm = new Term(term.field(), categoryListTermStr);
|
||||
return new PayloadIntDecodingIterator(reader, payloadTerm,
|
||||
createEncoder().createMatchingDecoder());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,196 @@
|
|||
package org.apache.lucene.facet.index.params;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.DefaultOrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.DefaultPathPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Default implementation for {@link FacetIndexingParams}.
|
||||
* <p>
|
||||
* Getters for <em>partition-size</em>, {@link OrdinalPolicy} and
|
||||
* {@link PathPolicy} are all final, and so the proper way to modify them when
|
||||
* extending this class is through {@link #fixedPartitionSize()},
|
||||
* {@link #fixedOrdinalPolicy()} or {@link #fixedPathPolicy()} accordingly.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class DefaultFacetIndexingParams implements FacetIndexingParams {
|
||||
|
||||
/**
|
||||
* delimiter between a categories in a path, e.g. Products FACET_DELIM
|
||||
* Consumer FACET_DELIM Tv. This should be a character not found in any path
|
||||
* component
|
||||
*/
|
||||
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
|
||||
|
||||
private final CategoryListParams clpParams;
|
||||
private final OrdinalPolicy ordinalPolicy;
|
||||
private final PathPolicy pathPolicy;
|
||||
private final int partitionSize;
|
||||
|
||||
public DefaultFacetIndexingParams() {
|
||||
this(new CategoryListParams());
|
||||
}
|
||||
|
||||
public DefaultFacetIndexingParams(CategoryListParams categoryListParams) {
|
||||
clpParams = categoryListParams;
|
||||
ordinalPolicy = fixedOrdinalPolicy();
|
||||
pathPolicy = fixedPathPolicy();
|
||||
partitionSize = fixedPartitionSize();
|
||||
}
|
||||
|
||||
public CategoryListParams getCategoryListParams(CategoryPath category) {
|
||||
return clpParams;
|
||||
}
|
||||
|
||||
public int drillDownTermText(CategoryPath path, char[] buffer) {
|
||||
return path.copyToCharArray(buffer, 0, -1, getFacetDelimChar());
|
||||
}
|
||||
|
||||
/**
|
||||
* "fixed" partition size.
|
||||
* @see #getPartitionSize()
|
||||
*/
|
||||
protected int fixedPartitionSize() {
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* "fixed" ordinal policy.
|
||||
* @see #getOrdinalPolicy()
|
||||
*/
|
||||
protected OrdinalPolicy fixedOrdinalPolicy() {
|
||||
return new DefaultOrdinalPolicy();
|
||||
}
|
||||
|
||||
/**
|
||||
* "fixed" path policy.
|
||||
* @see #getPathPolicy()
|
||||
*/
|
||||
protected PathPolicy fixedPathPolicy() {
|
||||
return new DefaultPathPolicy();
|
||||
}
|
||||
|
||||
public final int getPartitionSize() {
|
||||
return partitionSize;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.facet.index.params.FacetIndexingParams#getAllCategoryListParams
|
||||
* ()
|
||||
*/
|
||||
public Iterable<CategoryListParams> getAllCategoryListParams() {
|
||||
List<CategoryListParams> res = new ArrayList<CategoryListParams>();
|
||||
res.add(clpParams);
|
||||
return res;
|
||||
}
|
||||
|
||||
public final OrdinalPolicy getOrdinalPolicy() {
|
||||
return ordinalPolicy;
|
||||
}
|
||||
|
||||
public final PathPolicy getPathPolicy() {
|
||||
return pathPolicy;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#hashCode()
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result
|
||||
+ ((clpParams == null) ? 0 : clpParams.hashCode());
|
||||
result = prime * result
|
||||
+ ((ordinalPolicy == null) ? 0 : ordinalPolicy.hashCode());
|
||||
result = prime * result + partitionSize;
|
||||
result = prime * result
|
||||
+ ((pathPolicy == null) ? 0 : pathPolicy.hashCode());
|
||||
|
||||
for (CategoryListParams clp: getAllCategoryListParams()) {
|
||||
result ^= clp.hashCode();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.lang.Object#equals(java.lang.Object)
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (!(obj instanceof DefaultFacetIndexingParams)) {
|
||||
return false;
|
||||
}
|
||||
DefaultFacetIndexingParams other = (DefaultFacetIndexingParams) obj;
|
||||
if (clpParams == null) {
|
||||
if (other.clpParams != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!clpParams.equals(other.clpParams)) {
|
||||
return false;
|
||||
}
|
||||
if (ordinalPolicy == null) {
|
||||
if (other.ordinalPolicy != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!ordinalPolicy.equals(other.ordinalPolicy)) {
|
||||
return false;
|
||||
}
|
||||
if (partitionSize != other.partitionSize) {
|
||||
return false;
|
||||
}
|
||||
if (pathPolicy == null) {
|
||||
if (other.pathPolicy != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!pathPolicy.equals(other.pathPolicy)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Iterable<CategoryListParams> cLs = getAllCategoryListParams();
|
||||
Iterable<CategoryListParams> otherCLs = other.getAllCategoryListParams();
|
||||
|
||||
return cLs.equals(otherCLs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use {@link #DEFAULT_FACET_DELIM_CHAR} as the delimiter.
|
||||
*/
|
||||
public char getFacetDelimChar() {
|
||||
return DEFAULT_FACET_DELIM_CHAR;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
package org.apache.lucene.facet.index.params;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Parameters on how facets are to be written to the index.
|
||||
* For example, which fields and terms are used to refer to the indexed posting list.
|
||||
* <P>
|
||||
* If non-default parameters were used during indexing, the same parameters
|
||||
* must also be passed during faceted search. This requirement is analogous
|
||||
* to the requirement during search to know which fields were indexed, and which
|
||||
* Analyzer was used on the text.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface FacetIndexingParams extends Serializable {
|
||||
|
||||
/**
|
||||
* The name of the category-list to put this category in, or null if this
|
||||
* category should not be aggregatable.
|
||||
* <P>
|
||||
* By default, all categories are written to the same category list, but
|
||||
* applications which know in advance that in some situations only parts
|
||||
* of the category hierarchy needs to be counted can divide the categories
|
||||
* into two or more different category lists.
|
||||
* <P>
|
||||
* If null is returned for a category, it means that this category should
|
||||
* not appear in any category list, and thus counts for it cannot be
|
||||
* aggregated. This category can still be used for drill-down, even though
|
||||
* the count for it is not known.
|
||||
*/
|
||||
public CategoryListParams getCategoryListParams(CategoryPath category);
|
||||
|
||||
/**
|
||||
* Return info about all category lists in the index.
|
||||
*
|
||||
* @see #getCategoryListParams(CategoryPath)
|
||||
*/
|
||||
public Iterable<CategoryListParams> getAllCategoryListParams();
|
||||
|
||||
// TODO (Facet): Add special cases of exact/non-exact category term-text
|
||||
|
||||
/**
|
||||
* Return the drilldown Term-Text which does not need to do any allocations.
|
||||
* The number of chars set is returned.
|
||||
* <p>
|
||||
* Note: Make sure <code>buffer</code> is large enough.
|
||||
* @see CategoryPath#charsNeededForFullPath()
|
||||
*/
|
||||
public int drillDownTermText(CategoryPath path, char[] buffer);
|
||||
|
||||
/**
|
||||
* Get the partition size.
|
||||
* Same value should be used during the life time of an index.
|
||||
* At search time this value is compared with actual taxonomy size and their minimum is used.
|
||||
*/
|
||||
public int getPartitionSize();
|
||||
|
||||
/**
|
||||
* Get the policy for indexing category <b>paths</b>,
|
||||
* used for deciding how "high" to climb in taxonomy
|
||||
* from a category when ingesting its category paths.
|
||||
*/
|
||||
public PathPolicy getPathPolicy();
|
||||
|
||||
/**
|
||||
* Get the policy for indexing category <b>ordinals</b>,
|
||||
* used for deciding how "high" to climb in taxonomy
|
||||
* from a category when ingesting its ordinals
|
||||
*/
|
||||
public OrdinalPolicy getOrdinalPolicy();
|
||||
|
||||
/**
|
||||
* Get the delimiter character used internally for drill-down terms
|
||||
*/
|
||||
public char getFacetDelimChar();
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package org.apache.lucene.facet.index.params;
|
||||
|
||||
import org.apache.lucene.facet.FacetException;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Thrown when the facets params are missing a property. *
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FacetParamsMissingPropertyException extends FacetException {
|
||||
|
||||
public FacetParamsMissingPropertyException(String key) {
|
||||
super("Property with key \"" + key + "\" not found");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
package org.apache.lucene.facet.index.params;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A FacetIndexingParams that utilizes different category lists, defined by the
|
||||
* dimension specified CategoryPaths (see
|
||||
* {@link PerDimensionIndexingParams#addCategoryListParams(CategoryPath, CategoryListParams)}
|
||||
* <p>
|
||||
* A 'dimension' is defined as the first or "zero-th" component in a
|
||||
* CategoryPath. For example, if a CategoryPath is defined as
|
||||
* "/Author/American/Mark Twain", then the dimension is "Author".
|
||||
* <p>
|
||||
* This class also uses the 'default' CategoryListParams (as specified by
|
||||
* {@link CategoryListParams#CategoryListParams()} when
|
||||
* {@link #getCategoryListParams(CategoryPath)} is called for a CategoryPath
|
||||
* whose dimension component has not been specifically defined.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PerDimensionIndexingParams extends DefaultFacetIndexingParams {
|
||||
|
||||
// "Root" or "first component" of a Category Path maps to a
|
||||
// CategoryListParams
|
||||
private final Map<String, CategoryListParams> clParamsMap = new HashMap<String, CategoryListParams>();
|
||||
|
||||
/**
|
||||
* Construct with the default {@link CategoryListParams} as the default
|
||||
* CategoryListParams for unspecified CategoryPaths.
|
||||
*/
|
||||
public PerDimensionIndexingParams() {
|
||||
this(new CategoryListParams());
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct with the included categoryListParams as the default
|
||||
* CategoryListParams for unspecified CategoryPaths.
|
||||
*
|
||||
* @param categoryListParams
|
||||
* the default categoryListParams to use
|
||||
*/
|
||||
public PerDimensionIndexingParams(CategoryListParams categoryListParams) {
|
||||
super(categoryListParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the categoryListParams, including the default.
|
||||
*/
|
||||
@Override
|
||||
public Iterable<CategoryListParams> getAllCategoryListParams() {
|
||||
ArrayList<CategoryListParams> vals =
|
||||
new ArrayList<CategoryListParams>(clParamsMap.values());
|
||||
for (CategoryListParams clp : super.getAllCategoryListParams()) {
|
||||
vals.add(clp);
|
||||
}
|
||||
return vals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the CategoryListParams based on the dimension or "zero-th category"
|
||||
* of the specified CategoryPath.
|
||||
*/
|
||||
@Override
|
||||
public CategoryListParams getCategoryListParams(CategoryPath category) {
|
||||
if (category != null) {
|
||||
CategoryListParams clParams = clParamsMap.get(category.getComponent(0));
|
||||
if (clParams != null) {
|
||||
return clParams;
|
||||
}
|
||||
}
|
||||
return super.getCategoryListParams(category);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a CategoryListParams for a given CategoryPath's dimension or
|
||||
* "zero-th" category.
|
||||
*
|
||||
* @param category
|
||||
* @param clParams
|
||||
*/
|
||||
public void addCategoryListParams(CategoryPath category, CategoryListParams clParams) {
|
||||
clParamsMap.put(category.getComponent(0), clParams);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Indexing-time specifications for handling facets</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Indexing-time specifications for handling facets</h1>
|
||||
|
||||
Parameters on how facets are to be written to the index,
|
||||
such as which fields and terms are used to refer to the facets posting list.
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,81 @@
|
|||
package org.apache.lucene.facet.index.streaming;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An attribute stream built from an {@link Iterable} of
|
||||
* {@link CategoryAttribute}. This stream should then be passed through several
|
||||
* filters (see {@link CategoryParentsStream}, {@link CategoryListTokenizer} and
|
||||
* {@link CategoryTokenizer}) until a token stream is produced that can be
|
||||
* indexed by Lucene.
|
||||
* <P>
|
||||
* A CategoryAttributesStream object can be reused for producing more than one
|
||||
* stream. To do that, the user should cause the underlying
|
||||
* Iterable<CategoryAttribute> object to return a new set of categories, and
|
||||
* then call {@link #reset()} to allow this stream to be used again.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryAttributesStream extends TokenStream {
|
||||
|
||||
protected CategoryAttribute categoryAttribute;
|
||||
|
||||
private Iterable<CategoryAttribute> iterable;
|
||||
private Iterator<CategoryAttribute> iterator;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param iterable
|
||||
* {@link Iterable} of {@link CategoryAttribute}, from which
|
||||
* categories are taken.
|
||||
*/
|
||||
public CategoryAttributesStream(Iterable<CategoryAttribute> iterable) {
|
||||
this.iterable = iterable;
|
||||
this.iterator = null;
|
||||
this.categoryAttribute = this.addAttribute(CategoryAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (iterator == null) {
|
||||
if (iterable == null) {
|
||||
return false;
|
||||
}
|
||||
iterator = iterable.iterator();
|
||||
}
|
||||
if (iterator.hasNext()) {
|
||||
categoryAttribute.set(iterator.next());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
this.iterator = null;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.facet.index.streaming;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A base class for category list tokenizers, which add category list tokens to
|
||||
* category streams.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class CategoryListTokenizer extends CategoryTokenizerBase {
|
||||
|
||||
/**
|
||||
* @see CategoryTokenizerBase#CategoryTokenizerBase(TokenStream, FacetIndexingParams)
|
||||
*/
|
||||
public CategoryListTokenizer(TokenStream input,
|
||||
FacetIndexingParams indexingParams) {
|
||||
super(input, indexingParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* A method invoked once when the input stream begins, for subclass-specific
|
||||
* processing. Subclass implementations must invoke this one, too!
|
||||
*/
|
||||
protected void handleStartOfInput() throws IOException {
|
||||
// In this class, we do nothing.
|
||||
}
|
||||
|
||||
/**
|
||||
* A method invoked once when the input stream ends, for subclass-specific
|
||||
* processing.
|
||||
*/
|
||||
protected void handleEndOfInput() throws IOException {
|
||||
// In this class, we do nothing.
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
handleStartOfInput();
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract boolean incrementToken() throws IOException;
|
||||
|
||||
}
|
|
@ -0,0 +1,189 @@
|
|||
package org.apache.lucene.facet.index.streaming;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryProperty;
|
||||
import org.apache.lucene.facet.index.attributes.OrdinalProperty;
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class adds parents to a {@link CategoryAttributesStream}. The parents
|
||||
* are added according to the {@link PathPolicy} and {@link OrdinalPolicy} from
|
||||
* the {@link FacetIndexingParams} given in the constructor.<br>
|
||||
* By default, category properties are removed when creating parents of a
|
||||
* certain category. However, it is possible to retain certain property types
|
||||
* using {@link #addRetainableProperty(Class)}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryParentsStream extends TokenFilter {
|
||||
|
||||
/**
|
||||
* A {@link TaxonomyWriter} for adding categories and retrieving their
|
||||
* ordinals.
|
||||
*/
|
||||
protected TaxonomyWriter taxonomyWriter;
|
||||
|
||||
/** An attribute containing all data related to the category */
|
||||
protected CategoryAttribute categoryAttribute;
|
||||
|
||||
/** A category property containing the category ordinal */
|
||||
protected OrdinalProperty ordinalProperty;
|
||||
|
||||
/**
|
||||
* A set of property classes that are to be retained when creating a parent
|
||||
* token.
|
||||
*/
|
||||
private Set<Class<? extends CategoryProperty>> retainableProperties;
|
||||
|
||||
/** A {@link PathPolicy} for the category's parents' category paths. */
|
||||
private PathPolicy pathPolicy;
|
||||
|
||||
/** An {@link OrdinalPolicy} for the category's parents' ordinals. */
|
||||
private OrdinalPolicy ordinalPolicy;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param input
|
||||
* The input stream to handle, must be derived from
|
||||
* {@link CategoryAttributesStream}.
|
||||
* @param taxonomyWriter
|
||||
* The taxonomy writer to use for adding categories and
|
||||
* retrieving their ordinals.
|
||||
* @param indexingParams
|
||||
* The indexing params used for filtering parents.
|
||||
*/
|
||||
public CategoryParentsStream(CategoryAttributesStream input,
|
||||
TaxonomyWriter taxonomyWriter, FacetIndexingParams indexingParams) {
|
||||
super(input);
|
||||
this.categoryAttribute = this.addAttribute(CategoryAttribute.class);
|
||||
this.taxonomyWriter = taxonomyWriter;
|
||||
this.pathPolicy = indexingParams.getPathPolicy();
|
||||
this.ordinalPolicy = indexingParams.getOrdinalPolicy();
|
||||
this.ordinalPolicy.init(taxonomyWriter);
|
||||
this.ordinalProperty = new OrdinalProperty();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (this.categoryAttribute.getCategoryPath() != null) {
|
||||
// try adding the parent of the current category to the stream
|
||||
clearCategoryProperties();
|
||||
boolean added = false;
|
||||
// set the parent's ordinal, if illegal set -1
|
||||
int ordinal = this.ordinalProperty.getOrdinal();
|
||||
if (ordinal != -1) {
|
||||
ordinal = this.taxonomyWriter.getParent(ordinal);
|
||||
if (this.ordinalPolicy.shouldAdd(ordinal)) {
|
||||
this.ordinalProperty.setOrdinal(ordinal);
|
||||
try {
|
||||
this.categoryAttribute.addProperty(ordinalProperty);
|
||||
} catch (UnsupportedOperationException e) {
|
||||
throw new IOException(e.getLocalizedMessage());
|
||||
}
|
||||
added = true;
|
||||
} else {
|
||||
this.ordinalProperty.setOrdinal(-1);
|
||||
}
|
||||
}
|
||||
// set the parent's category path, if illegal set null
|
||||
CategoryPath cp = this.categoryAttribute.getCategoryPath();
|
||||
if (cp != null) {
|
||||
cp.trim(1);
|
||||
// if ordinal added, must also have category paths
|
||||
if (added || this.pathPolicy.shouldAdd(cp)) {
|
||||
this.categoryAttribute.setCategoryPath(cp);
|
||||
added = true;
|
||||
} else {
|
||||
this.categoryAttribute.clear();
|
||||
}
|
||||
}
|
||||
if (added) {
|
||||
// a legal parent exists
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// no more parents - get new category
|
||||
if (input.incrementToken()) {
|
||||
int ordinal = taxonomyWriter.addCategory(this.categoryAttribute.getCategoryPath());
|
||||
this.ordinalProperty.setOrdinal(ordinal);
|
||||
try {
|
||||
this.categoryAttribute.addProperty(this.ordinalProperty);
|
||||
} catch (UnsupportedOperationException e) {
|
||||
throw new IOException(e.getLocalizedMessage());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the properties of the current {@link CategoryAttribute} attribute
|
||||
* before setting the parent attributes. <br>
|
||||
* It is possible to retain properties of certain types the parent tokens,
|
||||
* using {@link #addRetainableProperty(Class)}.
|
||||
*/
|
||||
protected void clearCategoryProperties() {
|
||||
if (this.retainableProperties == null
|
||||
|| this.retainableProperties.isEmpty()) {
|
||||
this.categoryAttribute.clearProperties();
|
||||
} else {
|
||||
List<Class<? extends CategoryProperty>> propertyClassesToRemove =
|
||||
new LinkedList<Class<? extends CategoryProperty>>();
|
||||
for (Class<? extends CategoryProperty> propertyClass : this.categoryAttribute
|
||||
.getPropertyClasses()) {
|
||||
if (!this.retainableProperties.contains(propertyClass)) {
|
||||
propertyClassesToRemove.add(propertyClass);
|
||||
}
|
||||
}
|
||||
for (Class<? extends CategoryProperty> propertyClass : propertyClassesToRemove) {
|
||||
this.categoryAttribute.remove(propertyClass);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a {@link CategoryProperty} class which is retained when creating
|
||||
* parent tokens.
|
||||
*
|
||||
* @param toRetain
|
||||
* The property class to retain.
|
||||
*/
|
||||
public void addRetainableProperty(Class<? extends CategoryProperty> toRetain) {
|
||||
if (this.retainableProperties == null) {
|
||||
this.retainableProperties = new HashSet<Class<? extends CategoryProperty>>();
|
||||
}
|
||||
this.retainableProperties.add(toRetain);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.facet.index.streaming;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Basic class for setting the {@link CharTermAttribute}s and
|
||||
* {@link PayloadAttribute}s of category tokens.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CategoryTokenizer extends CategoryTokenizerBase {
|
||||
|
||||
/**
|
||||
* @see CategoryTokenizerBase#CategoryTokenizerBase(TokenStream,
|
||||
* FacetIndexingParams)
|
||||
*/
|
||||
public CategoryTokenizer(TokenStream input,
|
||||
FacetIndexingParams indexingParams) {
|
||||
super(input, indexingParams);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (categoryAttribute != null && categoryAttribute.getCategoryPath() != null) {
|
||||
CategoryPath categoryPath = categoryAttribute.getCategoryPath();
|
||||
char[] termBuffer = termAttribute.resizeBuffer(categoryPath.charsNeededForFullPath());
|
||||
int nChars = indexingParams.drillDownTermText(categoryPath, termBuffer);
|
||||
termAttribute.setLength(nChars);
|
||||
setPayload();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the payload of the current category token.
|
||||
*/
|
||||
protected void setPayload() {
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
package org.apache.lucene.facet.index.streaming;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
||||
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
|
||||
import org.apache.lucene.facet.index.attributes.CategoryAttribute;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A base class for all token filters which add term and payload attributes to
|
||||
* tokens and are to be used in {@link CategoryDocumentBuilder}. Contains three
|
||||
* attributes: {@link CategoryAttribute}, {@link CharTermAttribute} and
|
||||
* {@link PayloadAttribute}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class CategoryTokenizerBase extends TokenFilter {
|
||||
|
||||
/** The stream's category attributes. */
|
||||
protected CategoryAttribute categoryAttribute;
|
||||
|
||||
/** The stream's payload attribute. */
|
||||
protected PayloadAttribute payloadAttribute;
|
||||
|
||||
/** The stream's term attribute. */
|
||||
protected CharTermAttribute termAttribute;
|
||||
|
||||
/** The object used for constructing payloads. */
|
||||
protected Payload payload = new Payload();
|
||||
|
||||
/** Indexing params for creating term text **/
|
||||
protected FacetIndexingParams indexingParams;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param input
|
||||
* The input stream, either {@link CategoryParentsStream} or an
|
||||
* extension of {@link CategoryTokenizerBase}.
|
||||
* @param indexingParams
|
||||
* The indexing params to use.
|
||||
*/
|
||||
public CategoryTokenizerBase(TokenStream input,
|
||||
FacetIndexingParams indexingParams) {
|
||||
super(input);
|
||||
this.categoryAttribute = this.addAttribute(CategoryAttribute.class);
|
||||
this.termAttribute = this.addAttribute(CharTermAttribute.class);
|
||||
this.payloadAttribute = this.addAttribute(PayloadAttribute.class);
|
||||
this.indexingParams = indexingParams;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract boolean incrementToken() throws IOException;
|
||||
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
package org.apache.lucene.facet.index.streaming;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.facet.index.CategoryListPayloadStream;
|
||||
import org.apache.lucene.facet.index.attributes.OrdinalProperty;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.util.encoding.IntEncoder;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* {@link CategoryListTokenizer} for facet counting
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CountingListTokenizer extends CategoryListTokenizer {
|
||||
|
||||
/** A table for retrieving payload streams by category-list name. */
|
||||
protected HashMap<String, CategoryListPayloadStream> payloadStreamsByName =
|
||||
new HashMap<String, CategoryListPayloadStream>();
|
||||
|
||||
/** An iterator over the payload streams */
|
||||
protected Iterator<Entry<String, CategoryListPayloadStream>> payloadStreamIterator;
|
||||
|
||||
public CountingListTokenizer(TokenStream input,
|
||||
FacetIndexingParams indexingParams) {
|
||||
super(input, indexingParams);
|
||||
this.payloadStreamsByName = new HashMap<String, CategoryListPayloadStream>();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleStartOfInput() throws IOException {
|
||||
payloadStreamsByName.clear();
|
||||
payloadStreamIterator = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (this.categoryAttribute != null) {
|
||||
OrdinalProperty ordinalProperty = (OrdinalProperty) this.categoryAttribute
|
||||
.getProperty(OrdinalProperty.class);
|
||||
if (ordinalProperty != null && legalCategory()) {
|
||||
CategoryPath categoryPath = this.categoryAttribute
|
||||
.getCategoryPath();
|
||||
int ordinal = ordinalProperty.getOrdinal();
|
||||
CategoryListPayloadStream payloadStream = getPayloadStream(
|
||||
categoryPath, ordinal);
|
||||
int partitionSize = indexingParams.getPartitionSize();
|
||||
payloadStream.appendIntToStream(ordinal % partitionSize);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (this.payloadStreamIterator == null) {
|
||||
this.handleEndOfInput();
|
||||
this.payloadStreamIterator = this.payloadStreamsByName.entrySet()
|
||||
.iterator();
|
||||
}
|
||||
if (this.payloadStreamIterator.hasNext()) {
|
||||
Entry<String, CategoryListPayloadStream> entry = this.payloadStreamIterator
|
||||
.next();
|
||||
String countingListName = entry.getKey();
|
||||
int length = countingListName.length();
|
||||
this.termAttribute.resizeBuffer(length);
|
||||
countingListName.getChars(0, length, termAttribute.buffer(), 0);
|
||||
this.termAttribute.setLength(length);
|
||||
CategoryListPayloadStream payloadStream = entry.getValue();
|
||||
payload.setData(payloadStream.convertStreamToByteArray());
|
||||
this.payloadAttribute.setPayload(payload);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* A method which allows extending classes to filter the categories going
|
||||
* into the counting list.
|
||||
*
|
||||
* @return By default returns {@code true}, meaning the current category is
|
||||
* to be part of the counting list. For categories that should be
|
||||
* filtered, return {@code false}.
|
||||
*/
|
||||
protected boolean legalCategory() {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected CategoryListPayloadStream getPayloadStream(
|
||||
CategoryPath categoryPath, int ordinal) throws IOException {
|
||||
CategoryListParams clParams = this.indexingParams.getCategoryListParams(categoryPath);
|
||||
String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, clParams, ordinal);
|
||||
CategoryListPayloadStream fps = payloadStreamsByName.get(name);
|
||||
if (fps == null) {
|
||||
IntEncoder encoder = clParams.createEncoder();
|
||||
fps = new CategoryListPayloadStream(encoder);
|
||||
payloadStreamsByName.put(name, fps);
|
||||
}
|
||||
return fps;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Expert: attributes streaming definition for indexing facets</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Expert: attributes streaming definition for indexing facets</h1>
|
||||
|
||||
Steaming of facets attributes is a low level indexing interface with Lucene indexing.
|
||||
There are two types of category related streams:
|
||||
<ul>
|
||||
<li><b>Category tokenizer stream</b> handles tokenization for a single category,
|
||||
e.g. for creating drill-down tokens.</li>
|
||||
<li><b>Category list tokenizer stream</b> handles tokenization for multiple categories,
|
||||
e.g. for creating a counting list token, representing all the categories of
|
||||
a certain document.</li>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,8 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Faceted Indexing and Search</title>
|
||||
</head>
|
||||
<body>
|
||||
Provides faceted indexing and search capabilities.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,116 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.sampling.Sampler;
|
||||
import org.apache.lucene.facet.search.sampling.SamplingAccumulator;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* {@link FacetsAccumulator} whose behavior regarding complements, sampling,
|
||||
* etc. is not set up front but rather is determined at accumulation time
|
||||
* according to the statistics of the accumulated set of documents and the
|
||||
* index.
|
||||
* <p>
|
||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
||||
* does not guarantee accurate values for
|
||||
* {@link FacetResult#getNumValidDescendants()} &
|
||||
* {@link FacetResultNode#getResidue()}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class AdaptiveFacetsAccumulator extends StandardFacetsAccumulator {
|
||||
|
||||
private Sampler sampler = new Sampler();
|
||||
|
||||
/**
|
||||
* Create an {@link AdaptiveFacetsAccumulator}
|
||||
* @see StandardFacetsAccumulator#StandardFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader)
|
||||
*/
|
||||
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
super(searchParams, indexReader, taxonomyReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link AdaptiveFacetsAccumulator}
|
||||
* @see StandardFacetsAccumulator#StandardFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader,
|
||||
* IntArrayAllocator, FloatArrayAllocator)
|
||||
*/
|
||||
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader, IntArrayAllocator intArrayAllocator,
|
||||
FloatArrayAllocator floatArrayAllocator) {
|
||||
super(searchParams, indexReader, taxonomyReader, intArrayAllocator, floatArrayAllocator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the sampler.
|
||||
* @param sampler sampler to set
|
||||
*/
|
||||
public void setSampler(Sampler sampler) {
|
||||
this.sampler = sampler;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
|
||||
FacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids);
|
||||
|
||||
if (delegee == this) {
|
||||
return super.accumulate(docids);
|
||||
}
|
||||
|
||||
return delegee.accumulate(docids);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the appropriate facet accumulator to use.
|
||||
* If no special/clever adaptation is possible/needed return this (self).
|
||||
*/
|
||||
private FacetsAccumulator appropriateFacetCountingAccumulator(ScoredDocIDs docids) {
|
||||
// Verify that searchPareams permit sampling/complement/etc... otherwise do default
|
||||
if (!mayComplement()) {
|
||||
return this;
|
||||
}
|
||||
|
||||
// Now we're sure we can use the sampling methods as we're in a counting only mode
|
||||
|
||||
// Verify that sampling is enabled and required ... otherwise do default
|
||||
if (sampler == null || !sampler.shouldSample(docids)) {
|
||||
return this;
|
||||
}
|
||||
|
||||
SamplingAccumulator samplingAccumulator = new SamplingAccumulator(sampler, searchParams, indexReader, taxonomyReader);
|
||||
samplingAccumulator.setComplementThreshold(getComplementThreshold());
|
||||
return samplingAccumulator;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the sampler in effect
|
||||
*/
|
||||
public final Sampler getSampler() {
|
||||
return sampler;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An interface for iterating over a "category list", i.e., the list of
|
||||
* categories per document.
|
||||
* <p>
|
||||
* <b>NOTE:</b>
|
||||
* <ul>
|
||||
* <li>This class operates as a key to a Map. Appropriate implementation of
|
||||
* <code>hashCode()</code> and <code>equals()</code> must be provided.
|
||||
* <li>{@link #init()} must be called before you consume any categories, or call
|
||||
* {@link #skipTo(int)}.
|
||||
* <li>{@link #skipTo(int)} must be called before any calls to
|
||||
* {@link #nextCategory()}.
|
||||
* <li>{@link #nextCategory()} returns values < {@link Integer#MAX_VALUE}, so
|
||||
* you can use it as a stop condition.
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface CategoryListIterator {
|
||||
|
||||
/**
|
||||
* Initializes the iterator. This method must be called before any calls to
|
||||
* {@link #skipTo(int)}, and its return value indicates whether there are
|
||||
* any relevant documents for this iterator. If it returns false, any call
|
||||
* to {@link #skipTo(int)} will return false as well.<br>
|
||||
* <b>NOTE:</b> calling this method twice may result in skipping over
|
||||
* documents for some implementations. Also, calling it again after all
|
||||
* documents were consumed may yield unexpected behavior.
|
||||
*/
|
||||
public boolean init() throws IOException;
|
||||
|
||||
/**
|
||||
* Skips forward to document docId. Returns true iff this document exists
|
||||
* and has any categories. This method must be called before calling
|
||||
* {@link #nextCategory()} for a particular document.<br>
|
||||
* <b>NOTE:</b> Users should call this method with increasing docIds, and
|
||||
* implementations can assume that this is the case.
|
||||
*/
|
||||
public boolean skipTo(int docId) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the next category for the current document that is set through
|
||||
* {@link #skipTo(int)}, or a number higher than {@link Integer#MAX_VALUE}.
|
||||
* No assumptions can be made on the order of the categories.
|
||||
*/
|
||||
public long nextCategory() throws IOException;
|
||||
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Creation of drill down term or query.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class DrillDown {
|
||||
|
||||
/**
|
||||
* @see #term(FacetIndexingParams, CategoryPath)
|
||||
*/
|
||||
public static final Term term(FacetSearchParams sParams, CategoryPath path) {
|
||||
return term(sParams.getFacetIndexingParams(), path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a term for drilling down into a category.
|
||||
*/
|
||||
public static final Term term(FacetIndexingParams iParams, CategoryPath path) {
|
||||
CategoryListParams clp = iParams.getCategoryListParams(path);
|
||||
char[] buffer = new char[path.charsNeededForFullPath()];
|
||||
iParams.drillDownTermText(path, buffer);
|
||||
return new Term(clp.getTerm().field(), String.valueOf(buffer));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a query for drilling down into all given categories (AND).
|
||||
* @see #term(FacetSearchParams, CategoryPath)
|
||||
* @see #query(FacetSearchParams, Query, CategoryPath...)
|
||||
*/
|
||||
public static final Query query(FacetIndexingParams iParams, CategoryPath... paths) {
|
||||
if (paths==null || paths.length==0) {
|
||||
throw new IllegalArgumentException("Empty category path not allowed for drill down query!");
|
||||
}
|
||||
if (paths.length==1) {
|
||||
return new TermQuery(term(iParams, paths[0]));
|
||||
}
|
||||
BooleanQuery res = new BooleanQuery();
|
||||
for (CategoryPath cp : paths) {
|
||||
res.add(new TermQuery(term(iParams, cp)), Occur.MUST);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a query for drilling down into all given categories (AND).
|
||||
* @see #term(FacetSearchParams, CategoryPath)
|
||||
* @see #query(FacetSearchParams, Query, CategoryPath...)
|
||||
*/
|
||||
public static final Query query(FacetSearchParams sParams, CategoryPath... paths) {
|
||||
return query(sParams.getFacetIndexingParams(), paths);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn a base query into a drilling-down query for all given category paths (AND).
|
||||
* @see #query(FacetIndexingParams, CategoryPath...)
|
||||
*/
|
||||
public static final Query query(FacetIndexingParams iParams, Query baseQuery, CategoryPath... paths) {
|
||||
BooleanQuery res = new BooleanQuery();
|
||||
res.add(baseQuery, Occur.MUST);
|
||||
res.add(query(iParams, paths), Occur.MUST);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn a base query into a drilling-down query for all given category paths (AND).
|
||||
* @see #query(FacetSearchParams, CategoryPath...)
|
||||
*/
|
||||
public static final Query query(FacetSearchParams sParams, Query baseQuery, CategoryPath... paths) {
|
||||
return query(sParams.getFacetIndexingParams(), baseQuery, paths);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn a base query into a drilling-down query using the default {@link FacetSearchParams}
|
||||
* @see #query(FacetSearchParams, Query, CategoryPath...)
|
||||
*/
|
||||
public static final Query query(Query baseQuery, CategoryPath... paths) {
|
||||
return query(new FacetSearchParams(), baseQuery, paths);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Provider of arrays used for facet operations such as counting.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FacetArrays {
|
||||
|
||||
private int[] intArray;
|
||||
private float[] floatArray;
|
||||
private IntArrayAllocator intArrayAllocator;
|
||||
private FloatArrayAllocator floatArrayAllocator;
|
||||
private int arraysLength;
|
||||
|
||||
/**
|
||||
* Create a FacetArrays with certain array allocators.
|
||||
* @param intArrayAllocator allocator for int arrays.
|
||||
* @param floatArrayAllocator allocator for float arrays.
|
||||
*/
|
||||
public FacetArrays(IntArrayAllocator intArrayAllocator,
|
||||
FloatArrayAllocator floatArrayAllocator) {
|
||||
this.intArrayAllocator = intArrayAllocator;
|
||||
this.floatArrayAllocator = floatArrayAllocator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify allocators that they can free arrays allocated
|
||||
* on behalf of this FacetArrays object.
|
||||
*/
|
||||
public void free() {
|
||||
if (intArrayAllocator!=null) {
|
||||
intArrayAllocator.free(intArray);
|
||||
// Should give up handle to the array now
|
||||
// that it is freed.
|
||||
intArray = null;
|
||||
}
|
||||
if (floatArrayAllocator!=null) {
|
||||
floatArrayAllocator.free(floatArray);
|
||||
// Should give up handle to the array now
|
||||
// that it is freed.
|
||||
floatArray = null;
|
||||
}
|
||||
arraysLength = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain an int array, e.g. for facet counting.
|
||||
*/
|
||||
public int[] getIntArray() {
|
||||
if (intArray == null) {
|
||||
intArray = intArrayAllocator.allocate();
|
||||
arraysLength = intArray.length;
|
||||
}
|
||||
return intArray;
|
||||
}
|
||||
|
||||
/** Obtain a float array, e.g. for evaluating facet association values. */
|
||||
public float[] getFloatArray() {
|
||||
if (floatArray == null) {
|
||||
floatArray = floatArrayAllocator.allocate();
|
||||
arraysLength = floatArray.length;
|
||||
}
|
||||
return floatArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the arrays length
|
||||
*/
|
||||
public int getArraysLength() {
|
||||
return arraysLength;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,161 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Handler for facet results.
|
||||
* <p>
|
||||
* The facet results handler provided by the {@link FacetRequest} to
|
||||
* a {@link FacetsAccumulator}.
|
||||
* <p>
|
||||
* First it is used by {@link FacetsAccumulator} to obtain a temporary
|
||||
* facet result for each partition and to merge results of several partitions.
|
||||
* <p>
|
||||
* Later the accumulator invokes the handler to render the results, creating
|
||||
* {@link FacetResult} objects.
|
||||
* <p>
|
||||
* Last the accumulator invokes the handler to label final results.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class FacetResultsHandler {
|
||||
|
||||
/** Taxonomy for which facets are handled */
|
||||
protected final TaxonomyReader taxonomyReader;
|
||||
|
||||
/**
|
||||
* Facet request served by this handler.
|
||||
*/
|
||||
protected final FacetRequest facetRequest;
|
||||
|
||||
/**
|
||||
* Create a faceted search handler.
|
||||
* @param taxonomyReader See {@link #getTaxonomyReader()}.
|
||||
* @param facetRequest See {@link #getFacetRequest()}.
|
||||
*/
|
||||
public FacetResultsHandler(TaxonomyReader taxonomyReader,
|
||||
FacetRequest facetRequest) {
|
||||
this.taxonomyReader = taxonomyReader;
|
||||
this.facetRequest = facetRequest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch results of a single partition, given facet arrays for that partition,
|
||||
* and based on the matching documents and faceted search parameters.
|
||||
*
|
||||
* @param arrays
|
||||
* facet arrays for the certain partition
|
||||
* @param offset
|
||||
* offset in input arrays where partition starts
|
||||
* @return temporary facet result, potentially, to be passed back to
|
||||
* <b>this</b> result handler for merging, or <b>null</b> in case that
|
||||
* constructor parameter, <code>facetRequest</code>, requests an
|
||||
* illegal FacetResult, like, e.g., a root node category path that
|
||||
* does not exist in constructor parameter <code>taxonomyReader</code>
|
||||
* .
|
||||
* @throws IOException
|
||||
* on error
|
||||
*/
|
||||
public abstract IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException;
|
||||
|
||||
/**
|
||||
* Merge results of several facet partitions. Logic of the merge is undefined
|
||||
* and open for interpretations. For example, a merge implementation could
|
||||
* keep top K results. Passed {@link IntermediateFacetResult} must be ones
|
||||
* that were created by this handler otherwise a {@link ClassCastException} is
|
||||
* thrown. In addition, all passed {@link IntermediateFacetResult} must have
|
||||
* the same {@link FacetRequest} otherwise an {@link IllegalArgumentException}
|
||||
* is thrown.
|
||||
*
|
||||
* @param tmpResults one or more temporary results created by <b>this</b>
|
||||
* handler.
|
||||
* @return temporary facet result that represents to union, as specified by
|
||||
* <b>this</b> handler, of the input temporary facet results.
|
||||
* @throws IOException on error.
|
||||
* @throws ClassCastException if the temporary result passed was not created
|
||||
* by this handler
|
||||
* @throws IllegalArgumentException if passed <code>facetResults</code> do not
|
||||
* have the same {@link FacetRequest}
|
||||
* @see IntermediateFacetResult#getFacetRequest()
|
||||
*/
|
||||
public abstract IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults)
|
||||
throws IOException, ClassCastException, IllegalArgumentException;
|
||||
|
||||
/**
|
||||
* Create a facet result from the temporary result.
|
||||
* @param tmpResult temporary result to be rendered as a {@link FacetResult}
|
||||
* @throws IOException on error.
|
||||
*/
|
||||
public abstract FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException ;
|
||||
|
||||
/**
|
||||
* Perform any rearrangement as required on a facet result that has changed after
|
||||
* it was rendered.
|
||||
* <P>
|
||||
* Possible use case: a sampling facets accumulator invoked another
|
||||
* other facets accumulator on a sample set of documents, obtained
|
||||
* rendered facet results, fixed their counts, and now it is needed
|
||||
* to sort the results differently according to the fixed counts.
|
||||
* @param facetResult result to be rearranged.
|
||||
* @see FacetResultNode#setValue(double)
|
||||
*/
|
||||
public abstract FacetResult rearrangeFacetResult(FacetResult facetResult);
|
||||
|
||||
/**
|
||||
* Label results according to settings in {@link FacetRequest},
|
||||
* such as {@link FacetRequest#getNumLabel()}.
|
||||
* Usually invoked by {@link FacetsAccumulator#accumulate(ScoredDocIDs)}
|
||||
* @param facetResult facet result to be labeled.
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public abstract void labelResult (FacetResult facetResult) throws IOException;
|
||||
|
||||
/** Return taxonomy reader used for current facets accumulation operation. */
|
||||
public final TaxonomyReader getTaxonomyReader() {
|
||||
return this.taxonomyReader;
|
||||
}
|
||||
|
||||
/** Return the facet request served by this handler. */
|
||||
public final FacetRequest getFacetRequest() {
|
||||
return this.facetRequest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an array contains the partition which contains ordinal
|
||||
*
|
||||
* @param ordinal
|
||||
* checked facet
|
||||
* @param facetArrays
|
||||
* facet arrays for the certain partition
|
||||
* @param offset
|
||||
* offset in input arrays where partition starts
|
||||
*/
|
||||
protected boolean isSelfPartition (int ordinal, FacetArrays facetArrays, int offset) {
|
||||
int partitionSize = facetArrays.getArraysLength();
|
||||
return ordinal / partitionSize == offset / partitionSize;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Driver for Accumulating facets of faceted search requests over given
|
||||
* documents.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class FacetsAccumulator {
|
||||
|
||||
/**
|
||||
* Default threshold for using the complements optimization.
|
||||
* If accumulating facets for a document set larger than this ratio of the index size than
|
||||
* perform the complement optimization.
|
||||
* @see #setComplementThreshold(double) for more info on the complements optimization.
|
||||
*/
|
||||
public static final double DEFAULT_COMPLEMENT_THRESHOLD = 0.6;
|
||||
|
||||
/**
|
||||
* Passing this to {@link #setComplementThreshold(double)} will disable using complement optimization.
|
||||
*/
|
||||
public static final double DISABLE_COMPLEMENT = Double.POSITIVE_INFINITY; // > 1 actually
|
||||
|
||||
/**
|
||||
* Passing this to {@link #setComplementThreshold(double)} will force using complement optimization.
|
||||
*/
|
||||
public static final double FORCE_COMPLEMENT = 0; // <=0
|
||||
|
||||
private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD;
|
||||
|
||||
protected final TaxonomyReader taxonomyReader;
|
||||
protected final IndexReader indexReader;
|
||||
protected FacetSearchParams searchParams;
|
||||
|
||||
private boolean allowLabeling = true;
|
||||
|
||||
public FacetsAccumulator(FacetSearchParams searchParams,
|
||||
IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
this.indexReader = indexReader;
|
||||
this.taxonomyReader = taxonomyReader;
|
||||
this.searchParams = searchParams;
|
||||
}
|
||||
|
||||
/**
|
||||
* Accumulate facets over given documents, according to facet requests in effect.
|
||||
* @param docids documents (and their scores) for which facets are Accumulated.
|
||||
* @return Accumulated facets.
|
||||
* @throws IOException on error.
|
||||
*/
|
||||
// internal API note: it was considered to move the docids into the constructor as well,
|
||||
// but this prevents nice extension capabilities, especially in the way that
|
||||
// Sampling Accumulator works with the (any) delegated accumulator.
|
||||
public abstract List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException;
|
||||
|
||||
/**
|
||||
* @return the complement threshold
|
||||
* @see #setComplementThreshold(double)
|
||||
*/
|
||||
public double getComplementThreshold() {
|
||||
return complementThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the complement threshold.
|
||||
* This threshold will dictate whether the complements optimization is applied.
|
||||
* The optimization is to count for less documents. It is useful when the same
|
||||
* FacetSearchParams are used for varying sets of documents. The first time
|
||||
* complements is used the "total counts" are computed - counting for all the
|
||||
* documents in the collection. Then, only the complementing set of documents
|
||||
* is considered, and used to decrement from the overall counts, thereby
|
||||
* walking through less documents, which is faster.
|
||||
* <p>
|
||||
* Note that this optimization is only available when searching an index
|
||||
* whose {@link IndexReader} implements both
|
||||
* {@link IndexReader#directory()} and {@link IndexReader#getVersion()}
|
||||
* otherwise the optimization is silently disabled regardless of
|
||||
* the complement threshold settings.
|
||||
* <p>
|
||||
* For the default settings see {@link #DEFAULT_COMPLEMENT_THRESHOLD}.
|
||||
* <p>
|
||||
* To forcing complements in all cases pass {@link #FORCE_COMPLEMENT}.
|
||||
* This is mostly useful for testing purposes, as forcing complements when only
|
||||
* tiny fraction of available documents match the query does not make sense and
|
||||
* would incur performance degradations.
|
||||
* <p>
|
||||
* To disable complements pass {@link #DISABLE_COMPLEMENT}.
|
||||
* @param complementThreshold the complement threshold to set
|
||||
*/
|
||||
public void setComplementThreshold(double complementThreshold) {
|
||||
this.complementThreshold = complementThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if labeling is allowed for this accumulator.
|
||||
* <p>
|
||||
* By default labeling is allowed.
|
||||
* This allows one accumulator to invoke other accumulators for accumulation
|
||||
* but keep to itself the responsibility of labeling.
|
||||
* This might br handy since labeling is a costly operation.
|
||||
* @return true of labeling is allowed for this accumulator
|
||||
* @see #setAllowLabeling(boolean)
|
||||
*/
|
||||
protected boolean isAllowLabeling() {
|
||||
return allowLabeling;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether labeling is allowed for this accumulator.
|
||||
* @param allowLabeling new setting for allow labeling
|
||||
* @see #isAllowLabeling()
|
||||
*/
|
||||
protected void setAllowLabeling(boolean allowLabeling) {
|
||||
this.allowLabeling = allowLabeling;
|
||||
}
|
||||
|
||||
/** check if all requests are complementable */
|
||||
protected boolean mayComplement() {
|
||||
for (FacetRequest freq:searchParams.getFacetRequests()) {
|
||||
if (!freq.supportsComplements()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Collector for facet accumulation. *
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FacetsCollector extends Collector {
|
||||
|
||||
protected final FacetsAccumulator facetsAccumulator;
|
||||
private ScoredDocIdCollector scoreDocIdCollector;
|
||||
private List<FacetResult> results;
|
||||
private Object resultsGuard;
|
||||
|
||||
/**
|
||||
* Create a collector for accumulating facets while collecting documents
|
||||
* during search.
|
||||
*
|
||||
* @param facetSearchParams
|
||||
* faceted search parameters defining which facets are required and
|
||||
* how.
|
||||
* @param indexReader
|
||||
* searched index.
|
||||
* @param taxonomyReader
|
||||
* taxonomy containing the facets.
|
||||
*/
|
||||
public FacetsCollector(FacetSearchParams facetSearchParams,
|
||||
IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
|
||||
resultsGuard = new Object();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link ScoredDocIdCollector} to be used as the first phase of
|
||||
* the facet collection. If all facetRequests are do not require the
|
||||
* document score, a ScoredDocIdCollector which does not store the document
|
||||
* scores would be returned. Otherwise a SDIC which does store the documents
|
||||
* will be returned, having an initial allocated space for 1000 such
|
||||
* documents' scores.
|
||||
*/
|
||||
protected ScoredDocIdCollector initScoredDocCollector(
|
||||
FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
for (FacetRequest frq : facetSearchParams.getFacetRequests()) {
|
||||
if (frq.requireDocumentScore()) {
|
||||
return ScoredDocIdCollector.create(1000, true);
|
||||
}
|
||||
}
|
||||
return ScoredDocIdCollector.create(indexReader.maxDoc(), false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the {@link FacetsAccumulator} to be used. Default is
|
||||
* {@link StandardFacetsAccumulator}. Called once at the constructor of the collector.
|
||||
*
|
||||
* @param facetSearchParams
|
||||
* The search params.
|
||||
* @param indexReader
|
||||
* A reader to the index to search in.
|
||||
* @param taxonomyReader
|
||||
* A reader to the active taxonomy.
|
||||
* @return The {@link FacetsAccumulator} to use.
|
||||
*/
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams,
|
||||
IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
return new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return accumulated facets results (according to faceted search parameters)
|
||||
* for collected documents.
|
||||
* @throws IOException on error
|
||||
*/
|
||||
public List<FacetResult> getFacetResults() throws IOException {
|
||||
synchronized (resultsGuard) { // over protection
|
||||
if (results == null) {
|
||||
// lazy creation but just once
|
||||
results = facetsAccumulator.accumulate(scoreDocIdCollector.getScoredDocIDs());
|
||||
scoreDocIdCollector = null;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
scoreDocIdCollector.collect(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
scoreDocIdCollector.setNextReader(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
scoreDocIdCollector.setScorer(scorer);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An FloatArrayAllocator is an object which manages float array objects
|
||||
* of a certain size. These float arrays are needed temporarily during
|
||||
* faceted search (see {@link FacetsAccumulator} and can be reused across searches
|
||||
* instead of being allocated afresh on every search.
|
||||
* <P>
|
||||
* An FloatArrayAllocator is thread-safe.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class FloatArrayAllocator extends TemporaryObjectAllocator<float[]> {
|
||||
|
||||
// An FloatArrayAllocater deals with integer arrays of a fixed size, size.
|
||||
private int size;
|
||||
|
||||
/**
|
||||
* Construct an allocator for float arrays of size <CODE>size</CODE>,
|
||||
* keeping around a pool of up to <CODE>maxArrays</CODE> old arrays.
|
||||
* <P>
|
||||
* Note that the pool size only restricts the number of arrays that hang
|
||||
* around when not needed, but <I>not</I> the maximum number of arrays
|
||||
* that are allocated when actually is use: If a number of concurrent
|
||||
* threads ask for an allocation, all of them will get a counter array,
|
||||
* even if their number is greater than maxArrays. If an application wants
|
||||
* to limit the number of concurrent threads making allocations, it needs
|
||||
* to do so on its own - for example by blocking new threads until the
|
||||
* existing ones have finished.
|
||||
* <P>
|
||||
* In particular, when maxArrays=0, this object behaves as a trivial
|
||||
* allocator, always allocating a new array and never reusing an old one.
|
||||
*/
|
||||
public FloatArrayAllocator(int size, int maxArrays) {
|
||||
super(maxArrays);
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] create() {
|
||||
return new float[size];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear(float[] array) {
|
||||
Arrays.fill(array, 0);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Declares an interface for heap (and heap alike) structures,
|
||||
* handling a given type T
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface Heap<T> {
|
||||
/**
|
||||
* Get and remove the top of the Heap <BR>
|
||||
* NOTE: Once {@link #pop()} is called no other {@link #add(Object)} or
|
||||
* {@link #insertWithOverflow(Object)} should be called.
|
||||
*/
|
||||
public T pop();
|
||||
|
||||
/** Get (But not remove) the top of the Heap */
|
||||
public T top();
|
||||
|
||||
/**
|
||||
* Insert a new value, returning the overflowen object <br>
|
||||
* NOTE: This method should not be called after invoking {@link #pop()}
|
||||
*/
|
||||
public T insertWithOverflow(T value);
|
||||
|
||||
/**
|
||||
* Add a new value to the heap, return the new top(). <br>
|
||||
* Some implementations may choose to not implement this functionality.
|
||||
* In such a case <code>null</code> should be returned. <BR>
|
||||
* NOTE: This method should not be called after invoking {@link #pop()}
|
||||
*/
|
||||
public T add(T frn);
|
||||
|
||||
/** Clear the heap */
|
||||
public void clear();
|
||||
|
||||
/** Return the amount of objects currently in the heap */
|
||||
public int size();
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An IntArrayAllocator is an object which manages counter array objects
|
||||
* of a certain length. These counter arrays are needed temporarily during
|
||||
* faceted search (see {@link FacetsAccumulator} and can be reused across searches
|
||||
* instead of being allocated afresh on every search.
|
||||
* <P>
|
||||
* An IntArrayAllocator is thread-safe.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class IntArrayAllocator extends TemporaryObjectAllocator<int[]> {
|
||||
|
||||
// An IntArrayAllocater deals with integer arrays of a fixed length.
|
||||
private int length;
|
||||
|
||||
/**
|
||||
* Construct an allocator for counter arrays of length <CODE>length</CODE>,
|
||||
* keeping around a pool of up to <CODE>maxArrays</CODE> old arrays.
|
||||
* <P>
|
||||
* Note that the pool size only restricts the number of arrays that hang
|
||||
* around when not needed, but <I>not</I> the maximum number of arrays
|
||||
* that are allocated when actually is use: If a number of concurrent
|
||||
* threads ask for an allocation, all of them will get a counter array,
|
||||
* even if their number is greater than maxArrays. If an application wants
|
||||
* to limit the number of concurrent threads making allocations, it needs
|
||||
* to do so on its own - for example by blocking new threads until the
|
||||
* existing ones have finished.
|
||||
* <P>
|
||||
* In particular, when maxArrays=0, this object behaves as a trivial
|
||||
* allocator, always allocating a new array and never reusing an old one.
|
||||
*/
|
||||
public IntArrayAllocator(int length, int maxArrays) {
|
||||
super(maxArrays);
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] create() {
|
||||
return new int[length];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear(int[] array) {
|
||||
Arrays.fill(array, 0);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import org.apache.lucene.util.UnsafeByteArrayInputStream;
|
||||
import org.apache.lucene.util.encoding.IntDecoder;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A payload deserializer comes with its own working space (buffer). One need to
|
||||
* define the {@link IndexReader} and {@link Term} in which the payload resides.
|
||||
* The iterator then consumes the payload information of each document and
|
||||
* decodes it into categories. A typical use case of this class is:
|
||||
*
|
||||
* <pre>
|
||||
* IndexReader reader = [open your reader];
|
||||
* Term t = new Term("field", "where-payload-exists");
|
||||
* CategoryListIterator cli = new PayloadIntDecodingIterator(reader, t);
|
||||
* if (!cli.init()) {
|
||||
* // it means there are no payloads / documents associated with that term.
|
||||
* // Usually a sanity check. However, init() must be called.
|
||||
* }
|
||||
* DocIdSetIterator disi = [you usually iterate on something else, such as a Scorer];
|
||||
* int doc;
|
||||
* while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
* cli.setdoc(doc);
|
||||
* long category;
|
||||
* while ((category = cli.nextCategory()) < Integer.MAX_VALUE) {
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PayloadIntDecodingIterator implements CategoryListIterator {
|
||||
|
||||
private final UnsafeByteArrayInputStream ubais;
|
||||
private final IntDecoder decoder;
|
||||
|
||||
private final IndexReader indexReader;
|
||||
private final Term term;
|
||||
private final PayloadIterator pi;
|
||||
private final int hashCode;
|
||||
|
||||
public PayloadIntDecodingIterator(IndexReader indexReader, Term term, IntDecoder decoder)
|
||||
throws IOException {
|
||||
this(indexReader, term, decoder, new byte[1024]);
|
||||
}
|
||||
|
||||
public PayloadIntDecodingIterator(IndexReader indexReader, Term term, IntDecoder decoder,
|
||||
byte[] buffer) throws IOException {
|
||||
pi = new PayloadIterator(indexReader, term, buffer);
|
||||
ubais = new UnsafeByteArrayInputStream();
|
||||
this.decoder = decoder;
|
||||
hashCode = indexReader.hashCode() ^ term.hashCode();
|
||||
this.term = term;
|
||||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (!(other instanceof PayloadIntDecodingIterator)) {
|
||||
return false;
|
||||
}
|
||||
PayloadIntDecodingIterator that = (PayloadIntDecodingIterator) other;
|
||||
if (hashCode != that.hashCode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Hash codes are the same, check equals() to avoid cases of hash-collisions.
|
||||
return indexReader.equals(that.indexReader) && term.equals(that.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public boolean init() throws IOException {
|
||||
return pi.init();
|
||||
}
|
||||
|
||||
public long nextCategory() throws IOException {
|
||||
return decoder.decode();
|
||||
}
|
||||
|
||||
public boolean skipTo(int docId) throws IOException {
|
||||
if (!pi.setdoc(docId)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initializing the decoding mechanism with the new payload data
|
||||
ubais.reInit(pi.getBuffer(), 0, pi.getPayloadLength());
|
||||
decoder.reInit(ubais);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A utility class for iterating through a posting list of a given term and
|
||||
* retrieving the payload of the first occurrence in every document. Comes with
|
||||
* its own working space (buffer).
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PayloadIterator {
|
||||
|
||||
protected byte[] buffer;
|
||||
protected int payloadLength;
|
||||
|
||||
DocsAndPositionsEnum tp;
|
||||
|
||||
private boolean hasMore;
|
||||
|
||||
public PayloadIterator(IndexReader indexReader, Term term)
|
||||
throws IOException {
|
||||
this(indexReader, term, new byte[1024]);
|
||||
}
|
||||
|
||||
public PayloadIterator(IndexReader indexReader, Term term, byte[] buffer)
|
||||
throws IOException {
|
||||
this.buffer = buffer;
|
||||
// TODO (Facet): avoid Multi*?
|
||||
Bits deletedDocs = MultiFields.getDeletedDocs(indexReader);
|
||||
this.tp = MultiFields.getTermPositionsEnum(indexReader, deletedDocs, term.field(), term.bytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* (re)initialize the iterator. Should be done before the first call to
|
||||
* {@link #setdoc(int)}. Returns false if there is no category list found
|
||||
* (no setdoc() will never return true).
|
||||
*/
|
||||
public boolean init() throws IOException {
|
||||
hasMore = tp != null && tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
|
||||
return hasMore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip forward to document docId. Return true if this document exists and
|
||||
* has any payload.
|
||||
* <P>
|
||||
* Users should call this method with increasing docIds, and implementations
|
||||
* can assume that this is the case.
|
||||
*/
|
||||
public boolean setdoc(int docId) throws IOException {
|
||||
if (!hasMore) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tp.docID() > docId) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// making sure we have the requested document
|
||||
if (tp.docID() < docId) {
|
||||
// Skipping to requested document
|
||||
if (tp.advance(docId) == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
this.hasMore = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// If document not found (skipped to much)
|
||||
if (tp.docID() != docId) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare for payload extraction
|
||||
tp.nextPosition();
|
||||
|
||||
// TODO: fix bug in SepCodec and then remove this check (the null check should be enough)
|
||||
if (!tp.hasPayload()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BytesRef br = tp.getPayload();
|
||||
|
||||
if (br == null || br.length == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.payloadLength = br.length;
|
||||
|
||||
if (this.payloadLength > this.buffer.length) {
|
||||
// Growing if necessary.
|
||||
this.buffer = new byte[this.payloadLength * 2 + 1];
|
||||
}
|
||||
// Loading the payload
|
||||
System.arraycopy(br.bytes, br.offset, this.buffer, 0, payloadLength);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the buffer with the content of the last read payload.
|
||||
*/
|
||||
public byte[] getBuffer() {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the length of the last read payload.
|
||||
*/
|
||||
public int getPayloadLength() {
|
||||
return payloadLength;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.sampling.Sampler;
|
||||
import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Wrap any Facets Accumulator with sampling.
|
||||
* <p>
|
||||
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
|
||||
* does not guarantee accurate values for
|
||||
* {@link FacetResult#getNumValidDescendants()} &
|
||||
* {@link FacetResultNode#getResidue()}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SamplingWrapper extends FacetsAccumulator {
|
||||
|
||||
private FacetsAccumulator delegee;
|
||||
private Sampler sampler;
|
||||
|
||||
public SamplingWrapper(FacetsAccumulator delegee, Sampler sampler) {
|
||||
super(delegee.searchParams, delegee.indexReader, delegee.taxonomyReader);
|
||||
this.delegee = delegee;
|
||||
this.sampler = sampler;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
|
||||
// first let delegee accumulate without labeling at all (though
|
||||
// currently it doesn't matter because we have to label all returned anyhow)
|
||||
boolean origAllowLabeling = isAllowLabeling();
|
||||
setAllowLabeling(false);
|
||||
|
||||
// Replacing the original searchParams with the over-sampled (and without statistics-compute)
|
||||
FacetSearchParams original = delegee.searchParams;
|
||||
delegee.searchParams = sampler.overSampledSearchParams(original);
|
||||
|
||||
SampleResult sampleSet = sampler.getSampleSet(docids);
|
||||
|
||||
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
|
||||
setAllowLabeling(origAllowLabeling);
|
||||
|
||||
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
|
||||
for (FacetResult fres : sampleRes) {
|
||||
// for sure fres is not null because this is guaranteed by the delegee.
|
||||
FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(taxonomyReader);
|
||||
// fix the result of current request
|
||||
sampler.getSampleFixer(indexReader, taxonomyReader, searchParams)
|
||||
.fixResult(docids, fres);
|
||||
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
|
||||
|
||||
// Using the sampler to trim the extra (over-sampled) results
|
||||
fres = sampler.trimResult(fres);
|
||||
|
||||
// final labeling if allowed (because labeling is a costly operation)
|
||||
if (isAllowLabeling()) {
|
||||
frh.labelResult(fres);
|
||||
}
|
||||
fixedRes.add(fres); // add to final results
|
||||
}
|
||||
|
||||
delegee.searchParams = original; // Back to original params
|
||||
|
||||
return fixedRes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see FacetsAccumulator#getComplementThreshold()
|
||||
*/
|
||||
@Override
|
||||
public double getComplementThreshold() {
|
||||
return delegee.getComplementThreshold();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param complementThreshold
|
||||
* @see FacetsAccumulator#setComplementThreshold(double)
|
||||
*/
|
||||
@Override
|
||||
public void setComplementThreshold(double complementThreshold) {
|
||||
delegee.setComplementThreshold(complementThreshold);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isAllowLabeling() {
|
||||
return delegee.isAllowLabeling();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setAllowLabeling(boolean allowLabeling) {
|
||||
delegee.setAllowLabeling(allowLabeling);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Document IDs with scores for each, driving facets accumulation. Document
|
||||
* scores are optionally used in the process of facets scoring.
|
||||
*
|
||||
* @see FacetsAccumulator#accumulate(ScoredDocIDs)
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface ScoredDocIDs {
|
||||
|
||||
/** Returns an iterator over the document IDs and their scores. */
|
||||
public ScoredDocIDsIterator iterator() throws IOException;
|
||||
|
||||
/** Returns the set of doc IDs. */
|
||||
public DocIdSet getDocIDs();
|
||||
|
||||
/** Returns the number of scored documents. */
|
||||
public int size();
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Iterator over document IDs and their scores. Each {@link #next()} retrieves
|
||||
* the next docID and its score which can be later be retrieved by
|
||||
* {@link #getDocID()} and {@link #getScore()}. <b>NOTE:</b> you must call
|
||||
* {@link #next()} before {@link #getDocID()} and/or {@link #getScore()}, or
|
||||
* otherwise the returned values are unexpected.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface ScoredDocIDsIterator {
|
||||
|
||||
/** Default score used in case scoring is disabled. */
|
||||
public static final float DEFAULT_SCORE = 1.0f;
|
||||
|
||||
/** Iterate to the next document/score pair. Returns true iff there is such a pair. */
|
||||
public abstract boolean next();
|
||||
|
||||
/** Returns the ID of the current document. */
|
||||
public abstract int getDocID();
|
||||
|
||||
/** Returns the score of the current document. */
|
||||
public abstract float getScore();
|
||||
|
||||
}
|
|
@ -0,0 +1,224 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link Collector} which stores all docIDs and their scores in a
|
||||
* {@link ScoredDocIDs} instance. If scoring is not enabled, then the default
|
||||
* score as set in {@link #setDefaultScore(float)} (or
|
||||
* {@link ScoredDocIDsIterator#DEFAULT_SCORE}) will be set for all documents.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class ScoredDocIdCollector extends Collector {
|
||||
|
||||
private static final class NonScoringDocIdCollector extends ScoredDocIdCollector {
|
||||
|
||||
float defaultScore = ScoredDocIDsIterator.DEFAULT_SCORE;
|
||||
|
||||
@SuppressWarnings("synthetic-access")
|
||||
public NonScoringDocIdCollector(int maxDoc) {
|
||||
super(maxDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() { return true; }
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
docIds.fastSet(docBase + doc);
|
||||
++numDocIds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getDefaultScore() {
|
||||
return defaultScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoredDocIDsIterator scoredDocIdsIterator() throws IOException {
|
||||
return new ScoredDocIDsIterator() {
|
||||
|
||||
private DocIdSetIterator docIdsIter = docIds.iterator();
|
||||
private int nextDoc;
|
||||
|
||||
public int getDocID() { return nextDoc; }
|
||||
public float getScore() { return defaultScore; }
|
||||
|
||||
public boolean next() {
|
||||
try {
|
||||
nextDoc = docIdsIter.nextDoc();
|
||||
return nextDoc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
} catch (IOException e) {
|
||||
// This should not happen as we're iterating over an OpenBitSet. For
|
||||
// completeness, terminate iteration
|
||||
nextDoc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDefaultScore(float defaultScore) {
|
||||
this.defaultScore = defaultScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
}
|
||||
|
||||
private static final class ScoringDocIdCollector extends ScoredDocIdCollector {
|
||||
|
||||
float[] scores;
|
||||
private Scorer scorer;
|
||||
|
||||
@SuppressWarnings("synthetic-access")
|
||||
public ScoringDocIdCollector(int maxDoc) {
|
||||
super(maxDoc);
|
||||
scores = new float[maxDoc];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() { return false; }
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
docIds.fastSet(docBase + doc);
|
||||
|
||||
float score = this.scorer.score();
|
||||
if (numDocIds >= scores.length) {
|
||||
float[] newScores = new float[ArrayUtil.oversize(numDocIds + 1, 4)];
|
||||
System.arraycopy(scores, 0, newScores, 0, numDocIds);
|
||||
scores = newScores;
|
||||
}
|
||||
scores[numDocIds] = score;
|
||||
++numDocIds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoredDocIDsIterator scoredDocIdsIterator() throws IOException {
|
||||
return new ScoredDocIDsIterator() {
|
||||
|
||||
private DocIdSetIterator docIdsIter = docIds.iterator();
|
||||
private int nextDoc;
|
||||
private int scoresIdx = -1;
|
||||
|
||||
public int getDocID() { return nextDoc; }
|
||||
public float getScore() { return scores[scoresIdx]; }
|
||||
|
||||
public boolean next() {
|
||||
try {
|
||||
nextDoc = docIdsIter.nextDoc();
|
||||
if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return false;
|
||||
}
|
||||
++scoresIdx;
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
// This should not happen as we're iterating over an OpenBitSet. For
|
||||
// completeness, terminate iteration
|
||||
nextDoc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getDefaultScore() { return ScoredDocIDsIterator.DEFAULT_SCORE; }
|
||||
|
||||
@Override
|
||||
public void setDefaultScore(float defaultScore) {}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
}
|
||||
}
|
||||
|
||||
protected int numDocIds;
|
||||
protected int docBase;
|
||||
protected final OpenBitSet docIds;
|
||||
|
||||
/**
|
||||
* Creates a new {@link ScoredDocIdCollector} with the given parameters.
|
||||
*
|
||||
* @param maxDoc the number of documents that are expected to be collected.
|
||||
* Note that if more documents are collected, unexpected exceptions may
|
||||
* be thrown. Usually you should pass {@link IndexReader#maxDoc()} of
|
||||
* the same IndexReader with which the search is executed.
|
||||
* @param enableScoring if scoring is enabled, a score will be computed for
|
||||
* every matching document, which might be expensive. Therefore if you
|
||||
* do not require scoring, it is better to set it to <i>false</i>.
|
||||
*/
|
||||
public static ScoredDocIdCollector create(int maxDoc, boolean enableScoring) {
|
||||
return enableScoring ? new ScoringDocIdCollector(maxDoc)
|
||||
: new NonScoringDocIdCollector(maxDoc);
|
||||
}
|
||||
|
||||
private ScoredDocIdCollector(int maxDoc) {
|
||||
numDocIds = 0;
|
||||
docIds = new OpenBitSet(maxDoc);
|
||||
}
|
||||
|
||||
/** Returns the default score used when scoring is disabled. */
|
||||
public abstract float getDefaultScore();
|
||||
|
||||
/** Set the default score. Only applicable if scoring is disabled. */
|
||||
public abstract void setDefaultScore(float defaultScore);
|
||||
|
||||
public abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
|
||||
|
||||
public ScoredDocIDs getScoredDocIDs() {
|
||||
return new ScoredDocIDs() {
|
||||
|
||||
public ScoredDocIDsIterator iterator() throws IOException {
|
||||
return scoredDocIdsIterator();
|
||||
}
|
||||
|
||||
public DocIdSet getDocIDs() {
|
||||
return docIds;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return numDocIds;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
this.docBase = context.docBase;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,338 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.facet.util.ScoredDocIdsUtils;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Standard implementation for {@link FacetsAccumulator}, utilizing partitions to save on memory.
|
||||
* <p>
|
||||
* Why partitions? Because if there are say 100M categories out of which
|
||||
* only top K are required, we must first compute value for all 100M categories
|
||||
* (going over all documents) and only then could we select top K.
|
||||
* This is made easier on memory by working in partitions of distinct categories:
|
||||
* Once a values for a partition are found, we take the top K for that
|
||||
* partition and work on the next partition, them merge the top K of both,
|
||||
* and so forth, thereby computing top K with RAM needs for the size of
|
||||
* a single partition rather than for the size of all the 100M categories.
|
||||
* <p>
|
||||
* Decision on partitions size is done at indexing time, and the facet information
|
||||
* for each partition is maintained separately.
|
||||
* <p>
|
||||
* <u>Implementation detail:</u> Since facets information of each partition is
|
||||
* maintained in a separate "category list", we can be more efficient
|
||||
* at search time, because only the facet info for a single partition
|
||||
* need to be read while processing that partition.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class StandardFacetsAccumulator extends FacetsAccumulator {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(StandardFacetsAccumulator.class.getName());
|
||||
|
||||
protected final IntArrayAllocator intArrayAllocator;
|
||||
protected final FloatArrayAllocator floatArrayAllocator;
|
||||
|
||||
protected int partitionSize;
|
||||
protected int maxPartitions;
|
||||
protected boolean isUsingComplements;
|
||||
|
||||
private TotalFacetCounts totalFacetCounts;
|
||||
|
||||
private Object accumulateGuard;
|
||||
|
||||
public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader, IntArrayAllocator intArrayAllocator,
|
||||
FloatArrayAllocator floatArrayAllocator) {
|
||||
|
||||
super(searchParams,indexReader,taxonomyReader);
|
||||
int realPartitionSize = intArrayAllocator == null || floatArrayAllocator == null
|
||||
? PartitionsUtils.partitionSize(searchParams, taxonomyReader) : -1; // -1 if not needed.
|
||||
this.intArrayAllocator = intArrayAllocator != null
|
||||
? intArrayAllocator
|
||||
// create a default one if null was provided
|
||||
: new IntArrayAllocator(realPartitionSize, 1);
|
||||
this.floatArrayAllocator = floatArrayAllocator != null
|
||||
? floatArrayAllocator
|
||||
// create a default one if null provided
|
||||
: new FloatArrayAllocator(realPartitionSize, 1);
|
||||
// can only be computed later when docids size is known
|
||||
isUsingComplements = false;
|
||||
partitionSize = PartitionsUtils.partitionSize(searchParams, taxonomyReader);
|
||||
maxPartitions = (int) Math.ceil(this.taxonomyReader.getSize() / (double) partitionSize);
|
||||
accumulateGuard = new Object();
|
||||
}
|
||||
|
||||
public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
|
||||
this(searchParams, indexReader, taxonomyReader, null, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
|
||||
|
||||
// synchronize to prevent calling two accumulate()'s at the same time.
|
||||
// We decided not to synchronize the method because that might mislead
|
||||
// users to feel encouraged to call this method simultaneously.
|
||||
synchronized (accumulateGuard) {
|
||||
|
||||
// only now we can compute this
|
||||
isUsingComplements = shouldComplement(docids);
|
||||
|
||||
if (isUsingComplements) {
|
||||
try {
|
||||
totalFacetCounts = TotalFacetCountsCache.getSingleton()
|
||||
.getTotalCounts(indexReader, taxonomyReader,
|
||||
searchParams.getFacetIndexingParams(), searchParams.getClCache());
|
||||
if (totalFacetCounts != null) {
|
||||
docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader);
|
||||
} else {
|
||||
isUsingComplements = false;
|
||||
}
|
||||
} catch (UnsupportedOperationException e) {
|
||||
// TODO (Facet): this exception is thrown from TotalCountsKey if the
|
||||
// IndexReader used does not support getVersion(). We should re-think
|
||||
// this: is this tiny detail worth disabling total counts completely
|
||||
// for such readers? Currently, it's not supported by Parallel and
|
||||
// MultiReader, which might be problematic for several applications.
|
||||
// We could, for example, base our "isCurrent" logic on something else
|
||||
// than the reader's version. Need to think more deeply about it.
|
||||
if (logger.isLoggable(Level.FINEST)) {
|
||||
logger.log(Level.FINEST, "IndexReader used does not support completents: ", e);
|
||||
}
|
||||
isUsingComplements = false;
|
||||
} catch (IOException e) {
|
||||
if (logger.isLoggable(Level.FINEST)) {
|
||||
logger.log(Level.FINEST, "Failed to load/calculate total counts (complement counting disabled): ", e);
|
||||
}
|
||||
// silently fail if for some reason failed to load/save from/to dir
|
||||
isUsingComplements = false;
|
||||
} catch (Exception e) {
|
||||
// give up: this should not happen!
|
||||
IOException ioEx = new IOException(
|
||||
"PANIC: Got unexpected exception while trying to get/calculate total counts: "
|
||||
+e.getMessage());
|
||||
ioEx.initCause(e);
|
||||
throw ioEx;
|
||||
}
|
||||
}
|
||||
|
||||
docids = actualDocsToAccumulate(docids);
|
||||
|
||||
FacetArrays facetArrays = new FacetArrays(intArrayAllocator, floatArrayAllocator);
|
||||
|
||||
HashMap<FacetRequest, IntermediateFacetResult> fr2tmpRes = new HashMap<FacetRequest, IntermediateFacetResult>();
|
||||
|
||||
try {
|
||||
for (int part = 0; part < maxPartitions; part++) {
|
||||
|
||||
// fill arrays from category lists
|
||||
fillArraysForPartition(docids, facetArrays, part);
|
||||
|
||||
int offset = part * partitionSize;
|
||||
|
||||
// for each partition we go over all requests and handle
|
||||
// each, where
|
||||
// the request maintains the merged result.
|
||||
// In this implementation merges happen after each
|
||||
// partition,
|
||||
// but other impl could merge only at the end.
|
||||
for (FacetRequest fr : searchParams.getFacetRequests()) {
|
||||
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
|
||||
IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(facetArrays, offset);
|
||||
IntermediateFacetResult oldRes = fr2tmpRes.get(fr);
|
||||
if (oldRes != null) {
|
||||
res4fr = frHndlr.mergeResults(oldRes, res4fr);
|
||||
}
|
||||
fr2tmpRes.put(fr, res4fr);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
facetArrays.free();
|
||||
}
|
||||
|
||||
// gather results from all requests into a list for returning them
|
||||
List<FacetResult> res = new ArrayList<FacetResult>();
|
||||
for (FacetRequest fr : searchParams.getFacetRequests()) {
|
||||
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
|
||||
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
|
||||
if (tmpResult == null) {
|
||||
continue; // do not add a null to the list.
|
||||
}
|
||||
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
|
||||
// final labeling if allowed (because labeling is a costly operation)
|
||||
if (isAllowLabeling()) {
|
||||
frHndlr.labelResult(facetRes);
|
||||
}
|
||||
res.add(facetRes);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the actual set of documents over which accumulation should take place.
|
||||
* <p>
|
||||
* Allows to override the set of documents to accumulate for. Invoked just
|
||||
* before actual accumulating starts. From this point that set of documents
|
||||
* remains unmodified. Default implementation just returns the input
|
||||
* unchanged.
|
||||
*
|
||||
* @param docids
|
||||
* candidate documents to accumulate for
|
||||
* @return actual documents to accumulate for
|
||||
*/
|
||||
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
|
||||
return docids;
|
||||
}
|
||||
|
||||
/** Check if it is worth to use complements */
|
||||
protected boolean shouldComplement(ScoredDocIDs docids) {
|
||||
return
|
||||
mayComplement() &&
|
||||
(docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the documents for this partition and fill the facet arrays with the correct
|
||||
* count/complement count/value.
|
||||
* @param internalCollector
|
||||
* @param facetArrays
|
||||
* @param part
|
||||
* @throws IOException
|
||||
*/
|
||||
private final void fillArraysForPartition(ScoredDocIDs docids,
|
||||
FacetArrays facetArrays, int partition) throws IOException {
|
||||
|
||||
if (isUsingComplements) {
|
||||
initArraysByTotalCounts(facetArrays, partition, docids.size());
|
||||
} else {
|
||||
facetArrays.free(); // to get a cleared array for this partition
|
||||
}
|
||||
|
||||
HashMap<CategoryListIterator, Aggregator> categoryLists = getCategoryListMap(
|
||||
facetArrays, partition);
|
||||
|
||||
for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
|
||||
CategoryListIterator categoryList = entry.getKey();
|
||||
if (!categoryList.init()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Aggregator categorator = entry.getValue();
|
||||
ScoredDocIDsIterator iterator = docids.iterator();
|
||||
while (iterator.next()) {
|
||||
int docID = iterator.getDocID();
|
||||
if (!categoryList.skipTo(docID)) {
|
||||
continue;
|
||||
}
|
||||
categorator.setNextDoc(docID, iterator.getScore());
|
||||
long ordinal;
|
||||
while ((ordinal = categoryList.nextCategory()) <= Integer.MAX_VALUE) {
|
||||
categorator.aggregate((int) ordinal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Init arrays for partition by total counts, optionally applying a factor
|
||||
*/
|
||||
private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) {
|
||||
int[] intArray = facetArrays.getIntArray();
|
||||
totalFacetCounts.fillTotalCountsForPartition(intArray, partition);
|
||||
double totalCountsFactor = getTotalCountsFactor();
|
||||
// fix total counts, but only if the effect of this would be meaningfull.
|
||||
if (totalCountsFactor < 0.99999) {
|
||||
int delta = nAccumulatedDocs + 1;
|
||||
for (int i = 0; i < intArray.length; i++) {
|
||||
intArray[i] *= totalCountsFactor;
|
||||
// also translate to prevent loss of non-positive values
|
||||
// due to complement sampling (ie if sampled docs all decremented a certain category).
|
||||
intArray[i] += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: factor by which counts should be multiplied when initializing
|
||||
* the count arrays from total counts.
|
||||
* Default implementation for this returns 1, which is a no op.
|
||||
* @return a factor by which total counts should be multiplied
|
||||
*/
|
||||
protected double getTotalCountsFactor() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link Aggregator} and a {@link CategoryListIterator} for each
|
||||
* and every {@link FacetRequest}. Generating a map, matching each
|
||||
* categoryListIterator to its matching aggregator.
|
||||
* <p>
|
||||
* If two CategoryListIterators are served by the same aggregator, a single
|
||||
* aggregator is returned for both.
|
||||
*
|
||||
* <b>NOTE: </b>If a given category list iterator is needed with two different
|
||||
* aggregators (e.g counting and association) - an exception is thrown as this
|
||||
* functionality is not supported at this time.
|
||||
*/
|
||||
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(FacetArrays facetArrays,
|
||||
int partition) throws IOException {
|
||||
|
||||
HashMap<CategoryListIterator, Aggregator> categoryLists = new HashMap<CategoryListIterator, Aggregator>();
|
||||
|
||||
for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
|
||||
Aggregator categoryAggregator = facetRequest.createAggregator(
|
||||
isUsingComplements, facetArrays, indexReader, taxonomyReader);
|
||||
|
||||
CategoryListIterator cli =
|
||||
facetRequest.createCategoryListIterator(indexReader, taxonomyReader, searchParams, partition);
|
||||
|
||||
// get the aggregator
|
||||
Aggregator old = categoryLists.put(cli, categoryAggregator);
|
||||
|
||||
if (old != null && !old.equals(categoryAggregator)) {
|
||||
// TODO (Facet): create a more meaningful RE class, and throw it.
|
||||
throw new RuntimeException(
|
||||
"Overriding existing category list with different aggregator. THAT'S A NO NO!");
|
||||
}
|
||||
// if the aggregator is the same we're covered
|
||||
}
|
||||
|
||||
return categoryLists;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An TemporaryObjectAllocator is an object which manages large, reusable,
|
||||
* temporary objects needed during multiple concurrent computations. The idea
|
||||
* is to remember some of the previously allocated temporary objects, and
|
||||
* reuse them if possible to avoid constant allocation and garbage-collection
|
||||
* of these objects.
|
||||
* <P>
|
||||
* This technique is useful for temporary counter arrays in faceted search
|
||||
* (see {@link FacetsAccumulator}), which can be reused across searches instead
|
||||
* of being allocated afresh on every search.
|
||||
* <P>
|
||||
* A TemporaryObjectAllocator is thread-safe.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class TemporaryObjectAllocator<T> {
|
||||
|
||||
// In the "pool" we hold up to "maxObjects" old objects, and if the pool
|
||||
// is not empty, we return one of its objects rather than allocating a new
|
||||
// one.
|
||||
ConcurrentLinkedQueue<T> pool = new ConcurrentLinkedQueue<T>();
|
||||
int maxObjects;
|
||||
|
||||
/**
|
||||
* Construct an allocator for objects of a certain type, keeping around a
|
||||
* pool of up to <CODE>maxObjects</CODE> old objects.
|
||||
* <P>
|
||||
* Note that the pool size only restricts the number of objects that hang
|
||||
* around when not needed, but <I>not</I> the maximum number of objects
|
||||
* that are allocated when actually is use: If a number of concurrent
|
||||
* threads ask for an allocation, all of them will get an object, even if
|
||||
* their number is greater than maxObjects. If an application wants to
|
||||
* limit the number of concurrent threads making allocations, it needs to
|
||||
* do so on its own - for example by blocking new threads until the
|
||||
* existing ones have finished. If more than maxObjects are freed, only
|
||||
* maxObjects of them will be kept in the pool - the rest will not and
|
||||
* will eventually be garbage-collected by Java.
|
||||
* <P>
|
||||
* In particular, when maxObjects=0, this object behaves as a trivial
|
||||
* allocator, always allocating a new array and never reusing an old one.
|
||||
*/
|
||||
public TemporaryObjectAllocator(int maxObjects) {
|
||||
this.maxObjects = maxObjects;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must override this method to actually create a new object
|
||||
* of the desired type.
|
||||
*
|
||||
*/
|
||||
protected abstract T create();
|
||||
|
||||
/**
|
||||
* Subclasses must override this method to clear an existing object of
|
||||
* the desired type, to prepare it for reuse. Note that objects will be
|
||||
* cleared just before reuse (on allocation), not when freed.
|
||||
*/
|
||||
protected abstract void clear(T object);
|
||||
|
||||
/**
|
||||
* Allocate a new object. If there's a previously allocated object in our
|
||||
* pool, we return it immediately. Otherwise, a new object is allocated.
|
||||
* <P>
|
||||
* Don't forget to call {@link #free(Object)} when you're done with the object,
|
||||
* to return it to the pool. If you don't, memory is <I>not</I> leaked,
|
||||
* but the pool will remain empty and a new object will be allocated each
|
||||
* time (just like the maxArrays=0 case).
|
||||
*/
|
||||
public final T allocate() {
|
||||
T object = pool.poll();
|
||||
if (object==null) {
|
||||
return create();
|
||||
}
|
||||
clear(object);
|
||||
return object;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a no-longer-needed object back to the pool. If we already have
|
||||
* enough objects in the pool (maxObjects as specified in the constructor),
|
||||
* the array will not be saved, and Java will eventually garbage collect
|
||||
* it.
|
||||
* <P>
|
||||
* In particular, when maxArrays=0, the given array is never saved and
|
||||
* free does nothing.
|
||||
*/
|
||||
public final void free(T object) {
|
||||
if (pool.size() < maxObjects && object != null) {
|
||||
pool.add(object);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,292 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
|
||||
import org.apache.lucene.facet.util.ResultSortUtils;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Generate Top-K results for a particular FacetRequest.
|
||||
* <p>
|
||||
* K is global (among all results) and is defined by {@link FacetRequest#getNumResults()}.
|
||||
* <p>
|
||||
* Note: Values of 0 (Zero) are ignored by this results handler.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TopKFacetResultsHandler extends FacetResultsHandler {
|
||||
|
||||
/**
|
||||
* Construct top-K results handler.
|
||||
* @param taxonomyReader taxonomy reader
|
||||
* @param facetRequest facet request being served
|
||||
*/
|
||||
public TopKFacetResultsHandler(TaxonomyReader taxonomyReader,
|
||||
FacetRequest facetRequest) {
|
||||
super(taxonomyReader, facetRequest);
|
||||
}
|
||||
|
||||
// fetch top K for specific partition.
|
||||
@Override
|
||||
public IntermediateFacetResult fetchPartitionResult(FacetArrays facetArrays, int offset)
|
||||
throws IOException {
|
||||
TopKFacetResult res = null;
|
||||
int ordinal = taxonomyReader.getOrdinal(facetRequest.getCategoryPath());
|
||||
if (ordinal != TaxonomyReader.INVALID_ORDINAL) {
|
||||
double value = 0;
|
||||
if (isSelfPartition(ordinal, facetArrays, offset)) {
|
||||
int partitionSize = facetArrays.getArraysLength();
|
||||
value = facetRequest.getValueOf(facetArrays, ordinal % partitionSize);
|
||||
}
|
||||
|
||||
// TODO (Facet): should initial value of "residue" depend on aggregator if not sum?
|
||||
MutableFacetResultNode parentResultNode =
|
||||
new MutableFacetResultNode(ordinal, value);
|
||||
|
||||
Heap<FacetResultNode> heap = ResultSortUtils.createSuitableHeap(facetRequest);
|
||||
int totalFacets = heapDescendants(ordinal, heap, parentResultNode, facetArrays, offset);
|
||||
res = new TopKFacetResult(facetRequest, parentResultNode, totalFacets);
|
||||
res.setHeap(heap);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// merge given top K results into current
|
||||
@Override
|
||||
public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException {
|
||||
|
||||
int ordinal = taxonomyReader.getOrdinal(facetRequest.getCategoryPath());
|
||||
MutableFacetResultNode resNode = new MutableFacetResultNode(ordinal, 0);
|
||||
|
||||
int totalFacets = 0;
|
||||
Heap<FacetResultNode> heap = null;
|
||||
|
||||
// merge other results in queue
|
||||
for (IntermediateFacetResult tmpFres : tmpResults) {
|
||||
// cast should succeed
|
||||
TopKFacetResult fres = (TopKFacetResult) tmpFres;
|
||||
totalFacets += fres.getNumValidDescendants();
|
||||
// set the value for the result node representing the facet request
|
||||
resNode.increaseValue(fres.getFacetResultNode().getValue());
|
||||
Heap<FacetResultNode> tmpHeap = fres.getHeap();
|
||||
if (heap == null) {
|
||||
heap = tmpHeap;
|
||||
continue;
|
||||
}
|
||||
// bring sub results from heap of tmp res into result heap
|
||||
for (int i = tmpHeap.size(); i > 0; i--) {
|
||||
|
||||
FacetResultNode a = heap.insertWithOverflow(tmpHeap.pop());
|
||||
if (a != null) {
|
||||
resNode.increaseResidue(a.getResidue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TopKFacetResult res = new TopKFacetResult(facetRequest, resNode, totalFacets);
|
||||
res.setHeap(heap);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the top K descendants of ordinal, which are at most facetRequest.getDepth()
|
||||
* deeper than facetRequest.getCategoryPath (whose ordinal is input parameter ordinal).
|
||||
* Candidates are restricted to current "counting list" and current "partition",
|
||||
* they join the overall priority queue pq of size K.
|
||||
* @return total number of descendants considered here by pq, excluding ordinal itself.
|
||||
*/
|
||||
private int heapDescendants(int ordinal, Heap<FacetResultNode> pq,
|
||||
MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) {
|
||||
int partitionSize = facetArrays.getArraysLength();
|
||||
int endOffset = offset + partitionSize;
|
||||
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
|
||||
int[] youngestChild = childrenArray.getYoungestChildArray();
|
||||
int[] olderSibling = childrenArray.getOlderSiblingArray();
|
||||
FacetResultNode reusable = null;
|
||||
int localDepth = 0;
|
||||
int depth = facetRequest.getDepth();
|
||||
int[] ordinalStack = new int[2+Math.min(Short.MAX_VALUE, depth)];
|
||||
int childrenCounter = 0;
|
||||
|
||||
int tosOrdinal; // top of stack element
|
||||
|
||||
int yc = youngestChild[ordinal];
|
||||
while (yc >= endOffset) {
|
||||
yc = olderSibling[yc];
|
||||
}
|
||||
// make use of the fact that TaxonomyReader.INVALID_ORDINAL == -1, < endOffset
|
||||
// and it, too, can stop the loop.
|
||||
ordinalStack[++localDepth] = yc;
|
||||
|
||||
/*
|
||||
* stack holds input parameter ordinal in position 0.
|
||||
* Other elements are < endoffset.
|
||||
* Only top of stack can be TaxonomyReader.INVALID_ORDINAL, and this if and only if
|
||||
* the element below it exhausted all its children: has them all processed.
|
||||
*
|
||||
* stack elements are processed (counted and accumulated) only if they
|
||||
* belong to current partition (between offset and endoffset) and first time
|
||||
* they are on top of stack
|
||||
*
|
||||
* loop as long as stack is not empty of elements other than input ordinal, or for a little while -- it sibling
|
||||
*/
|
||||
while (localDepth > 0) {
|
||||
tosOrdinal = ordinalStack[localDepth];
|
||||
if (tosOrdinal == TaxonomyReader.INVALID_ORDINAL) {
|
||||
// element below tos has all its children, and itself, all processed
|
||||
// need to proceed to its sibling
|
||||
localDepth--;
|
||||
// change element now on top of stack to its sibling.
|
||||
ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
|
||||
continue;
|
||||
}
|
||||
// top of stack is not invalid, this is the first time we see it on top of stack.
|
||||
// collect it, if belongs to current partition, and then push its kids on itself, if applicable
|
||||
if (tosOrdinal >= offset) { // tosOrdinal resides in current partition
|
||||
int relativeOrdinal = tosOrdinal % partitionSize;
|
||||
double value = facetRequest.getValueOf(facetArrays, relativeOrdinal);
|
||||
if (value != 0 && !Double.isNaN(value)) {
|
||||
// Count current ordinal -- the TOS
|
||||
if (reusable == null) {
|
||||
reusable = new MutableFacetResultNode(tosOrdinal, value);
|
||||
} else {
|
||||
// it is safe to cast since reusable was created here.
|
||||
((MutableFacetResultNode)reusable).reset(tosOrdinal, value);
|
||||
}
|
||||
++childrenCounter;
|
||||
reusable = pq.insertWithOverflow(reusable);
|
||||
if (reusable != null) {
|
||||
// TODO (Facet): is other logic (not add) needed, per aggregator?
|
||||
parentResultNode.increaseResidue(reusable.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (localDepth < depth) {
|
||||
// push kid of current tos
|
||||
yc = youngestChild[tosOrdinal];
|
||||
while (yc >= endOffset) {
|
||||
yc = olderSibling[yc];
|
||||
}
|
||||
ordinalStack[++localDepth] = yc;
|
||||
} else { // localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL
|
||||
ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
} // endof while stack is not empty
|
||||
|
||||
return childrenCounter; // we're done
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) {
|
||||
TopKFacetResult res = (TopKFacetResult) tmpResult; // cast is safe by contract of this class
|
||||
if (res != null) {
|
||||
Heap<FacetResultNode> heap = res.getHeap();
|
||||
MutableFacetResultNode resNode = (MutableFacetResultNode)res.getFacetResultNode(); // cast safe too
|
||||
for (int i = heap.size(); i > 0; i--) {
|
||||
resNode.insertSubResult(heap.pop());
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult rearrangeFacetResult(FacetResult facetResult) {
|
||||
TopKFacetResult res = (TopKFacetResult) facetResult; // cast is safe by contract of this class
|
||||
Heap<FacetResultNode> heap = res.getHeap();
|
||||
heap.clear(); // just to be safe
|
||||
MutableFacetResultNode topFrn = (MutableFacetResultNode) res.getFacetResultNode(); // safe cast
|
||||
for (FacetResultNode frn : topFrn.getSubResults()) {
|
||||
heap.add(frn);
|
||||
}
|
||||
int size = heap.size();
|
||||
ArrayList<FacetResultNode> subResults = new ArrayList<FacetResultNode>(size);
|
||||
for (int i = heap.size(); i > 0; i--) {
|
||||
subResults.add(0,heap.pop());
|
||||
}
|
||||
topFrn.setSubResults(subResults);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
// label top K sub results
|
||||
public void labelResult(FacetResult facetResult) throws IOException {
|
||||
if (facetResult != null) { // any result to label?
|
||||
FacetResultNode facetResultNode = facetResult.getFacetResultNode();
|
||||
if (facetResultNode != null) { // any result to label?
|
||||
facetResultNode.getLabel(taxonomyReader);
|
||||
int num2label = facetRequest.getNumLabel();
|
||||
for (FacetResultNode frn : facetResultNode.getSubResults()) {
|
||||
if (--num2label < 0) {
|
||||
break;
|
||||
}
|
||||
frn.getLabel(taxonomyReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Private Mutable implementation of result of faceted search.
|
||||
*/
|
||||
private static class TopKFacetResult extends FacetResult implements IntermediateFacetResult {
|
||||
|
||||
// TODO (Facet): is it worth to override PriorityQueue.getSentinelObject()
|
||||
// for any of our PQs?
|
||||
private Heap<FacetResultNode> heap;
|
||||
|
||||
/**
|
||||
* Create a Facet Result.
|
||||
* @param facetRequest Request for which this result was obtained.
|
||||
* @param facetResultNode top result node for this facet result.
|
||||
* @param totalFacets - number of children of the targetFacet, up till the requested depth.
|
||||
*/
|
||||
TopKFacetResult(FacetRequest facetRequest, MutableFacetResultNode facetResultNode, int totalFacets) {
|
||||
super(facetRequest, facetResultNode, totalFacets);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the heap
|
||||
*/
|
||||
public Heap<FacetResultNode> getHeap() {
|
||||
return heap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the heap for this result.
|
||||
* @param heap heap top be set.
|
||||
*/
|
||||
public void setHeap(Heap<FacetResultNode> heap) {
|
||||
this.heap = heap;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
}
|
|
@ -0,0 +1,797 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
|
||||
import org.apache.lucene.util.collections.IntIterator;
|
||||
import org.apache.lucene.util.collections.IntToObjectMap;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Generates {@link FacetResult} from the count arrays aggregated for a particular
|
||||
* {@link FacetRequest}.
|
||||
* The generated {@link FacetResult} is a subtree of the taxonomy tree.
|
||||
* Its root node, {@link FacetResult#getFacetResultNode()},
|
||||
* is the facet specified by {@link FacetRequest#getCategoryPath()},
|
||||
* and the enumerated children, {@link FacetResultNode#getSubResults()}, of each node in that
|
||||
* {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()}) among its children
|
||||
* in the taxonomy.
|
||||
* Top in the sense {@link FacetRequest#getSortBy()},
|
||||
* which can be by the values aggregated in the count arrays, or by ordinal numbers;
|
||||
* also specified is the sort order, {@link FacetRequest#getSortOrder()},
|
||||
* ascending or descending, of these values or ordinals before their top K are selected.
|
||||
* The depth (number of levels excluding the root) of the
|
||||
* {@link FacetResult} tree is specified by {@link FacetRequest#getDepth()}.
|
||||
* <p>
|
||||
* Because the number of selected children of each node is restricted,
|
||||
* and not the overall number of nodes in the {@link FacetResult}, facets not selected
|
||||
* into {@link FacetResult} might have better values, or ordinals, (typically,
|
||||
* higher counts), than facets that are selected into the {@link FacetResult}.
|
||||
* <p>
|
||||
* The generated {@link FacetResult} also provides with
|
||||
* {@link FacetResult#getNumValidDescendants()}, which returns the total number of facets
|
||||
* that are descendants of the root node, no deeper than {@link FacetRequest#getDepth()}, and
|
||||
* which have valid value. The rootnode itself is not counted here.
|
||||
* Valid value is determined by the {@link FacetResultsHandler}.
|
||||
* {@link TopKInEachNodeHandler} defines valid as != 0.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this code relies on the assumption that {@link TaxonomyReader#INVALID_ORDINAL} == -1, a smaller
|
||||
* value than any valid ordinal.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TopKInEachNodeHandler extends FacetResultsHandler {
|
||||
|
||||
public TopKInEachNodeHandler(TaxonomyReader taxonomyReader,
|
||||
FacetRequest facetRequest) {
|
||||
super(taxonomyReader, facetRequest);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively explore all facets that can be potentially included in the
|
||||
* {@link FacetResult} to be generated, and that belong to the given
|
||||
* partition, so that values can be examined and collected. For each such
|
||||
* node, gather its top K ({@link FacetRequest#getNumResults()}) children
|
||||
* among its children that are encountered in the given particular partition
|
||||
* (aka current counting list).
|
||||
*
|
||||
* @return {@link IntermediateFacetResult} consisting of
|
||||
* {@link IntToObjectMap} that maps potential
|
||||
* {@link FacetResult} nodes to their top K children encountered in
|
||||
* the current partition. Note that the mapped potential tree nodes
|
||||
* need not belong to the given partition, only the top K children
|
||||
* mapped to. The aim is to identify nodes that are certainly excluded
|
||||
* from the {@link FacetResult} to be eventually (after going through
|
||||
* all the partitions) returned by this handler, because they have K
|
||||
* better siblings, already identified in this partition. For the
|
||||
* identified excluded nodes, we only count number of their
|
||||
* descendants in the subtree (to be included in
|
||||
* {@link FacetResult#getNumValidDescendants()}), but not bother with
|
||||
* selecting top K in these generations, which, by definition, are,
|
||||
* too, excluded from the FacetResult tree.
|
||||
* @param arrays the already filled in count array, potentially only covering
|
||||
* one partition: the ordinals ranging from
|
||||
* @param offset to <code>offset</code> + the length of the count arrays
|
||||
* within <code>arrays</code> (exclusive)
|
||||
* @throws IOException in case
|
||||
* {@link TaxonomyReader#getOrdinal(org.apache.lucene.facet.taxonomy.CategoryPath)}
|
||||
* does.
|
||||
* @see FacetResultsHandler#fetchPartitionResult(FacetArrays, int)
|
||||
*/
|
||||
@Override
|
||||
public IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException {
|
||||
|
||||
// get the root of the result tree to be returned, and the depth of that result tree
|
||||
// (depth means number of node levels excluding the root).
|
||||
int rootNode = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
|
||||
if (rootNode == TaxonomyReader.INVALID_ORDINAL) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int K = Math.min(facetRequest.getNumResults(),taxonomyReader.getSize()); // number of best results in each node
|
||||
|
||||
// this will grow into the returned IntermediateFacetResult
|
||||
IntToObjectMap<AACO> AACOsOfOnePartition = new IntToObjectMap<AACO>();
|
||||
|
||||
int partitionSize = arrays.getArraysLength(); // all partitions, except, possibly, the last,
|
||||
// have the same length. Hence modulo is OK.
|
||||
|
||||
int depth = facetRequest.getDepth();
|
||||
|
||||
if (depth == 0) {
|
||||
// Need to only have root node.
|
||||
IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
|
||||
facetRequest, AACOsOfOnePartition);
|
||||
if (isSelfPartition(rootNode, arrays, offset)) {
|
||||
tempFRWH.isRootNodeIncluded = true;
|
||||
tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
|
||||
}
|
||||
return tempFRWH;
|
||||
}
|
||||
|
||||
if (depth > Short.MAX_VALUE - 3) {
|
||||
depth = Short.MAX_VALUE -3;
|
||||
}
|
||||
|
||||
int endOffset = offset + partitionSize; // one past the largest ordinal in the partition
|
||||
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
|
||||
int[] youngestChild = childrenArray.getYoungestChildArray();
|
||||
int[] olderSibling = childrenArray.getOlderSiblingArray();
|
||||
int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0,
|
||||
// in the tree. These include those selected as top K in each node, and all the others that
|
||||
// were not. Not including rootNode
|
||||
|
||||
// the following priority queue will be used again and again for each node recursed into
|
||||
// to select its best K children among its children encountered in the given partition
|
||||
PriorityQueue<AggregatedCategory> pq =
|
||||
new AggregatedCategoryHeap(K, this.getSuitableACComparator());
|
||||
|
||||
// reusables will feed the priority queue in each use
|
||||
AggregatedCategory [] reusables = new AggregatedCategory[2+K];
|
||||
for (int i = 0; i < reusables.length; i++) {
|
||||
reusables[i] = new AggregatedCategory(1,0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The returned map is built by a recursive visit of potential tree nodes. Nodes
|
||||
* determined to be excluded from the FacetResult are not recursively explored as others,
|
||||
* they are only recursed in order to count the number of their descendants.
|
||||
* Also, nodes that they and any of their descendants can not be mapped into facets encountered
|
||||
* in this partition, are, too, explored no further. These are facets whose ordinal
|
||||
* numbers are greater than the ordinals of the given partition. (recall that the Taxonomy
|
||||
* maintains that a parent ordinal is smaller than any of its descendants' ordinals).
|
||||
* So, when scanning over all children of a potential tree node n: (1) all children with ordinal number
|
||||
* greater than those in the given partition are skipped over, (2) among the children of n residing
|
||||
* in this partition, the best K children are selected (using pq) for usual further recursion
|
||||
* and the rest (those rejected out from the pq) are only recursed for counting total number
|
||||
* of descendants, and (3) all the children of ordinal numbers smaller than the given partition
|
||||
* are further explored in the usual way, since these may lead to descendants residing in this partition.
|
||||
*
|
||||
* ordinalStack drives the recursive descent.
|
||||
* Top of stack holds the current node which we recurse from.
|
||||
* ordinalStack[0] holds the root of the facetRequest, and
|
||||
* it is always maintained that parent(ordianlStack[i]) = ordinalStack[i-1].
|
||||
* localDepth points to the current top of ordinalStack.
|
||||
* Only top of ordinalStack can be TaxonomyReader.INVALID_ORDINAL, and this if and only if
|
||||
* the element below it explored all its relevant children.
|
||||
*/
|
||||
int[] ordinalStack = new int[depth+2]; // for 0 and for invalid on top
|
||||
ordinalStack[0] = rootNode;
|
||||
int localDepth = 0;
|
||||
|
||||
/*
|
||||
* bestSignlingsStack[i] maintains the best K children of ordinalStack[i-1], namely,
|
||||
* the best K siblings of ordinalStack[i], best K among those residing in the given partition.
|
||||
* Note that the residents of ordinalStack need not belong
|
||||
* to the current partition, only the residents of bestSignlingsStack.
|
||||
* When exploring the children of ordianlStack[i-1] that reside in the current partition
|
||||
* (after the top K of them have been determined and stored into bestSignlingsStack[i]),
|
||||
* siblingExplored[i] points into bestSignlingsStack[i], to the child now explored, hence
|
||||
* residing in ordinalStack[i], and firstToTheLeftOfPartition[i] holds the largest ordinal of
|
||||
* a sibling smaller than the ordinals in the partition.
|
||||
* When siblingExplored[i] == max int, the top K siblings of ordinalStack[i] among those siblings
|
||||
* that reside in this partition have not been determined yet.
|
||||
* if siblingExplored[i] < 0, the node in ordinalStack[i] is to the left of partition
|
||||
* (i.e. of a smaller ordinal than the current partition)
|
||||
* (step (3) above is executed for the children of ordianlStack[i-1])
|
||||
*/
|
||||
int[][] bestSignlingsStack = new int[depth+2][];
|
||||
int[] siblingExplored = new int[depth+2];
|
||||
int[] firstToTheLeftOfPartition = new int [depth+2];
|
||||
|
||||
int tosOrdinal; // top of stack element, the ordinal at the top of stack
|
||||
|
||||
/*
|
||||
* to start the loop, complete the datastructures for root node:
|
||||
* push its youngest child to ordinalStack; make a note in siblingExplored[] that the children
|
||||
* of rootNode, which reside in the current partition have not been read yet to select the top
|
||||
* K of them. Also, make rootNode as if, related to its parent, rootNode belongs to the children
|
||||
* of ordinal numbers smaller than those of the current partition (this will ease on end condition --
|
||||
* we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that
|
||||
* it went that down)
|
||||
*/
|
||||
ordinalStack[++localDepth] = youngestChild[rootNode];
|
||||
siblingExplored[localDepth] = Integer.MAX_VALUE; // we have not verified position wrt current partition
|
||||
siblingExplored[0] = -1; // as if rootNode resides to the left of current position
|
||||
|
||||
/*
|
||||
* now the whole recursion: loop as long as stack is not empty of elements descendants of
|
||||
* facetRequest's root.
|
||||
*/
|
||||
|
||||
while (localDepth > 0) {
|
||||
tosOrdinal = ordinalStack[localDepth];
|
||||
if (tosOrdinal == TaxonomyReader.INVALID_ORDINAL) {
|
||||
// the brotherhood that has been occupying the top of stack is all exhausted.
|
||||
// Hence, element below tos, namely, father of tos, has all its children,
|
||||
// and itself, all explored.
|
||||
localDepth--;
|
||||
// replace this father, now on top of stack, by this father's sibling:
|
||||
// this parent's ordinal can not be greater than current partition, as otherwise
|
||||
// its child, now just removed, would not have been pushed on it.
|
||||
// so the father is either inside the partition, or smaller ordinal
|
||||
if (siblingExplored[localDepth] < 0 ) {
|
||||
ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
|
||||
continue;
|
||||
}
|
||||
// in this point, siblingExplored[localDepth] between 0 and number of bestSiblings
|
||||
// it can not be max int
|
||||
siblingExplored[localDepth]--;
|
||||
if (siblingExplored[localDepth] == -1 ) {
|
||||
//siblings residing in the partition have been all processed, we now move
|
||||
// to those of ordinal numbers smaller than the partition
|
||||
ordinalStack[localDepth] = firstToTheLeftOfPartition[localDepth];
|
||||
} else {
|
||||
// still explore siblings residing in the partition
|
||||
// just move to the next one
|
||||
ordinalStack[localDepth] = bestSignlingsStack[localDepth][siblingExplored[localDepth]];
|
||||
}
|
||||
continue;
|
||||
} // endof tosOrdinal is invalid, and hence removed, and its parent was replaced by this
|
||||
// parent's sibling
|
||||
|
||||
// now try to push a kid, but first look at tos whether it 'deserves' its kids explored:
|
||||
// it is not to the right of current partition, and we know whether to only count or to
|
||||
// select best K siblings.
|
||||
if (siblingExplored[localDepth] == Integer.MAX_VALUE) {
|
||||
//tosOrdinal was not examined yet for its position relative to current partition
|
||||
// and the best K of current partition, among its siblings, have not been determined yet
|
||||
while (tosOrdinal >= endOffset) {
|
||||
tosOrdinal = olderSibling[tosOrdinal];
|
||||
}
|
||||
// now it is inside. Run it and all its siblings inside the partition through a heap
|
||||
// and in doing so, count them, find best K, and sum into residue
|
||||
double residue = 0f; // the sum of all the siblings from this partition that do not make
|
||||
// it to top K
|
||||
pq.clear();
|
||||
|
||||
//reusables are consumed as from a stack. The stack starts full and returns full.
|
||||
int tosReuslables = reusables.length -1;
|
||||
|
||||
while (tosOrdinal >= offset) { // while tosOrdinal belongs to the given partition; here, too, we use the fact
|
||||
// that TaxonomyReader.INVALID_ORDINAL == -1 < offset
|
||||
double value = facetRequest.getValueOf(arrays, tosOrdinal % partitionSize);
|
||||
if (value != 0) { // the value of yc is not 0, it is to be considered.
|
||||
totalNumOfDescendantsConsidered++;
|
||||
|
||||
// consume one reusable, and push to the priority queue
|
||||
AggregatedCategory ac = reusables[tosReuslables--];
|
||||
ac.ordinal = tosOrdinal;
|
||||
ac.value = value;
|
||||
ac = pq.insertWithOverflow(ac);
|
||||
if (null != ac) {
|
||||
residue += ac.value;
|
||||
// TODO (Facet): could it be that we need to do something
|
||||
// else, not add, depending on the aggregator?
|
||||
|
||||
/* when a facet is excluded from top K, because already in this partition it has
|
||||
* K better siblings, it is only recursed for count only.
|
||||
*/
|
||||
// update totalNumOfDescendants by the now excluded node and all its descendants
|
||||
totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap
|
||||
// and now return it and all its descendants. These will never make it to FacetResult
|
||||
totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild,
|
||||
olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth);
|
||||
reusables[++tosReuslables] = ac;
|
||||
}
|
||||
}
|
||||
tosOrdinal = olderSibling[tosOrdinal];
|
||||
}
|
||||
// now pq has best K children of ordinals that belong to the given partition.
|
||||
// Populate a new AACO with them.
|
||||
// tosOrdinal is now first sibling smaller than partition, make a note of that
|
||||
firstToTheLeftOfPartition[localDepth] = tosOrdinal;
|
||||
int aaci = pq.size();
|
||||
int[] ords = new int[aaci];
|
||||
double [] vals = new double [aaci];
|
||||
while (aaci > 0) {
|
||||
AggregatedCategory ac = pq.pop();
|
||||
ords[--aaci] = ac.ordinal;
|
||||
vals[aaci] = ac.value;
|
||||
reusables[++tosReuslables] = ac;
|
||||
}
|
||||
// if more than 0 ordinals, add this AACO to the map to be returned,
|
||||
// and add ords to sibling stack, and make a note in siblingExplored that these are to
|
||||
// be visited now
|
||||
if (ords.length > 0) {
|
||||
AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue));
|
||||
bestSignlingsStack[localDepth] = ords;
|
||||
siblingExplored[localDepth] = ords.length-1;
|
||||
ordinalStack[localDepth] = ords[ords.length-1];
|
||||
} else {
|
||||
// no ordinals siblings of tosOrdinal in current partition, move to the left of it
|
||||
// tosOrdinal is already there (to the left of partition).
|
||||
// make a note of it in siblingExplored
|
||||
ordinalStack[localDepth] = tosOrdinal;
|
||||
siblingExplored[localDepth] = -1;
|
||||
}
|
||||
continue;
|
||||
} // endof we did not check the position of a valid ordinal wrt partition
|
||||
|
||||
// now tosOrdinal is a valid ordinal, inside partition or to the left of it, we need
|
||||
// to push its kids on top of it, if not too deep.
|
||||
// Make a note that we did not check them yet
|
||||
if (localDepth >= depth) {
|
||||
// localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL
|
||||
ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL;
|
||||
continue;
|
||||
}
|
||||
ordinalStack[++localDepth] = youngestChild[tosOrdinal];
|
||||
siblingExplored[localDepth] = Integer.MAX_VALUE;
|
||||
} // endof loop while stack is not empty
|
||||
|
||||
// now generate a TempFacetResult from AACOsOfOnePartition, and consider self.
|
||||
IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
|
||||
facetRequest, AACOsOfOnePartition);
|
||||
if (isSelfPartition(rootNode, arrays, offset)) {
|
||||
tempFRWH.isRootNodeIncluded = true;
|
||||
tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
|
||||
}
|
||||
tempFRWH.totalNumOfFacetsConsidered = totalNumOfDescendantsConsidered;
|
||||
return tempFRWH;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively count <code>ordinal</code>, whose depth is <code>currentDepth</code>,
|
||||
* and all its descendants down to <code>maxDepth</code> (including),
|
||||
* descendants whose value in the count arrays, <code>arrays</code>, is != 0.
|
||||
* The count arrays only includes the current partition, from <code>offset</code>, to (exclusive)
|
||||
* <code>endOffset</code>.
|
||||
* It is assumed that <code>ordinal</code> < <code>endOffset</code>,
|
||||
* otherwise, not <code>ordinal</code>, and none of its descendants, reside in
|
||||
* the current partition. <code>ordinal</code> < <code>offset</code> is allowed,
|
||||
* as ordinal's descendants might be >= <code>offeset</code>.
|
||||
*
|
||||
* @param ordinal a facet ordinal.
|
||||
* @param youngestChild mapping a given ordinal to its youngest child in the taxonomy (of largest ordinal number),
|
||||
* or to -1 if has no children.
|
||||
* @param olderSibling mapping a given ordinal to its older sibling, or to -1
|
||||
* @param arrays values for the ordinals in the given partition
|
||||
* @param offset the first (smallest) ordinal in the given partition
|
||||
* @param partitionSize number of ordinals in the given partition
|
||||
* @param endOffset one larger than the largest ordinal that belong to this partition
|
||||
* @param currentDepth the depth or ordinal in the TaxonomyTree (relative to rootnode of the facetRequest)
|
||||
* @param maxDepth maximal depth of descendants to be considered here (measured relative to rootnode of the
|
||||
* facetRequest).
|
||||
*
|
||||
* @return the number of nodes, from ordinal down its descendants, of depth <= maxDepth,
|
||||
* which reside in the current partition, and whose value != 0
|
||||
*/
|
||||
private int countOnly(int ordinal, int[] youngestChild, int[] olderSibling,
|
||||
FacetArrays arrays, int partitionSize, int offset,
|
||||
int endOffset, int currentDepth, int maxDepth) {
|
||||
int ret = 0;
|
||||
if (offset <= ordinal) {
|
||||
// ordinal belongs to the current partition
|
||||
if (0 != facetRequest.getValueOf(arrays, ordinal % partitionSize)) {
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
// now consider children of ordinal, if not too deep
|
||||
if (currentDepth >= maxDepth) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int yc = youngestChild[ordinal];
|
||||
while (yc >= endOffset) {
|
||||
yc = olderSibling[yc];
|
||||
}
|
||||
while (yc > TaxonomyReader.INVALID_ORDINAL) { // assuming this is -1, smaller than any legal ordinal
|
||||
ret += countOnly (yc, youngestChild, olderSibling, arrays,
|
||||
partitionSize, offset, endOffset, currentDepth+1, maxDepth);
|
||||
yc = olderSibling[yc];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge several partitions' {@link IntermediateFacetResult}-s into one of the
|
||||
* same format
|
||||
*
|
||||
* @see FacetResultsHandler#mergeResults(IntermediateFacetResult...)
|
||||
*/
|
||||
@Override
|
||||
public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults)
|
||||
throws ClassCastException, IllegalArgumentException {
|
||||
|
||||
if (tmpResults.length == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int i=0;
|
||||
// skip over null tmpResults
|
||||
for (; (i < tmpResults.length)&&(tmpResults[i] == null); i++) {}
|
||||
if (i == tmpResults.length) {
|
||||
// all inputs are null
|
||||
return null;
|
||||
}
|
||||
|
||||
// i points to the first non-null input
|
||||
int K = this.facetRequest.getNumResults(); // number of best result in each node
|
||||
IntermediateFacetResultWithHash tmpToReturn = (IntermediateFacetResultWithHash)tmpResults[i++];
|
||||
|
||||
// now loop over the rest of tmpResults and merge each into tmpToReturn
|
||||
for ( ; i < tmpResults.length; i++) {
|
||||
IntermediateFacetResultWithHash tfr = (IntermediateFacetResultWithHash)tmpResults[i];
|
||||
tmpToReturn.totalNumOfFacetsConsidered += tfr.totalNumOfFacetsConsidered;
|
||||
if (tfr.isRootNodeIncluded) {
|
||||
tmpToReturn.isRootNodeIncluded = true;
|
||||
tmpToReturn.rootNodeValue = tfr.rootNodeValue;
|
||||
}
|
||||
// now merge the HashMap of tfr into this of tmpToReturn
|
||||
IntToObjectMap<AACO> tmpToReturnMapToACCOs = tmpToReturn.mapToAACOs;
|
||||
IntToObjectMap<AACO> tfrMapToACCOs = tfr.mapToAACOs;
|
||||
IntIterator tfrIntIterator = tfrMapToACCOs.keyIterator();
|
||||
//iterate over all ordinals in tfr that are maps to their children (and the residue over
|
||||
// non included chilren)
|
||||
while (tfrIntIterator.hasNext()) {
|
||||
int tfrkey = tfrIntIterator.next();
|
||||
AACO tmpToReturnAACO = null;
|
||||
if (null == (tmpToReturnAACO = tmpToReturnMapToACCOs.get(tfrkey))) {
|
||||
// if tmpToReturn does not have any kids of tfrkey, map all the kids
|
||||
// from tfr to it as one package, along with their redisude
|
||||
tmpToReturnMapToACCOs.put(tfrkey, tfrMapToACCOs.get(tfrkey));
|
||||
} else {
|
||||
// merge the best K children of tfrkey as appear in tmpToReturn and in tfr
|
||||
AACO tfrAACO = tfrMapToACCOs.get(tfrkey);
|
||||
int resLength = tfrAACO.ordinals.length + tmpToReturnAACO.ordinals.length;
|
||||
if (K < resLength) {
|
||||
resLength = K;
|
||||
}
|
||||
int[] resOrds = new int [resLength];
|
||||
double[] resVals = new double [resLength];
|
||||
double resResidue = tmpToReturnAACO.residue + tfrAACO.residue;
|
||||
int indexIntoTmpToReturn = 0;
|
||||
int indexIntoTFR = 0;
|
||||
ACComparator merger = getSuitableACComparator(); // by facet Request
|
||||
for (int indexIntoRes = 0; indexIntoRes < resLength; indexIntoRes++) {
|
||||
if (indexIntoTmpToReturn >= tmpToReturnAACO.ordinals.length) {
|
||||
//tmpToReturnAACO (former result to return) ran out of indices
|
||||
// it is all merged into resOrds and resVal
|
||||
resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
|
||||
resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
|
||||
indexIntoTFR++;
|
||||
continue;
|
||||
}
|
||||
if (indexIntoTFR >= tfrAACO.ordinals.length) {
|
||||
// tfr ran out of indices
|
||||
resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
|
||||
resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
|
||||
indexIntoTmpToReturn++;
|
||||
continue;
|
||||
}
|
||||
// select which goes now to res: next (ord, value) from tmpToReturn or from tfr:
|
||||
if (merger.leftGoesNow( tmpToReturnAACO.ordinals[indexIntoTmpToReturn],
|
||||
tmpToReturnAACO.values[indexIntoTmpToReturn],
|
||||
tfrAACO.ordinals[indexIntoTFR],
|
||||
tfrAACO.values[indexIntoTFR])) {
|
||||
resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
|
||||
resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
|
||||
indexIntoTmpToReturn++;
|
||||
} else {
|
||||
resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
|
||||
resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
|
||||
indexIntoTFR++;
|
||||
}
|
||||
} // end of merge of best kids of tfrkey that appear in tmpToReturn and its kids that appear in tfr
|
||||
// altogether yielding no more that best K kids for tfrkey, not to appear in the new shape of
|
||||
// tmpToReturn
|
||||
|
||||
while (indexIntoTmpToReturn < tmpToReturnAACO.ordinals.length) {
|
||||
resResidue += tmpToReturnAACO.values[indexIntoTmpToReturn++];
|
||||
}
|
||||
while (indexIntoTFR < tfrAACO.ordinals.length) {
|
||||
resResidue += tfrAACO.values[indexIntoTFR++];
|
||||
}
|
||||
//update the list of best kids of tfrkey as appear in tmpToReturn
|
||||
tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals, resResidue));
|
||||
} // endof need to merge both AACO -- children and residue for same ordinal
|
||||
|
||||
} // endof loop over all ordinals in tfr
|
||||
} // endof loop over all temporary facet results to merge
|
||||
|
||||
return tmpToReturn;
|
||||
}
|
||||
|
||||
private static class AggregatedCategoryHeap extends PriorityQueue<AggregatedCategory> {
|
||||
|
||||
private ACComparator merger;
|
||||
public AggregatedCategoryHeap(int size, ACComparator merger) {
|
||||
super(size);
|
||||
this.merger = merger;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(AggregatedCategory arg1, AggregatedCategory arg2) {
|
||||
return merger.leftGoesNow(arg2.ordinal, arg2.value, arg1.ordinal, arg1.value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class ResultNodeHeap extends PriorityQueue<FacetResultNode> {
|
||||
private ACComparator merger;
|
||||
public ResultNodeHeap(int size, ACComparator merger) {
|
||||
super(size);
|
||||
this.merger = merger;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(FacetResultNode arg1, FacetResultNode arg2) {
|
||||
return merger.leftGoesNow(arg2.getOrdinal(), arg2.getValue(), arg1.getOrdinal(), arg1.getValue());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the {@link ACComparator} that reflects the order,
|
||||
* expressed in the {@link FacetRequest}, of
|
||||
* facets in the {@link FacetResult}.
|
||||
*/
|
||||
|
||||
private ACComparator getSuitableACComparator() {
|
||||
if (facetRequest.getSortOrder() == SortOrder.ASCENDING) {
|
||||
switch (facetRequest.getSortBy()) {
|
||||
case VALUE:
|
||||
return new AscValueACComparator();
|
||||
case ORDINAL:
|
||||
return new AscOrdACComparator();
|
||||
}
|
||||
} else {
|
||||
switch (facetRequest.getSortBy()) {
|
||||
case VALUE:
|
||||
return new DescValueACComparator();
|
||||
case ORDINAL:
|
||||
return new DescOrdACComparator();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A comparator of two Aggregated Categories according to the order
|
||||
* (ascending / descending) and item (ordinal or value) specified in the
|
||||
* FacetRequest for the FacetResult to be generated
|
||||
*/
|
||||
|
||||
private static abstract class ACComparator {
|
||||
ACComparator() { }
|
||||
protected abstract boolean leftGoesNow (int ord1, double val1, int ord2, double val2);
|
||||
}
|
||||
|
||||
private static final class AscValueACComparator extends ACComparator {
|
||||
|
||||
AscValueACComparator() { }
|
||||
|
||||
@Override
|
||||
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
|
||||
return (val1 < val2);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class DescValueACComparator extends ACComparator {
|
||||
|
||||
DescValueACComparator() { }
|
||||
|
||||
@Override
|
||||
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
|
||||
return (val1 > val2);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class AscOrdACComparator extends ACComparator {
|
||||
|
||||
AscOrdACComparator() { }
|
||||
|
||||
@Override
|
||||
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
|
||||
return (ord1 < ord2);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class DescOrdACComparator extends ACComparator {
|
||||
|
||||
DescOrdACComparator() { }
|
||||
|
||||
@Override
|
||||
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
|
||||
return (ord1 > ord2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Intermediate result to hold counts from one or more partitions processed
|
||||
* thus far. Its main field, constructor parameter <i>mapToAACOs</i>, is a map
|
||||
* from ordinals to AACOs. The AACOs mapped to contain ordinals and values
|
||||
* encountered in the count arrays of the partitions processed thus far. The
|
||||
* ordinals mapped from are their parents, and they may be not contained in
|
||||
* the partitions processed thus far. All nodes belong to the taxonomy subtree
|
||||
* defined at the facet request, constructor parameter <i>facetReq</i>, by its
|
||||
* root and depth.
|
||||
*/
|
||||
public static class IntermediateFacetResultWithHash implements IntermediateFacetResult {
|
||||
protected IntToObjectMap<AACO> mapToAACOs;
|
||||
FacetRequest facetRequest;
|
||||
boolean isRootNodeIncluded; // among the ordinals in the partitions
|
||||
// processed thus far
|
||||
double rootNodeValue; // the value of it, in case encountered.
|
||||
int totalNumOfFacetsConsidered; // total number of facets
|
||||
// which belong to facetRequest subtree and have value != 0,
|
||||
// and have been encountered thus far in the partitions processed.
|
||||
// root node of result tree is not included in this count.
|
||||
|
||||
public IntermediateFacetResultWithHash(FacetRequest facetReq,
|
||||
IntToObjectMap<AACO> mapToAACOs) {
|
||||
this.mapToAACOs = mapToAACOs;
|
||||
this.facetRequest = facetReq;
|
||||
this.isRootNodeIncluded = false;
|
||||
this.rootNodeValue = 0.0;
|
||||
this.totalNumOfFacetsConsidered = 0;
|
||||
}
|
||||
|
||||
public FacetRequest getFacetRequest() {
|
||||
return this.facetRequest;
|
||||
}
|
||||
} // endof FacetResultWithHash
|
||||
|
||||
/**
|
||||
* Maintains info of one entry in the filled up count array:
|
||||
* an ordinal number of a category and the value aggregated for it
|
||||
* (typically, that value is the count for that ordinal).
|
||||
*/
|
||||
private static final class AggregatedCategory {
|
||||
int ordinal;
|
||||
double value;
|
||||
AggregatedCategory(int ord, double val) {
|
||||
this.ordinal = ord;
|
||||
this.value = val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Maintains an array of {@link AggregatedCategory}. For space consideration, this is implemented as
|
||||
* a pair of arrays, <i>ordinals</i> and <i>values</i>, rather than one array of pairs.
|
||||
* Enumerated in <i>ordinals</i> are siblings,
|
||||
* potential nodes of the {@link FacetResult} tree
|
||||
* (i.e., the descendants of the root node, no deeper than the specified depth).
|
||||
* No more than K ( = {@link FacetRequest#getNumResults()})
|
||||
* siblings are enumerated, and
|
||||
* <i>residue</i> holds the sum of values of the siblings rejected from the
|
||||
* enumerated top K.
|
||||
*/
|
||||
private static final class AACO {
|
||||
int [] ordinals; // ordinals of the best K children, sorted from best to least
|
||||
double [] values; // the respective values for these children
|
||||
double residue; // sum of values of all other children, that did not get into top K
|
||||
AACO (int[] ords, double[] vals, double r) {
|
||||
this.ordinals = ords;
|
||||
this.values = vals;
|
||||
this.residue = r;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* Recursively label the first facetRequest.getNumLabel() sub results
|
||||
* of the root of a given {@link FacetResult}, or of an already labeled node in it.
|
||||
* I.e., a node is labeled only if it is the root or all its ancestors are labeled.
|
||||
*/
|
||||
public void labelResult(FacetResult facetResult) throws IOException {
|
||||
if (facetResult == null) {
|
||||
return; // any result to label?
|
||||
}
|
||||
FacetResultNode rootNode = facetResult.getFacetResultNode();
|
||||
recursivelyLabel(rootNode, facetRequest.getNumLabel());
|
||||
}
|
||||
|
||||
private void recursivelyLabel(FacetResultNode node, int numToLabel) throws IOException {
|
||||
if (node == null) {
|
||||
return;
|
||||
}
|
||||
node.getLabel(this.taxonomyReader); // attach a label -- category path -- to the node
|
||||
if (null == node.getSubResults()) {
|
||||
return; // if node has no children -- done
|
||||
}
|
||||
|
||||
// otherwise, label the first numToLabel of these children, and recursively -- their children.
|
||||
int numLabeled = 0;
|
||||
for (FacetResultNode frn : node.getSubResults()) {
|
||||
// go over the children of node from first to last, no more than numToLable of them
|
||||
recursivelyLabel(frn, numToLabel);
|
||||
if (++numLabeled >= numToLabel) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
// verifies that the children of each node are sorted by the order
|
||||
// specified by the facetRequest.
|
||||
// the values in these nodes may have changed due to a re-count, for example
|
||||
// following the accumulation by Sampling.
|
||||
// so now we test and re-order if necessary.
|
||||
public FacetResult rearrangeFacetResult(FacetResult facetResult) {
|
||||
PriorityQueue<FacetResultNode> nodesHeap =
|
||||
new ResultNodeHeap(this.facetRequest.getNumResults(), this.getSuitableACComparator());
|
||||
MutableFacetResultNode topFrn = (MutableFacetResultNode) facetResult.getFacetResultNode(); // safe cast
|
||||
rearrangeChilrenOfNode(topFrn, nodesHeap);
|
||||
return facetResult;
|
||||
}
|
||||
|
||||
private void rearrangeChilrenOfNode(FacetResultNode node,
|
||||
PriorityQueue<FacetResultNode> nodesHeap) {
|
||||
nodesHeap.clear(); // just to be safe
|
||||
for (FacetResultNode frn : node.getSubResults()) {
|
||||
nodesHeap.add(frn);
|
||||
}
|
||||
int size = nodesHeap.size();
|
||||
ArrayList<FacetResultNode> subResults = new ArrayList<FacetResultNode>(size);
|
||||
while (nodesHeap.size()>0) {
|
||||
subResults.add(0,nodesHeap.pop());
|
||||
}
|
||||
((MutableFacetResultNode)node).setSubResults(subResults);
|
||||
for (FacetResultNode frn : node.getSubResults()) {
|
||||
rearrangeChilrenOfNode(frn, nodesHeap);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException {
|
||||
IntermediateFacetResultWithHash tmp = (IntermediateFacetResultWithHash) tmpResult;
|
||||
int ordinal = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
|
||||
if ((tmp == null) || (ordinal == TaxonomyReader.INVALID_ORDINAL)) {
|
||||
return null;
|
||||
}
|
||||
double value = Double.NaN;
|
||||
if (tmp.isRootNodeIncluded) {
|
||||
value = tmp.rootNodeValue;
|
||||
}
|
||||
MutableFacetResultNode root = generateNode (ordinal, value, tmp.mapToAACOs);
|
||||
return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
|
||||
|
||||
}
|
||||
|
||||
private MutableFacetResultNode generateNode (int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {
|
||||
MutableFacetResultNode node = new MutableFacetResultNode(ordinal, val);
|
||||
AACO aaco = mapToAACOs.get(ordinal);
|
||||
if (null == aaco) {
|
||||
return node;
|
||||
}
|
||||
List<FacetResultNode> list = new ArrayList<FacetResultNode>();
|
||||
for (int i = 0; i < aaco.ordinals.length; i++) {
|
||||
list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
|
||||
}
|
||||
node.setSubResults(list);
|
||||
node.setResidue(aaco.residue);
|
||||
return node;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,188 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.aggregator.CountingAggregator;
|
||||
import org.apache.lucene.facet.search.cache.CategoryListCache;
|
||||
import org.apache.lucene.facet.search.cache.CategoryListData;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.facet.util.ScoredDocIdsUtils;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Maintain Total Facet Counts per partition, for given parameters:
|
||||
* <ul>
|
||||
* <li>Index reader of an index</li>
|
||||
* <li>Taxonomy index reader</li>
|
||||
* <li>Facet indexing params (and particularly the category list params)</li>
|
||||
* <li></li>
|
||||
* </ul>
|
||||
* The total facet counts are maintained as an array of arrays of integers,
|
||||
* where a separate array is kept for each partition.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TotalFacetCounts {
|
||||
|
||||
/** total facet counts per partition: totalCounts[partition][ordinal%partitionLength] */
|
||||
private int[][] totalCounts = null;
|
||||
|
||||
private final TaxonomyReader taxonomy;
|
||||
private final FacetIndexingParams facetIndexingParams;
|
||||
|
||||
private final static AtomicInteger atomicGen4Test = new AtomicInteger(1);
|
||||
/** Creation type for test purposes */
|
||||
enum CreationType { Computed, Loaded } // for testing
|
||||
final int gen4test;
|
||||
final CreationType createType4test;
|
||||
|
||||
/**
|
||||
* Construct by key - from index Directory or by recomputing.
|
||||
* @param key the key mapping of this total facet counts (index, taxonomy, category lists...)
|
||||
*/
|
||||
private TotalFacetCounts (TaxonomyReader taxonomy, FacetIndexingParams facetIndexingParams,
|
||||
int[][] counts, CreationType createType4Test) throws IOException, LockObtainFailedException {
|
||||
this.taxonomy = taxonomy;
|
||||
this.facetIndexingParams = facetIndexingParams;
|
||||
this.totalCounts = counts;
|
||||
this.createType4test = createType4Test;
|
||||
this.gen4test = atomicGen4Test.incrementAndGet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill a partition's array with the TotalCountsArray values.
|
||||
* @param partitionArray array to fill
|
||||
* @param partition number of required partition
|
||||
*/
|
||||
public void fillTotalCountsForPartition(int[] partitionArray, int partition) {
|
||||
int partitionSize = partitionArray.length;
|
||||
int[] countArray = totalCounts[partition];
|
||||
if (countArray == null) {
|
||||
countArray = new int[partitionSize];
|
||||
totalCounts[partition] = countArray;
|
||||
}
|
||||
int length = Math.min(partitionSize, countArray.length);
|
||||
System.arraycopy(countArray, 0, partitionArray, 0, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the total count of an input category
|
||||
* @param ordinal ordinal of category whose total count is required
|
||||
*/
|
||||
public int getTotalCount(int ordinal) {
|
||||
int partition = PartitionsUtils.partitionNumber(facetIndexingParams,ordinal);
|
||||
int offset = ordinal % PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
|
||||
return totalCounts[partition][offset];
|
||||
}
|
||||
|
||||
static TotalFacetCounts loadFromFile(File inputFile, TaxonomyReader taxonomy,
|
||||
FacetIndexingParams facetIndexingParams) throws IOException {
|
||||
DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
|
||||
try {
|
||||
int[][] counts = new int[dis.readInt()][];
|
||||
for (int i=0; i<counts.length; i++) {
|
||||
int size = dis.readInt();
|
||||
if (size<0) {
|
||||
counts[i] = null;
|
||||
} else {
|
||||
counts[i] = new int[size];
|
||||
for (int j=0; j<size; j++) {
|
||||
counts[i][j] = dis.readInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Loaded);
|
||||
} finally {
|
||||
dis.close();
|
||||
}
|
||||
}
|
||||
|
||||
static void storeToFile(File outputFile, TotalFacetCounts tfc) throws IOException {
|
||||
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
|
||||
try {
|
||||
dos.writeInt(tfc.totalCounts.length);
|
||||
for (int[] counts : tfc.totalCounts) {
|
||||
if (counts == null) {
|
||||
dos.writeInt(-1);
|
||||
} else {
|
||||
dos.writeInt(counts.length);
|
||||
for (int i : counts) {
|
||||
dos.writeInt(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
dos.close();
|
||||
}
|
||||
}
|
||||
|
||||
static TotalFacetCounts compute(final IndexReader indexReader,
|
||||
final TaxonomyReader taxonomy, final FacetIndexingParams facetIndexingParams,
|
||||
final CategoryListCache clCache) throws IOException {
|
||||
int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
|
||||
final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize];
|
||||
FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams);
|
||||
//createAllListsSearchParams(facetIndexingParams, this.totalCounts);
|
||||
FacetsAccumulator fe = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
|
||||
@Override
|
||||
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(
|
||||
FacetArrays facetArrays, int partition) throws IOException {
|
||||
|
||||
Aggregator aggregator = new CountingAggregator(counts[partition]);
|
||||
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
|
||||
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
|
||||
final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
|
||||
map.put(cli, aggregator);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
};
|
||||
fe.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
|
||||
fe.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader));
|
||||
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
|
||||
}
|
||||
|
||||
static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp,
|
||||
IndexReader indexReader, int partition) throws IOException {
|
||||
if (clCache != null) {
|
||||
CategoryListData cld = clCache.get(clp);
|
||||
if (cld != null) {
|
||||
return cld.iterator(partition);
|
||||
}
|
||||
}
|
||||
return clp.createCategoryListIterator(indexReader, partition);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,285 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.cache.CategoryListCache;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
|
||||
* facet indexing params.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class TotalFacetCountsCache {
|
||||
|
||||
/**
|
||||
* Default size of in memory cache for computed total facet counts.
|
||||
* Set to 2 for the case when an application reopened a reader and
|
||||
* the original one is still in use (Otherwise there will be
|
||||
* switching again and again between the two.)
|
||||
*/
|
||||
public static final int DEFAULT_CACHE_SIZE = 2;
|
||||
|
||||
private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
|
||||
|
||||
/**
|
||||
* Get the single instance of this cache
|
||||
*/
|
||||
public static TotalFacetCountsCache getSingleton() {
|
||||
return singleton;
|
||||
}
|
||||
|
||||
/**
|
||||
* In-memory cache of TFCs.
|
||||
* <ul>
|
||||
* <li>It's size is kept within limits through {@link #trimCache()}.
|
||||
* <li>An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}.
|
||||
* <li>After each addition to the cache, trimCache is called, to remove entries least recently used.
|
||||
* </ul>
|
||||
* @see #markRecentlyUsed(TFCKey)
|
||||
*/
|
||||
private ConcurrentHashMap<TFCKey,TotalFacetCounts> cache = new ConcurrentHashMap<TFCKey,TotalFacetCounts>();
|
||||
|
||||
/**
|
||||
* A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
|
||||
* @see #markRecentlyUsed(TFCKey)
|
||||
*/
|
||||
private ConcurrentLinkedQueue<TFCKey> lruKeys = new ConcurrentLinkedQueue<TFCKey>();
|
||||
|
||||
private int maxCacheSize = DEFAULT_CACHE_SIZE;
|
||||
|
||||
/** private constructor for singleton pattern */
|
||||
private TotalFacetCountsCache() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total facet counts for a reader/taxonomy pair and facet indexing parameters.
|
||||
* If not in cache, computed here and added to the cache for later use.
|
||||
* @param indexReader the documents index
|
||||
* @param taxonomy the taxonomy index
|
||||
* @param facetIndexingParams facet indexing parameters
|
||||
* @param clCache category list cache for faster computation, can be null
|
||||
* @return the total facet counts.
|
||||
*/
|
||||
public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
|
||||
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
|
||||
// create the key
|
||||
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
|
||||
// it is important that this call is not synchronized, so that available TFC
|
||||
// would not wait for one that needs to be computed.
|
||||
TotalFacetCounts tfc = cache.get(key);
|
||||
if (tfc != null) {
|
||||
markRecentlyUsed(key);
|
||||
return tfc;
|
||||
}
|
||||
return computeAndCache(key, clCache);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark key as it as recently used.
|
||||
* <p>
|
||||
* <b>Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:</b>
|
||||
* <ol>
|
||||
* <li>A concurrent {@link LinkedHashMap} would have made this class much simpler.
|
||||
* But unfortunately, Java does not provide one.
|
||||
* Instead, we combine two concurrent objects:
|
||||
* <ul>
|
||||
* <li>{@link ConcurrentHashMap} for the cached TFCs.
|
||||
* <li>{@link ConcurrentLinkedQueue} for active keys
|
||||
* </ul>
|
||||
* <li>Both {@link #lruKeys} and {@link #cache} are concurrently safe.
|
||||
* <li>Checks for a cached item through getTotalCounts() are not synchronized.
|
||||
* Therefore, the case that a needed TFC is in the cache is very fast:
|
||||
* it does not wait for the computation of other TFCs.
|
||||
* <li>computeAndCache() is synchronized, and, has a (double) check of the required
|
||||
* TFC, to avoid computing the same TFC twice.
|
||||
* <li>A race condition in this method (markRecentlyUsed) might result in two copies
|
||||
* of the same 'key' in lruKeys, but this is handled by the loop in trimCache(),
|
||||
* where an attempt to remove the same key twice is a no-op.
|
||||
* </ol>
|
||||
*/
|
||||
private void markRecentlyUsed(TFCKey key) {
|
||||
lruKeys.remove(key);
|
||||
lruKeys.add(key);
|
||||
}
|
||||
|
||||
private synchronized void trimCache() {
|
||||
// loop until cache is of desired size.
|
||||
while (cache.size()>maxCacheSize ) {
|
||||
TFCKey key = lruKeys.poll();
|
||||
if (key==null) { //defensive
|
||||
// it is defensive since lruKeys presumably covers the cache keys
|
||||
key = cache.keys().nextElement();
|
||||
}
|
||||
// remove this element. Note that an attempt to remove with the same key again is a no-op,
|
||||
// which gracefully handles the possible race in markRecentlyUsed().
|
||||
cache.remove(key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* compute TFC and cache it, after verifying it was not just added - for this
|
||||
* matter this method is synchronized, which is not too bad, because there is
|
||||
* lots of work done in the computations.
|
||||
*/
|
||||
private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException {
|
||||
TotalFacetCounts tfc = cache.get(key);
|
||||
if (tfc == null) {
|
||||
tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache);
|
||||
lruKeys.add(key);
|
||||
cache.put(key,tfc);
|
||||
trimCache();
|
||||
}
|
||||
return tfc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load {@link TotalFacetCounts} matching input parameters from the provided outputFile
|
||||
* and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams.
|
||||
* If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be
|
||||
* replaced by the loaded one.
|
||||
* @param inputFile file from which to read the data
|
||||
* @param indexReader the documents index
|
||||
* @param taxonomy the taxonomy index
|
||||
* @param facetIndexingParams the facet indexing parameters
|
||||
* @throws IOException on error
|
||||
* @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
|
||||
*/
|
||||
public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
|
||||
FacetIndexingParams facetIndexingParams) throws IOException {
|
||||
if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
|
||||
throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
|
||||
}
|
||||
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
|
||||
TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
|
||||
cache.put(key,tfc);
|
||||
trimCache();
|
||||
markRecentlyUsed(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile,
|
||||
* making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}.
|
||||
* If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are
|
||||
* not in the cache, this call will first compute them (which will also add them to the cache).
|
||||
* @param outputFile file to store in.
|
||||
* @param indexReader the documents index
|
||||
* @param taxonomy the taxonomy index
|
||||
* @param facetIndexingParams the facet indexing parameters
|
||||
* @param clCache category list cache for faster computation, can be null
|
||||
* @throws IOException on error
|
||||
* @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
|
||||
* @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
|
||||
*/
|
||||
public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
|
||||
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
|
||||
File parentFile = outputFile.getParentFile();
|
||||
if (
|
||||
( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
|
||||
(!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
|
||||
) {
|
||||
throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
|
||||
}
|
||||
TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache);
|
||||
TotalFacetCounts.storeToFile(outputFile, tfc);
|
||||
}
|
||||
|
||||
private static class TFCKey {
|
||||
final IndexReader indexReader;
|
||||
final TaxonomyReader taxonomy;
|
||||
private final Iterable<CategoryListParams> clps;
|
||||
private final int hashCode;
|
||||
private final int nDels; // needed when a reader used for faceted search was just used for deletion.
|
||||
final FacetIndexingParams facetIndexingParams;
|
||||
|
||||
public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
|
||||
FacetIndexingParams facetIndexingParams) {
|
||||
this.indexReader = indexReader;
|
||||
this.taxonomy = taxonomy;
|
||||
this.facetIndexingParams = facetIndexingParams;
|
||||
this.clps = facetIndexingParams.getAllCategoryListParams();
|
||||
this.nDels = indexReader.numDeletedDocs();
|
||||
hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
TFCKey o = (TFCKey) other;
|
||||
if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
|
||||
return false;
|
||||
}
|
||||
Iterator<CategoryListParams> it1 = clps.iterator();
|
||||
Iterator<CategoryListParams> it2 = o.clps.iterator();
|
||||
while (it1.hasNext() && it2.hasNext()) {
|
||||
if (!it1.next().equals(it2.next())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return it1.hasNext() == it2.hasNext();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the cache.
|
||||
*/
|
||||
public synchronized void clear() {
|
||||
cache.clear();
|
||||
lruKeys.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the maximal cache size
|
||||
*/
|
||||
public int getCacheSize() {
|
||||
return maxCacheSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of TotalFacetCounts arrays that will remain in memory cache.
|
||||
* <p>
|
||||
* If new size is smaller than current size, the cache is appropriately trimmed.
|
||||
* <p>
|
||||
* Minimal size is 1, so passing zero or negative size would result in size of 1.
|
||||
* @param size new size to set
|
||||
*/
|
||||
public void setCacheSize(int size) {
|
||||
if (size < 1) size = 1;
|
||||
int origSize = maxCacheSize;
|
||||
maxCacheSize = size;
|
||||
if (maxCacheSize < origSize) { // need to trim only if the cache was reduced
|
||||
trimCache();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
package org.apache.lucene.facet.search.aggregator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An Aggregator is the analogue of Lucene's Collector (see
|
||||
* {@link org.apache.lucene.search.Collector}), for processing the categories
|
||||
* belonging to a certain document. The Aggregator is responsible for doing
|
||||
* whatever it wishes with the categories it is fed, e.g., counting the number
|
||||
* of times that each category appears, or performing some computation on their
|
||||
* association values.
|
||||
* <P>
|
||||
* Much of the function of an Aggregator implementation is not described by this
|
||||
* interface. This includes the constructor and getter methods to retrieve the
|
||||
* results of the aggregation.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface Aggregator {
|
||||
|
||||
/**
|
||||
* Specify the document (and its score in the search) that the following
|
||||
* {@link #aggregate(int)} calls will pertain to.
|
||||
*/
|
||||
void setNextDoc(int docid, float score) throws IOException;
|
||||
|
||||
/**
|
||||
* Collect (and do whatever an implementation deems appropriate) the
|
||||
* category given by its ordinal. This category belongs to a document
|
||||
* given earlier by {@link #setNextDoc(int, float)}.
|
||||
*/
|
||||
void aggregate(int ordinal);
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package org.apache.lucene.facet.search.aggregator;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link CountingAggregator} used during complement counting.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ComplementCountingAggregator extends CountingAggregator {
|
||||
|
||||
public ComplementCountingAggregator(int[] counterArray) {
|
||||
super(counterArray);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(int ordinal) {
|
||||
assert counterArray[ordinal]!=0:"complement aggregation: count is about to become negative for ordinal "+ordinal;
|
||||
--counterArray[ordinal];
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
package org.apache.lucene.facet.search.aggregator;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A CountingAggregator updates a counter array with the size of the whole
|
||||
* taxonomy, counting the number of times each category appears in the given set
|
||||
* of documents.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CountingAggregator implements Aggregator {
|
||||
|
||||
protected int[] counterArray;
|
||||
|
||||
public void aggregate(int ordinal) {
|
||||
++counterArray[ordinal];
|
||||
}
|
||||
|
||||
public void setNextDoc(int docid, float score) {
|
||||
// There's nothing for us to do here since we only increment the count by 1
|
||||
// in this aggregator.
|
||||
}
|
||||
|
||||
public CountingAggregator(int[] counterArray) {
|
||||
this.counterArray = counterArray;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == null || obj.getClass() != this.getClass()) {
|
||||
return false;
|
||||
}
|
||||
CountingAggregator that = (CountingAggregator) obj;
|
||||
return that.counterArray == this.counterArray;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hashCode = counterArray == null ? 0 : counterArray.hashCode();
|
||||
|
||||
return hashCode;
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue