mirror of https://github.com/apache/lucene.git
LUCENE-5801: add back OrdinalMappingAtomicReader
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1607582 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3a7ae7ee18
commit
a7eee727cf
|
@ -101,6 +101,10 @@ New Features
|
|||
* LUCENE-5778: Support hunspell morphological description fields/aliases.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-5801: Added (back) OrdinalMappingAtomicReader for merging search
|
||||
indexes that contain category ordinals from separate taxonomy indexes.
|
||||
(Nicola Buso via Shai Erera)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5752: Simplified Automaton API to be immutable. (Mike McCandless)
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.taxonomy.OrdinalsReader.OrdinalsSegmentReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FilterAtomicReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/**
|
||||
* A {@link FilterAtomicReader} for updating facets ordinal references,
|
||||
* based on an ordinal map. You should use this code in conjunction with merging
|
||||
* taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
|
||||
* which maps the 'old' ordinals to the 'new' ones. You can use that map to
|
||||
* re-map the doc values which contain the facets information (ordinals) either
|
||||
* before or while merging the indexes.
|
||||
* <p>
|
||||
* For re-mapping the ordinals during index merge, do the following:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* // merge the old taxonomy with the new one.
|
||||
* OrdinalMap map = new MemoryOrdinalMap();
|
||||
* DirectoryTaxonomyWriter.addTaxonomy(srcTaxoDir, map);
|
||||
* int[] ordmap = map.getMap();
|
||||
*
|
||||
* // Add the index and re-map ordinals on the go
|
||||
* DirectoryReader reader = DirectoryReader.open(oldDir);
|
||||
* IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
|
||||
* IndexWriter writer = new IndexWriter(newDir, conf);
|
||||
* List<AtomicReaderContext> leaves = reader.leaves();
|
||||
* AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()];
|
||||
* for (int i = 0; i < leaves.size(); i++) {
|
||||
* wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordmap);
|
||||
* }
|
||||
* writer.addIndexes(new MultiReader(wrappedLeaves));
|
||||
* writer.commit();
|
||||
* </pre>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class OrdinalMappingAtomicReader extends FilterAtomicReader {
|
||||
|
||||
// silly way, but we need to use dedupAndEncode and it's protected on FacetsConfig.
|
||||
private static class InnerFacetsConfig extends FacetsConfig {
|
||||
|
||||
InnerFacetsConfig() {}
|
||||
|
||||
@Override
|
||||
public BytesRef dedupAndEncode(IntsRef ordinals) {
|
||||
return super.dedupAndEncode(ordinals);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class OrdinalMappingBinaryDocValues extends BinaryDocValues {
|
||||
|
||||
private final IntsRef ordinals = new IntsRef(32);
|
||||
private final OrdinalsSegmentReader ordsReader;
|
||||
|
||||
OrdinalMappingBinaryDocValues(OrdinalsSegmentReader ordsReader) throws IOException {
|
||||
this.ordsReader = ordsReader;
|
||||
}
|
||||
|
||||
@SuppressWarnings("synthetic-access")
|
||||
@Override
|
||||
public BytesRef get(int docID) {
|
||||
try {
|
||||
// NOTE: this isn't quite koscher, because in general
|
||||
// multiple threads can call BinaryDV.get which would
|
||||
// then conflict on the single ordinals instance, but
|
||||
// because this impl is only used for merging, we know
|
||||
// only 1 thread calls us:
|
||||
ordsReader.get(docID, ordinals);
|
||||
|
||||
// map the ordinals
|
||||
for (int i = 0; i < ordinals.length; i++) {
|
||||
ordinals.ints[i] = ordinalMap[ordinals.ints[i]];
|
||||
}
|
||||
|
||||
return encode(ordinals);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("error reading category ordinals for doc " + docID, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final int[] ordinalMap;
|
||||
private final InnerFacetsConfig facetsConfig;
|
||||
|
||||
/**
|
||||
* Wraps an AtomicReader, mapping ordinals according to the ordinalMap,
|
||||
* using the provided indexingParams.
|
||||
*/
|
||||
public OrdinalMappingAtomicReader(AtomicReader in, int[] ordinalMap) {
|
||||
super(in);
|
||||
this.ordinalMap = ordinalMap;
|
||||
facetsConfig = new InnerFacetsConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: encodes category ordinals into a BytesRef. Override in case you use
|
||||
* custom encoding, other than the default done by FacetsConfig.
|
||||
*/
|
||||
protected BytesRef encode(IntsRef ordinals) {
|
||||
return facetsConfig.dedupAndEncode(ordinals);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: override in case you used custom encoding for the categories under
|
||||
* this field.
|
||||
*/
|
||||
protected OrdinalsReader getOrdinalsReader(String field) {
|
||||
return new DocValuesOrdinalsReader(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
final OrdinalsReader ordsReader = getOrdinalsReader(field);
|
||||
return new OrdinalMappingBinaryDocValues(ordsReader.getReader(in.getContext()));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Utility methods for merging index and taxonomy directories.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TaxonomyMergeUtils {
|
||||
|
||||
/**
|
||||
* Merges the given taxonomy and index directories and commits the changes to
|
||||
* the given writers.
|
||||
*/
|
||||
public static void merge(Directory srcIndexDir, Directory srcTaxDir, OrdinalMap map, IndexWriter destIndexWriter,
|
||||
DirectoryTaxonomyWriter destTaxWriter) throws IOException {
|
||||
|
||||
// merge the taxonomies
|
||||
destTaxWriter.addTaxonomy(srcTaxDir, map);
|
||||
int ordinalMap[] = map.getMap();
|
||||
DirectoryReader reader = DirectoryReader.open(srcIndexDir);
|
||||
List<AtomicReaderContext> leaves = reader.leaves();
|
||||
int numReaders = leaves.size();
|
||||
AtomicReader wrappedLeaves[] = new AtomicReader[numReaders];
|
||||
for (int i = 0; i < numReaders; i++) {
|
||||
wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap);
|
||||
}
|
||||
try {
|
||||
destIndexWriter.addIndexes(new MultiReader(wrappedLeaves));
|
||||
|
||||
// commit changes to taxonomy and index respectively.
|
||||
destTaxWriter.commit();
|
||||
destIndexWriter.commit();
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
package org.apache.lucene.facet.taxonomy.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.facet.FacetField;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyMergeUtils;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class OrdinalMappingReaderTest extends FacetTestCase {
|
||||
|
||||
private static final int NUM_DOCS = 100;
|
||||
private FacetsConfig facetConfig = new FacetsConfig();
|
||||
|
||||
@Before
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
facetConfig.setMultiValued("tag", true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTaxonomyMergeUtils() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxDir = newDirectory();
|
||||
buildIndexWithFacets(dir, taxDir, true);
|
||||
|
||||
Directory dir1 = newDirectory();
|
||||
Directory taxDir1 = newDirectory();
|
||||
buildIndexWithFacets(dir1, taxDir1, false);
|
||||
|
||||
IndexWriter destIndexWriter = new IndexWriter(dir1, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||
DirectoryTaxonomyWriter destTaxWriter = new DirectoryTaxonomyWriter(taxDir1);
|
||||
try {
|
||||
TaxonomyMergeUtils.merge(dir, taxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter);
|
||||
} finally {
|
||||
IOUtils.close(destIndexWriter, destTaxWriter);
|
||||
}
|
||||
|
||||
verifyResults(dir1, taxDir1);
|
||||
dir1.close();
|
||||
taxDir1.close();
|
||||
dir.close();
|
||||
taxDir.close();
|
||||
}
|
||||
|
||||
private void verifyResults(Directory dir, Directory taxDir) throws IOException {
|
||||
DirectoryReader reader1 = DirectoryReader.open(dir);
|
||||
DirectoryTaxonomyReader taxReader = new DirectoryTaxonomyReader(taxDir);
|
||||
IndexSearcher searcher = newSearcher(reader1);
|
||||
|
||||
FacetsCollector collector = new FacetsCollector();
|
||||
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, collector);
|
||||
Facets facets = new FastTaxonomyFacetCounts(taxReader, facetConfig, collector);
|
||||
FacetResult result = facets.getTopChildren(10, "tag");
|
||||
|
||||
for (LabelAndValue lv: result.labelValues) {
|
||||
int weight = lv.value.intValue();
|
||||
String label = lv.label;
|
||||
if (VERBOSE) {
|
||||
System.out.println(label + ": " + weight);
|
||||
}
|
||||
assertEquals(NUM_DOCS ,weight);
|
||||
}
|
||||
reader1.close();
|
||||
taxReader.close();
|
||||
}
|
||||
|
||||
private void buildIndexWithFacets(Directory dir, Directory taxDir, boolean asc) throws IOException {
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
|
||||
|
||||
DirectoryTaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxDir);
|
||||
for (int i = 1; i <= NUM_DOCS; i++) {
|
||||
Document doc = new Document();
|
||||
for (int j = i; j <= NUM_DOCS; j++) {
|
||||
int facetValue = asc? j: NUM_DOCS - j;
|
||||
doc.add(new FacetField("tag", Integer.toString(facetValue)));
|
||||
}
|
||||
writer.addDocument(facetConfig.build(taxonomyWriter, doc));
|
||||
}
|
||||
taxonomyWriter.commit();
|
||||
taxonomyWriter.close();
|
||||
writer.commit();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue