mirror of https://github.com/apache/lucene.git
LUCENE-5321: remove Facet42DocValuesFormat and FacetCodec
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1538245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b4a343d6ba
commit
6befc56d8c
|
@ -205,6 +205,9 @@ API Changes:
|
||||||
* LUCENE-5313: Move preservePositionIncrements from setter to ctor in
|
* LUCENE-5313: Move preservePositionIncrements from setter to ctor in
|
||||||
Analyzing/FuzzySuggester. (Areek Zillur via Mike McCandless)
|
Analyzing/FuzzySuggester. (Areek Zillur via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-5321: Remove Facet42DocValuesFormat. Use DirectDocValuesFormat if you
|
||||||
|
want to load the category list into memory. (Shai Erera, Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-5225: The ToParentBlockJoinQuery only keeps tracks of the the child
|
* LUCENE-5225: The ToParentBlockJoinQuery only keeps tracks of the the child
|
||||||
|
|
|
@ -1,53 +0,0 @@
|
||||||
package org.apache.lucene.facet.codecs.facet42;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
|
||||||
import org.apache.lucene.store.DataInput;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
|
|
||||||
class Facet42BinaryDocValues extends BinaryDocValues {
|
|
||||||
|
|
||||||
private final byte[] bytes;
|
|
||||||
private final PackedInts.Reader addresses;
|
|
||||||
|
|
||||||
Facet42BinaryDocValues(DataInput in) throws IOException {
|
|
||||||
int totBytes = in.readVInt();
|
|
||||||
bytes = new byte[totBytes];
|
|
||||||
in.readBytes(bytes, 0, totBytes);
|
|
||||||
addresses = PackedInts.getReader(in);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void get(int docID, BytesRef ret) {
|
|
||||||
int start = (int) addresses.get(docID);
|
|
||||||
ret.bytes = bytes;
|
|
||||||
ret.offset = start;
|
|
||||||
ret.length = (int) (addresses.get(docID+1)-start);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns approximate RAM bytes used */
|
|
||||||
public long ramBytesUsed() {
|
|
||||||
return RamUsageEstimator.sizeOf(bytes) + addresses.ramBytesUsed();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,126 +0,0 @@
|
||||||
package org.apache.lucene.facet.codecs.facet42;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
|
|
||||||
/** Writer for {@link Facet42DocValuesFormat}. */
|
|
||||||
public class Facet42DocValuesConsumer extends DocValuesConsumer {
|
|
||||||
|
|
||||||
final IndexOutput out;
|
|
||||||
final int maxDoc;
|
|
||||||
final float acceptableOverheadRatio;
|
|
||||||
|
|
||||||
public Facet42DocValuesConsumer(SegmentWriteState state) throws IOException {
|
|
||||||
this(state, PackedInts.DEFAULT);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Facet42DocValuesConsumer(SegmentWriteState state, float acceptableOverheadRatio) throws IOException {
|
|
||||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
|
|
||||||
out = state.directory.createOutput(fileName, state.context);
|
|
||||||
CodecUtil.writeHeader(out, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_CURRENT);
|
|
||||||
maxDoc = state.segmentInfo.getDocCount();
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(this);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
|
||||||
throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
|
|
||||||
// write the byte[] data
|
|
||||||
out.writeVInt(field.number);
|
|
||||||
|
|
||||||
long totBytes = 0;
|
|
||||||
for (BytesRef v : values) {
|
|
||||||
if (v != null) {
|
|
||||||
totBytes += v.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (totBytes > Integer.MAX_VALUE) {
|
|
||||||
throw new IllegalStateException("too many facets in one segment: Facet42DocValues cannot handle more than 2 GB facet data per segment");
|
|
||||||
}
|
|
||||||
|
|
||||||
out.writeVInt((int) totBytes);
|
|
||||||
|
|
||||||
for (BytesRef v : values) {
|
|
||||||
if (v != null) {
|
|
||||||
out.writeBytes(v.bytes, v.offset, v.length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PackedInts.Writer w = PackedInts.getWriter(out, maxDoc+1, PackedInts.bitsRequired(totBytes+1), acceptableOverheadRatio);
|
|
||||||
|
|
||||||
int address = 0;
|
|
||||||
for(BytesRef v : values) {
|
|
||||||
w.add(address);
|
|
||||||
if (v != null) {
|
|
||||||
address += v.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
w.add(address);
|
|
||||||
w.finish();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
|
|
||||||
throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
|
||||||
throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
out.writeVInt(-1); // write EOF marker
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (success) {
|
|
||||||
IOUtils.close(out);
|
|
||||||
} else {
|
|
||||||
IOUtils.closeWhileHandlingException(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,63 +0,0 @@
|
||||||
package org.apache.lucene.facet.codecs.facet42;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* DocValues format that only handles binary doc values and
|
|
||||||
* is optimized for usage with facets. It uses more RAM than other
|
|
||||||
* formats in exchange for faster lookups.
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE</b>: this format cannot handle more than 2 GB
|
|
||||||
* of facet data in a single segment. If your usage may hit
|
|
||||||
* this limit, you can either use Lucene's default
|
|
||||||
* DocValuesFormat, limit the maximum segment size in your
|
|
||||||
* MergePolicy, or send us a patch fixing the limitation.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public final class Facet42DocValuesFormat extends DocValuesFormat {
|
|
||||||
|
|
||||||
public static final String CODEC = "FacetsDocValues";
|
|
||||||
public static final String EXTENSION = "fdv";
|
|
||||||
public static final int VERSION_START = 0;
|
|
||||||
public static final int VERSION_CURRENT = VERSION_START;
|
|
||||||
|
|
||||||
public Facet42DocValuesFormat() {
|
|
||||||
super("Facet42");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
|
||||||
return new Facet42DocValuesConsumer(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
|
||||||
return new Facet42DocValuesProducer(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,103 +0,0 @@
|
||||||
package org.apache.lucene.facet.codecs.facet42;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
class Facet42DocValuesProducer extends DocValuesProducer {
|
|
||||||
|
|
||||||
private final Map<Integer,Facet42BinaryDocValues> fields = new HashMap<Integer,Facet42BinaryDocValues>();
|
|
||||||
private final int maxDoc;
|
|
||||||
|
|
||||||
Facet42DocValuesProducer(SegmentReadState state) throws IOException {
|
|
||||||
String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
|
|
||||||
IndexInput in = state.directory.openInput(fileName, state.context);
|
|
||||||
this.maxDoc = state.segmentInfo.getDocCount();
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
CodecUtil.checkHeader(in, Facet42DocValuesFormat.CODEC,
|
|
||||||
Facet42DocValuesFormat.VERSION_START,
|
|
||||||
Facet42DocValuesFormat.VERSION_START);
|
|
||||||
int fieldNumber = in.readVInt();
|
|
||||||
while (fieldNumber != -1) {
|
|
||||||
fields.put(fieldNumber, new Facet42BinaryDocValues(in));
|
|
||||||
fieldNumber = in.readVInt();
|
|
||||||
}
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (success) {
|
|
||||||
IOUtils.close(in);
|
|
||||||
} else {
|
|
||||||
IOUtils.closeWhileHandlingException(in);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
|
||||||
throw new UnsupportedOperationException("FacetsDocValues only implements binary");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
|
||||||
return fields.get(field.number);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
|
||||||
throw new UnsupportedOperationException("FacetsDocValues only implements binary");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
|
||||||
throw new UnsupportedOperationException("FacetsDocValues only implements binary");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
|
||||||
return new Bits.MatchAllBits(maxDoc); // TODO: have codec impl this?
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long ramBytesUsed() {
|
|
||||||
long size = 0;
|
|
||||||
for (Facet42BinaryDocValues entry: fields.values()) {
|
|
||||||
size += entry.ramBytesUsed() + Integer.SIZE;
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,22 +0,0 @@
|
||||||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<body>
|
|
||||||
Codec + DocValuesFormat that are optimized for facets.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,79 +0,0 @@
|
||||||
package org.apache.lucene.facet.codecs.facet46;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
|
||||||
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
|
|
||||||
import org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat;
|
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Same as {@link Lucene46Codec} except it uses {@link Facet42DocValuesFormat}
|
|
||||||
* for facet fields (faster-but-more-RAM-consuming doc values).
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE</b>: this codec does not support facet partitions (see
|
|
||||||
* {@link FacetIndexingParams#getPartitionSize()}).
|
|
||||||
*
|
|
||||||
* <p>
|
|
||||||
* <b>NOTE</b>: this format cannot handle more than 2 GB
|
|
||||||
* of facet data in a single segment. If your usage may hit
|
|
||||||
* this limit, you can either use Lucene's default
|
|
||||||
* DocValuesFormat, limit the maximum segment size in your
|
|
||||||
* MergePolicy, or send us a patch fixing the limitation.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class Facet46Codec extends Lucene46Codec {
|
|
||||||
|
|
||||||
private final Set<String> facetFields;
|
|
||||||
private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName("Facet42");
|
|
||||||
|
|
||||||
/** Default constructor, uses {@link FacetIndexingParams#DEFAULT}. */
|
|
||||||
public Facet46Codec() {
|
|
||||||
this(FacetIndexingParams.DEFAULT);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes with the given {@link FacetIndexingParams}. Returns the proper
|
|
||||||
* {@link DocValuesFormat} for the fields that are returned by
|
|
||||||
* {@link FacetIndexingParams#getAllCategoryListParams()}.
|
|
||||||
*/
|
|
||||||
public Facet46Codec(FacetIndexingParams fip) {
|
|
||||||
if (fip.getPartitionSize() != Integer.MAX_VALUE) {
|
|
||||||
throw new IllegalArgumentException("this Codec does not support partitions");
|
|
||||||
}
|
|
||||||
this.facetFields = new HashSet<String>();
|
|
||||||
for (CategoryListParams clp : fip.getAllCategoryListParams()) {
|
|
||||||
facetFields.add(clp.field);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
|
||||||
if (facetFields.contains(field)) {
|
|
||||||
return facetsDVFormat;
|
|
||||||
} else {
|
|
||||||
return super.getDocValuesFormatForField(field);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,22 +0,0 @@
|
||||||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<html>
|
|
||||||
<body>
|
|
||||||
Codec + DocValuesFormat that are optimized for facets.
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,16 +0,0 @@
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat
|
|
|
@ -2,8 +2,6 @@ package org.apache.lucene.facet;
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
|
||||||
import org.apache.lucene.facet.codecs.facet46.Facet46Codec;
|
|
||||||
import org.apache.lucene.facet.encoding.DGapIntEncoder;
|
import org.apache.lucene.facet.encoding.DGapIntEncoder;
|
||||||
import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
|
import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
|
||||||
import org.apache.lucene.facet.encoding.EightFlagsIntEncoder;
|
import org.apache.lucene.facet.encoding.EightFlagsIntEncoder;
|
||||||
|
@ -15,8 +13,6 @@ import org.apache.lucene.facet.encoding.UniqueValuesIntEncoder;
|
||||||
import org.apache.lucene.facet.encoding.VInt8IntEncoder;
|
import org.apache.lucene.facet.encoding.VInt8IntEncoder;
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
import org.apache.lucene.facet.params.CategoryListParams;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.junit.AfterClass;
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -47,24 +43,6 @@ public abstract class FacetTestCase extends LuceneTestCase {
|
||||||
new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(4)))),
|
new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(4)))),
|
||||||
};
|
};
|
||||||
|
|
||||||
private static Codec savedDefault = null;
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void beforeClassFacetTestCase() throws Exception {
|
|
||||||
if (random().nextDouble() < 0.3) {
|
|
||||||
savedDefault = Codec.getDefault(); // save to restore later
|
|
||||||
Codec.setDefault(new Facet46Codec());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterClass
|
|
||||||
public static void afterClassFacetTestCase() throws Exception {
|
|
||||||
if (savedDefault != null) {
|
|
||||||
Codec.setDefault(savedDefault);
|
|
||||||
savedDefault = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns a {@link CategoryListParams} with random {@link IntEncoder} and field. */
|
/** Returns a {@link CategoryListParams} with random {@link IntEncoder} and field. */
|
||||||
public static CategoryListParams randomCategoryListParams() {
|
public static CategoryListParams randomCategoryListParams() {
|
||||||
final String field = CategoryListParams.DEFAULT_FIELD + "$" + random().nextInt();
|
final String field = CategoryListParams.DEFAULT_FIELD + "$" + random().nextInt();
|
||||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.FacetTestUtils;
|
import org.apache.lucene.facet.FacetTestUtils;
|
||||||
import org.apache.lucene.facet.codecs.facet46.Facet46Codec;
|
|
||||||
import org.apache.lucene.facet.index.FacetFields;
|
import org.apache.lucene.facet.index.FacetFields;
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
import org.apache.lucene.facet.params.CategoryListParams;
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
import org.apache.lucene.facet.params.FacetIndexingParams;
|
||||||
|
@ -257,39 +256,39 @@ public class TestDemoFacets extends FacetTestCase {
|
||||||
// LUCENE-4583: make sure if we require > 32 KB for one
|
// LUCENE-4583: make sure if we require > 32 KB for one
|
||||||
// document, we don't hit exc when using Facet42DocValuesFormat
|
// document, we don't hit exc when using Facet42DocValuesFormat
|
||||||
public void testManyFacetsInOneDocument() throws Exception {
|
public void testManyFacetsInOneDocument() throws Exception {
|
||||||
|
assumeTrue("default Codec doesn't support huge BinaryDocValues", _TestUtil.fieldSupportsHugeBinaryDocValues(CategoryListParams.DEFAULT_FIELD));
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
Directory taxoDir = newDirectory();
|
Directory taxoDir = newDirectory();
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
iwc.setCodec(new Facet46Codec());
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||||
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
|
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
|
||||||
|
|
||||||
FacetFields facetFields = new FacetFields(taxoWriter);
|
FacetFields facetFields = new FacetFields(taxoWriter);
|
||||||
|
|
||||||
int numLabels = _TestUtil.nextInt(random(), 40000, 100000);
|
int numLabels = _TestUtil.nextInt(random(), 40000, 100000);
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(newTextField("field", "text", Field.Store.NO));
|
doc.add(newTextField("field", "text", Field.Store.NO));
|
||||||
List<CategoryPath> paths = new ArrayList<CategoryPath>();
|
List<CategoryPath> paths = new ArrayList<CategoryPath>();
|
||||||
for(int i=0;i<numLabels;i++) {
|
for (int i = 0; i < numLabels; i++) {
|
||||||
paths.add(new CategoryPath("dim", "" + i));
|
paths.add(new CategoryPath("dim", "" + i));
|
||||||
}
|
}
|
||||||
facetFields.addFields(doc, paths);
|
facetFields.addFields(doc, paths);
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
|
|
||||||
// NRT open
|
// NRT open
|
||||||
IndexSearcher searcher = newSearcher(writer.getReader());
|
IndexSearcher searcher = newSearcher(writer.getReader());
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
// NRT open
|
// NRT open
|
||||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||||
taxoWriter.close();
|
taxoWriter.close();
|
||||||
|
|
||||||
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));
|
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));
|
||||||
|
|
||||||
// Aggregate the facet counts:
|
// Aggregate the facet counts:
|
||||||
FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
|
FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
|
||||||
|
|
||||||
// MatchAllDocsQuery is for "browsing" (counts facets
|
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||||
// for all non-deleted docs in the index); normally
|
// for all non-deleted docs in the index); normally
|
||||||
// you'd use a "normal" query, and use MultiCollector to
|
// you'd use a "normal" query, and use MultiCollector to
|
||||||
|
@ -300,13 +299,13 @@ public class TestDemoFacets extends FacetTestCase {
|
||||||
FacetResultNode root = results.get(0).getFacetResultNode();
|
FacetResultNode root = results.get(0).getFacetResultNode();
|
||||||
assertEquals(numLabels, root.subResults.size());
|
assertEquals(numLabels, root.subResults.size());
|
||||||
Set<String> allLabels = new HashSet<String>();
|
Set<String> allLabels = new HashSet<String>();
|
||||||
for(FacetResultNode childNode : root.subResults) {
|
for (FacetResultNode childNode : root.subResults) {
|
||||||
assertEquals(2, childNode.label.length);
|
assertEquals(2, childNode.label.length);
|
||||||
allLabels.add(childNode.label.components[1]);
|
allLabels.add(childNode.label.components[1]);
|
||||||
assertEquals(1, (int) childNode.value);
|
assertEquals(1, (int) childNode.value);
|
||||||
}
|
}
|
||||||
assertEquals(numLabels, allLabels.size());
|
assertEquals(numLabels, allLabels.size());
|
||||||
|
|
||||||
IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
|
IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,15 +28,11 @@ import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.FacetTestUtils;
|
import org.apache.lucene.facet.FacetTestUtils;
|
||||||
import org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat;
|
|
||||||
import org.apache.lucene.facet.index.FacetFields;
|
import org.apache.lucene.facet.index.FacetFields;
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
import org.apache.lucene.facet.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.params.FacetSearchParams;
|
import org.apache.lucene.facet.params.FacetSearchParams;
|
||||||
|
@ -434,16 +430,6 @@ public class TestDrillSideways extends FacetTestCase {
|
||||||
|
|
||||||
boolean canUseDV = defaultCodecSupportsSortedSet();
|
boolean canUseDV = defaultCodecSupportsSortedSet();
|
||||||
|
|
||||||
// TestRuleSetupAndRestoreClassEnv can sometimes
|
|
||||||
// randomly pick the non-general Facet42DocValuesFormat:
|
|
||||||
DocValuesFormat dvf = Codec.getDefault().docValuesFormat();
|
|
||||||
if (dvf instanceof PerFieldDocValuesFormat) {
|
|
||||||
dvf = ((PerFieldDocValuesFormat) dvf).getDocValuesFormatForField("$facets");
|
|
||||||
}
|
|
||||||
if (dvf instanceof Facet42DocValuesFormat) {
|
|
||||||
canUseDV = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (aChance == 0.0) {
|
while (aChance == 0.0) {
|
||||||
aChance = random().nextDouble();
|
aChance = random().nextDouble();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue