mirror of https://github.com/apache/lucene.git
SOLR-12843: Implement a MultiContentWriter in SolrJ to post multiple files/payload at once
This commit is contained in:
parent
dbed8bafe6
commit
b4d9b25f44
|
@ -138,6 +138,8 @@ New Features
|
||||||
|
|
||||||
* SOLR-12815: Implement maxOps limit for IndexSizeTrigger. (ab)
|
* SOLR-12815: Implement maxOps limit for IndexSizeTrigger. (ab)
|
||||||
|
|
||||||
|
* SOLR-12843: Implement a MultiContentWriter in SolrJ to post multiple files/payload at once (noble)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -142,7 +142,7 @@ public class UpdateRequestHandler extends ContentStreamHandlerBase implements Pe
|
||||||
registry.put("application/xml", new XMLLoader().init(p) );
|
registry.put("application/xml", new XMLLoader().init(p) );
|
||||||
registry.put("application/json", new JsonLoader().init(p) );
|
registry.put("application/json", new JsonLoader().init(p) );
|
||||||
registry.put("application/csv", new CSVLoader().init(p) );
|
registry.put("application/csv", new CSVLoader().init(p) );
|
||||||
registry.put("application/javabin", new JavabinLoader().init(p) );
|
registry.put("application/javabin", new JavabinLoader(instance).init(p) );
|
||||||
registry.put("text/csv", registry.get("application/csv") );
|
registry.put("text/csv", registry.get("application/csv") );
|
||||||
registry.put("text/xml", registry.get("application/xml") );
|
registry.put("text/xml", registry.get("application/xml") );
|
||||||
registry.put("text/json", registry.get("application/json"));
|
registry.put("text/json", registry.get("application/json"));
|
||||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.solr.handler.loader;
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -31,7 +33,11 @@ import org.apache.solr.common.params.ShardParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.params.UpdateParams;
|
import org.apache.solr.common.params.UpdateParams;
|
||||||
import org.apache.solr.common.util.ContentStream;
|
import org.apache.solr.common.util.ContentStream;
|
||||||
|
import org.apache.solr.common.util.ContentStreamBase;
|
||||||
|
import org.apache.solr.common.util.DataInputInputStream;
|
||||||
import org.apache.solr.common.util.FastInputStream;
|
import org.apache.solr.common.util.FastInputStream;
|
||||||
|
import org.apache.solr.common.util.JavaBinCodec;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.handler.RequestHandlerUtils;
|
import org.apache.solr.handler.RequestHandlerUtils;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
@ -46,6 +52,16 @@ import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||||
* @see org.apache.solr.common.util.JavaBinCodec
|
* @see org.apache.solr.common.util.JavaBinCodec
|
||||||
*/
|
*/
|
||||||
public class JavabinLoader extends ContentStreamLoader {
|
public class JavabinLoader extends ContentStreamLoader {
|
||||||
|
final ContentStreamLoader contentStreamLoader;
|
||||||
|
|
||||||
|
public JavabinLoader() {
|
||||||
|
this.contentStreamLoader = this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public JavabinLoader(ContentStreamLoader contentStreamLoader) {
|
||||||
|
super();
|
||||||
|
this.contentStreamLoader = contentStreamLoader;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception {
|
public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception {
|
||||||
|
@ -62,6 +78,10 @@ public class JavabinLoader extends ContentStreamLoader {
|
||||||
|
|
||||||
private void parseAndLoadDocs(final SolrQueryRequest req, SolrQueryResponse rsp, InputStream stream,
|
private void parseAndLoadDocs(final SolrQueryRequest req, SolrQueryResponse rsp, InputStream stream,
|
||||||
final UpdateRequestProcessor processor) throws IOException {
|
final UpdateRequestProcessor processor) throws IOException {
|
||||||
|
if (req.getParams().getBool("multistream", false)) {
|
||||||
|
handleMultiStream(req, rsp, stream, processor);
|
||||||
|
return;
|
||||||
|
}
|
||||||
UpdateRequest update = null;
|
UpdateRequest update = null;
|
||||||
JavaBinUpdateRequestCodec.StreamingUpdateHandler handler = new JavaBinUpdateRequestCodec.StreamingUpdateHandler() {
|
JavaBinUpdateRequestCodec.StreamingUpdateHandler handler = new JavaBinUpdateRequestCodec.StreamingUpdateHandler() {
|
||||||
private AddUpdateCommand addCmd = null;
|
private AddUpdateCommand addCmd = null;
|
||||||
|
@ -116,6 +136,44 @@ public class JavabinLoader extends ContentStreamLoader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void handleMultiStream(SolrQueryRequest req, SolrQueryResponse rsp, InputStream stream, UpdateRequestProcessor processor)
|
||||||
|
throws IOException {
|
||||||
|
FastInputStream in = FastInputStream.wrap(stream);
|
||||||
|
SolrParams old = req.getParams();
|
||||||
|
new JavaBinCodec() {
|
||||||
|
SolrParams params;
|
||||||
|
AddUpdateCommand addCmd = null;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> readIterator(DataInputInputStream fis) throws IOException {
|
||||||
|
while (true) {
|
||||||
|
Object o = readVal(fis);
|
||||||
|
if (o == END_OBJ) break;
|
||||||
|
if (o instanceof NamedList) {
|
||||||
|
params = ((NamedList) o).toSolrParams();
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
if (o instanceof byte[]) {
|
||||||
|
if (params != null) req.setParams(params);
|
||||||
|
byte[] buf = (byte[]) o;
|
||||||
|
contentStreamLoader.load(req, rsp, new ContentStreamBase.ByteArrayStream(buf, null), processor);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("unsupported type ");
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
} finally {
|
||||||
|
params = null;
|
||||||
|
req.setParams(old);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
}.unmarshal(in);
|
||||||
|
}
|
||||||
|
|
||||||
private AddUpdateCommand getAddCommand(SolrQueryRequest req, SolrParams params) {
|
private AddUpdateCommand getAddCommand(SolrQueryRequest req, SolrParams params) {
|
||||||
AddUpdateCommand addCmd = new AddUpdateCommand(req);
|
AddUpdateCommand addCmd = new AddUpdateCommand(req);
|
||||||
addCmd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
|
addCmd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
|
||||||
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.client.solrj.request;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
|
||||||
|
import org.apache.solr.common.IteratorWriter;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.common.util.JavaBinCodec;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.common.util.Pair;
|
||||||
|
|
||||||
|
import static org.apache.solr.common.params.UpdateParams.ASSUME_CONTENT_TYPE;
|
||||||
|
|
||||||
|
public class MultiContentWriterRequest extends AbstractUpdateRequest {
|
||||||
|
|
||||||
|
private final Iterator<Pair<NamedList, Object>> payload;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param m HTTP method
|
||||||
|
* @param path path to which to post to
|
||||||
|
* @param payload add the per doc params, The Object could be a ByteBuffer or byte[]
|
||||||
|
*/
|
||||||
|
|
||||||
|
public MultiContentWriterRequest(METHOD m, String path, Iterator<Pair<NamedList, Object>> payload) {
|
||||||
|
super(m, path);
|
||||||
|
params = new ModifiableSolrParams();
|
||||||
|
params.add("multistream", "true");
|
||||||
|
this.payload = payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public RequestWriter.ContentWriter getContentWriter(String expectedType) {
|
||||||
|
return new RequestWriter.ContentWriter() {
|
||||||
|
@Override
|
||||||
|
public void write(OutputStream os) throws IOException {
|
||||||
|
new JavaBinCodec().marshal((IteratorWriter) iw -> {
|
||||||
|
while (payload.hasNext()) {
|
||||||
|
Pair<NamedList, Object> next = payload.next();
|
||||||
|
|
||||||
|
if (next.second() instanceof ByteBuffer || next.second() instanceof byte[]) {
|
||||||
|
NamedList params = next.first();
|
||||||
|
if(params.get(ASSUME_CONTENT_TYPE) == null){
|
||||||
|
String detectedType = detect(next.second());
|
||||||
|
if(detectedType==null){
|
||||||
|
throw new RuntimeException("Unknown content type");
|
||||||
|
}
|
||||||
|
params.add(ASSUME_CONTENT_TYPE, detectedType);
|
||||||
|
}
|
||||||
|
iw.add(params);
|
||||||
|
iw.add(next.second());
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("payload value must be byte[] or ByteBuffer");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, os);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getContentType() {
|
||||||
|
return "application/javabin";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
public static String detect(Object o) throws IOException {
|
||||||
|
Reader rdr = null;
|
||||||
|
byte[] bytes = null;
|
||||||
|
if (o instanceof byte[]) bytes = (byte[]) o;
|
||||||
|
else if (o instanceof ByteBuffer) bytes = ((ByteBuffer) o).array();
|
||||||
|
rdr = new InputStreamReader(new ByteArrayInputStream(bytes));
|
||||||
|
String detectedContentType = null;
|
||||||
|
for (;;) {
|
||||||
|
int ch = rdr.read();
|
||||||
|
if (Character.isWhitespace(ch)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int nextChar = -1;
|
||||||
|
// first non-whitespace chars
|
||||||
|
if (ch == '#' // single line comment
|
||||||
|
|| (ch == '/' && ((nextChar = rdr.read()) == '/' || nextChar == '*')) // single line or multi-line comment
|
||||||
|
|| (ch == '{' || ch == '[') // start of JSON object
|
||||||
|
)
|
||||||
|
{
|
||||||
|
detectedContentType = "application/json";
|
||||||
|
} else if (ch == '<') {
|
||||||
|
detectedContentType = "text/xml";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return detectedContentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ByteBuffer readByteBuffer(InputStream is) throws IOException {
|
||||||
|
BinaryRequestWriter.BAOS baos = new BinaryRequestWriter.BAOS();
|
||||||
|
org.apache.commons.io.IOUtils.copy(is, baos);
|
||||||
|
return ByteBuffer.wrap(baos.getbuf(), 0, baos.size());
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,8 +17,11 @@
|
||||||
package org.apache.solr.client.solrj;
|
package org.apache.solr.client.solrj;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -43,6 +46,7 @@ import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
||||||
import org.apache.solr.client.solrj.request.AbstractUpdateRequest.ACTION;
|
import org.apache.solr.client.solrj.request.AbstractUpdateRequest.ACTION;
|
||||||
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
||||||
import org.apache.solr.client.solrj.request.LukeRequest;
|
import org.apache.solr.client.solrj.request.LukeRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.MultiContentWriterRequest;
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
import org.apache.solr.client.solrj.request.StreamingUpdateRequest;
|
import org.apache.solr.client.solrj.request.StreamingUpdateRequest;
|
||||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||||
|
@ -64,11 +68,14 @@ import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.FacetParams;
|
import org.apache.solr.common.params.FacetParams;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.common.util.Pair;
|
||||||
|
import org.apache.solr.common.util.Utils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.noggit.JSONParser;
|
import org.noggit.JSONParser;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.apache.solr.common.params.UpdateParams.ASSUME_CONTENT_TYPE;
|
||||||
import static org.junit.internal.matchers.StringContains.containsString;
|
import static org.junit.internal.matchers.StringContains.containsString;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -671,6 +678,38 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
|
||||||
Assert.assertEquals( 10, rsp.getResults().getNumFound() );
|
Assert.assertEquals( 10, rsp.getResults().getNumFound() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiContentWriterRequest() throws Exception {
|
||||||
|
SolrClient client = getSolrClient();
|
||||||
|
client.deleteByQuery("*:*");// delete everything!
|
||||||
|
client.commit();
|
||||||
|
QueryResponse rsp = client.query(new SolrQuery("*:*"));
|
||||||
|
Assert.assertEquals(0, rsp.getResults().getNumFound());
|
||||||
|
|
||||||
|
List<Pair<NamedList, Object>> docs = new ArrayList<>();
|
||||||
|
NamedList params = new NamedList();
|
||||||
|
docs.add(new Pair(params, getFileContent(params, "solrj/docs1.xml")));
|
||||||
|
|
||||||
|
params = new NamedList();
|
||||||
|
params.add(ASSUME_CONTENT_TYPE, "application/csv");
|
||||||
|
docs.add(new Pair(params, getFileContent(params, "solrj/books.csv")));
|
||||||
|
|
||||||
|
MultiContentWriterRequest up = new MultiContentWriterRequest(SolrRequest.METHOD.POST, "/update", docs.iterator());
|
||||||
|
up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
|
||||||
|
NamedList<Object> result = client.request(up);
|
||||||
|
System.out.println(result.jsonStr());
|
||||||
|
rsp = client.query(new SolrQuery("*:*"));
|
||||||
|
Assert.assertEquals(12, rsp.getResults().getNumFound());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private ByteBuffer getFileContent(NamedList nl, String name) throws IOException {
|
||||||
|
try (InputStream is = new FileInputStream(getFile(name))) {
|
||||||
|
return MultiContentWriterRequest.readByteBuffer(is);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMultiContentStreamRequest() throws Exception {
|
public void testMultiContentStreamRequest() throws Exception {
|
||||||
SolrClient client = getSolrClient();
|
SolrClient client = getSolrClient();
|
||||||
|
|
Loading…
Reference in New Issue