mirror of https://github.com/apache/lucene.git
SOLR-13688: Run the bin/solr export command multithreaded
This commit is contained in:
parent
06dd37e907
commit
03fd3d3923
|
@ -17,42 +17,65 @@
|
||||||
|
|
||||||
package org.apache.solr.util;
|
package org.apache.solr.util;
|
||||||
|
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
import java.io.OutputStreamWriter;
|
import java.io.OutputStreamWriter;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.io.Writer;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Files;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.function.BiConsumer;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import org.apache.commons.cli.CommandLine;
|
import org.apache.commons.cli.CommandLine;
|
||||||
import org.apache.commons.cli.Option;
|
import org.apache.commons.cli.Option;
|
||||||
import org.apache.commons.cli.OptionBuilder;
|
import org.apache.commons.cli.OptionBuilder;
|
||||||
import org.apache.solr.client.solrj.SolrClient;
|
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.SolrRequest;
|
import org.apache.solr.client.solrj.SolrRequest;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.StreamingResponseCallback;
|
import org.apache.solr.client.solrj.StreamingResponseCallback;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.impl.ClusterStateProvider;
|
||||||
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
import org.apache.solr.client.solrj.impl.StreamingBinaryResponseParser;
|
import org.apache.solr.client.solrj.impl.StreamingBinaryResponseParser;
|
||||||
import org.apache.solr.client.solrj.request.GenericSolrRequest;
|
import org.apache.solr.client.solrj.request.GenericSolrRequest;
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
|
import org.apache.solr.common.cloud.Replica;
|
||||||
|
import org.apache.solr.common.cloud.Slice;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.CursorMarkParams;
|
import org.apache.solr.common.params.CursorMarkParams;
|
||||||
import org.apache.solr.common.params.MapSolrParams;
|
import org.apache.solr.common.params.MapSolrParams;
|
||||||
import org.apache.solr.common.util.FastWriter;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.common.util.ExecutorUtil;
|
||||||
import org.apache.solr.common.util.JavaBinCodec;
|
import org.apache.solr.common.util.JavaBinCodec;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.SolrJSONWriter;
|
import org.apache.solr.common.util.StrUtils;
|
||||||
|
import org.noggit.CharArr;
|
||||||
|
import org.noggit.JSONWriter;
|
||||||
|
|
||||||
import static org.apache.solr.common.params.CommonParams.FL;
|
import static org.apache.solr.common.params.CommonParams.FL;
|
||||||
import static org.apache.solr.common.params.CommonParams.JAVABIN;
|
import static org.apache.solr.common.params.CommonParams.JAVABIN;
|
||||||
|
import static org.apache.solr.common.params.CommonParams.Q;
|
||||||
|
import static org.apache.solr.common.params.CommonParams.SORT;
|
||||||
|
import static org.apache.solr.common.util.JavaBinCodec.SOLRINPUTDOC;
|
||||||
|
|
||||||
public class ExportTool extends SolrCLI.ToolBase {
|
public class ExportTool extends SolrCLI.ToolBase {
|
||||||
@Override
|
@Override
|
||||||
|
@ -65,7 +88,7 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
return OPTIONS;
|
return OPTIONS;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Info {
|
public static abstract class Info {
|
||||||
String baseurl;
|
String baseurl;
|
||||||
String format;
|
String format;
|
||||||
String query;
|
String query;
|
||||||
|
@ -73,10 +96,12 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
String out;
|
String out;
|
||||||
String fields;
|
String fields;
|
||||||
long limit = 100;
|
long limit = 100;
|
||||||
long docsWritten = 0;
|
AtomicLong docsWritten = new AtomicLong(0);
|
||||||
|
int bufferSize = 1024 * 1024;
|
||||||
PrintStream output;
|
PrintStream output;
|
||||||
//for testing purposes only
|
String uniqueKey;
|
||||||
public SolrClient solrClient;
|
CloudSolrClient solrClient;
|
||||||
|
DocsSink sink;
|
||||||
|
|
||||||
|
|
||||||
public Info(String url) {
|
public Info(String url) {
|
||||||
|
@ -117,60 +142,24 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
return JAVABIN.equals(format) ? new JavabinSink(this) : new JsonSink(this);
|
return JAVABIN.equals(format) ? new JavabinSink(this) : new JsonSink(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
void exportDocsWithCursorMark() throws SolrServerException, IOException {
|
abstract void exportDocs() throws Exception;
|
||||||
DocsSink sink = getSink();
|
|
||||||
|
void fetchUniqueKey() throws SolrServerException, IOException {
|
||||||
solrClient = new CloudSolrClient.Builder(Collections.singletonList(baseurl)).build();
|
solrClient = new CloudSolrClient.Builder(Collections.singletonList(baseurl)).build();
|
||||||
NamedList<Object> rsp1 = solrClient.request(new GenericSolrRequest(SolrRequest.METHOD.GET, "/schema/uniquekey",
|
NamedList<Object> response = solrClient.request(new GenericSolrRequest(SolrRequest.METHOD.GET, "/schema/uniquekey",
|
||||||
new MapSolrParams(Collections.singletonMap("collection", coll))));
|
new MapSolrParams(Collections.singletonMap("collection", coll))));
|
||||||
String uniqueKey = (String) rsp1.get("uniqueKey");
|
uniqueKey = (String) response.get("uniqueKey");
|
||||||
|
|
||||||
sink.start();
|
|
||||||
try {
|
|
||||||
NamedList<Object> rsp;
|
|
||||||
SolrQuery q = (new SolrQuery(query))
|
|
||||||
.setParam("collection", coll)
|
|
||||||
.setRows(100)
|
|
||||||
.setSort(SolrQuery.SortClause.asc(uniqueKey));
|
|
||||||
if (fields != null) {
|
|
||||||
q.setParam(FL, fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
String cursorMark = CursorMarkParams.CURSOR_MARK_START;
|
|
||||||
boolean done = false;
|
|
||||||
StreamingResponseCallback streamer = getStreamer(sink);
|
|
||||||
|
|
||||||
if(output!= null) output.println("Exporting data to : "+ out);
|
|
||||||
while (!done) {
|
|
||||||
if (docsWritten >= limit) break;
|
|
||||||
QueryRequest request = new QueryRequest(q);
|
|
||||||
request.setResponseParser(new StreamingBinaryResponseParser(streamer));
|
|
||||||
q.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
|
|
||||||
rsp = solrClient.request(request);
|
|
||||||
String nextCursorMark = (String) rsp.get(CursorMarkParams.CURSOR_MARK_NEXT);
|
|
||||||
if (nextCursorMark == null || Objects.equals(cursorMark, nextCursorMark)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
cursorMark = nextCursorMark;
|
|
||||||
if(output!= null) output.print(".");
|
|
||||||
}
|
|
||||||
if(output!= null) output.println("\n DONE!");
|
|
||||||
} finally {
|
|
||||||
sink.end();
|
|
||||||
solrClient.close();
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private StreamingResponseCallback getStreamer(DocsSink sink) {
|
public static StreamingResponseCallback getStreamer(Consumer<SolrDocument> sink) {
|
||||||
return new StreamingResponseCallback() {
|
return new StreamingResponseCallback() {
|
||||||
@Override
|
@Override
|
||||||
public void streamSolrDocument(SolrDocument doc) {
|
public void streamSolrDocument(SolrDocument doc) {
|
||||||
try {
|
try {
|
||||||
sink.accept(doc);
|
sink.accept(doc);
|
||||||
} catch (IOException e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
docsWritten++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -186,21 +175,24 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void runImpl(CommandLine cli) throws Exception {
|
protected void runImpl(CommandLine cli) throws Exception {
|
||||||
Info info = new Info(cli.getOptionValue("url"));
|
String url = cli.getOptionValue("url");
|
||||||
|
Info info = new MultiThreadedRunner(url);
|
||||||
info.query = cli.getOptionValue("query", "*:*");
|
info.query = cli.getOptionValue("query", "*:*");
|
||||||
info.setOutFormat(cli.getOptionValue("out"), cli.getOptionValue("format"));
|
info.setOutFormat(cli.getOptionValue("out"), cli.getOptionValue("format"));
|
||||||
info.fields = cli.getOptionValue("fields");
|
info.fields = cli.getOptionValue("fields");
|
||||||
info.setLimit(cli.getOptionValue("limit", "100"));
|
info.setLimit(cli.getOptionValue("limit", "100"));
|
||||||
info.output = super.stdout;
|
info.output = super.stdout;
|
||||||
info.exportDocsWithCursorMark();
|
info.exportDocs();
|
||||||
}
|
}
|
||||||
|
|
||||||
interface DocsSink {
|
interface DocsSink {
|
||||||
void start() throws IOException;
|
default void start() throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
void accept(SolrDocument document) throws IOException;
|
void accept(SolrDocument document) throws IOException, InterruptedException;
|
||||||
|
|
||||||
void end() throws IOException;
|
default void end() throws IOException {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Option[] OPTIONS = {
|
private static final Option[] OPTIONS = {
|
||||||
|
@ -236,11 +228,13 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
.create("fields")
|
.create("fields")
|
||||||
};
|
};
|
||||||
|
|
||||||
private static class JsonSink implements DocsSink {
|
static class JsonSink implements DocsSink {
|
||||||
private final Info info;
|
private final Info info;
|
||||||
private SolrJSONWriter jsonw;
|
private CharArr charArr = new CharArr(1024 * 2);
|
||||||
private FastWriter writer;
|
JSONWriter jsonWriter = new JSONWriter(charArr, -1);
|
||||||
private FileOutputStream fos;
|
private Writer writer;
|
||||||
|
private OutputStream fos;
|
||||||
|
public AtomicLong docs = new AtomicLong();
|
||||||
|
|
||||||
public JsonSink(Info info) {
|
public JsonSink(Info info) {
|
||||||
this.info = info;
|
this.info = info;
|
||||||
|
@ -249,24 +243,27 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
@Override
|
@Override
|
||||||
public void start() throws IOException {
|
public void start() throws IOException {
|
||||||
fos = new FileOutputStream(info.out);
|
fos = new FileOutputStream(info.out);
|
||||||
writer = FastWriter.wrap(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
if (info.bufferSize > 0) {
|
||||||
jsonw = new SolrJSONWriter(writer);
|
fos = new BufferedOutputStream(fos, info.bufferSize);
|
||||||
jsonw.setIndent(false);
|
}
|
||||||
|
writer = new OutputStreamWriter(fos, StandardCharsets.UTF_8);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
jsonw.close();
|
writer.flush();
|
||||||
|
fos.flush();
|
||||||
fos.close();
|
fos.close();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void accept(SolrDocument doc) throws IOException {
|
public synchronized void accept(SolrDocument doc) throws IOException {
|
||||||
|
docs.incrementAndGet();
|
||||||
|
charArr.reset();
|
||||||
Map m = new LinkedHashMap(doc.size());
|
Map m = new LinkedHashMap(doc.size());
|
||||||
doc.forEach((s, field) -> {
|
doc.forEach((s, field) -> {
|
||||||
if (s.equals("_version_")) return;
|
if (s.equals("_version_") || s.equals("_roor_")) return;
|
||||||
if (field instanceof List) {
|
if (field instanceof List) {
|
||||||
if (((List) field).size() == 1) {
|
if (((List) field).size() == 1) {
|
||||||
field = ((List) field).get(0);
|
field = ((List) field).get(0);
|
||||||
|
@ -274,17 +271,16 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
}
|
}
|
||||||
m.put(s, field);
|
m.put(s, field);
|
||||||
});
|
});
|
||||||
jsonw.writeObj(m);
|
jsonWriter.write(m);
|
||||||
writer.flush();
|
writer.write(charArr.getArray(), charArr.getStart(), charArr.getEnd());
|
||||||
writer.append('\n');
|
writer.append('\n');
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class JavabinSink implements DocsSink {
|
private static class JavabinSink implements DocsSink {
|
||||||
private final Info info;
|
private final Info info;
|
||||||
JavaBinCodec codec;
|
JavaBinCodec codec;
|
||||||
FileOutputStream fos;
|
OutputStream fos;
|
||||||
|
|
||||||
public JavabinSink(Info info) {
|
public JavabinSink(Info info) {
|
||||||
this.info = info;
|
this.info = info;
|
||||||
|
@ -293,6 +289,9 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
@Override
|
@Override
|
||||||
public void start() throws IOException {
|
public void start() throws IOException {
|
||||||
fos = new FileOutputStream(info.out);
|
fos = new FileOutputStream(info.out);
|
||||||
|
if (info.bufferSize > 0) {
|
||||||
|
fos = new BufferedOutputStream(fos, info.bufferSize);
|
||||||
|
}
|
||||||
codec = new JavaBinCodec(fos, null);
|
codec = new JavaBinCodec(fos, null);
|
||||||
codec.writeTag(JavaBinCodec.NAMED_LST, 2);
|
codec.writeTag(JavaBinCodec.NAMED_LST, 2);
|
||||||
codec.writeStr("params");
|
codec.writeStr("params");
|
||||||
|
@ -306,23 +305,208 @@ public class ExportTool extends SolrCLI.ToolBase {
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
codec.writeTag(JavaBinCodec.END);
|
codec.writeTag(JavaBinCodec.END);
|
||||||
codec.close();
|
codec.close();
|
||||||
|
fos.flush();
|
||||||
fos.close();
|
fos.close();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
private BiConsumer<String, Object> bic= new BiConsumer<>() {
|
||||||
|
@Override
|
||||||
|
public void accept(String s, Object o) {
|
||||||
|
try {
|
||||||
|
if (s.equals("_version_") || s.equals("_root_")) return;
|
||||||
|
codec.writeExternString(s);
|
||||||
|
codec.writeVal(o);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void accept(SolrDocument doc) throws IOException {
|
public synchronized void accept(SolrDocument doc) throws IOException {
|
||||||
SolrInputDocument document = new SolrInputDocument();
|
int sz = doc.size();
|
||||||
doc.forEach((s, o) -> {
|
if(doc.containsKey("_version_")) sz--;
|
||||||
if (s.equals("_version_")) return;
|
if(doc.containsKey("_root_")) sz--;
|
||||||
if (o instanceof List) {
|
codec.writeTag(SOLRINPUTDOC, sz);
|
||||||
if (((List) o).size() == 1) o = ((List) o).get(0);
|
codec.writeFloat(1f); // document boost
|
||||||
}
|
doc.forEach(bic);
|
||||||
document.addField(s, o);
|
|
||||||
});
|
|
||||||
|
|
||||||
codec.writeSolrInputDocument(document);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class MultiThreadedRunner extends Info {
|
||||||
|
ExecutorService producerThreadpool, consumerThreadpool;
|
||||||
|
ArrayBlockingQueue<SolrDocument> queue = new ArrayBlockingQueue(1000);
|
||||||
|
SolrDocument EOFDOC = new SolrDocument();
|
||||||
|
volatile boolean failed = false;
|
||||||
|
Map<String, CoreHandler> corehandlers = new HashMap();
|
||||||
|
|
||||||
|
public MultiThreadedRunner(String url) {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void exportDocs() throws Exception {
|
||||||
|
sink = getSink();
|
||||||
|
fetchUniqueKey();
|
||||||
|
ClusterStateProvider stateProvider = solrClient.getClusterStateProvider();
|
||||||
|
DocCollection coll = stateProvider.getCollection(this.coll);
|
||||||
|
Map<String, Slice> m = coll.getSlicesMap();
|
||||||
|
producerThreadpool = ExecutorUtil.newMDCAwareFixedThreadPool(m.size(),
|
||||||
|
new DefaultSolrThreadFactory("solrcli-exporter-producers"));
|
||||||
|
consumerThreadpool = ExecutorUtil.newMDCAwareFixedThreadPool(1,
|
||||||
|
new DefaultSolrThreadFactory("solrcli-exporter-consumer"));
|
||||||
|
sink.start();
|
||||||
|
CountDownLatch consumerlatch = new CountDownLatch(1);
|
||||||
|
try {
|
||||||
|
addConsumer(consumerlatch);
|
||||||
|
addProducers(m);
|
||||||
|
if (output != null) {
|
||||||
|
output.println("NO of shards : " + corehandlers.size());
|
||||||
|
}
|
||||||
|
CountDownLatch producerLatch = new CountDownLatch(corehandlers.size());
|
||||||
|
corehandlers.forEach((s, coreHandler) -> producerThreadpool.submit(() -> {
|
||||||
|
try {
|
||||||
|
coreHandler.exportDocsFromCore();
|
||||||
|
} catch (Exception e) {
|
||||||
|
if(output != null) output.println("Error exporting docs from : "+s);
|
||||||
|
|
||||||
|
}
|
||||||
|
producerLatch.countDown();
|
||||||
|
}));
|
||||||
|
|
||||||
|
producerLatch.await();
|
||||||
|
queue.offer(EOFDOC, 10, TimeUnit.SECONDS);
|
||||||
|
consumerlatch.await();
|
||||||
|
} finally {
|
||||||
|
sink.end();
|
||||||
|
solrClient.close();
|
||||||
|
producerThreadpool.shutdownNow();
|
||||||
|
consumerThreadpool.shutdownNow();
|
||||||
|
if (failed) {
|
||||||
|
try {
|
||||||
|
Files.delete(new File(out).toPath());
|
||||||
|
} catch (IOException e) {
|
||||||
|
//ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addProducers(Map<String, Slice> m) {
|
||||||
|
for (Map.Entry<String, Slice> entry : m.entrySet()) {
|
||||||
|
Slice slice = entry.getValue();
|
||||||
|
Replica replica = slice.getLeader();
|
||||||
|
if (replica == null) replica = slice.getReplicas().iterator().next();// get a random replica
|
||||||
|
CoreHandler coreHandler = new CoreHandler(replica);
|
||||||
|
corehandlers.put(replica.getCoreName(), coreHandler);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addConsumer(CountDownLatch consumerlatch) {
|
||||||
|
consumerThreadpool.submit(() -> {
|
||||||
|
while (true) {
|
||||||
|
SolrDocument doc = null;
|
||||||
|
try {
|
||||||
|
doc = queue.poll(30, TimeUnit.SECONDS);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
if (output != null) output.println("Consumer interrupted");
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (doc == EOFDOC) break;
|
||||||
|
try {
|
||||||
|
if (docsWritten.get() > limit) continue;
|
||||||
|
sink.accept(doc);
|
||||||
|
docsWritten.incrementAndGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (output != null) output.println("Failed to write to file " + e.getMessage());
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
consumerlatch.countDown();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CoreHandler {
|
||||||
|
final Replica replica;
|
||||||
|
long expectedDocs;
|
||||||
|
AtomicLong receivedDocs = new AtomicLong();
|
||||||
|
|
||||||
|
CoreHandler(Replica replica) {
|
||||||
|
this.replica = replica;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean exportDocsFromCore()
|
||||||
|
throws IOException, SolrServerException {
|
||||||
|
HttpSolrClient client = new HttpSolrClient.Builder(baseurl).build();
|
||||||
|
try {
|
||||||
|
expectedDocs = getDocCount(replica.getCoreName(), client);
|
||||||
|
GenericSolrRequest request;
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add(Q, query);
|
||||||
|
if (fields != null) params.add(FL, fields);
|
||||||
|
params.add(SORT, uniqueKey + " asc");
|
||||||
|
params.add(CommonParams.DISTRIB, "false");
|
||||||
|
params.add(CommonParams.ROWS, "1000");
|
||||||
|
String cursorMark = CursorMarkParams.CURSOR_MARK_START;
|
||||||
|
Consumer<SolrDocument> wrapper = doc -> {
|
||||||
|
try {
|
||||||
|
queue.offer(doc, 10, TimeUnit.SECONDS);
|
||||||
|
receivedDocs.incrementAndGet();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
failed = true;
|
||||||
|
if (output != null) output.println("Failed to write docs from" + e.getMessage());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
StreamingBinaryResponseParser responseParser = new StreamingBinaryResponseParser(getStreamer(wrapper));
|
||||||
|
while (true) {
|
||||||
|
if (failed) return false;
|
||||||
|
if (docsWritten.get() > limit) return true;
|
||||||
|
params.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
|
||||||
|
request = new GenericSolrRequest(SolrRequest.METHOD.GET,
|
||||||
|
"/" + replica.getCoreName() + "/select", params);
|
||||||
|
request.setResponseParser(responseParser);
|
||||||
|
try {
|
||||||
|
NamedList<Object> rsp = client.request(request);
|
||||||
|
String nextCursorMark = (String) rsp.get(CursorMarkParams.CURSOR_MARK_NEXT);
|
||||||
|
if (nextCursorMark == null || Objects.equals(cursorMark, nextCursorMark)) {
|
||||||
|
if (output != null)
|
||||||
|
output.println(StrUtils.formatString("\nExport complete for : {0}, docs : {1}", replica.getCoreName(), receivedDocs.get()));
|
||||||
|
if (expectedDocs != receivedDocs.get()) {
|
||||||
|
if (output != null) {
|
||||||
|
output.println(StrUtils.formatString("Could not download all docs for core {0} , expected: {1} , actual",
|
||||||
|
replica.getCoreName(), expectedDocs, receivedDocs));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
cursorMark = nextCursorMark;
|
||||||
|
if (output != null) output.print(".");
|
||||||
|
} catch (SolrServerException e) {
|
||||||
|
if(output != null) output.println("Error reading from server "+ replica.getBaseUrl()+"/"+ replica.getCoreName());
|
||||||
|
failed = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
client.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static long getDocCount(String coreName, HttpSolrClient client) throws SolrServerException, IOException {
|
||||||
|
SolrQuery q = new SolrQuery("*:*");
|
||||||
|
q.setRows(0);
|
||||||
|
q.add("distrib", "false");
|
||||||
|
GenericSolrRequest request = new GenericSolrRequest(SolrRequest.METHOD.GET,
|
||||||
|
"/" + coreName + "/select", q);
|
||||||
|
NamedList<Object> res = client.request(request);
|
||||||
|
SolrDocumentList sdl = (SolrDocumentList) res.get("response");
|
||||||
|
return sdl.getNumFound();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,14 +19,18 @@ package org.apache.solr.util;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
import org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec;
|
import org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec;
|
||||||
|
@ -34,6 +38,9 @@ import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.cloud.MiniSolrCloudCluster;
|
import org.apache.solr.cloud.MiniSolrCloudCluster;
|
||||||
import org.apache.solr.cloud.SolrCloudTestCase;
|
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||||
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
|
import org.apache.solr.common.cloud.Replica;
|
||||||
|
import org.apache.solr.common.cloud.Slice;
|
||||||
import org.apache.solr.common.util.FastInputStream;
|
import org.apache.solr.common.util.FastInputStream;
|
||||||
import org.apache.solr.common.util.JsonRecordReader;
|
import org.apache.solr.common.util.JsonRecordReader;
|
||||||
|
|
||||||
|
@ -71,82 +78,155 @@ public class TestExportTool extends SolrCloudTestCase {
|
||||||
String url = cluster.getRandomJetty(random()).getBaseUrl() + "/" + COLLECTION_NAME;
|
String url = cluster.getRandomJetty(random()).getBaseUrl() + "/" + COLLECTION_NAME;
|
||||||
|
|
||||||
|
|
||||||
ExportTool.Info info = new ExportTool.Info(url);
|
ExportTool.Info info = new ExportTool.MultiThreadedRunner(url);
|
||||||
|
|
||||||
String absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
|
String absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
|
||||||
info.setOutFormat(absolutePath, "jsonl");
|
info.setOutFormat(absolutePath, "jsonl");
|
||||||
info.setLimit("200");
|
info.setLimit("200");
|
||||||
info.exportDocsWithCursorMark();
|
info.fields = "id,desc_s";
|
||||||
|
info.exportDocs();
|
||||||
|
|
||||||
assertTrue(info.docsWritten >= 200);
|
assertJsonDocsCount(info, 200);
|
||||||
JsonRecordReader jsonReader = JsonRecordReader.getInst("/", Arrays.asList("$FQN:/**"));
|
|
||||||
Reader rdr = new InputStreamReader(new FileInputStream( absolutePath), StandardCharsets.UTF_8);
|
|
||||||
try {
|
|
||||||
int[] count = new int[]{0};
|
|
||||||
jsonReader.streamRecords(rdr, (record, path) -> count[0]++);
|
|
||||||
assertTrue(count[0] >= 200);
|
|
||||||
} finally {
|
|
||||||
rdr.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
info = new ExportTool.MultiThreadedRunner(url);
|
||||||
info = new ExportTool.Info(url);
|
|
||||||
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
|
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
|
||||||
info.setOutFormat(absolutePath, "jsonl");
|
info.setOutFormat(absolutePath, "jsonl");
|
||||||
info.setLimit("-1");
|
info.setLimit("-1");
|
||||||
info.exportDocsWithCursorMark();
|
info.fields = "id,desc_s";
|
||||||
|
info.exportDocs();
|
||||||
|
|
||||||
assertTrue(info.docsWritten >= 1000);
|
assertJsonDocsCount(info, 1000);
|
||||||
jsonReader = JsonRecordReader.getInst("/", Arrays.asList("$FQN:/**"));
|
|
||||||
rdr = new InputStreamReader(new FileInputStream( absolutePath), StandardCharsets.UTF_8);
|
|
||||||
try {
|
|
||||||
int[] count = new int[]{0};
|
|
||||||
jsonReader.streamRecords(rdr, (record, path) -> count[0]++);
|
|
||||||
assertTrue(count[0] >= 1000);
|
|
||||||
} finally {
|
|
||||||
rdr.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
info = new ExportTool.MultiThreadedRunner(url);
|
||||||
info = new ExportTool.Info(url);
|
|
||||||
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".javabin";
|
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".javabin";
|
||||||
info.setOutFormat(absolutePath, "javabin");
|
info.setOutFormat(absolutePath, "javabin");
|
||||||
info.setLimit("200");
|
info.setLimit("200");
|
||||||
info.exportDocsWithCursorMark();
|
info.fields = "id,desc_s";
|
||||||
assertTrue(info.docsWritten >= 200);
|
info.exportDocs();
|
||||||
|
|
||||||
FileInputStream fis = new FileInputStream(absolutePath);
|
assertJavabinDocsCount(info, 200);
|
||||||
try {
|
|
||||||
int[] count = new int[]{0};
|
|
||||||
FastInputStream in = FastInputStream.wrap(fis);
|
|
||||||
new JavaBinUpdateRequestCodec()
|
|
||||||
.unmarshal(in, (document, req, commitWithin, override) -> count[0]++);
|
|
||||||
assertTrue(count[0] >= 200);
|
|
||||||
} finally {
|
|
||||||
fis.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
info = new ExportTool.Info(url);
|
info = new ExportTool.MultiThreadedRunner(url);
|
||||||
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".javabin";
|
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".javabin";
|
||||||
info.setOutFormat(absolutePath, "javabin");
|
info.setOutFormat(absolutePath, "javabin");
|
||||||
info.setLimit("-1");
|
info.setLimit("-1");
|
||||||
info.exportDocsWithCursorMark();
|
info.fields = "id,desc_s";
|
||||||
assertTrue(info.docsWritten >= 1000);
|
info.exportDocs();
|
||||||
|
assertJavabinDocsCount(info, 1000);
|
||||||
fis = new FileInputStream(absolutePath);
|
|
||||||
try {
|
|
||||||
int[] count = new int[]{0};
|
|
||||||
FastInputStream in = FastInputStream.wrap(fis);
|
|
||||||
new JavaBinUpdateRequestCodec()
|
|
||||||
.unmarshal(in, (document, req, commitWithin, override) -> count[0]++);
|
|
||||||
assertTrue(count[0] >= 1000);
|
|
||||||
} finally {
|
|
||||||
fis.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nightly
|
||||||
|
public void testVeryLargeCluster() throws Exception {
|
||||||
|
String COLLECTION_NAME = "veryLargeColl";
|
||||||
|
MiniSolrCloudCluster cluster = configureCluster(4)
|
||||||
|
.addConfig("conf", configset("cloud-minimal"))
|
||||||
|
.configure();
|
||||||
|
|
||||||
|
try {
|
||||||
|
CollectionAdminRequest
|
||||||
|
.createCollection(COLLECTION_NAME, "conf", 8, 1)
|
||||||
|
.setMaxShardsPerNode(10)
|
||||||
|
.process(cluster.getSolrClient());
|
||||||
|
cluster.waitForActiveCollection(COLLECTION_NAME, 8, 8);
|
||||||
|
|
||||||
|
String tmpFileLoc = new File(cluster.getBaseDir().toFile().getAbsolutePath() +
|
||||||
|
File.separator).getPath();
|
||||||
|
String url = cluster.getRandomJetty(random()).getBaseUrl() + "/" + COLLECTION_NAME;
|
||||||
|
|
||||||
|
|
||||||
|
int docCount = 0;
|
||||||
|
|
||||||
|
for (int j = 0; j < 4; j++) {
|
||||||
|
int bsz = 10000;
|
||||||
|
UpdateRequest ur = new UpdateRequest();
|
||||||
|
ur.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
|
||||||
|
for (int i = 0; i < bsz; i++) {
|
||||||
|
ur.add("id", String.valueOf((j * bsz) + i), "desc_s", TestUtil.randomSimpleString(random(), 10, 50));
|
||||||
|
}
|
||||||
|
cluster.getSolrClient().request(ur, COLLECTION_NAME);
|
||||||
|
docCount += bsz;
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryResponse qr = cluster.getSolrClient().query(COLLECTION_NAME, new SolrQuery("*:*").setRows(0));
|
||||||
|
assertEquals(docCount, qr.getResults().getNumFound());
|
||||||
|
|
||||||
|
DocCollection coll = cluster.getSolrClient().getClusterStateProvider().getCollection(COLLECTION_NAME);
|
||||||
|
HashMap<String, Long> docCounts = new HashMap<>();
|
||||||
|
long totalDocsFromCores = 0;
|
||||||
|
for (Slice slice : coll.getSlices()) {
|
||||||
|
Replica replica = slice.getLeader();
|
||||||
|
try (HttpSolrClient client = new HttpSolrClient.Builder(replica.getBaseUrl()).build()) {
|
||||||
|
long count = ExportTool.getDocCount(replica.getCoreName(), client);
|
||||||
|
docCounts.put(replica.getCoreName(), count);
|
||||||
|
totalDocsFromCores += count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals(docCount, totalDocsFromCores);
|
||||||
|
|
||||||
|
ExportTool.MultiThreadedRunner info = null;
|
||||||
|
String absolutePath = null;
|
||||||
|
|
||||||
|
info = new ExportTool.MultiThreadedRunner(url);
|
||||||
|
info.output = System.out;
|
||||||
|
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".javabin";
|
||||||
|
info.setOutFormat(absolutePath, "javabin");
|
||||||
|
info.setLimit("-1");
|
||||||
|
info.exportDocs();
|
||||||
|
assertJavabinDocsCount(info, docCount);
|
||||||
|
for (Map.Entry<String, Long> e : docCounts.entrySet()) {
|
||||||
|
assertEquals(e.getValue().longValue(), info.corehandlers.get(e.getKey()).receivedDocs.get());
|
||||||
|
}
|
||||||
|
info = new ExportTool.MultiThreadedRunner(url);
|
||||||
|
info.output = System.out;
|
||||||
|
absolutePath = tmpFileLoc + COLLECTION_NAME + random().nextInt(100000) + ".json";
|
||||||
|
info.setOutFormat(absolutePath, "jsonl");
|
||||||
|
info.fields = "id,desc_s";
|
||||||
|
info.setLimit("-1");
|
||||||
|
info.exportDocs();
|
||||||
|
long actual = ((ExportTool.JsonSink) info.sink).docs.get();
|
||||||
|
assertTrue("docs written :" + actual + "docs produced : " + info.docsWritten.get(), actual >= docCount);
|
||||||
|
assertJsonDocsCount(info, docCount);
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void assertJavabinDocsCount(ExportTool.Info info, int expected) throws IOException {
|
||||||
|
assertTrue("" + info.docsWritten.get() + " expected " + expected, info.docsWritten.get() >= expected);
|
||||||
|
FileInputStream fis = new FileInputStream(info.out);
|
||||||
|
try {
|
||||||
|
int[] count = new int[]{0};
|
||||||
|
FastInputStream in = FastInputStream.wrap(fis);
|
||||||
|
new JavaBinUpdateRequestCodec()
|
||||||
|
.unmarshal(in, (document, req, commitWithin, override) -> {
|
||||||
|
assertEquals(2, document.size());
|
||||||
|
count[0]++;
|
||||||
|
});
|
||||||
|
assertTrue(count[0] >= expected);
|
||||||
|
} finally {
|
||||||
|
fis.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertJsonDocsCount(ExportTool.Info info, int expected) throws IOException {
|
||||||
|
assertTrue("" + info.docsWritten.get() + " expected " + expected, info.docsWritten.get() >= expected);
|
||||||
|
|
||||||
|
JsonRecordReader jsonReader;
|
||||||
|
Reader rdr;
|
||||||
|
jsonReader = JsonRecordReader.getInst("/", Arrays.asList("$FQN:/**"));
|
||||||
|
rdr = new InputStreamReader(new FileInputStream(info.out), StandardCharsets.UTF_8);
|
||||||
|
try {
|
||||||
|
int[] count = new int[]{0};
|
||||||
|
jsonReader.streamRecords(rdr, (record, path) -> count[0]++);
|
||||||
|
assertTrue(count[0] >= expected);
|
||||||
|
} finally {
|
||||||
|
rdr.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue