SOLR-14443: Make SolrLogPostTool resilient to odd requests (#1525)

This commit is contained in:
Jason Gerlowski 2020-05-22 10:08:26 -04:00 committed by GitHub
parent de2bad9039
commit 78f4a5b8ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 114 additions and 86 deletions

View File

@ -27,6 +27,8 @@ import java.util.TreeMap;
import java.util.UUID; import java.util.UUID;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
@ -96,13 +98,11 @@ public class SolrLogPostTool {
rec++; rec++;
UUID id = UUID.randomUUID(); UUID id = UUID.randomUUID();
doc.addField("id", id.toString()); doc.setField("id", id.toString());
doc.addField("file_s", fileName); doc.setField("file_s", fileName);
request.add(doc); request.add(doc);
if (rec == 300) { if (rec == 300) {
CLIO.out("Sending batch of 300 log records..."); sendBatch(client, request, false /* normal batch */);
request.process(client);
CLIO.out("Batch sent");
request = new UpdateRequest(); request = new UpdateRequest();
rec = 0; rec = 0;
} }
@ -113,17 +113,35 @@ public class SolrLogPostTool {
} }
if (rec > 0) { if (rec > 0) {
//Process last batch sendBatch(client, request, true /* last batch */);
CLIO.out("Sending last batch ...");
request.process(client);
client.commit();
CLIO.out("Committed");
} }
} finally { } finally {
client.close(); client.close();
} }
} }
private static void sendBatch(SolrClient client, UpdateRequest request, boolean lastRequest) throws SolrServerException, IOException {
final String beginMessage = lastRequest ? "Sending last batch ..." : "Sending batch of 300 log records...";
CLIO.out(beginMessage);
try {
request.process(client);
CLIO.out("Batch sent");
} catch (Exception e) {
CLIO.err("Batch sending failed: " + e.getMessage());
e.printStackTrace(CLIO.getErrStream());
}
if (lastRequest) {
try {
client.commit();
CLIO.out("Committed");
} catch (Exception e) {
CLIO.err("Unable to commit documents: " + e.getMessage());
e.printStackTrace(CLIO.getErrStream());
}
}
}
static void gatherFiles(File rootFile, List<File> files) { static void gatherFiles(File rootFile, List<File> files) {
if(rootFile.isFile()) { if(rootFile.isFile()) {
@ -228,50 +246,48 @@ public class SolrLogPostTool {
return null; return null;
} }
private void setFieldIfUnset(SolrInputDocument doc, String fieldName, String fieldValue) {
if (doc.containsKey(fieldName)) return;
doc.setField(fieldName, fieldValue);
}
private SolrInputDocument parseError(String line, String trace) throws IOException { private SolrInputDocument parseError(String line, String trace) throws IOException {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.addField("date_dt", parseDate(line)); doc.setField("date_dt", parseDate(line));
doc.addField("type_s", "error"); doc.setField("type_s", "error");
doc.addField("line_t", line); doc.setField("line_t", line);
//Don't include traces that have only the %html header. //Don't include traces that have only the %html header.
if(trace != null && trace.length() > 6) { if(trace != null && trace.length() > 6) {
doc.addField("stack_t", trace); doc.setField("stack_t", trace);
} }
if(this.cause != null) { if(this.cause != null) {
doc.addField("root_cause_t", cause.replace("Caused by:", "").trim()); doc.setField("root_cause_t", cause.replace("Caused by:", "").trim());
} }
doc.addField("collection_s", parseCollection(line)); doc.setField("collection_s", parseCollection(line));
doc.addField("core_s", parseCore(line)); doc.setField("core_s", parseCore(line));
doc.addField("shard_s", parseShard(line)); doc.setField("shard_s", parseShard(line));
doc.addField("replica_s", parseReplica(line)); doc.setField("replica_s", parseReplica(line));
return doc; return doc;
} }
private SolrInputDocument parseCommit(String line) throws IOException { private SolrInputDocument parseCommit(String line) throws IOException {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.addField("date_dt", parseDate(line)); doc.setField("date_dt", parseDate(line));
doc.addField("type_s", "commit"); doc.setField("type_s", "commit");
doc.addField("line_t", line); doc.setField("line_t", line);
if(line.contains("softCommit=true")) { doc.setField("soft_commit_s", Boolean.toString(line.contains("softCommit=true")));
doc.addField("soft_commit_s", "true");
} else {
doc.addField("soft_commit_s", "false");
}
if(line.contains("openSearcher=true")) { doc.setField("open_searcher_s", Boolean.toString(line.contains("openSearcher=true")));
doc.addField("open_searcher_s", "true");
} else {
doc.addField("open_searcher_s", "false");
}
doc.addField("collection_s", parseCollection(line)); doc.setField("collection_s", parseCollection(line));
doc.addField("core_s", parseCore(line)); doc.setField("core_s", parseCore(line));
doc.addField("shard_s", parseShard(line)); doc.setField("shard_s", parseShard(line));
doc.addField("replica_s", parseReplica(line)); doc.setField("replica_s", parseReplica(line));
return doc; return doc;
} }
@ -279,36 +295,36 @@ public class SolrLogPostTool {
private SolrInputDocument parseQueryRecord(String line) { private SolrInputDocument parseQueryRecord(String line) {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.addField("date_dt", parseDate(line)); doc.setField("date_dt", parseDate(line));
doc.addField("qtime_i", parseQTime(line)); doc.setField("qtime_i", parseQTime(line));
doc.addField("status_s", parseStatus(line)); doc.setField("status_s", parseStatus(line));
String path = parsePath(line); String path = parsePath(line);
doc.addField("path_s", path); doc.setField("path_s", path);
if(line.contains("hits=")) { if(line.contains("hits=")) {
doc.addField("hits_l", parseHits(line)); doc.setField("hits_l", parseHits(line));
} }
String params = parseParams(line); String params = parseParams(line);
doc.addField("params_t", params); doc.setField("params_t", params);
addParams(doc, params); addParams(doc, params);
doc.addField("collection_s", parseCollection(line)); doc.setField("collection_s", parseCollection(line));
doc.addField("core_s", parseCore(line)); doc.setField("core_s", parseCore(line));
doc.addField("node_s", parseNode(line)); doc.setField("node_s", parseNode(line));
doc.addField("shard_s", parseShard(line)); doc.setField("shard_s", parseShard(line));
doc.addField("replica_s", parseReplica(line)); doc.setField("replica_s", parseReplica(line));
if(path != null && path.contains("/admin")) { if(path != null && path.contains("/admin")) {
doc.addField("type_s", "admin"); doc.setField("type_s", "admin");
} else if(path != null && params.contains("/replication")) { } else if(path != null && params.contains("/replication")) {
doc.addField("type_s", "replication"); doc.setField("type_s", "replication");
} else if (path != null && path.contains("/get")) { } else if (path != null && path.contains("/get")) {
doc.addField("type_s", "get"); doc.setField("type_s", "get");
} else { } else {
doc.addField("type_s", "query"); doc.setField("type_s", "query");
} }
return doc; return doc;
@ -318,10 +334,10 @@ public class SolrLogPostTool {
private SolrInputDocument parseNewSearch(String line) { private SolrInputDocument parseNewSearch(String line) {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.addField("date_dt", parseDate(line)); doc.setField("date_dt", parseDate(line));
doc.addField("core_s", parseNewSearcherCore(line)); doc.setField("core_s", parseNewSearcherCore(line));
doc.addField("type_s", "newSearcher"); doc.setField("type_s", "newSearcher");
doc.addField("line_t", line); doc.setField("line_t", line);
return doc; return doc;
} }
@ -338,21 +354,21 @@ public class SolrLogPostTool {
private SolrInputDocument parseUpdate(String line) { private SolrInputDocument parseUpdate(String line) {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.addField("date_dt", parseDate(line)); doc.setField("date_dt", parseDate(line));
if(line.contains("deleteByQuery=")) { if(line.contains("deleteByQuery=")) {
doc.addField("type_s", "deleteByQuery"); doc.setField("type_s", "deleteByQuery");
} else if(line.contains("delete=")) { } else if(line.contains("delete=")) {
doc.addField("type_s", "delete"); doc.setField("type_s", "delete");
} else { } else {
doc.addField("type_s", "update"); doc.setField("type_s", "update");
} }
doc.addField("collection_s", parseCollection(line)); doc.setField("collection_s", parseCollection(line));
doc.addField("core_s", parseCore(line)); doc.setField("core_s", parseCore(line));
doc.addField("shard_s", parseShard(line)); doc.setField("shard_s", parseShard(line));
doc.addField("replica_s", parseReplica(line)); doc.setField("replica_s", parseReplica(line));
doc.addField("line_t", line); doc.setField("line_t", line);
return doc; return doc;
} }
@ -474,47 +490,51 @@ public class SolrLogPostTool {
return builder.toString(); return builder.toString();
} }
private void addOrReplaceFieldValue(SolrInputDocument doc, String fieldName, String fieldValue) {
doc.setField(fieldName, fieldValue);
}
private void addParams(SolrInputDocument doc, String params) { private void addParams(SolrInputDocument doc, String params) {
String[] pairs = params.split("&"); String[] pairs = params.split("&");
for(String pair : pairs) { for(String pair : pairs) {
String[] parts = pair.split("="); String[] parts = pair.split("=");
if(parts.length == 2 && parts[0].equals("q")) { if(parts.length == 2 && parts[0].equals("q")) {
String dq = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dq = URLDecoder.decode(parts[1], Charset.defaultCharset());
doc.addField("q_s", dq); setFieldIfUnset(doc, "q_s", dq);
doc.addField("q_t", dq); setFieldIfUnset(doc, "q_t", dq);
} }
if(parts[0].equals("rows")) { if(parts[0].equals("rows")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
doc.addField("rows_i", dr); setFieldIfUnset(doc, "rows_i", dr);
} }
if(parts[0].equals("distrib")) { if(parts[0].equals("distrib")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
doc.addField("distrib_s", dr); setFieldIfUnset(doc, "distrib_s", dr);
} }
if(parts[0].equals("shards")) { if(parts[0].equals("shards")) {
doc.addField("shards_s", "true"); setFieldIfUnset(doc, "shards_s", "true");
} }
if(parts[0].equals("ids") && !isRTGRequest(doc)) { if(parts[0].equals("ids") && !isRTGRequest(doc)) {
doc.addField("ids_s", "true"); setFieldIfUnset(doc, "ids_s", "true");
} }
if(parts[0].equals("isShard")) { if(parts[0].equals("isShard")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
doc.addField("isShard_s", dr); setFieldIfUnset(doc, "isShard_s", dr);
} }
if(parts[0].equals("wt")) { if(parts[0].equals("wt")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
doc.addField("wt_s", dr); setFieldIfUnset(doc, "wt_s", dr);
} }
if(parts[0].equals("facet")) { if(parts[0].equals("facet")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
doc.addField("facet_s", dr); setFieldIfUnset(doc, "facet_s", dr);
} }
if(parts[0].equals("shards.purpose")) { if(parts[0].equals("shards.purpose")) {
@ -533,19 +553,9 @@ public class SolrLogPostTool {
//Special params used to determine what stage a query is. //Special params used to determine what stage a query is.
//So we populate with defaults. //So we populate with defaults.
//The absence of the distrib params means its a distributed query. //The absence of the distrib params means its a distributed query.
setFieldIfUnset(doc, "distrib_s", "true");
setFieldIfUnset(doc, "shards_s", "false");
if(doc.getField("distrib_s") == null) { setFieldIfUnset(doc, "ids_s", "false");
doc.addField("distrib_s", "true");
}
if(doc.getField("shards_s") == null) {
doc.addField("shards_s", "false");
}
if(doc.getField("ids_s") == null) {
doc.addField("ids_s", "false");
}
} }
private boolean isRTGRequest(SolrInputDocument doc) { private boolean isRTGRequest(SolrInputDocument doc) {

View File

@ -75,6 +75,24 @@ public class SolrLogPostToolTest extends SolrTestCaseJ4 {
assertEquals("REFINE_FACETS", purposes[1].toString()); assertEquals("REFINE_FACETS", purposes[1].toString());
} }
// Requests which have multiple copies of the same param should be parsed so that the first param value only is
// indexed, since the log schema expects many of these to be single-valued fields and will throw errors if multiple
// values are received.
@Test
public void testRecordsFirstInstanceOfSingleValuedParams() throws Exception {
final String record = "2019-12-09 15:05:01.931 INFO (qtp2103763750-21) [c:logs4 s:shard1 r:core_node2 x:logs4_shard1_replica_n1] o.a.s.c.S.Request [logs4_shard1_replica_n1] webapp=/solr path=/select params={q=*:*&q=inStock:true&_=1575835181759&shards.purpose=36&isShard=true&wt=javabin&wt=xml&distrib=false} hits=234868 status=0 QTime=8\n";
List<SolrInputDocument> docs = readDocs(record);
assertEquals(docs.size(), 1);
SolrInputDocument doc = docs.get(0);
assertEquals(doc.getFieldValues("q_s").size(), 1);
assertEquals(doc.getFieldValue("q_s"), "*:*");
assertEquals(doc.getFieldValues("wt_s").size(), 1);
assertEquals(doc.getFieldValue("wt_s"), "javabin");
}
@Test @Test
public void testRTGRecord() throws Exception { public void testRTGRecord() throws Exception {
final String record = "2020-03-19 20:00:30.845 INFO (qtp1635378213-20354) [c:logs4 s:shard8 r:core_node63 x:logs4_shard8_replica_n60] o.a.s.c.S.Request [logs4_shard8_replica_n60] webapp=/solr path=/get params={qt=/get&_stateVer_=logs4:104&ids=id1&ids=id2&ids=id3&wt=javabin&version=2} status=0 QTime=61"; final String record = "2020-03-19 20:00:30.845 INFO (qtp1635378213-20354) [c:logs4 s:shard8 r:core_node63 x:logs4_shard8_replica_n60] o.a.s.c.S.Request [logs4_shard8_replica_n60] webapp=/solr path=/get params={qt=/get&_stateVer_=logs4:104&ids=id1&ids=id2&ids=id3&wt=javabin&version=2} status=0 QTime=61";