Issue #56: Adding scoring functionality

This commit is contained in:
Joshua Darnell 2021-04-30 07:29:03 -07:00
parent 03eff74438
commit 02b3855c20
7 changed files with 255 additions and 36 deletions

View File

@ -23,12 +23,12 @@ Feature: IDX Payload Endorsement (Web API)
Scenario: Standard Resource Sampling - Request Data from Each Server Resource
Given that metadata have been requested from the server
And the metadata contains RESO Standard Resources
When 100 records are sampled from each RESO Standard resource in the server metadata
When up to 10000 records are sampled from each RESO Standard resource in the server metadata
Then each record MUST have the string version of the Primary Key and ModificationTimestamp field
And the data MUST match what is advertised in the metadata
@non-standard-resource-sampling @idx-payload-endorsement
Scenario: Non Standard Resource Sampling - Request Data from Each Server Resource
Given that metadata have been requested from the server
When 100 records are sampled from each non standard resource in the server metadata
When up to 10000 records are sampled from each non standard resource in the server metadata
Then the data MUST match what is advertised in the metadata

View File

@ -114,7 +114,6 @@ public class CertificationReportGenerator {
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}

View File

@ -1,6 +1,7 @@
package org.reso.certification.stepdefs;
import com.google.common.collect.Sets;
import com.google.gson.GsonBuilder;
import com.google.inject.Inject;
import io.cucumber.java.Before;
import io.cucumber.java.Scenario;
@ -21,6 +22,7 @@ import org.reso.commander.common.DataDictionaryMetadata;
import org.reso.commander.common.Utils;
import org.reso.models.ODataTransportWrapper;
import org.reso.models.PayloadSample;
import org.reso.models.PayloadSampleReport;
import org.reso.models.Settings;
import java.io.ByteArrayInputStream;
@ -103,8 +105,8 @@ public class IDXPayload {
return sha256().hashString(String.join(SEPARATOR_CHARACTER, values), StandardCharsets.UTF_8).toString();
}
@When("{int} records are sampled from each RESO Standard resource in the server metadata")
public void recordsAreSampledFromEachRESOStandardResourceInTheServerMetadata(int numRecords) {
@When("up to {int} records are sampled from each RESO Standard resource in the server metadata")
public void upToRecordsAreSampledFromEachRESOStandardResourceInTheServerMetadata(int numRecords) {
if (!hasStandardResources.get()) {
scenario.log("No RESO Standard Resources to sample");
assumeTrue(true);
@ -120,9 +122,10 @@ public class IDXPayload {
*/
//TODO: decide whether to store in memory or serialize resource samples files upon completion
AtomicReference<Map<String, List<PayloadSample>>> resourcePayloadSamplesMap = new AtomicReference<>(new LinkedHashMap<>());
standardResources.get().stream().parallel().forEach(resourceName -> {
resourcePayloadSamplesMap.get().putIfAbsent(resourceName, new LinkedList<>());
AtomicReference<Map<String, List<PayloadSample>>> resourcePayloadSamplesMap = new AtomicReference<>(Collections.synchronizedMap(new LinkedHashMap<>()));
standardResources.get().forEach(resourceName -> {
resourcePayloadSamplesMap.get().putIfAbsent(resourceName, Collections.synchronizedList(new LinkedList<>()));
resourcePayloadSamplesMap.get().put(resourceName, fetchAndProcessRecords(resourceName, numRecords));
});
@ -230,15 +233,16 @@ public class IDXPayload {
}
break;
} else {
//need to look at the records from the response and get the lowest timestamp
LOG.info("Time taken: "
+ (transportWrapper.getElapsedTimeMillis() >= 1000 ? (transportWrapper.getElapsedTimeMillis() / 1000) + "s" : transportWrapper.getElapsedTimeMillis() + "ms"));
try {
payloadSample.get().setResponseSizeBytes(transportWrapper.getResponseData().getBytes().length);
entityCollectionResWrap = container.get().getCommander().getClient()
.getDeserializer(ContentType.APPLICATION_JSON)
.toEntitySet(new ByteArrayInputStream(transportWrapper.getResponseData().getBytes()));
LOG.info("Time taken: "
+ (transportWrapper.getElapsedTimeMillis() >= 1000 ? (transportWrapper.getElapsedTimeMillis() / 1000) + "s" : transportWrapper.getElapsedTimeMillis() + "ms"));
if (entityCollectionResWrap.getPayload().getEntities().size() > 0) {
assert (keyFields.size() > 0) :
getDefaultErrorMessage("no Key Fields found! Resources MUST have at least one key.");
@ -281,29 +285,15 @@ public class IDXPayload {
}
public void createDataAvailabilityReport(Map<String, List<PayloadSample>> resourcePayloadSamplesMap) {
AtomicReference<Map<String, Map<String, Integer>>> resourceTallies = new AtomicReference<>(new LinkedHashMap<>());
resourcePayloadSamplesMap.keySet().forEach(resourceName -> {
LOG.debug("Processing resource: " + resourceName);
LOG.debug("Sample size: " + resourcePayloadSamplesMap.get(resourceName).size());
//for each resource, go through the keys and tally the data presence counts for each field
//as well as the number of samples in each case
resourceTallies.get().putIfAbsent(resourceName, new LinkedHashMap<>());
resourcePayloadSamplesMap.get(resourceName).forEach(payloadSample -> {
payloadSample.getSamples().forEach(sample -> {
sample.forEach((fieldName, encodedValue) -> {
if (encodedValue != null) {
resourceTallies.get().get(resourceName).putIfAbsent(fieldName, 0);
resourceTallies.get().get(resourceName).put(fieldName, resourceTallies.get().get(resourceName).get(fieldName) + 1);
}
});
});
});
});
Utils.createFile("build", "availability-report.txt", resourceTallies.get().toString());
PayloadSampleReport payloadSampleReport = new PayloadSampleReport(container.get().getEdm(), resourcePayloadSamplesMap);
GsonBuilder gsonBuilder = new GsonBuilder().setPrettyPrinting();
gsonBuilder.registerTypeAdapter(PayloadSampleReport.class, payloadSampleReport);
Utils.createFile("build", "availability-report.json", gsonBuilder.create().toJson(payloadSampleReport));
}
@When("{int} records are sampled from each non standard resource in the server metadata")
public void recordsAreSampledFromEachNonStandardResourceInTheServerMetadata(int numRecords) {
@When("up to {int} records are sampled from each non standard resource in the server metadata")
public void upToRecordsAreSampledFromEachNonStandardResourceInTheServerMetadata(int numRecords) {
}
@Then("each record MUST have the string version of the Primary Key and ModificationTimestamp field")

View File

@ -165,7 +165,7 @@ public class MetadataReport implements JsonSerializer<MetadataReport> {
}
static class SneakyAnnotationReader {
Class object;
Class<? extends EdmAnnotationImpl> object;
Field field;
EdmAnnotationImpl edmAnnotationImpl;
ClientCsdlAnnotation clientCsdlAnnotation;

View File

@ -2,12 +2,15 @@ package org.reso.models;
import org.apache.olingo.commons.api.edm.EdmKeyPropertyRef;
import java.util.*;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class PayloadSample {
String resourceName;
String dateField;
Long responseTimeMillis = null;
Integer responseSizeBytes = null;
String requestUri = null;
//format is a list of key/value pairs where all fields besides
@ -48,4 +51,13 @@ public class PayloadSample {
requestUri = value;
}
public Integer getResponseSizeBytes() {
return responseSizeBytes;
}
public void setResponseSizeBytes(Integer responseSizeBytes) {
this.responseSizeBytes = responseSizeBytes;
}
}

View File

@ -0,0 +1,218 @@
package org.reso.models;
import com.google.gson.*;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.olingo.commons.api.edm.Edm;
import org.apache.olingo.commons.api.edm.EdmElement;
import org.reso.commander.common.Utils;
import java.lang.reflect.Type;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
public class PayloadSampleReport implements JsonSerializer<PayloadSampleReport> {
private static final Logger LOG = LogManager.getLogger(PayloadSampleReport.class);
Map<String, List<PayloadSample>> resourcePayloadSamplesMap = new LinkedHashMap<>();
Map<String, Map<String, Integer>> resourceFieldTallies = new LinkedHashMap<>(new LinkedHashMap<>());
private Edm metadata;
private PayloadSampleReport() {
//private default constructor
}
public PayloadSampleReport(Edm metadata, Map<String, List<PayloadSample>> resourcePayloadSamplesMap) {
this.metadata = metadata;
this.resourcePayloadSamplesMap = resourcePayloadSamplesMap;
resourceFieldTallies = createResourceFieldTallies(resourcePayloadSamplesMap);
}
@Override
public String toString() {
return String.valueOf(serialize(this, FieldAvailabilityJson.class, null));
}
/**
* FieldAvailabilityJson uses a JSON payload with the following structure:
*
* {
* "resourceName": "Property",
* "fieldName": "AboveGradeFinishedArea",
* "availability": 0.1
* }
*/
private final class FieldAvailabilityJson implements JsonSerializer<FieldAvailabilityJson> {
static final String
RESOURCE_NAME_KEY = "resourceName",
FIELD_NAME_KEY = "fieldName",
FIELDS_KEY = "fields",
AVAILABILITY_KEY = "availability";
String resourceName;
EdmElement edmElement;
public FieldAvailabilityJson(String resourceName, EdmElement edmElement) {
this.resourceName = resourceName;
this.edmElement = edmElement;
}
public String buildReportString(JsonElement dataAvailabilityReport) {
StringBuilder reportBuilder = new StringBuilder();
dataAvailabilityReport.getAsJsonObject().get(FIELDS_KEY).getAsJsonArray().forEach(field -> {
reportBuilder.append("\nResource: ");
reportBuilder.append(field.getAsJsonObject().get(RESOURCE_NAME_KEY));
reportBuilder.append("\nField: ");
reportBuilder.append(field.getAsJsonObject().get(FIELD_NAME_KEY));
reportBuilder.append("\nAvailability: ");
reportBuilder.append(field.getAsJsonObject().get(AVAILABILITY_KEY));
reportBuilder.append("\n");
});
return reportBuilder.toString();
}
@Override
public JsonElement serialize(FieldAvailabilityJson src, Type typeOfSrc, JsonSerializationContext context) {
JsonObject field = new JsonObject();
int numTimesPresent = resourceFieldTallies != null && resourceFieldTallies.get(src.resourceName) != null && resourceFieldTallies.get(src.resourceName).get(src.edmElement.getName()) != null
? resourceFieldTallies.get(src.resourceName).get(src.edmElement.getName()) : 0;
int numSamples = resourcePayloadSamplesMap.get(src.resourceName) != null
? resourcePayloadSamplesMap.get(src.resourceName).stream().reduce(0, (acc, f) -> acc + f.encodedSamples.size(), Integer::sum) : 0;
field.addProperty(RESOURCE_NAME_KEY, src.resourceName);
field.addProperty(FIELD_NAME_KEY, src.edmElement.getName());
field.addProperty(AVAILABILITY_KEY, numSamples > 0 ? (1.0 * numTimesPresent) / numSamples : 0);
return field;
}
}
private static Map<String, Map<String, Integer>> createResourceFieldTallies(Map<String, List<PayloadSample>> resourcePayloadSamplesMap) {
AtomicReference<Map<String, Map<String, Integer>>> resourceTallies = new AtomicReference<>(new LinkedHashMap<>());
AtomicInteger numSamples = new AtomicInteger(0);
resourcePayloadSamplesMap.keySet().forEach(resourceName -> {
LOG.info("Processing resource: " + resourceName);
numSamples.set(resourcePayloadSamplesMap.get(resourceName) != null ? resourcePayloadSamplesMap.get(resourceName).stream()
.reduce(0, (acc, f) -> acc + f.getSamples().size(), Integer::sum) : 0);
LOG.info("Sample size: " + numSamples.get());
//for each resource, go through the keys and tally the data presence counts for each field
//as well as the number of samples in each case
resourceTallies.get().putIfAbsent(resourceName, new LinkedHashMap<>());
if (numSamples.get() > 0) {
resourcePayloadSamplesMap.get(resourceName).forEach(payloadSample -> {
payloadSample.getSamples().forEach(sample -> {
sample.forEach((fieldName, encodedValue) -> {
if (encodedValue != null) {
resourceTallies.get().get(resourceName).putIfAbsent(fieldName, 0);
resourceTallies.get().get(resourceName).put(fieldName, resourceTallies.get().get(resourceName).get(fieldName) + 1);
}
});
});
});
}
});
return resourceTallies.get();
}
@Override
public JsonElement serialize(PayloadSampleReport src, Type typeOfSrc, JsonSerializationContext context) {
final String
DESCRIPTION_KEY = "description", DESCRIPTION = "RESO Data Availability Report",
VERSION_KEY = "version", VERSION = "1.7",
GENERATED_ON_KEY = "generatedOn",
RESOURCE_TOTALS_KEY = "resourceTotals",
FIELDS_KEY = "fields";
JsonArray fields = new JsonArray();
src.metadata.getSchemas().forEach(edmSchema -> {
//serialize entities (resources) and members (fields)
edmSchema.getEntityTypes().forEach(edmEntityType -> {
edmEntityType.getPropertyNames().forEach(propertyName -> {
FieldAvailabilityJson fieldJson = new FieldAvailabilityJson(edmEntityType.getName(), edmEntityType.getProperty(propertyName));
fields.add(fieldJson.serialize(fieldJson, FieldAvailabilityJson.class, null));
});
});
});
JsonObject availabilityReport = new JsonObject();
availabilityReport.addProperty(DESCRIPTION_KEY, DESCRIPTION);
availabilityReport.addProperty(VERSION_KEY, VERSION);
availabilityReport.addProperty(GENERATED_ON_KEY, Utils.getIsoTimestamp());
final JsonArray resourceTotalsByResource = new JsonArray();
src.resourcePayloadSamplesMap.keySet().forEach(resourceName -> {
ResourceTotals resourceTotals = new ResourceTotals(resourceName);
if (src.resourcePayloadSamplesMap.get(resourceName) != null) {
src.resourcePayloadSamplesMap.get(resourceName).forEach(payloadSample -> {
resourceTotals.totalBytesReceived.getAndAdd(payloadSample.getResponseSizeBytes());
resourceTotals.totalResponseTimeMillis.getAndAdd(payloadSample.getResponseTimeMillis());
resourceTotals.numSamplesProcessed.getAndIncrement();
resourceTotals.numRecordsFetched.getAndAdd(payloadSample.encodedSamples.size());
if (resourceTotals.pageSize.get() == 0) resourceTotals.pageSize.set(payloadSample.getSamples().size());
});
}
resourceTotalsByResource.add(resourceTotals.serialize(resourceTotals, ResourceTotals.class, null));
});
availabilityReport.add(RESOURCE_TOTALS_KEY, resourceTotalsByResource);
availabilityReport.add(FIELDS_KEY, fields);
return availabilityReport;
}
static final class ResourceTotals implements JsonSerializer<ResourceTotals> {
final String
RESOURCE_NAME_KEY = "resourceName",
TOTAL_NUM_RECORDS_FETCHED = "numRecordsFetched",
TOTAL_NUM_SAMPLES_KEY = "numSamples",
PAGE_SIZE_KEY = "pageSize",
AVERAGE_RESPONSE_TIME_MILLIS_KEY = "averageResponseTimeMillis",
AVERAGE_RESPONSE_BYTES_KEY = "averageResponseBytes";
final AtomicInteger numSamplesProcessed = new AtomicInteger(0);
final AtomicInteger numRecordsFetched = new AtomicInteger(0);
final AtomicReference<String> resourceName = new AtomicReference<>();
final AtomicLong totalResponseTimeMillis = new AtomicLong(0);
final AtomicLong totalBytesReceived = new AtomicLong(0);
final AtomicInteger pageSize = new AtomicInteger(0);
public ResourceTotals (String resourceName) {
this.resourceName.set(resourceName);
}
/**
* Gson invokes this call-back method during serialization when it encounters a field of the
* specified type.
*
* <p>In the implementation of this call-back method, you should consider invoking
* {@link JsonSerializationContext#serialize(Object, Type)} method to create JsonElements for any
* non-trivial field of the {@code src} object. However, you should never invoke it on the
* {@code src} object itself since that will cause an infinite loop (Gson will call your
* call-back method again).</p>
*
* @param src the object that needs to be converted to Json.
* @param typeOfSrc the actual type (fully genericized version) of the source object.
* @param context
* @return a JsonElement corresponding to the specified object.
*/
@Override
public JsonElement serialize(ResourceTotals src, Type typeOfSrc, JsonSerializationContext context) {
JsonObject totals = new JsonObject();
totals.addProperty(RESOURCE_NAME_KEY, src.resourceName.get());
totals.addProperty(TOTAL_NUM_RECORDS_FETCHED, src.numRecordsFetched.get());
totals.addProperty(TOTAL_NUM_SAMPLES_KEY, src.numSamplesProcessed.get());
totals.addProperty(PAGE_SIZE_KEY, src.pageSize.get());
totals.addProperty(AVERAGE_RESPONSE_BYTES_KEY, src.numSamplesProcessed.get() > 0 ? src.totalBytesReceived.get() / src.numSamplesProcessed.get() : 0);
totals.addProperty(AVERAGE_RESPONSE_TIME_MILLIS_KEY, src.numSamplesProcessed.get() > 0 ? src.totalResponseTimeMillis.get() / src.numSamplesProcessed.get() : 0);
return totals;
}
}
}

View File

@ -1,7 +1,7 @@
{
"description": "RESO Data Dictionary Metadata Report",
"version": "1.7",
"generatedOn": "2021-04-13T23:23:58.588Z",
"generatedOn": "2021-04-30T12:16:46.822Z",
"fields": [
{
"resourceName": "Property",