From 02b3855c200f840cbd189b00408d1849140da63f Mon Sep 17 00:00:00 2001 From: Joshua Darnell Date: Fri, 30 Apr 2021 07:29:03 -0700 Subject: [PATCH] Issue #56: Adding scoring functionality --- .../features/payloads/idx-payload.feature | 4 +- .../CertificationReportGenerator.java | 1 - .../certification/stepdefs/IDXPayload.java | 50 ++-- .../java/org/reso/models/MetadataReport.java | 2 +- .../java/org/reso/models/PayloadSample.java | 14 +- .../org/reso/models/PayloadSampleReport.java | 218 ++++++++++++++++++ ...ESODataDictionary-1.7.metadata-report.json | 2 +- 7 files changed, 255 insertions(+), 36 deletions(-) create mode 100644 src/main/java/org/reso/models/PayloadSampleReport.java diff --git a/src/main/java/org/reso/certification/features/payloads/idx-payload.feature b/src/main/java/org/reso/certification/features/payloads/idx-payload.feature index 1092cae..2b49794 100644 --- a/src/main/java/org/reso/certification/features/payloads/idx-payload.feature +++ b/src/main/java/org/reso/certification/features/payloads/idx-payload.feature @@ -23,12 +23,12 @@ Feature: IDX Payload Endorsement (Web API) Scenario: Standard Resource Sampling - Request Data from Each Server Resource Given that metadata have been requested from the server And the metadata contains RESO Standard Resources - When 100 records are sampled from each RESO Standard resource in the server metadata + When up to 10000 records are sampled from each RESO Standard resource in the server metadata Then each record MUST have the string version of the Primary Key and ModificationTimestamp field And the data MUST match what is advertised in the metadata @non-standard-resource-sampling @idx-payload-endorsement Scenario: Non Standard Resource Sampling - Request Data from Each Server Resource Given that metadata have been requested from the server - When 100 records are sampled from each non standard resource in the server metadata + When up to 10000 records are sampled from each non standard resource in the server metadata Then the data MUST match what is advertised in the metadata \ No newline at end of file diff --git a/src/main/java/org/reso/certification/reporting/CertificationReportGenerator.java b/src/main/java/org/reso/certification/reporting/CertificationReportGenerator.java index d7f184d..160aa1f 100644 --- a/src/main/java/org/reso/certification/reporting/CertificationReportGenerator.java +++ b/src/main/java/org/reso/certification/reporting/CertificationReportGenerator.java @@ -114,7 +114,6 @@ public class CertificationReportGenerator { } catch (IOException e) { e.printStackTrace(); } - return null; } } diff --git a/src/main/java/org/reso/certification/stepdefs/IDXPayload.java b/src/main/java/org/reso/certification/stepdefs/IDXPayload.java index b5c7866..a58529a 100644 --- a/src/main/java/org/reso/certification/stepdefs/IDXPayload.java +++ b/src/main/java/org/reso/certification/stepdefs/IDXPayload.java @@ -1,6 +1,7 @@ package org.reso.certification.stepdefs; import com.google.common.collect.Sets; +import com.google.gson.GsonBuilder; import com.google.inject.Inject; import io.cucumber.java.Before; import io.cucumber.java.Scenario; @@ -21,6 +22,7 @@ import org.reso.commander.common.DataDictionaryMetadata; import org.reso.commander.common.Utils; import org.reso.models.ODataTransportWrapper; import org.reso.models.PayloadSample; +import org.reso.models.PayloadSampleReport; import org.reso.models.Settings; import java.io.ByteArrayInputStream; @@ -103,8 +105,8 @@ public class IDXPayload { return sha256().hashString(String.join(SEPARATOR_CHARACTER, values), StandardCharsets.UTF_8).toString(); } - @When("{int} records are sampled from each RESO Standard resource in the server metadata") - public void recordsAreSampledFromEachRESOStandardResourceInTheServerMetadata(int numRecords) { + @When("up to {int} records are sampled from each RESO Standard resource in the server metadata") + public void upToRecordsAreSampledFromEachRESOStandardResourceInTheServerMetadata(int numRecords) { if (!hasStandardResources.get()) { scenario.log("No RESO Standard Resources to sample"); assumeTrue(true); @@ -120,9 +122,10 @@ public class IDXPayload { */ //TODO: decide whether to store in memory or serialize resource samples files upon completion - AtomicReference>> resourcePayloadSamplesMap = new AtomicReference<>(new LinkedHashMap<>()); - standardResources.get().stream().parallel().forEach(resourceName -> { - resourcePayloadSamplesMap.get().putIfAbsent(resourceName, new LinkedList<>()); + AtomicReference>> resourcePayloadSamplesMap = new AtomicReference<>(Collections.synchronizedMap(new LinkedHashMap<>())); + + standardResources.get().forEach(resourceName -> { + resourcePayloadSamplesMap.get().putIfAbsent(resourceName, Collections.synchronizedList(new LinkedList<>())); resourcePayloadSamplesMap.get().put(resourceName, fetchAndProcessRecords(resourceName, numRecords)); }); @@ -230,15 +233,16 @@ public class IDXPayload { } break; } else { - //need to look at the records from the response and get the lowest timestamp + LOG.info("Time taken: " + + (transportWrapper.getElapsedTimeMillis() >= 1000 ? (transportWrapper.getElapsedTimeMillis() / 1000) + "s" : transportWrapper.getElapsedTimeMillis() + "ms")); + try { + payloadSample.get().setResponseSizeBytes(transportWrapper.getResponseData().getBytes().length); + entityCollectionResWrap = container.get().getCommander().getClient() .getDeserializer(ContentType.APPLICATION_JSON) .toEntitySet(new ByteArrayInputStream(transportWrapper.getResponseData().getBytes())); - LOG.info("Time taken: " - + (transportWrapper.getElapsedTimeMillis() >= 1000 ? (transportWrapper.getElapsedTimeMillis() / 1000) + "s" : transportWrapper.getElapsedTimeMillis() + "ms")); - if (entityCollectionResWrap.getPayload().getEntities().size() > 0) { assert (keyFields.size() > 0) : getDefaultErrorMessage("no Key Fields found! Resources MUST have at least one key."); @@ -281,29 +285,15 @@ public class IDXPayload { } public void createDataAvailabilityReport(Map> resourcePayloadSamplesMap) { - AtomicReference>> resourceTallies = new AtomicReference<>(new LinkedHashMap<>()); - resourcePayloadSamplesMap.keySet().forEach(resourceName -> { - LOG.debug("Processing resource: " + resourceName); - LOG.debug("Sample size: " + resourcePayloadSamplesMap.get(resourceName).size()); - //for each resource, go through the keys and tally the data presence counts for each field - //as well as the number of samples in each case - resourceTallies.get().putIfAbsent(resourceName, new LinkedHashMap<>()); - resourcePayloadSamplesMap.get(resourceName).forEach(payloadSample -> { - payloadSample.getSamples().forEach(sample -> { - sample.forEach((fieldName, encodedValue) -> { - if (encodedValue != null) { - resourceTallies.get().get(resourceName).putIfAbsent(fieldName, 0); - resourceTallies.get().get(resourceName).put(fieldName, resourceTallies.get().get(resourceName).get(fieldName) + 1); - } - }); - }); - }); - }); - Utils.createFile("build", "availability-report.txt", resourceTallies.get().toString()); + PayloadSampleReport payloadSampleReport = new PayloadSampleReport(container.get().getEdm(), resourcePayloadSamplesMap); + GsonBuilder gsonBuilder = new GsonBuilder().setPrettyPrinting(); + gsonBuilder.registerTypeAdapter(PayloadSampleReport.class, payloadSampleReport); + + Utils.createFile("build", "availability-report.json", gsonBuilder.create().toJson(payloadSampleReport)); } - @When("{int} records are sampled from each non standard resource in the server metadata") - public void recordsAreSampledFromEachNonStandardResourceInTheServerMetadata(int numRecords) { + @When("up to {int} records are sampled from each non standard resource in the server metadata") + public void upToRecordsAreSampledFromEachNonStandardResourceInTheServerMetadata(int numRecords) { } @Then("each record MUST have the string version of the Primary Key and ModificationTimestamp field") diff --git a/src/main/java/org/reso/models/MetadataReport.java b/src/main/java/org/reso/models/MetadataReport.java index 85bb900..3db2315 100644 --- a/src/main/java/org/reso/models/MetadataReport.java +++ b/src/main/java/org/reso/models/MetadataReport.java @@ -165,7 +165,7 @@ public class MetadataReport implements JsonSerializer { } static class SneakyAnnotationReader { - Class object; + Class object; Field field; EdmAnnotationImpl edmAnnotationImpl; ClientCsdlAnnotation clientCsdlAnnotation; diff --git a/src/main/java/org/reso/models/PayloadSample.java b/src/main/java/org/reso/models/PayloadSample.java index 0b4f2f1..5a59780 100644 --- a/src/main/java/org/reso/models/PayloadSample.java +++ b/src/main/java/org/reso/models/PayloadSample.java @@ -2,12 +2,15 @@ package org.reso.models; import org.apache.olingo.commons.api.edm.EdmKeyPropertyRef; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; public class PayloadSample { String resourceName; String dateField; Long responseTimeMillis = null; + Integer responseSizeBytes = null; String requestUri = null; //format is a list of key/value pairs where all fields besides @@ -48,4 +51,13 @@ public class PayloadSample { requestUri = value; } + public Integer getResponseSizeBytes() { + return responseSizeBytes; + } + + public void setResponseSizeBytes(Integer responseSizeBytes) { + this.responseSizeBytes = responseSizeBytes; + } + + } diff --git a/src/main/java/org/reso/models/PayloadSampleReport.java b/src/main/java/org/reso/models/PayloadSampleReport.java new file mode 100644 index 0000000..69edf35 --- /dev/null +++ b/src/main/java/org/reso/models/PayloadSampleReport.java @@ -0,0 +1,218 @@ +package org.reso.models; + +import com.google.gson.*; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.olingo.commons.api.edm.Edm; +import org.apache.olingo.commons.api.edm.EdmElement; +import org.reso.commander.common.Utils; + +import java.lang.reflect.Type; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +public class PayloadSampleReport implements JsonSerializer { + private static final Logger LOG = LogManager.getLogger(PayloadSampleReport.class); + Map> resourcePayloadSamplesMap = new LinkedHashMap<>(); + Map> resourceFieldTallies = new LinkedHashMap<>(new LinkedHashMap<>()); + + private Edm metadata; + + private PayloadSampleReport() { + //private default constructor + } + + public PayloadSampleReport(Edm metadata, Map> resourcePayloadSamplesMap) { + this.metadata = metadata; + this.resourcePayloadSamplesMap = resourcePayloadSamplesMap; + resourceFieldTallies = createResourceFieldTallies(resourcePayloadSamplesMap); + } + + @Override + public String toString() { + return String.valueOf(serialize(this, FieldAvailabilityJson.class, null)); + } + + /** + * FieldAvailabilityJson uses a JSON payload with the following structure: + * + * { + * "resourceName": "Property", + * "fieldName": "AboveGradeFinishedArea", + * "availability": 0.1 + * } + */ + private final class FieldAvailabilityJson implements JsonSerializer { + static final String + RESOURCE_NAME_KEY = "resourceName", + FIELD_NAME_KEY = "fieldName", + FIELDS_KEY = "fields", + AVAILABILITY_KEY = "availability"; + + String resourceName; + EdmElement edmElement; + + public FieldAvailabilityJson(String resourceName, EdmElement edmElement) { + this.resourceName = resourceName; + this.edmElement = edmElement; + } + + public String buildReportString(JsonElement dataAvailabilityReport) { + StringBuilder reportBuilder = new StringBuilder(); + dataAvailabilityReport.getAsJsonObject().get(FIELDS_KEY).getAsJsonArray().forEach(field -> { + reportBuilder.append("\nResource: "); + reportBuilder.append(field.getAsJsonObject().get(RESOURCE_NAME_KEY)); + reportBuilder.append("\nField: "); + reportBuilder.append(field.getAsJsonObject().get(FIELD_NAME_KEY)); + reportBuilder.append("\nAvailability: "); + reportBuilder.append(field.getAsJsonObject().get(AVAILABILITY_KEY)); + reportBuilder.append("\n"); + }); + return reportBuilder.toString(); + } + + @Override + public JsonElement serialize(FieldAvailabilityJson src, Type typeOfSrc, JsonSerializationContext context) { + JsonObject field = new JsonObject(); + int numTimesPresent = resourceFieldTallies != null && resourceFieldTallies.get(src.resourceName) != null && resourceFieldTallies.get(src.resourceName).get(src.edmElement.getName()) != null + ? resourceFieldTallies.get(src.resourceName).get(src.edmElement.getName()) : 0; + int numSamples = resourcePayloadSamplesMap.get(src.resourceName) != null + ? resourcePayloadSamplesMap.get(src.resourceName).stream().reduce(0, (acc, f) -> acc + f.encodedSamples.size(), Integer::sum) : 0; + + field.addProperty(RESOURCE_NAME_KEY, src.resourceName); + field.addProperty(FIELD_NAME_KEY, src.edmElement.getName()); + field.addProperty(AVAILABILITY_KEY, numSamples > 0 ? (1.0 * numTimesPresent) / numSamples : 0); + + return field; + } + } + + private static Map> createResourceFieldTallies(Map> resourcePayloadSamplesMap) { + AtomicReference>> resourceTallies = new AtomicReference<>(new LinkedHashMap<>()); + AtomicInteger numSamples = new AtomicInteger(0); + resourcePayloadSamplesMap.keySet().forEach(resourceName -> { + LOG.info("Processing resource: " + resourceName); + numSamples.set(resourcePayloadSamplesMap.get(resourceName) != null ? resourcePayloadSamplesMap.get(resourceName).stream() + .reduce(0, (acc, f) -> acc + f.getSamples().size(), Integer::sum) : 0); + LOG.info("Sample size: " + numSamples.get()); + + //for each resource, go through the keys and tally the data presence counts for each field + //as well as the number of samples in each case + resourceTallies.get().putIfAbsent(resourceName, new LinkedHashMap<>()); + if (numSamples.get() > 0) { + resourcePayloadSamplesMap.get(resourceName).forEach(payloadSample -> { + payloadSample.getSamples().forEach(sample -> { + sample.forEach((fieldName, encodedValue) -> { + if (encodedValue != null) { + resourceTallies.get().get(resourceName).putIfAbsent(fieldName, 0); + resourceTallies.get().get(resourceName).put(fieldName, resourceTallies.get().get(resourceName).get(fieldName) + 1); + } + }); + }); + }); + } + }); + return resourceTallies.get(); + } + + @Override + public JsonElement serialize(PayloadSampleReport src, Type typeOfSrc, JsonSerializationContext context) { + final String + DESCRIPTION_KEY = "description", DESCRIPTION = "RESO Data Availability Report", + VERSION_KEY = "version", VERSION = "1.7", + GENERATED_ON_KEY = "generatedOn", + RESOURCE_TOTALS_KEY = "resourceTotals", + FIELDS_KEY = "fields"; + + JsonArray fields = new JsonArray(); + + src.metadata.getSchemas().forEach(edmSchema -> { + //serialize entities (resources) and members (fields) + edmSchema.getEntityTypes().forEach(edmEntityType -> { + edmEntityType.getPropertyNames().forEach(propertyName -> { + FieldAvailabilityJson fieldJson = new FieldAvailabilityJson(edmEntityType.getName(), edmEntityType.getProperty(propertyName)); + fields.add(fieldJson.serialize(fieldJson, FieldAvailabilityJson.class, null)); + }); + }); + }); + + JsonObject availabilityReport = new JsonObject(); + availabilityReport.addProperty(DESCRIPTION_KEY, DESCRIPTION); + availabilityReport.addProperty(VERSION_KEY, VERSION); + availabilityReport.addProperty(GENERATED_ON_KEY, Utils.getIsoTimestamp()); + + final JsonArray resourceTotalsByResource = new JsonArray(); + src.resourcePayloadSamplesMap.keySet().forEach(resourceName -> { + ResourceTotals resourceTotals = new ResourceTotals(resourceName); + if (src.resourcePayloadSamplesMap.get(resourceName) != null) { + src.resourcePayloadSamplesMap.get(resourceName).forEach(payloadSample -> { + resourceTotals.totalBytesReceived.getAndAdd(payloadSample.getResponseSizeBytes()); + resourceTotals.totalResponseTimeMillis.getAndAdd(payloadSample.getResponseTimeMillis()); + resourceTotals.numSamplesProcessed.getAndIncrement(); + resourceTotals.numRecordsFetched.getAndAdd(payloadSample.encodedSamples.size()); + + if (resourceTotals.pageSize.get() == 0) resourceTotals.pageSize.set(payloadSample.getSamples().size()); + }); + } + resourceTotalsByResource.add(resourceTotals.serialize(resourceTotals, ResourceTotals.class, null)); + }); + + availabilityReport.add(RESOURCE_TOTALS_KEY, resourceTotalsByResource); + availabilityReport.add(FIELDS_KEY, fields); + + return availabilityReport; + } + + static final class ResourceTotals implements JsonSerializer { + final String + RESOURCE_NAME_KEY = "resourceName", + TOTAL_NUM_RECORDS_FETCHED = "numRecordsFetched", + TOTAL_NUM_SAMPLES_KEY = "numSamples", + PAGE_SIZE_KEY = "pageSize", + AVERAGE_RESPONSE_TIME_MILLIS_KEY = "averageResponseTimeMillis", + AVERAGE_RESPONSE_BYTES_KEY = "averageResponseBytes"; + + final AtomicInteger numSamplesProcessed = new AtomicInteger(0); + final AtomicInteger numRecordsFetched = new AtomicInteger(0); + final AtomicReference resourceName = new AtomicReference<>(); + final AtomicLong totalResponseTimeMillis = new AtomicLong(0); + final AtomicLong totalBytesReceived = new AtomicLong(0); + final AtomicInteger pageSize = new AtomicInteger(0); + + public ResourceTotals (String resourceName) { + this.resourceName.set(resourceName); + } + + /** + * Gson invokes this call-back method during serialization when it encounters a field of the + * specified type. + * + *

In the implementation of this call-back method, you should consider invoking + * {@link JsonSerializationContext#serialize(Object, Type)} method to create JsonElements for any + * non-trivial field of the {@code src} object. However, you should never invoke it on the + * {@code src} object itself since that will cause an infinite loop (Gson will call your + * call-back method again).

+ * + * @param src the object that needs to be converted to Json. + * @param typeOfSrc the actual type (fully genericized version) of the source object. + * @param context + * @return a JsonElement corresponding to the specified object. + */ + @Override + public JsonElement serialize(ResourceTotals src, Type typeOfSrc, JsonSerializationContext context) { + JsonObject totals = new JsonObject(); + totals.addProperty(RESOURCE_NAME_KEY, src.resourceName.get()); + totals.addProperty(TOTAL_NUM_RECORDS_FETCHED, src.numRecordsFetched.get()); + totals.addProperty(TOTAL_NUM_SAMPLES_KEY, src.numSamplesProcessed.get()); + totals.addProperty(PAGE_SIZE_KEY, src.pageSize.get()); + totals.addProperty(AVERAGE_RESPONSE_BYTES_KEY, src.numSamplesProcessed.get() > 0 ? src.totalBytesReceived.get() / src.numSamplesProcessed.get() : 0); + totals.addProperty(AVERAGE_RESPONSE_TIME_MILLIS_KEY, src.numSamplesProcessed.get() > 0 ? src.totalResponseTimeMillis.get() / src.numSamplesProcessed.get() : 0); + return totals; + } + } + +} diff --git a/src/main/resources/RESODataDictionary-1.7.metadata-report.json b/src/main/resources/RESODataDictionary-1.7.metadata-report.json index fabf256..f48823e 100644 --- a/src/main/resources/RESODataDictionary-1.7.metadata-report.json +++ b/src/main/resources/RESODataDictionary-1.7.metadata-report.json @@ -1,7 +1,7 @@ { "description": "RESO Data Dictionary Metadata Report", "version": "1.7", - "generatedOn": "2021-04-13T23:23:58.588Z", + "generatedOn": "2021-04-30T12:16:46.822Z", "fields": [ { "resourceName": "Property",