Issue #76: initial working data generator. TODO: lookups need their own generator

This commit is contained in:
Joshua Darnell 2021-05-27 01:44:57 -07:00
parent 20dc65f77c
commit 313c57e9f7
8 changed files with 329 additions and 28 deletions

View File

@ -22,7 +22,7 @@ public class DDCacheProcessor extends WorksheetProcessor {
return fieldCache;
}
public static Map<String, List<ReferenceStandardField>> getDDReferenceStandardFieldCache() {
public static Map<String, List<ReferenceStandardField>> buildCache() {
LOG.info("Creating standard field cache...");
DDCacheProcessor cacheProcessor = new DDCacheProcessor();
DataDictionaryCodeGenerator generator = new DataDictionaryCodeGenerator(cacheProcessor);
@ -31,6 +31,11 @@ public class DDCacheProcessor extends WorksheetProcessor {
return cacheProcessor.getFieldCache();
}
public static DataDictionaryCodeGenerator getGeneratorInstance() {
DDCacheProcessor cacheProcessor = new DDCacheProcessor();
return new DataDictionaryCodeGenerator(cacheProcessor);
}
@Override
void processNumber(ReferenceStandardField field) {
addToFieldCache(field);

View File

@ -168,8 +168,6 @@ public class DDLProcessor extends WorksheetProcessor {
return CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, resourceName).replace("o_u_i_d", "ouid");
}
private static String buildCreateLookupStatement(boolean useKeyNumeric) {
return
"\n\n/**\n" +

View File

@ -11,7 +11,7 @@ import static org.reso.certification.codegen.WorksheetProcessor.buildWellKnownSt
public final class DataDictionaryCodeGenerator {
private static final Logger LOG = LogManager.getLogger(DataDictionaryCodeGenerator.class);
WorksheetProcessor processor = null;
private WorksheetProcessor processor = null;
Workbook workbook = null;
private DataDictionaryCodeGenerator() {
@ -73,4 +73,8 @@ public final class DataDictionaryCodeGenerator {
LOG.info(ex);
}
}
public WorksheetProcessor getProcessor() {
return processor;
}
}

View File

@ -1,18 +1,23 @@
package org.reso.certification.codegen;
import com.github.javafaker.Faker;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.reso.commander.common.Utils;
import org.reso.models.DataGenerator;
import org.reso.models.ReferenceStandardField;
import org.reso.models.ReferenceStandardLookup;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import static org.reso.certification.codegen.WorksheetProcessor.WELL_KNOWN_DATA_TYPES.*;
/**
*
* From: https://mariadb.com/kb/en/how-to-quickly-insert-data-into-mariadb/
*
* ALTER TABLE table_name DISABLE KEYS;
@ -32,16 +37,247 @@ import java.util.concurrent.atomic.AtomicReference;
*/
public class DataDictionarySeedDataSqlGenerator {
private static final Logger LOG = LogManager.getLogger(DataDictionarySeedDataSqlGenerator.class);
private DDCacheProcessor processor;
private final static AtomicReference<Map<String, Map<String, DataGenerator>>> dataGeneratorResourceFieldMap
/**
* Cache of fields and their data generators by resource
*/
private final static AtomicReference<Map<String, Map<String, DataGenerator.FieldDataGenerator>>> dataGeneratorResourceFieldMap
= new AtomicReference<>(Collections.synchronizedMap(new LinkedHashMap<>()));
/**
* Cache of standard fields from the current Data Dictionary worksheet
*/
private final static AtomicReference<Map<String, List<ReferenceStandardField>>> referenceStandardFieldCache
= new AtomicReference<>(Collections.synchronizedMap(new LinkedHashMap<>()));
/**
* Cache of keys by resource name
*/
private final static AtomicReference<Map<String, String>> keyCache
= new AtomicReference<>(Collections.synchronizedMap(new LinkedHashMap<>()));
/**
* TODO: add a standard relationships cache so keys can be sampled from the keyCache for related records
*/
public DataDictionarySeedDataSqlGenerator() {
referenceStandardFieldCache.set(DDCacheProcessor.getDDReferenceStandardFieldCache());
dataGeneratorResourceFieldMap.set(DataGenerator.buildReferenceGeneratorCache());
LOG.info("Creating standard field cache...");
DDCacheProcessor processor = new DDCacheProcessor();
DataDictionaryCodeGenerator generator = new DataDictionaryCodeGenerator(processor);
generator.processWorksheets();
LOG.info("Standard field cache created!");
this.processor = processor;
//build a cache of the Dictionary standard fields
referenceStandardFieldCache.set(processor.getFieldCache());
//build a cache of Data Dictionary generators
DataGenerator dataGenerator = DataGenerator.deserialize();
dataGenerator.getResourceInfo().forEach(resourceInfo -> {
dataGeneratorResourceFieldMap.get().putIfAbsent(resourceInfo.getResourceName(), new LinkedHashMap<>());
dataGenerator.getFields().forEach(fieldDataGenerator ->
dataGeneratorResourceFieldMap.get().get(resourceInfo.getResourceName()).put(fieldDataGenerator.getFieldName(), fieldDataGenerator));
});
//extract counts for each resource
final Map<String, Integer> resourceCounts = dataGenerator.getResourceInfo().stream()
.collect(Collectors.toMap(DataGenerator.ResourceInfo::getResourceName, DataGenerator.ResourceInfo::getRecordCount));
//iterate over each resource in the Data Dictionary and generate n items from it, where n is the recordCount
//in the resourceInfo section of the data generator reference file
referenceStandardFieldCache.get().keySet().forEach(resourceName -> {
LOG.info("Processing " + resourceName + " resource...");
LOG.info(generateRowInsertStatements(resourceName, referenceStandardFieldCache.get().get(resourceName), resourceCounts.get(resourceName)));
});
}
/**
* INSERT INTO tbl_name (a,b,c)
* VALUES(1,2,3), (4,5,6), (7,8,9);
*
* TODO: this function needs to have the lookups split out and handled in their own insert statement generator
*
* @param resourceName
* @param referenceStandardFields
* @param numStatements
* @return
*/
final String generateRowInsertStatements(String resourceName, List<ReferenceStandardField> referenceStandardFields, Integer numStatements) {
final String tableName = DDLProcessor.buildDbTableName(resourceName);
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append("ALTER TABLE ").append(tableName).append(" DISABLE KEYS;\n");
stringBuilder.append("BEGIN;\n");
stringBuilder.append("INSERT INTO ").append(tableName);
stringBuilder.append(" (");
stringBuilder.append(referenceStandardFields.stream().map(ReferenceStandardField::getStandardName)
.collect(Collectors.joining(", ")));
stringBuilder.append(") VALUES");
for (int statementCount = 0; statementCount < numStatements; statementCount++) {
stringBuilder.append("\n\t(");
stringBuilder.append(referenceStandardFields.stream().map(this::generateValues).collect(Collectors.joining(", ")));
stringBuilder.append(")");
//add commas between values only if we're not at the last item
if (statementCount < numStatements - 1) stringBuilder.append(", ");
}
stringBuilder.append(";\n");
stringBuilder.append("COMMIT;\n");
stringBuilder.append("ALTER TABLE " + tableName + " ENABLE KEYS;\n\n");
return stringBuilder.toString();
}
final String generateValues(ReferenceStandardField referenceStandardField) {
//now that row has been processed, extract field type and assemble the template
switch (referenceStandardField.getSimpleDataType()) {
case NUMBER:
return generateNumber(referenceStandardField);
case STRING_LIST_SINGLE:
return generateStringListSingle(referenceStandardField);
case STRING:
return generateString(referenceStandardField);
case BOOLEAN:
return generateBoolean(referenceStandardField);
case STRING_LIST_MULTI:
return generateStringListMulti(referenceStandardField).toString();
case DATE:
return generateDate(referenceStandardField);
case TIMESTAMP:
return generateTimestamp(referenceStandardField);
default:
if (referenceStandardField.getSimpleDataType() != null)
LOG.debug("Data type: " + referenceStandardField.getSimpleDataType() + " is not supported!");
}
return null;
}
String generateNumber(ReferenceStandardField referenceStandardField) {
return referenceStandardField.getSuggestedMaxPrecision() != null
? generateDecimal(referenceStandardField) : generateInteger(referenceStandardField);
}
String generateInteger(ReferenceStandardField referenceStandardField) {
final int MAX_INTEGER_POWER = 5;
int maxPower = Math.min(referenceStandardField.getSuggestedMaxLength(), MAX_INTEGER_POWER);
return String.valueOf(Faker.instance().number().numberBetween(0, (int)Math.pow(10, maxPower)));
}
String generateDecimal(ReferenceStandardField referenceStandardField) {
final int MAX_INTEGER_POWER = 6;
int maxPower = Math.min(referenceStandardField.getSuggestedMaxLength(), MAX_INTEGER_POWER);
return String.valueOf(Faker.instance().number()
.randomDouble(referenceStandardField.getSuggestedMaxPrecision(), 0, (int)Math.pow(10, maxPower)));
}
String generateBoolean(ReferenceStandardField referenceStandardField) {
return String.valueOf(new Random().nextBoolean()).toUpperCase();
}
String generateStringListSingle(ReferenceStandardField referenceStandardField) {
List<String> possibleChoices;
List<String> customExamples = dataGeneratorResourceFieldMap.get().get(referenceStandardField.getParentResourceName()).get(referenceStandardField.getStandardName()) != null
? dataGeneratorResourceFieldMap.get().get(referenceStandardField.getParentResourceName()).get(referenceStandardField.getStandardName()).getCustomExamples() : null;
int numElements;
if (processor.getEnumerations().containsKey(referenceStandardField.getLookupStandardName())) {
possibleChoices = processor.getEnumerations().get(referenceStandardField.getLookupStandardName()).stream()
.map(ReferenceStandardLookup::getLookupValue).collect(Collectors.toList());
} else if (customExamples != null && customExamples.size() > 0) {
possibleChoices = customExamples;
} else {
possibleChoices = new ArrayList<>();
possibleChoices.add(Faker.instance().chuckNorris().fact());
}
Collections.shuffle(possibleChoices);
return wrapInQuotes(possibleChoices.get(0));
}
static String wrapInQuotes(String item) {
return "\"" + item + "\"";
}
List<String> generateStringListMulti(ReferenceStandardField referenceStandardField) {
List<String> possibleChoices;
List<String> customExamples = dataGeneratorResourceFieldMap.get().get(referenceStandardField.getParentResourceName()).get(referenceStandardField.getStandardName()) != null
? dataGeneratorResourceFieldMap.get().get(referenceStandardField.getParentResourceName()).get(referenceStandardField.getStandardName()).getCustomExamples() : null;
int numElements, randomSize = 0;
Set<String> enumNames = new LinkedHashSet<>();
if (processor.getEnumerations().containsKey(referenceStandardField.getLookupStandardName())) {
numElements = processor.getEnumerations().get(referenceStandardField.getLookupStandardName()).size();
randomSize = ThreadLocalRandom.current().nextInt(0, numElements);
possibleChoices = processor.getEnumerations().get(referenceStandardField.getLookupStandardName()).stream()
.map(ReferenceStandardLookup::getLookupValue).collect(Collectors.toList());
} else if (customExamples != null && customExamples.size() > 0) {
randomSize = ThreadLocalRandom.current().nextInt(customExamples.size());
possibleChoices = customExamples;
} else {
possibleChoices = new ArrayList<>();
possibleChoices.add(Faker.instance().buffy().quotes());
}
new LinkedHashSet<>(randomSize);
for(int numEnums = 0; numEnums < randomSize; numEnums++) {
Collections.shuffle(possibleChoices);
if (possibleChoices.size() > 0) {
enumNames.add(wrapInQuotes(possibleChoices.get(0)));
possibleChoices.remove(0);
}
}
return new ArrayList<>(enumNames);
}
/**
* TODO: determine whether we need to be able to go both ways on dates on demand.
* For example, it might make sense to have open house dates in the future.
* This method currently only generates past dates.
* @param referenceStandardField
* @return
*/
String generateDate(ReferenceStandardField referenceStandardField) {
long numDays = new Random().nextInt(5 * 365); //max 5 years back
return wrapInQuotes(Utils.getIsoDate(OffsetDateTime.now().minus(numDays, ChronoUnit.DAYS)));
}
/**
* The only time a string will be generated will be when there is a custom example
* @param referenceStandardField
* @return
*/
String generateString(ReferenceStandardField referenceStandardField) {
List<String> customExamples = dataGeneratorResourceFieldMap.get().get(referenceStandardField.getParentResourceName()).get(referenceStandardField.getStandardName()) != null
? dataGeneratorResourceFieldMap.get().get(referenceStandardField.getParentResourceName()).get(referenceStandardField.getStandardName()).getCustomExamples() : null;
String value;
if (customExamples != null && customExamples.size() > 0) {
value = customExamples.get(new Random().nextInt(customExamples.size()));
} else {
value = Faker.instance().buffy().quotes();
}
if (value != null) {
value = wrapInQuotes(value);
}
return value;
}
String generateTimestamp(ReferenceStandardField referenceStandardField) {
long numDays = new Random().nextInt(5 * 365); //max 5 years back
return wrapInQuotes(Utils.getIsoTimestamp(OffsetDateTime.now().minus(numDays, ChronoUnit.DAYS)));
}
}

View File

@ -27,7 +27,7 @@ public abstract class WorksheetProcessor {
public static final String REFERENCE_WORKSHEET = "RESODataDictionary-1.7.xlsx";
static final Map<String, String> resourceTemplates = new LinkedHashMap<>();
static final Map<String, Set<ReferenceStandardLookup>> standardEnumerationsMap = new LinkedHashMap<>();
static final Map<String, List<ReferenceStandardLookup>> standardEnumerationsMap = new LinkedHashMap<>();
static final Map<String, Map<String, ReferenceStandardField>> standardFieldsMap = new LinkedHashMap<>(new LinkedHashMap<>());
private static final Logger LOG = LogManager.getLogger(WorksheetProcessor.class);
String referenceDocument = null;
@ -332,9 +332,7 @@ public abstract class WorksheetProcessor {
public void buildEnumerationMap() {
final String ENUMERATION_TAB_NAME = "Lookup Fields and Values";
final int LOOKUP_NAME_INDEX = 0, STANDARD_NAME_INDEX = 1;
DataFormatter formatter = new DataFormatter();
Sheet sheet = getReferenceWorkbook().getSheet(ENUMERATION_TAB_NAME);
buildWellKnownStandardEnumerationHeaderMap(sheet);
@ -345,12 +343,11 @@ public abstract class WorksheetProcessor {
standardEnumeration.set(deserializeStandardEnumerationRow(row));
if (!standardEnumerationsMap.containsKey(standardEnumeration.get().getLookupField())) {
standardEnumerationsMap.put(standardEnumeration.get().getLookupField(), new LinkedHashSet<>());
standardEnumerationsMap.put(standardEnumeration.get().getLookupField(), new ArrayList<>());
}
standardEnumerationsMap.get(standardEnumeration.get().getLookupField()).add(standardEnumeration.get());
}
});
//enumerations.forEach((key, items) -> LOG.info("key: " + key + " , items: " + items.toString()));
}
public void buildStandardRelationships(Sheet worksheet) {
@ -366,7 +363,7 @@ public abstract class WorksheetProcessor {
}
}
public Map<String, Set<ReferenceStandardLookup>> getEnumerations() {
public Map<String, List<ReferenceStandardLookup>> getEnumerations() {
return standardEnumerationsMap;
}

View File

@ -419,7 +419,7 @@ public class IDXPayload {
try {
if (container.get().hasValidMetadata()) {
if (standardFieldCache.get().size() == 0) {
standardFieldCache.get().putAll(DDCacheProcessor.getDDReferenceStandardFieldCache());
standardFieldCache.get().putAll(DDCacheProcessor.buildCache());
}
} else {
failAndExitWithErrorMessage("Valid metadata was not retrieved from the server. Exiting!", scenario);

View File

@ -136,11 +136,19 @@ public class Utils {
}
public static String getIsoTimestamp() {
return OffsetDateTime.now().format(DateTimeFormatter.ISO_INSTANT);
return getIsoTimestamp(OffsetDateTime.now());
}
public static String getIsoTimestamp(OffsetDateTime fromDate) {
return OffsetDateTime.from(fromDate.toInstant()).format(DateTimeFormatter.ISO_INSTANT);
return OffsetDateTime.from(fromDate).format(DateTimeFormatter.ISO_INSTANT);
}
public static String getIsoDate() {
return getIsoDate(OffsetDateTime.now());
}
public static String getIsoDate(OffsetDateTime fromDate) {
return fromDate.format(DateTimeFormatter.ISO_DATE);
}
}

View File

@ -1,6 +1,6 @@
package org.reso.models;
import com.google.gson.*;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -8,7 +8,10 @@ import org.reso.commander.Commander;
import java.lang.reflect.Type;
import java.net.URL;
import java.util.*;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* Used to deserialize the Data Dictionary reference sheet into a cache of generators
@ -30,7 +33,7 @@ public class DataGenerator {
*
* @return nested hashes of standard field generators
*/
public static Map<String, Map<String, DataGenerator>> buildReferenceGeneratorCache() {
public static DataGenerator deserialize() {
Map<String, Map<String, DataGenerator>> dataGeneratorResourceFieldMap =
Collections.synchronizedMap(new LinkedHashMap<>());
@ -41,11 +44,27 @@ public class DataGenerator {
//note the open braces before getType()
Type targetClassType = new TypeToken<DataGenerator>() {}.getType();
DataGenerator dataGenerator = new Gson().fromJson(generatorJson, targetClassType);
return new Gson().fromJson(generatorJson, targetClassType);
}
LOG.info("Target Collection deserialized: " + dataGenerator);
public String getDescription() {
return description;
}
return dataGeneratorResourceFieldMap;
public String getVersion() {
return version;
}
public String getGeneratedOn() {
return generatedOn;
}
public List<ResourceInfo> getResourceInfo() {
return resourceInfo;
}
public List<FieldDataGenerator> getFields() {
return fields;
}
public static final class FieldDataGenerator {
@ -72,9 +91,35 @@ public class DataGenerator {
public void setResourceName(String resourceName) {
this.resourceName = resourceName;
}
public String getFakerGeneratorName() {
return fakerGeneratorName;
}
public List<String> getCustomExamples() {
return customExamples;
}
public boolean hasFakerGenerator() {
return fakerGeneratorName != null && fakerGeneratorName.length() > 0;
}
public boolean hasCustomExamples() {
return customExamples != null && customExamples.size() > 0;
}
@Override
public String toString() {
return "FieldDataGenerator{" +
"fieldName='" + fieldName + '\'' +
", resourceName=" + (resourceName == null ? "null" : "'" + resourceName + "'") +
", fakerGeneratorName=" + (fakerGeneratorName == null ? "null" : "'" + fakerGeneratorName + "'") +
", customExamples=" + customExamples +
'}';
}
}
static final class ResourceInfo {
public static final class ResourceInfo {
private String resourceName;
private Integer recordCount;
@ -82,5 +127,13 @@ public class DataGenerator {
this.resourceName = resourceName;
this.recordCount = recordCount;
}
public String getResourceName() {
return resourceName;
}
public Integer getRecordCount() {
return recordCount;
}
}
}