Skip to content

Commit

Permalink
35195 Support functions on data export
Browse files Browse the repository at this point in the history
Implemented function and test for usage
  • Loading branch information
cgendreau committed Nov 8, 2024
1 parent 4e7c251 commit b5d4e63
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import io.crnk.core.resource.annotations.JsonApiResource;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import lombok.AllArgsConstructor;
import lombok.Builder;
Expand Down Expand Up @@ -53,4 +54,6 @@ public class DataExportDto {
@IgnoreDinaMapping(reason = "handled by DataExportColumnsFieldAdapter")
private List<String> columnAliases;

private Map<String, DataExport.FunctionDef> columnFunctions;

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ca.gc.aafc.dina.export.api.entity;

import java.time.OffsetDateTime;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import javax.persistence.Column;
Expand Down Expand Up @@ -40,6 +41,7 @@ public class DataExport implements DinaEntity {

public enum ExportStatus { NEW, RUNNING, COMPLETED, EXPIRED, ERROR }
public enum ExportType { TABULAR_DATA, OBJECT_ARCHIVE }
public enum FunctionType { CONCAT, CONVERT_COORDINATES_DD }

@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
Expand Down Expand Up @@ -82,6 +84,9 @@ public enum ExportType { TABULAR_DATA, OBJECT_ARCHIVE }
@Column
private String[] columnAliases;

@Transient
private Map<String, FunctionDef> columnFunctions;

@Enumerated(EnumType.STRING)
@NotNull
@Column
Expand All @@ -90,4 +95,6 @@ public enum ExportType { TABULAR_DATA, OBJECT_ARCHIVE }
@Transient
private Map<String, String> transitiveData;

public record FunctionDef(FunctionType type, List<String> params) {
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package ca.gc.aafc.dina.export.api.generator;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections4.IteratorUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;

Expand Down Expand Up @@ -111,7 +114,7 @@ public CompletableFuture<UUID> export(DataExport dinaExport) throws IOException
new TypeReference<>() {
}, w)) {
export(dinaExport.getSource(), objectMapper.writeValueAsString(dinaExport.getQuery()),
output);
dinaExport.getColumnFunctions(), output);
}
} catch (IOException ioEx) {
updateStatus(dinaExport.getUuid(), DataExport.ExportStatus.ERROR);
Expand Down Expand Up @@ -148,14 +151,16 @@ public void deleteExport(DataExport dinaExport) throws IOException {
* @param output
* @throws IOException
*/
private void export(String sourceIndex, String query, DataOutput<JsonNode> output) throws IOException {
private void export(String sourceIndex, String query,
Map<String, DataExport.FunctionDef> columnFunctions,
DataOutput<JsonNode> output) throws IOException {
SearchResponse<JsonNode>
response = elasticSearchDataSource.searchWithPIT(sourceIndex, query);

boolean pageAvailable = response.hits().hits().size() != 0;
while (pageAvailable) {
for (Hit<JsonNode> hit : response.hits().hits()) {
processRecord(hit.id(), hit.source(), output);
processRecord(hit.id(), hit.source(), columnFunctions, output);
}
pageAvailable = false;

Expand All @@ -178,7 +183,9 @@ private void export(String sourceIndex, String query, DataOutput<JsonNode> outpu
* @param output
* @throws IOException
*/
private void processRecord(String documentId, JsonNode record, DataOutput<JsonNode> output) throws IOException {
private void processRecord(String documentId, JsonNode record,
Map<String, DataExport.FunctionDef> columnFunctions,
DataOutput<JsonNode> output) throws IOException {
if (record == null) {
return;
}
Expand All @@ -204,6 +211,19 @@ private void processRecord(String documentId, JsonNode record, DataOutput<JsonNo
objectMapper.valueToTree(entry.getValue()));
replaceNestedByDotNotation(attributeObjNode);
}

// Check if we have functions to apply
if(MapUtils.isNotEmpty(columnFunctions)) {
for (var functionDef : columnFunctions.entrySet()) {
switch (functionDef.getValue().type()) {
case CONCAT -> attributeObjNode.put(functionDef.getKey(),
handleConcatFunction(attributeObjNode, functionDef.getValue().params()));
case CONVERT_COORDINATES_DD -> attributeObjNode.put(functionDef.getKey(),
handleConvertCoordinatesDecimalDegrees(attributeObjNode,
functionDef.getValue().params()));
}
}
}
output.addRecord(attributeObjNode);
}
}
Expand All @@ -212,7 +232,7 @@ private Map<String, Object> extractById(String id, List<Map<String, Object>> doc
DocumentContext dc = JsonPath.using(jsonPathConfiguration).parse(document);
try {
List<Map<String, Object>> includedObj = JsonPathHelper.extractById(dc, id, JSON_PATH_TYPE_REF);
return CollectionUtils.isEmpty(includedObj) ? Map.of() : includedObj.get(0);
return CollectionUtils.isEmpty(includedObj) ? Map.of() : includedObj.getFirst();
} catch (PathNotFoundException pnf) {
return Map.of();
}
Expand Down Expand Up @@ -288,6 +308,33 @@ private static boolean jsonNodeHasFieldAndIsArray(JsonNode node, String fieldNam
return node.has(fieldName) && node.get(fieldName).isArray();
}

private static String handleConcatFunction(ObjectNode attributeObjNod, List<String> columns) {
List<String> toConcat = new ArrayList<>();
for(String col : columns) {
toConcat.add(attributeObjNod.get(col).asText());
}
return String.join(",", toConcat);
}

private static String handleConvertCoordinatesDecimalDegrees(ObjectNode attributeObjNod,
List<String> columns) {
String decimalDegreeCoordinates = null;
if (columns.size() == 1) {
JsonNode coordinates = attributeObjNod.get(columns.getFirst());
if (coordinates.isArray()) {
List<JsonNode> longLatNode = IteratorUtils.toList(coordinates.iterator());
if (longLatNode.size() == 2) {
decimalDegreeCoordinates =
longLatNode.get(1).asText() + "," + longLatNode.get(0).asText();
}
}
}
if (StringUtils.isBlank(decimalDegreeCoordinates)) {
log.debug("Invalid Coordinates format. Array of doubles in form of [lon,lat] expected");
}
return null;
}

/**
* Creates a special document that represents all the values concatenated (by ; like the array elements) per attributes
* @param toMerge
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
Expand Down Expand Up @@ -73,12 +74,13 @@ public void testESDatasource() throws IOException {
.query(query)
.columns(List.of("id", "materialSampleName", "collectingEvent.dwcVerbatimLocality",
"dwcCatalogNumber", "dwcOtherCatalogNumbers", "managedAttributes.attribute_1",
"collectingEvent.managedAttributes.attribute_ce_1", "projects.name"))
"collectingEvent.managedAttributes.attribute_ce_1", "projects.name", "latLong"))
.columnFunctions(Map.of("latLong", new DataExport.FunctionDef(DataExport.FunctionType.CONVERT_COORDINATES_DD, List.of("collectingEvent.eventGeom"))))
.build());
assertNotNull(dto.getUuid());

try {
asyncConsumer.getAccepted().get(0).get();
asyncConsumer.getAccepted().getFirst().get();
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ public class JsonApiDocuments {
"attributes": {
"group": "cnc",
"dwcVerbatimLocality" : "Montreal",
"managedAttributes" : { "attribute_ce_1":"value ce 1"}
"managedAttributes" : { "attribute_ce_1":"value ce 1"},
"eventGeom": [-75.695000, 45.424721]
}
},
{
Expand Down

0 comments on commit b5d4e63

Please sign in to comment.