diff --git a/src/main/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetQueryGenerator.java b/src/main/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetQueryGenerator.java new file mode 100644 index 0000000..161afa6 --- /dev/null +++ b/src/main/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetQueryGenerator.java @@ -0,0 +1,324 @@ +package edu.harvard.dbmi.avillach.dictionary.facet; + +import edu.harvard.dbmi.avillach.dictionary.filter.Filter; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; +import org.springframework.stereotype.Component; +import org.springframework.util.CollectionUtils; +import org.springframework.util.StringUtils; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +@Component +public class FacetQueryGenerator { + + public String createFacetSQLAndPopulateParams(Filter filter, MapSqlParameterSource params) { + Map> groupedFacets = (filter.facets() == null ? Stream.of() : filter.facets().stream()) + .collect(Collectors.groupingBy(Facet::category)); + if (CollectionUtils.isEmpty(filter.facets())) { + if (StringUtils.hasLength(filter.search())) { + return createNoFacetSQLWithSearch(filter.search(), params); + } else { + return createNoFacetSQLNoSearch(params); + } + } else if (groupedFacets.size() == 1) { + if (StringUtils.hasLength(filter.search())) { + return createSingleCategorySQLWithSearch(filter.facets(), filter.search(), params); + } else { + return createSingleCategorySQLNoSearch(filter.facets(), params); + } + } else { + if (StringUtils.hasLength(filter.search())) { + return createMultiCategorySQLWithSearch(groupedFacets, filter.search(), params); + } else { + return createMultiCategorySQLNoSearch(groupedFacets, params); + } + } + } + + private String createMultiCategorySQLWithSearch(Map> facets, String search, MapSqlParameterSource params) { + return ""; + } + + private Map createSQLSafeCategoryKeys(List categories) { + HashMap keys = new HashMap<>(); + for (int i = 0; i < categories.size(); i++) { + keys.put(categories.get(i), "cat_" + i); + } + return keys; + } + + private String createMultiCategorySQLNoSearch(Map> facets, MapSqlParameterSource params) { + Map categoryKeys = createSQLSafeCategoryKeys(facets.keySet().stream().toList()); + + /* + for each category of facet present in the filter, create a query that gives all the concept IDs for those + facets + */ + String conceptsQuery = "WITH " + facets.keySet().stream().map(category -> { + List selectedFacetsNotInCategory = facets.entrySet().stream() + .filter(e -> !e.getKey().equals(category)) + .flatMap(e -> e.getValue().stream()) + .map(facet -> new String[]{facet.category(), facet.name()}) + .toList(); + params.addValue("facets_not_in_cat_" + categoryKeys.get(category), selectedFacetsNotInCategory); + params.addValue("facet_category_" + categoryKeys.get(category), category); + return """ + facet_category_%s_concepts AS ( + SELECT + concept_node.concept_node_id + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN concept_node ON concept_node.concept_node_id = fcn.concept_node_id + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + (fc.name, facet.name) IN (:facets_not_in_cat_%s) + AND ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + ) + """.formatted(categoryKeys.get(category), categoryKeys.get(category)); + }).collect(Collectors.joining(",\n")); + /* + Categories with no selected facets contribute no concepts, so ignore them for now. + Now, for each category with selected facets, take all the concepts from all other categories with selections + and INTERSECT them. This creates the concepts for this category + */ + String selectedFacetsQuery = facets.keySet().stream().map(category -> { + String allConceptsForCategory = categoryKeys.values().stream() + .filter(key -> !categoryKeys.get(category).equals(key)) + .map(key -> "SELECT * FROM facet_category_" + key + "_concepts") + .collect(Collectors.joining(" UNION ")); + params.addValue("", ""); + return """ + ( + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + WHERE + fcn.concept_node_id IN (%s) + GROUP BY + facet.facet_id + ) + """.formatted(allConceptsForCategory); + }) + .collect(Collectors.joining("\n\tUNION\n")); + + /* + For categories with no selected facets, take all the concepts from all facets, and use them for the counts + */ + params.addValue("all_selected_facet_categories", facets.keySet()); + String allConceptsForUnselectedCategories = categoryKeys.values().stream() + .map(key -> "SELECT * FROM facet_category_" + key + "_concepts") + .collect(Collectors.joining(" UNION ")); + String unselectedFacetsQuery = """ + UNION + ( + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + WHERE + fc.name NOT IN (:all_selected_facet_categories) + AND fcn.concept_node_id IN (%s) + GROUP BY + facet.facet_id + ) + """.formatted(allConceptsForUnselectedCategories); + + return conceptsQuery + selectedFacetsQuery + unselectedFacetsQuery; + } + + private String createSingleCategorySQLWithSearch(List facets, String search, MapSqlParameterSource params) { + params.addValue("facet_category_name", facets.getFirst().category()); + params.addValue("facets", facets.stream().map(Facet::name).toList()); + params.addValue("search", search); + // return all the facets that + // are in the matched category + // are displayable + // match a concept with search hits + // UNION + // all the facets from other categories that match concepts that + // match selected facets from this category + // match search + return """ + ( + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN concept_node ON concept_node.concept_node_id = fcn.concept_node_id + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + fc.name = :facet_category_name + AND concept_node.searchable_fields @@ (phraseto_tsquery(:search)::text || ':*')::tsquery + AND ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + GROUP BY facet.facet_id + ) + UNION + ( + WITH matching_concepts AS ( + SELECT + concept_node.concept_node_id AS concept_node_id + FROM + facet + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN concept_node ON concept_node.concept_node_id = facet__concept_node.concept_node + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + fc.name = :facet_category_name + AND facet.name IN (:facets) + AND concept_node.searchable_fields @@ (phraseto_tsquery(:search)::text || ':*')::tsquery + AND ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + ) + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN matching_concepts ON fcn.concept_node_id = matching_concepts.concept_node_id + WHERE + fc.name = :facet_category_name + GROUP BY facet.facet_id + ) + """; + } + + private String createSingleCategorySQLNoSearch(List facets, MapSqlParameterSource params) { + params.addValue("facet_category_name", facets.getFirst().category()); + params.addValue("facets", facets.stream().map(Facet::name).toList()); + // return all the facets in the matched category that are displayable + // UNION + // all the facets from other categories that match concepts that match selected facets from this category + return """ + ( + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN concept_node ON concept_node.concept_node_id = fcn.concept_node_id + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + fc.name = :facet_category_name + AND ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + GROUP BY facet.facet_id + ) + UNION + ( + WITH matching_concepts AS ( + SELECT + concept_node.concept_node_id AS concept_node_id + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN concept_node ON concept_node.concept_node_id = fcn.concept_node_id + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + fc.name = :facet_category_name + AND facet.name IN (:facets) + AND ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + ) + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN matching_concepts ON fcn.concept_node_id = matching_concepts.concept_node_id + WHERE + fc.name = :facet_category_name + GROUP BY facet.facet_id + ) + """; + } + + private String createNoFacetSQLWithSearch(String search, MapSqlParameterSource params) { + // return all the facets that match concepts that + // match search + // are displayable + params.addValue("search", search); + return """ + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN concept_node ON concept_node.concept_node_id = fcn.concept_node_id + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + concept_node.searchable_fields @@ (phraseto_tsquery(:search)::text || ':*')::tsquery + AND ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + """; + + } + + private String createNoFacetSQLNoSearch(MapSqlParameterSource params) { + // return all the facets that match displayable concepts + // this is the easy one! + return """ + SELECT + facet.facet_id, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + JOIN concept_node ON concept_node.concept_node_id = fcn.concept_node_id + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + """; + } +} diff --git a/src/main/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetRepository.java b/src/main/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetRepository.java index 2921dec..ff054d7 100644 --- a/src/main/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetRepository.java +++ b/src/main/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetRepository.java @@ -9,10 +9,13 @@ import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; import org.springframework.stereotype.Repository; +import org.springframework.util.CollectionUtils; +import org.springframework.util.StringUtils; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; @Repository public class FacetRepository { @@ -34,6 +37,7 @@ public FacetRepository( public List getFacets(Filter filter) { QueryParamPair pair = generator.generateFilterQuery(filter, Pageable.unpaged()); + // return a list of facets and the number of concepts associated with them String sql = """ SELECT facet_category.name AS category_name, @@ -46,21 +50,81 @@ public List getFacets(Filter filter) { facet LEFT JOIN facet_category ON facet_category.facet_category_id = facet.facet_category_id LEFT JOIN facet as parent_facet ON facet.parent_id = parent_facet.facet_id - INNER JOIN ( - SELECT - count(*) as facet_count, inner_facet_q.facet_id AS inner_facet_id - FROM - facet AS inner_facet_q - JOIN facet__concept_node AS inner_facet__concept_node_q ON inner_facet__concept_node_q.facet_id = inner_facet_q.facet_id - WHERE - inner_facet__concept_node_q.concept_node_id IN (%s) - GROUP BY inner_facet_q.facet_id - ) AS facet_count_q ON facet_count_q.inner_facet_id = facet.facet_id + """.formatted(pair.query()); return template.query(sql, pair.params(), new FacetCategoryExtractor()); } + + private String createFacetSQL(Filter filter, MapSqlParameterSource params) { + String searchSQL; + String orderSQL; + if (StringUtils.hasLength(filter.search())) { + searchSQL = """ + concept_node.searchable_fields @@ (phraseto_tsquery(:search)::text || ':*')::tsquery AND + ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + """; + } + if (CollectionUtils.isEmpty(filter.facets())) { + return ""; + } + Map> categories = filter.facets().stream().collect(Collectors.groupingBy(Facet::category)); + if (categories.size() == 1) { + return """ + ( + SELECT + facet.facet_id, count(*) + FROM facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN concept_node ON concept_node.concept_node_id = facet__concept_node.concept_node + JOIN facet_category fc on fc.facet_category_id = facet.facet_category_id + WHERE fc.name = :facet_category_name + GROUP BY facet.facet_id + ) + """; + } + return categories.keySet().stream().map(category -> { + // get all the selected facets NOT IN this category + // get all the concepts for those facets + // get all the facets that match those concepts + List> facetsNotInCategory = filter.facets().stream() + .filter(c -> !c.category().equals(category)) + .map(facet -> List.of(facet.name(), facet.category())) + .toList(); + params.addValue("facets_not_in_" + category, facetsNotInCategory); + params.addValue("category_" + category, category); + + return """ + ( + WITH study_ids_dataset_ids_query AS ( + SELECT + distinct(concept_node_id) AS concept_node_id + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + LEFT join facet_category fc ON fc.facet_category_id = facet.facet_category_id + WHERE + (facet.name, fc.name) IN (:facets_not_in_%s) + ) + SELECT facet.name, count(*) + FROM + facet + JOIN facet__concept_node fcn ON fcn.facet_id = facet.facet_id + JOIN study_ids_dataset_ids_query ON study_ids_dataset_ids_query.concept_node_id = fcn.concept_node_id + LEFT JOIN facet_category fc ON fc.facet_category_id = facet.facet_category_id + WHERE + fc.name = :category_%s + GROUP BY facet.name + ) + """.formatted(category, category); + }).collect(Collectors.joining("\n\tUNION\n")); + } + public Optional getFacet(String facetCategory, String facet) { String sql = """ SELECT diff --git a/src/main/java/edu/harvard/dbmi/avillach/dictionary/filter/FilterQueryGenerator.java b/src/main/java/edu/harvard/dbmi/avillach/dictionary/filter/FilterQueryGenerator.java index 85f2c63..d6ec485 100644 --- a/src/main/java/edu/harvard/dbmi/avillach/dictionary/filter/FilterQueryGenerator.java +++ b/src/main/java/edu/harvard/dbmi/avillach/dictionary/filter/FilterQueryGenerator.java @@ -31,9 +31,7 @@ public QueryParamPair generateFilterQuery(Filter filter, Pageable pageable) { if (!CollectionUtils.isEmpty(filter.facets())) { clauses.addAll(createFacetFilter(filter.facets(), params)); } - if (clauses.isEmpty()) { - clauses.add(createValuelessNodeFilter()); - } + clauses.add(createValuelessNodeFilter()); String query = "(\n" + String.join("\n\tINTERSECT\n", clauses) + "\n)"; diff --git a/src/test/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetQueryGeneratorTest.java b/src/test/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetQueryGeneratorTest.java new file mode 100644 index 0000000..fe852fe --- /dev/null +++ b/src/test/java/edu/harvard/dbmi/avillach/dictionary/facet/FacetQueryGeneratorTest.java @@ -0,0 +1,28 @@ +package edu.harvard.dbmi.avillach.dictionary.facet; + +import edu.harvard.dbmi.avillach.dictionary.filter.Filter; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +@SpringBootTest +class FacetQueryGeneratorTest { + + @Autowired + FacetQueryGenerator generator; + + @Test + void shouldaaa() { + Filter jim = new Filter(List.of( + new Facet("phs000007", "", "", null, null, "study_ids_dataset_ids", null), + new Facet("PhenX", "", "", null, null, "nsrr_harmonized", null) + ), ""); + String query = generator.createFacetSQLAndPopulateParams(jim, new MapSqlParameterSource()); + System.out.println(query); + } +} \ No newline at end of file