From e3516ab619deec828e56cc61bffc30e29dc3746c Mon Sep 17 00:00:00 2001 From: mereolog Date: Thu, 26 Sep 2024 16:30:44 +0200 Subject: [PATCH 1/3] removal of transformation script directory Signed-off-by: mereolog --- etc/transformation/GSRS/README.md | 19 - etc/transformation/GSRS/_common.sh | 18 - .../GSRS/gsrs-public-data-identifiers.rqg | 141 -------- .../GSRS/gsrs-public-data-names.rqg | 63 ---- .../GSRS/gsrs-public-data-relationships.rqg | 87 ----- .../GSRS/gsrs-public-data-substances.rqg | 216 ----------- etc/transformation/GSRS/transform-accurids.sh | 6 - etc/transformation/GSRS/transform-local.sh | 30 -- etc/transformation/README.md | 1 - etc/transformation/SPOR/README.md | 1 - .../SPOR/organisations/README.md | 27 -- .../SPOR/organisations/spor-organisations.rqg | 179 ---------- .../SPOR/organisations/transform-local.sh | 20 -- .../SPOR/referentials/README.md | 58 --- .../SPOR/referentials/spor-referentials.rqg | 334 ------------------ .../SPOR/referentials/transform-local.sh | 15 - etc/transformation/SPOR/substances/README.md | 19 - .../SPOR/substances/spor-substances.rqg | 171 --------- .../SPOR/substances/transform-local.sh | 20 -- etc/transformation/_env.sh | 4 - etc/transformation/_sparql_generate.sh | 7 - 21 files changed, 1436 deletions(-) delete mode 100644 etc/transformation/GSRS/README.md delete mode 100644 etc/transformation/GSRS/_common.sh delete mode 100644 etc/transformation/GSRS/gsrs-public-data-identifiers.rqg delete mode 100644 etc/transformation/GSRS/gsrs-public-data-names.rqg delete mode 100644 etc/transformation/GSRS/gsrs-public-data-relationships.rqg delete mode 100644 etc/transformation/GSRS/gsrs-public-data-substances.rqg delete mode 100755 etc/transformation/GSRS/transform-accurids.sh delete mode 100755 etc/transformation/GSRS/transform-local.sh delete mode 100644 etc/transformation/README.md delete mode 100644 etc/transformation/SPOR/README.md delete mode 100644 etc/transformation/SPOR/organisations/README.md delete mode 100644 etc/transformation/SPOR/organisations/spor-organisations.rqg delete mode 100755 etc/transformation/SPOR/organisations/transform-local.sh delete mode 100644 etc/transformation/SPOR/referentials/README.md delete mode 100644 etc/transformation/SPOR/referentials/spor-referentials.rqg delete mode 100755 etc/transformation/SPOR/referentials/transform-local.sh delete mode 100644 etc/transformation/SPOR/substances/README.md delete mode 100644 etc/transformation/SPOR/substances/spor-substances.rqg delete mode 100755 etc/transformation/SPOR/substances/transform-local.sh delete mode 100644 etc/transformation/_env.sh delete mode 100644 etc/transformation/_sparql_generate.sh diff --git a/etc/transformation/GSRS/README.md b/etc/transformation/GSRS/README.md deleted file mode 100644 index 9941cdf9..00000000 --- a/etc/transformation/GSRS/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Minimum Requirements - -1. Install Docker. Follow https://docs.docker.com/get-docker instructions. -1. Open the GSRS release notes at https://gsrs.ncats.nih.gov/#/release and click the `public data` button in `Download public data` section. The downloaded archive will have the `gsrs` extension. -1. Double-check that file, it must have the name `public_data.gsrs` name. If not, rename it to `public_data.gsrs`. -1. Copy `public_data.gsrs` file to this folder (i.e. `etc/transformation/GSRS`). - -# Transformation - -Instructions for transforming the GSRS data. - -## Local - -1. Run `./transform-local.sh` from the terminal console. - -## Accurids - -1. Run `./transform-accurids.sh` from the terminal console. -1. Upload `public_data.json`, `gsrs-public-data-substances.rqg`, `gsrs-public-data-relationships.rqg`, `gsrs-public-data-names.rqg`, and `gsrs-public-data-identifiers.rqg` files to Accurids https://pistoiaalliance.dev.accurids.com. diff --git a/etc/transformation/GSRS/_common.sh b/etc/transformation/GSRS/_common.sh deleted file mode 100644 index fea10914..00000000 --- a/etc/transformation/GSRS/_common.sh +++ /dev/null @@ -1,18 +0,0 @@ -# Unzip & Converting the public_data to valid json -if [ ! -f public_data.json ]; then - if [ -f public_data.gsrs ]; then - echo "Unzipping public_data.gsrs" - mv public_data.gsrs public_data.gz - $docker_cmd $java_docker_image sh -c "gunzip -f public_data.gz" - fi - - if [ -f public_data ]; then - echo "Convert the public_data to valid json format & name it as public_data.json" - $docker_cmd jetbrainsinfra/jq sh -c "jq -c -s '.' public_data > public_data.json" - fi -fi - -if [ ! -f public_data.json ]; then - echo "public_data.json file not found! Read the instructions in the README.md" - exit 1 -fi diff --git a/etc/transformation/GSRS/gsrs-public-data-identifiers.rqg b/etc/transformation/GSRS/gsrs-public-data-identifiers.rqg deleted file mode 100644 index 076220fc..00000000 --- a/etc/transformation/GSRS/gsrs-public-data-identifiers.rqg +++ /dev/null @@ -1,141 +0,0 @@ -PREFIX iter: -PREFIX rdf: -PREFIX rdfs: -PREFIX skos: -PREFIX cmns-id: -PREFIX cmns-ra: -PREFIX idmp-nara: -PREFIX cmns-txt: -PREFIX idmp-eura: -PREFIX idmp-ra: - -GENERATE { - # Substance - ?Substance ?SubstancePropertyBasedOnCodeType ?SubstanceObjectBasedOnCodeType . - - # Identifiers - ?IdentifiedByEntity a cmns-ra:RegisteredIdentifier ; - cmns-txt:hasTextValue ?CodeCode ; - cmns-ra:isRegisteredIn ?Registry ; - rdfs:seeAlso ?SeeAlsoIdentifier ; - cmns-id:identifies ?Substance ; - skos:note ?CodeType ; - . -} - -# General Substance Info + Molecular Structure -ITERATOR iter:JSONSurfer(, - "$[*]", - "$.approvalID", - "$.codes" - ) AS - -?I1 -?approvalID -?codes - -BIND( AS ?Substance ) - -# Codes -ITERATOR iter:JSONSurfer(?codes, - "$[*]", - "$.codeSystem" , - "$.code" , - "$.type" , - "$.url" - ) AS - -?I1_Codes -?CodeCodeSystem -?CodeCode -?CodeType -?CodeUrl - -BIND(str(?CodeUrl) AS ?SeeAlsoIdentifier) - -BIND( - IF(str(?CodeCodeSystem) = "CHEMBL", idmp-eura:ChemblDatabase , - IF(str(?CodeCodeSystem) = "CAS", idmp-nara:ChemicalAbstractsService , - IF(str(?CodeCodeSystem) = "WHO-ATC", idmp-ra:AnatomicalTherapeuticChemicalClassificationSystem , - IF(str(?CodeCodeSystem) = "WHO-ESSENTIAL MEDICINES LIST", idmp-ra:ModelListOfEssentialMedicines , - IF(str(?CodeCodeSystem) = "WHO-SDG", idmp-ra:UnitedNationsSustainableDevelopmentGoals , - IF(str(?CodeCodeSystem) = "WHO-VATC", idmp-ra:AnatomicalTherapeuticChemicalClassificationSystem , - IF(str(?CodeCodeSystem) = "AUSTRALIAN PLANT NAME INDEX", idmp-ra:AustralianPlantNameIndex , - IF(str(?CodeCodeSystem) = "CHEBI", idmp-eura:ChemicalEntitiesOfBiologicalInterest , - IF(str(?CodeCodeSystem) = "CODEX ALIMENTARIUS (GSFA)", idmp-ra:CodexGeneralStandardForFoodAdditivesOnlineDatabase , - IF(str(?CodeCodeSystem) = "DRUG BANK", idmp-nara:DrugBank , - IF(str(?CodeCodeSystem) = "ECHA (EC/EINECS)", idmp-eura:EuropeanInventoryOfExistingCommercialChemicalSubstances , - IF(str(?CodeCodeSystem) = "JECFA EVALUATION", idmp-ra:EvaluationsOfTheJointExpertCommitteeOnFoodAdditives , - IF(str(?CodeCodeSystem) = "JECFA MONOGRAPH", idmp-ra:EvaluationsOfTheJointExpertCommitteeOnFoodAdditives , - IF(str(?CodeCodeSystem) = "GRIN", idmp-nara:GermplasmResourcesInformationNetwork , - IF(str(?CodeCodeSystem) = "ITIS", idmp-nara:IntegratedTaxonomicInformationSystem , - IF(str(?CodeCodeSystem) = "KEGG", idmp-ra:KyotoEncyclopediaOfGenesAndGenomes , - IF(str(?CodeCodeSystem) = "LIVERTOX", idmp-nara:LiverTox , - IF(str(?CodeCodeSystem) = "MESH", idmp-nara:MedicalSubjectHeadings , - IF(str(?CodeCodeSystem) = "MERCK INDEX", idmp-ra:MerckIndexOnline , - IF(str(?CodeCodeSystem) = "NCI_THESAURUS", idmp-nara:NationalCancerInstituteThesaurus , - IF(str(?CodeCodeSystem) = "GENE", idmp-nara:NationalCenterForBiotechnologyInformationGeneDatabase , - IF(str(?CodeCodeSystem) = "NCBI TAXONOMY", idmp-nara:NationalCenterForBiotechnologyInformationTaxonomyDatabase , - IF(str(?CodeCodeSystem) = "NDF-RT", idmp-nara:NationalDrugFileReferenceTerminology , - IF(str(?CodeCodeSystem) = "EPA PESTICIDE CODE", idmp-nara:PesticideProductInformationSystem , - IF(str(?CodeCodeSystem) = "USDA PLANTS", idmp-nara:PlantListOfAcceptedNomenclatureTaxonomyAndSymbols , - IF(str(?CodeCodeSystem) = "PUBCHEM", idmp-nara:PubChem , - IF(str(?CodeCodeSystem) = "USP-MC MONOGRAPH", idmp-nara:USPharmacopeiaMedicinesCompendium , - IF(str(?CodeCodeSystem) = "USP-MC VALIDATION RPT", idmp-nara:USPharmacopeiaMedicinesCompendium , - IF(str(?CodeCodeSystem) = "UNIPROT", idmp-eura:UniProt , - IF(str(?CodeCodeSystem) = "UCSF-FDA TRANSPORTAL", idmp-nara:UniversityOfCaliforniaSanFrancisoFederalDrugAdministrationTransport , - IF(str(?CodeCodeSystem) = "WHO INTERNATIONAL PHARMACOPOEIA", idmp-ra:WorldHealthOrganizationInternationalPharmacopoeia , - IF(str(?CodeCodeSystem) = "COSMETIC INGREDIENT REVIEW (CIR)", idmp-nara:CosmeticIngredientReviewDatabase , - IF(str(?CodeCodeSystem) = "EVMPD", idmp-eura:ExtendedEudraVigilanceMedicinalProductDictionary , - IF(str(?CodeCodeSystem) = "EMA ASSESSMENT REPORTS", idmp-eura:EuropeanPublicAssessmentReports , - IF(str(?CodeCodeSystem) = "EMA VETERINARY ASSESSMENT REPORTS", idmp-eura:EuropeanPublicAssessmentReports , - IF(str(?CodeCodeSystem) = "FDA ORPHAN DRUG", idmp-nara:OrphanDrugProductDesignationDatabase , - IF(str(?CodeCodeSystem) = "FDA UNII", idmp-nara:GlobalSubstanceRegistrationSystem , - IF(str(?CodeCodeSystem) = "UNII", idmp-nara:GlobalSubstanceRegistrationSystem , - IF(str(?CodeCodeSystem) = "INN", idmp-ra:MedNetINN, - ?CodeCodeSystem ))))))))))))))))))))))))))))))))))))))) - AS ?Registry) - -BIND(replace(str(?CodeCodeSystem), " ", "_") as ?CodeCodeSystemWithoutSpace) -BIND(replace(str(?CodeCode), " ", "_") as ?CodeCodeWithoutSpace) - -BIND( - IF(str(?CodeType) = "ALTERNATIVE", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "GENERIC (FAMILY)", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "NON-SPECIFIC STEREOCHEMISTRY", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "NON-SPECIFIC STOICHIOMETRY", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "NO STRUCTURE GIVEN", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "PRIMARY", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "SECONDARY", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "SUPERSEDED", cmns-id:isIdentifiedBy, - IF(str(?CodeType) = "CONCEPT", rdf:type, - ?nothing ))))))))) - AS ?SubstancePropertyBasedOnCodeType ) - -BIND( - IF(str(?CodeCodeSystem) = "NCI_THESAURUS", , - IF(str(?CodeCodeSystem) = "WHO-ATC", , - IF(str(?CodeCodeSystem) = "WHO-VATC", , - IF(str(?CodeType) = "ALTERNATIVE", , - IF(str(?CodeType) = "GENERIC (FAMILY)", , - IF(str(?CodeType) = "NON-SPECIFIC STEREOCHEMISTRY", , - IF(str(?CodeType) = "NON-SPECIFIC STOICHIOMETRY", , - IF(str(?CodeType) = "NO STRUCTURE GIVEN", , - IF(str(?CodeType) = "PRIMARY", , - IF(str(?CodeType) = "SECONDARY", , - IF(str(?CodeType) = "SUPERSEDED", , - IF(str(?CodeType) = "CONCEPT", , - ?nothing )))))))))))) - AS ?SubstanceObjectBasedOnCodeType ) - -BIND( - IF(str(?CodeType) = "ALTERNATIVE", ?SubstanceObjectBasedOnCodeType, - IF(str(?CodeType) = "GENERIC (FAMILY)", ?SubstanceObjectBasedOnCodeType, - IF(str(?CodeType) = "NON-SPECIFIC STEREOCHEMISTRY", ?SubstanceObjectBasedOnCodeType, - IF(str(?CodeType) = "NON-SPECIFIC STOICHIOMETRY", ?SubstanceObjectBasedOnCodeType, - IF(str(?CodeType) = "NO STRUCTURE GIVEN", ?SubstanceObjectBasedOnCodeType, - IF(str(?CodeType) = "PRIMARY", ?SubstanceObjectBasedOnCodeType, - IF(str(?CodeType) = "SECONDARY", ?SubstanceObjectBasedOnCodeType, - IF(str(?CodeType) = "SUPERSEDED", ?SubstanceObjectBasedOnCodeType, - ?nothing )))))))) - AS ?IdentifiedByEntity ) diff --git a/etc/transformation/GSRS/gsrs-public-data-names.rqg b/etc/transformation/GSRS/gsrs-public-data-names.rqg deleted file mode 100644 index ad0e1a0c..00000000 --- a/etc/transformation/GSRS/gsrs-public-data-names.rqg +++ /dev/null @@ -1,63 +0,0 @@ -PREFIX iter: -PREFIX skos: -PREFIX rdfs: -PREFIX cmns-dsg: -PREFIX cmns-cxtdsg: -PREFIX cmns-txt: -PREFIX idmp-sub: - -GENERATE { - # Substance - ?Substance ?labelProperty ?OfficialSubstanceName ; - rdfs:label ?OfficialSubstanceName ; - cmns-dsg:hasName ?SubstanceName ; - . - - # Names - ?SubstanceName a cmns-cxtdsg:ContextualName ; - cmns-txt:hasTextValue ?SubstanceNameValue ; - cmns-dsg:isNameOf ?Substance ; - idmp-sub:hasSubstanceNameType ?SubstanceNameType ; - . -} - -ITERATOR iter:JSONSurfer(, - "$[*]", - "$.approvalID", - "$.names" - ) AS - -?I1 -?approvalID -?names - -BIND( AS ?Substance ) - -# Names -ITERATOR iter:JSONSurfer(?names, - "$[*]", - "$.uuid", - "$.name", - "$.type", - "$.displayName" - ) AS - -?I1_Names -?NameUuid -?NameName -?NameType -?NameDisplayName - -BIND( AS ?SubstanceName) -BIND(?NameName AS ?SubstanceNameValue) -BIND(IF(str(?NameDisplayName) = "true", ?NameName, ?nothing) AS ?OfficialSubstanceName) -BIND(IF(str(?NameDisplayName) = "true", skos:prefLabel, ?nothing) AS ?labelProperty) - -BIND( - IF(str(?NameType) = "bn", idmp-sub:SubstanceNameClassifier-BrandName , - IF(str(?NameType) = "cd", idmp-sub:SubstanceNameClassifier-CompanyCode , - IF(str(?NameType) = "cn", idmp-sub:SubstanceNameClassifier-CommonName , - IF(str(?NameType) = "of", idmp-sub:SubstanceNameClassifier-OfficialName , - IF(str(?NameType) = "sys", idmp-sub:SubstanceNameClassifier-SystematicName , - idmp-sub:SubstanceNameClassifier ))))) - AS ?SubstanceNameType) diff --git a/etc/transformation/GSRS/gsrs-public-data-relationships.rqg b/etc/transformation/GSRS/gsrs-public-data-relationships.rqg deleted file mode 100644 index d8b4ea31..00000000 --- a/etc/transformation/GSRS/gsrs-public-data-relationships.rqg +++ /dev/null @@ -1,87 +0,0 @@ -PREFIX iter: -PREFIX skos: -PREFIX cmns-cxtdsg: -PREFIX cmns-rlcmp: -PREFIX idmp-narga: -PREFIX idmp-sub: - -GENERATE { - - # Relationships - ?Substance ?SubstancesRelationshipProperty ?RelatedSubstance . - - # Has Active Moiety Role and Concept - ?RelatedSubstanceAssociatedWithActiveMoietyRole cmns-rlcmp:playsRole ?ActiveMoietyRole . - - # Is Active Moiety Of - # ?RelatedSubstance ?IsActiveMoietyOfProperty ?Substance . - - ?ActiveMoietyRole a ?MoietyRole ; - cmns-cxtdsg:isApplicableIn ?RegulatoryContext; - cmns-rlcmp:isPlayedBy ?RelatedSubstanceAssociatedWithActiveMoietyRole ; - cmns-rlcmp:isManifestedIn ?SubstanceAssociatedWithActiveMoietyRole ; - . - - ?SubstanceAssociatedWithActiveMoietyRole cmns-rlcmp:manifests ?ActiveMoietyRole . - - ?RegulatoryContext cmns-cxtdsg:appliesTo ?ActiveMoietyRole . - -} - -# General Substance Info + Molecular Structure -ITERATOR iter:JSONSurfer(, - "$[*]", - "$.approvalID", - "$.relationships" - ) AS - -?I1 -?approvalID -?relationships - -BIND( AS ?Substance ) - -# Relationships -ITERATOR iter:JSONSurfer(?relationships, - "$[*]", - "$.type", - "$.relatedSubstance.approvalID" - ) AS - -?Relationship -?RelationshipType -?RelatedSubstanceApprovalID - -BIND( AS ?RelatedSubstance) - -BIND( - IF(?Substance = ?RelatedSubstance, ?nothing, # avoid meaningless triple - IF(str(?RelationshipType) = "ACTIVE MOIETY" || str(?RelationshipType) = "ACTIVE MOIETY (FOR EXCLUSIVITY)", ?nothing, - skos:related)) - AS ?SubstancesRelationshipProperty) - -BIND( - IF(?Substance = ?RelatedSubstance, ?nothing, # avoid meaningless triple - IF(str(?RelationshipType) = "ACTIVE MOIETY" || str(?RelationshipType) = "ACTIVE MOIETY (FOR EXCLUSIVITY)", idmp-sub:hasActiveMoiety, - skos:related)) - AS ?SubstancesRelationship) - -# HAS ACTIVE MOIETY ROLE AND CONTEXT -BIND(IF(uri(?SubstancesRelationship) = idmp-sub:hasActiveMoiety, ?approvalID, ?nothing) AS ?SourceSubstanceApprovalID) - -BIND( - IF(str(?RelationshipType) = "ACTIVE MOIETY", , - IF(str(?RelationshipType) = "ACTIVE MOIETY (FOR EXCLUSIVITY)", , - ?nothing)) - AS ?ActiveMoietyRole) - -BIND(IF(uri(?SubstancesRelationship) = idmp-sub:hasActiveMoiety, ?Substance, ?nothing) AS ?SubstanceAssociatedWithActiveMoietyRole) -BIND(IF(uri(?SubstancesRelationship) = idmp-sub:hasActiveMoiety, ?RelatedSubstance, ?nothing) AS ?RelatedSubstanceAssociatedWithActiveMoietyRole) -BIND(IF(uri(?SubstancesRelationship) = idmp-sub:hasActiveMoiety, idmp-sub:ActiveMoietyRole, ?nothing) AS ?MoietyRole) -BIND(IF(uri(?SubstancesRelationship) = idmp-sub:hasActiveMoiety, idmp-sub:isActiveMoietyOf, ?nothing) AS ?IsActiveMoietyOfProperty) - -BIND( - IF(str(?RelationshipType) = "ACTIVE MOIETY", idmp-narga:RegulatoryContext-FoodAndDrugAdministrationGeneral, - IF(str(?RelationshipType) = "ACTIVE MOIETY (FOR EXCLUSIVITY)", idmp-narga:RegulatoryContext-ActiveMoietyForFoodAndDrugAdministrationExclusivity, - ?nothing)) - AS ?RegulatoryContext) diff --git a/etc/transformation/GSRS/gsrs-public-data-substances.rqg b/etc/transformation/GSRS/gsrs-public-data-substances.rqg deleted file mode 100644 index 38cd7aed..00000000 --- a/etc/transformation/GSRS/gsrs-public-data-substances.rqg +++ /dev/null @@ -1,216 +0,0 @@ -PREFIX iter: -PREFIX rdfs: -PREFIX owl: -PREFIX skos: -PREFIX idmp-sub: -PREFIX cmns-txt: -PREFIX cmns-dsg: - -GENERATE { - - # TEMP FIXES to be included in ontology development - #EPACompTox rdfs:label "EPA CompTox" . - #hasInhibitorTarget rdfs:label "has inhibitor target" . - - # Ontology - a owl:Ontology ; - rdfs:label "GSRS Public Data" ; - owl:imports ; - . - - # Substance - ?Substance a ?SubstanceType ; - ?hasMolecularFormulaByMoietyProperty ?structureFormula ; - cmns-txt:hasTextValue ?approvalID ; - idmp-sub:hasDefiningMolecularFormula ?structureFormula ; - idmp-sub:hasDefiningMolecularWeight ?structureMolecularWeight ; - idmp-sub:hasDefiningStereochemistry ?Stereochemistry ; - idmp-sub:hasDefiningStructure ?MolecularStructure ; - . - - # Molecular Structure - ?MolecularStructure a idmp-sub:MolecularStructure ; - idmp-sub:hasSMILESValue ?structureSmilesString ; - cmns-dsg:isDescribedBy ?StructureMolfileURI; - . - - # Molecular Structure Molfile - ?StructureMolfileURI a idmp-sub:Molfile; - idmp-sub:hasStructuralRepresentationText ?structureMolfile; - . - - - GENERATE{ - ?Substance idmp-sub:hasMoiety ?Moiety; - . - - ?Moiety a idmp-sub:Moiety; - rdfs:label ?moietyFormula; - idmp-sub:hasDefiningStructure ?MoietyStructure; - idmp-sub:hasDefiningMolecularWeight ?MoietyStructureMolecularWeight ; - idmp-sub:hasDefiningMolecularFormula ?moietyFormula ; - idmp-sub:hasDefiningStereochemistry ?MoietyStereochemistry ; - . - - ?MoietyStructure a idmp-sub:MolecularStructure ; - idmp-sub:hasSMILESValue ?MoietyStructureSmilesString ; - cmns-dsg:isDescribedBy ?MoietyStructureMolfileURI; - . - - ?MoietyStructureMolfileURI a idmp-sub:Molfile; - idmp-sub:hasStructuralRepresentationText ?MoietyStructureMolfile; - . - } - # Molecular Structure Moieties - # This part about moieties needs rework! (not a problem for current transform) - ITERATOR iter:JSONSurfer(?moieties, - "$[*]", - "$.hash", - "$.formula", - "$.molfile", - "$.smiles", - "$.stereochemistry", - "$.mwt", - "$.uuid" - ) AS - ?moiety - ?moietyHash - ?moietyFormula - ?MoietyStructureMolfile - ?MoietyStructureSmilesString - ?MoietystructureStereochemistry - ?MoietyStructureMolecularWeight - ?moietyStructureID - - BIND( AS ?Moiety ) - BIND( AS ?MoietyStructure ) - BIND( AS ?MoietyStructureMolfileURI ) - - BIND(IF(str(?MoietystructureStereochemistry) = "ABSOLUTE", idmp-sub:Stereochemistry-Absolute , - IF(str(?MoietystructureStereochemistry) = "ACHIRAL", idmp-sub:Stereochemistry-Achiral , - IF(str(?MoietystructureStereochemistry) = "AXIAL", idmp-sub:Stereochemistry-Axial , - IF(str(?MoietystructureStereochemistry) = "AXIAL R", idmp-sub:Stereochemistry-AxialR , - IF(str(?MoietystructureStereochemistry) = "AXIAL S", idmp-sub:Stereochemistry-AxialS , - IF(str(?MoietystructureStereochemistry) = "CHIRAL", idmp-sub:Stereochemistry-Chiral , - IF(str(?MoietystructureStereochemistry) = "CIS", idmp-sub:Stereochemistry-Cis , - IF(str(?MoietystructureStereochemistry) = "EPIMERIC", idmp-sub:Stereochemistry-Epimeric , - IF(str(?MoietystructureStereochemistry) = "MESO", idmp-sub:Stereochemistry-Meso , - IF(str(?MoietystructureStereochemistry) = "MIXED", idmp-sub:Stereochemistry-Mixed , - IF(str(?MoietystructureStereochemistry) = "OCTAHEDRAL 12", idmp-sub:Stereochemistry-Octahedral12 , - IF(str(?MoietystructureStereochemistry) = "OCTAHEDRAL 22", idmp-sub:Stereochemistry-Octahedral22 , - IF(str(?MoietystructureStereochemistry) = "OCTAHEDRAL 21", idmp-sub:Stereochemistry-Octahedral21 , - IF(str(?MoietystructureStereochemistry) = "RACEMIC", idmp-sub:Stereochemistry-Racemic , - IF(str(?MoietystructureStereochemistry) = "SQUARE PLANAR 1", idmp-sub:Stereochemistry-SquarePlanar1 , - IF(str(?MoietystructureStereochemistry) = "SQUARE PLANAR 2", idmp-sub:Stereochemistry-SquarePlanar2 , - IF(str(?MoietystructureStereochemistry) = "SQUARE PLANAR 3", idmp-sub:Stereochemistry-SquarePlanar3 , - IF(str(?MoietystructureStereochemistry) = "SQUARE PLANAR 4", idmp-sub:Stereochemistry-SquarePlanar4 , - IF(str(?MoietystructureStereochemistry) = "TETRAHEDRAL", idmp-sub:Stereochemistry-Tetrahedral , - IF(str(?MoietystructureStereochemistry) = "TRANS", idmp-sub:Stereochemistry-Trans , - IF(str(?MoietystructureStereochemistry) = "UNKNOWN", idmp-sub:Stereochemistry-Unknown , - ?MoietystructureStereochemistry ))))))))))))))))))))) - AS ?MoietyStereochemistry) - . - - GENERATE{ - # Relationships - ?Substance ?SubstancesRelationshipProperty ?RelatedSubstance; - . - # Is Active Moiety Of - ?RelatedSubstance ?IsActiveMoietyOfProperty ?Substance; - . - } - ITERATOR iter:JSONSurfer(?relationships, - "$[*]", - "$.type", - "$.relatedSubstance.approvalID" - ) AS - ?Relationship - ?RelationshipType - ?RelatedSubstanceApprovalID - - BIND( AS ?RelatedSubstance) - - BIND( - IF(str(?RelationshipType) = "ACTIVE MOIETY" || str(?RelationshipType) = "ACTIVE MOIETY (FOR EXCLUSIVITY)", idmp-sub:hasActiveMoiety, - ?nothing) - AS ?SubstancesRelationshipProperty) - - BIND( - IF(str(?RelationshipType) = "ACTIVE MOIETY" || str(?RelationshipType) = "ACTIVE MOIETY (FOR EXCLUSIVITY)", idmp-sub:hasActiveMoiety, - skos:related) - AS ?SubstancesRelationship) - - BIND(IF(uri(?SubstancesRelationship) = idmp-sub:hasActiveMoiety, idmp-sub:isActiveMoietyOf, ?nothing) AS ?IsActiveMoietyOfProperty) - . - -} -# General Substance Info + Molecular Structure -ITERATOR iter:JSONSurfer(, - "$[*]", - "$.approvalID", - "$.substanceClass", - "$.structure.id", - "$.structure.smiles", - "$.structure.formula", - "$.structure.stereochemistry", - "$.structure.mwt", - "$.structure.molfile", - "$.moieties", - "$.relationships" - ) AS - -?I1 -?approvalID -?substanceClass -?structureID -?structureSmilesString -?structureFormula -?structureStereochemistry -?structureMolecularWeight -?structureMolfile -?moieties -?relationships - -BIND( AS ?Substance ) -BIND( AS ?MolecularStructure ) -BIND( AS ?StructureMolfileURI ) - -BIND(IF(contains(str(?structureFormula), "."), idmp-sub:hasDefiningMolecularFormulaByMoiety , ?nothing ) AS ?hasMolecularFormulaByMoietyProperty) - -BIND(IF(str(?substanceClass) = "chemical", idmp-sub:ChemicalSubstance , - IF(str(?substanceClass) = "protein", idmp-sub:ProteinSubstance , - IF(str(?substanceClass) = "nucleicAcid", idmp-sub:NucleicAcidSubstance , - IF(str(?substanceClass) = "mixture", idmp-sub:Mixture , - IF(str(?substanceClass) = "polymer", idmp-sub:PolymerSubstance , - IF(str(?substanceClass) = "structurallyDiverse", idmp-sub:StructurallyDiverseSubstance , - IF(str(?substanceClass) = "specifiedSubstance", idmp-sub:SpecifiedSubstance , - IF(str(?substanceClass) = "specifiedSubstanceG1", idmp-sub:SpecifiedSubstanceGroup1 , - IF(str(?substanceClass) = "specifiedSubstanceG2", idmp-sub:SpecifiedSubstanceGroup2 , - IF(str(?substanceClass) = "specifiedSubstanceG3", idmp-sub:SpecifiedSubstanceGroup3 , - IF(str(?substanceClass) = "specifiedSubstanceG4", idmp-sub:SpecifiedSubstanceGroup4 , - idmp-sub:Substance ))))))))))) - AS ?SubstanceType) - -BIND(IF(str(?structureStereochemistry) = "ABSOLUTE", idmp-sub:Stereochemistry-Absolute , - IF(str(?structureStereochemistry) = "ACHIRAL", idmp-sub:Stereochemistry-Achiral , - IF(str(?structureStereochemistry) = "AXIAL", idmp-sub:Stereochemistry-Axial , - IF(str(?structureStereochemistry) = "AXIAL R", idmp-sub:Stereochemistry-AxialR , - IF(str(?structureStereochemistry) = "AXIAL S", idmp-sub:Stereochemistry-AxialS , - IF(str(?structureStereochemistry) = "CHIRAL", idmp-sub:Stereochemistry-Chiral , - IF(str(?structureStereochemistry) = "CIS", idmp-sub:Stereochemistry-Cis , - IF(str(?structureStereochemistry) = "EPIMERIC", idmp-sub:Stereochemistry-Epimeric , - IF(str(?structureStereochemistry) = "MESO", idmp-sub:Stereochemistry-Meso , - IF(str(?structureStereochemistry) = "MIXED", idmp-sub:Stereochemistry-Mixed , - IF(str(?structureStereochemistry) = "OCTAHEDRAL 12", idmp-sub:Stereochemistry-Octahedral12 , - IF(str(?structureStereochemistry) = "OCTAHEDRAL 22", idmp-sub:Stereochemistry-Octahedral22 , - IF(str(?structureStereochemistry) = "OCTAHEDRAL 21", idmp-sub:Stereochemistry-Octahedral21 , - IF(str(?structureStereochemistry) = "RACEMIC", idmp-sub:Stereochemistry-Racemic , - IF(str(?structureStereochemistry) = "SQUARE PLANAR 1", idmp-sub:Stereochemistry-SquarePlanar1 , - IF(str(?structureStereochemistry) = "SQUARE PLANAR 2", idmp-sub:Stereochemistry-SquarePlanar2 , - IF(str(?structureStereochemistry) = "SQUARE PLANAR 3", idmp-sub:Stereochemistry-SquarePlanar3 , - IF(str(?structureStereochemistry) = "SQUARE PLANAR 4", idmp-sub:Stereochemistry-SquarePlanar4 , - IF(str(?structureStereochemistry) = "TETRAHEDRAL", idmp-sub:Stereochemistry-Tetrahedral , - IF(str(?structureStereochemistry) = "TRANS", idmp-sub:Stereochemistry-Trans , - IF(str(?structureStereochemistry) = "UNKNOWN", idmp-sub:Stereochemistry-Unknown , - ?structureStereochemistry ))))))))))))))))))))) - AS ?Stereochemistry) diff --git a/etc/transformation/GSRS/transform-accurids.sh b/etc/transformation/GSRS/transform-accurids.sh deleted file mode 100755 index 809c8739..00000000 --- a/etc/transformation/GSRS/transform-accurids.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -. ../_env.sh -. _common.sh - -echo "Preparation is done. Now, upload public_data.json, gsrs-public-data-substances.rqg, gsrs-public-data-relationships.rqg, gsrs-public-data-names.rqg, and gsrs-public-data-identifiers.rqg files to Accurids https://pistoiaalliance.dev.accurids.com" diff --git a/etc/transformation/GSRS/transform-local.sh b/etc/transformation/GSRS/transform-local.sh deleted file mode 100755 index b61dd4d4..00000000 --- a/etc/transformation/GSRS/transform-local.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh - -. ../_env.sh -. ../_sparql_generate.sh -. _common.sh - -# Perform transformation -if [ -f public_data.json ]; then - queries=(gsrs-public-data-identifiers gsrs-public-data-names gsrs-public-data-relationships gsrs-public-data-substances) - for query in "${queries[@]}"; do - echo "Transforming $query.rqg" - $docker_cmd $java_docker_image sh -c "java -Xmx20g \ - -jar sparql-generate-$sparql_generate_version.jar \ - -l INFO \ - -q $query.rqg \ - -o $query.ttl \ - -fo TTL" - done - - result=gsrs-transformed.ttl - rm -rf $result - for query in "${queries[@]}"; do - cat $query.ttl >>$result - done - echo "Result available in $result" - - for query in "${queries[@]}"; do - rm -rf $query.ttl - done -fi diff --git a/etc/transformation/README.md b/etc/transformation/README.md deleted file mode 100644 index c896833e..00000000 --- a/etc/transformation/README.md +++ /dev/null @@ -1 +0,0 @@ -Transformation scripts for publicly available data into IDMP compliant RDF format using [sparql-generate](https://ci.mines-stetienne.fr/sparql-generate) library. diff --git a/etc/transformation/SPOR/README.md b/etc/transformation/SPOR/README.md deleted file mode 100644 index 4d497efd..00000000 --- a/etc/transformation/SPOR/README.md +++ /dev/null @@ -1 +0,0 @@ -Transformation scripts for [SPOR](https://spor.ema.europa.eu/sporwi/) datasets. diff --git a/etc/transformation/SPOR/organisations/README.md b/etc/transformation/SPOR/organisations/README.md deleted file mode 100644 index 55aa0138..00000000 --- a/etc/transformation/SPOR/organisations/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Minimum Requirements - -1. Install Docker. Follow https://docs.docker.com/get-docker instructions. -1. Download the `organisations` dataset from https://spor.ema.europa.eu/omswi/#/searchOrganisations by clicking the `Export All Organisations` link. The downloaded file will be in ZIP format (eg: `locations.zip`). -1. Extract/unzip the zip file (eg: `locations.zip`). The extracted file will be in CSV format (eg: `locations.csv`). -1. Make sure the CSV filename is `locations.csv`. -1. Copy `locations.csv` file to this folder (i.e. `etc/transformation/SPOR/organisations`). - -## Cleanup Data - -Unfortunately this `locations.csv` require some cleanup before we can use it. - -1. Remove entry in line `14364` (i.e. `Organisation ID: ORG-100011833`). Reason: Data is too long. -1. Update entry with Alternative Name `"Ośrodek Diabetologiczny "Popula" Elżbieta Popławska, Grażyna Laszewska Sp. j."` to `"Ośrodek Diabetologiczny 'Popula' Elżbieta Popławska, Grażyna Laszewska Sp. j."`. Reason: Invalid double quote. -1. Update entry with Alternative Name `""Pharmamagist" Gyógyszeripari, Kereskedelmi És Szolgáltató kft.¦Pharmamagist Ltd."` to `"'Pharmamagist' Gyógyszeripari, Kereskedelmi És Szolgáltató kft.¦Pharmamagist Ltd."`. Reason: Invalid double quote. - -# Transformation - -Instructions for transforming the SPOR organisations data. - -## Local - -1. Run `./transform-local.sh` from the terminal console. - -## Accurids - -1. Upload `locations.csv`, and `spor-organisations.rqg` files to Accurids https://pistoiaalliance.dev.accurids.com. diff --git a/etc/transformation/SPOR/organisations/spor-organisations.rqg b/etc/transformation/SPOR/organisations/spor-organisations.rqg deleted file mode 100644 index 926bab0b..00000000 --- a/etc/transformation/SPOR/organisations/spor-organisations.rqg +++ /dev/null @@ -1,179 +0,0 @@ -PREFIX iter: -PREFIX func: -PREFIX skos: -PREFIX schema: -PREFIX cmns-org: -PREFIX cmns-reg: -PREFIX cmns-txt: -PREFIX cmns-id: -PREFIX rdfs: - -GENERATE { - # Generate Organisations - - a cmns-org:LegalEntity ; - skos:prefLabel ?Name ; - skos:altLabel ?Acronym ; - skos:note ?Spor_Status_URI ; - schema:address ; - cmns-id:isIdentifiedBy ; - . - - - a cmns-reg:RegisteredIdentifier; - cmns-txt:hasTextValue ?Organisation_ID; - cmns-id:identifies ; - cmns-reg:isRegisteredIn - . - - - GENERATE { - - skos:altLabel ?Alternative_Name_LangString ; - . - } - ITERATOR iter:for(0,1,?Alternative_Name_Length) AS ?index - WHERE { - BIND(func:SplitAtPosition(?Alternative_Name,"¦",?index) as ?Alt_Name) - BIND(IF(?index - skos:broader ; - . - } - ITERATOR iter:for(0,1,?Category_Classification_Category_Code_Length) AS ?index - WHERE { - BIND(func:SplitAtPosition(?Category_Classification_Category_Code,"¦",?index) as ?code) - } . - - # Generate Locations - - a schema:PostalAddress ; - skos:prefLabel ?Location_ID ; - rdfs:seeAlso ?Location_Link_Href ; - schema:streetAddress ?Full_Address ; - schema:postalCode ?Address_Postal_Code ; - schema:addressLocality ?City_Normalized ; - schema:addressRegion ?State_Normalized ; - schema:addressCountry ?Address_Country_Display_Name ; - schema:postOfficeBoxNumber ?Address_PO_Box ; - . -} -SOURCE AS ?source -ITERATOR iter:CSV(?source) AS -?Organisation_ID -?Identifier_Code -?Mapping_Code_System -?Mapping_Code_System_Name -?Mapping_Code -?Mapping_Valid_From -?Mapping_Valid_To -?Category_Classification_Category_Code_System_Name -?Category_Classification_Category_Code -?Category_Classification_Category_Display_Name -?Status -?Name -?Acronym -?Alternative_Name -?Alternative_Name_Language -?Version_Timestamp -?Location_ID -?Location_Link_Href -?Current_Version -?Location_Mapping_Code_System -?Location_Mapping_Code_System_Name -?Location_Mapping_Code -?Location_Status -?Address_PO_Box -?Address_Postal_Code -?Address_Country_Code -?Address_Country_Display_Name -?Address_GPS_Location -?Address_Language -?Address_Line_1 -?Address_Line_2 -?Address_Line_3 -?Address_Line_4 -?Address_City -?Address_State -?Address_Region -?Address_County -?Communication_Details_Email_Address -?Communication_Details_Phone_Number_Country_Prefix -?Communication_Details_Phone_Number -?Communication_Details_Phone_Number_Extension -?Communication_Details_Address_PO_Box -?Communication_Details_Address_Postal_Code -?Communication_Details_Address_Country_Code -?Communication_Details_Address_Country_Display_Name -?Communication_Details_Address_GPS_Location -?Communication_Details_Address_Language -?Communication_Details_Address_Line_1 -?Communication_Details_Address_Line_2 -?Communication_Details_Address_Line_3 -?Communication_Details_Address_Line_4 -?Communication_Details_Address_City -?Communication_Details_Address_State -?Communication_Details_Address_Region -?Communication_Details_Address_County -?Location_Version_Timestamp -WHERE { - - # Normalized Data - BIND( - IF(?Alternative_Name_Language = "DE¦" && ?Alternative_Name = "Referat Arzneimittel - Und Apothekenwesen¦Landesverwaltungsamt Sachsen-Anhalt", "DE¦DE", - IF(?Alternative_Name_Language = "ES¦" && ?Alternative_Name = "Centro De Farmacovigilancia De Navarra¦Centro de Farmacovigilancia de Navarragilancia de Navarra", "ES¦ES", - IF(?Alternative_Name_Language = "SV¦" && ?Alternative_Name = "Rättsmedicinalverket¦Swedish National Board of Forensic Medicine and Karolinska Institute", "SV¦EN", - ?Alternative_Name_Language ))) - AS ?Alternative_Name_Language_Normalized ) - - BIND ( IF (BOUND(?Alternative_Name), strlen(replace(?Alternative_Name, "[^¦]", "")) + 1, 0) as ?Alternative_Name_Length) - BIND ( IF (BOUND(?Alternative_Name_Language_Normalized), strlen(replace(?Alternative_Name_Language_Normalized, "[^¦]", "")) + 1, 0) as ?Alternative_Name_Language_Normalized_Length) - - BIND ( - IF (contains(?Address_Line_1, "¦"), - IF (regex(?Address_Line_1, "^[^¦]", "i"), func:SplitAtPosition(?Address_Line_1,"¦",0), ?unbound), ?Address_Line_1) - as ?Address_Line_1_Normalized) - BIND ( - IF (contains(?Address_Line_2, "¦"), - IF (regex(?Address_Line_2, "^[^¦]", "i"), func:SplitAtPosition(?Address_Line_2,"¦",0), ?unbound), ?Address_Line_2) - as ?Address_Line_2_Normalized) - BIND ( - IF (contains(?Address_Line_3, "¦"), - IF (regex(?Address_Line_3, "^[^¦]", "i"), func:SplitAtPosition(?Address_Line_3,"¦",0), ?unbound), ?Address_Line_3) - as ?Address_Line_3_Normalized) - BIND ( - IF (contains(?Address_Line_4, "¦"), - IF (regex(?Address_Line_4, "^[^¦]", "i"), func:SplitAtPosition(?Address_Line_4,"¦",0), ?unbound), ?Address_Line_4) - as ?Address_Line_4_Normalized) - BIND ( - IF (BOUND(?Address_Line_2_Normalized), "{?Address_Line_1_Normalized}\n{?Address_Line_2_Normalized}", ?Address_Line_1_Normalized) as ?Address_Line_1_2_Normalized) - BIND ( - IF (BOUND(?Address_Line_3_Normalized), "{?Address_Line_1_2_Normalized}\n{?Address_Line_3_Normalized}", ?Address_Line_1_2_Normalized) as ?Address_Line_1_2_3_Normalized) - BIND ( - IF (BOUND(?Address_Line_4_Normalized), "{?Address_Line_1_2_3_Normalized}\n{?Address_Line_4_Normalized}", ?Address_Line_1_2_3_Normalized) as ?Full_Address) - BIND ( - IF (contains(?Address_City, "¦"), - IF (regex(?Address_City, "^[^¦]", "i"), func:SplitAtPosition(?Address_City,"¦",0), ?unbound), ?Address_City) - as ?City_Normalized) - BIND ( - IF (contains(?Address_State, "¦"), - IF (regex(?Address_State, "^[^¦]", "i"), func:SplitAtPosition(?Address_State,"¦",0), ?unbound), ?Address_State) - as ?State_Normalized) - - BIND ( IF (BOUND(?Category_Classification_Category_Code), strlen(replace(?Category_Classification_Category_Code, "[^¦]", "")) + 1, 0) as ?Category_Classification_Category_Code_Length) - - # Map to RMS Organisation Role Status - BIND ( - IF(?Status = "PROVISIONAL", , - IF(?Status = "ACTIVE", , - IF(?Status = "INACTIVE", , - IF(?Status = "DELETED", , - ?unbound )))) - AS ?Spor_Status_URI ) - -} diff --git a/etc/transformation/SPOR/organisations/transform-local.sh b/etc/transformation/SPOR/organisations/transform-local.sh deleted file mode 100755 index 7e0a5e21..00000000 --- a/etc/transformation/SPOR/organisations/transform-local.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh - -. ../../_env.sh -. ../../_sparql_generate.sh - -# Perform transformation -if [ -f locations.csv ]; then - query=spor-organisations - echo "Transforming $query.rqg" - $docker_cmd $java_docker_image sh -c "java -Xmx20g \ - -jar sparql-generate-$sparql_generate_version.jar \ - -l INFO \ - -q $query.rqg \ - -o $query.ttl \ - -fo TTL" - echo "Result available in $query.ttl" -else - echo "locations.csv file not found! Read the instructions in the README.md" - exit 1 -fi diff --git a/etc/transformation/SPOR/referentials/README.md b/etc/transformation/SPOR/referentials/README.md deleted file mode 100644 index a6b31dbb..00000000 --- a/etc/transformation/SPOR/referentials/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# Minimum Requirements - -1. Install Docker. Follow https://docs.docker.com/get-docker instructions. -1. Login to https://spor.ema.europa.eu/. The login button is located in the top right corner. Create one if you don't have an account. -1. Download the `referentials` dataset from https://spor.ema.europa.eu/rmswi/#/lists. - 1. Choose the list that you want to transform (eg: `Age Range`). - 1. Click the `Export` icon on the right side in the `Actions` column. - 1. A popup will show, choose format CSV and click the `Export` button. - 1. The downloaded file will be in ZIP format (eg: `export-100000000001.zip`). -1. Extract/unzip the zip file (eg: `export-100000000001.zip`). The extracted file will be in CSV format (eg: `100000000001.csv`). -1. Copy the CSV file (eg: `100000000001.csv`) to this folder (i.e. `etc/transformation/SPOR/referentials`). - -## Create an Ontology File -1. Create a new CSV file inside of this folder (i.e. `etc/transformation/SPOR/referentials`), and named it using the same list ID from the previous task (eg: `100000000001-ontology.csv`). -1. Inside the CSV file create a header like below: - ``` - Identifier,Name,Owner,Domain,Status,Modified on,Created on,Version - ``` -1. Add additional metadata information from the list. This information can be seen in the https://spor.ema.europa.eu/rmswi/#/lists by clicking the right arrow icon on the left of a list. Example metadata for list `Age Range`: - ``` - 100000000001,"Age Range",EMA,Human and Veterinary use,CURRENT,2022-03-23T11:05:54,2007-09-12T00:00:00,75 - ``` -1. So the final file of `100000000001-ontology.csv` will look like this: - ``` - Identifier,Name,Owner,Domain,Status,Modified on,Created on,Version - 100000000001,"Age Range",EMA,Human and Veterinary use,CURRENT,2022-03-23T11:05:54,2007-09-12T00:00:00,75 - ``` - -## Update spor-referentials.rqg File -1. Open the `spor-referentials.rqg` file. -1. Find the line: - ``` - ITERATOR iter:CSV(?ontology_source) AS - ``` - Change it with the ontology file that we created from the previous task (eg: `100000000001-ontology.csv`). - ``` - ITERATOR iter:CSV(<100000000001-ontology.csv>) AS - ``` -1. Find the line: - ``` - ITERATOR iter:CSV(?source) AS - ``` - Change it with the extracted CSV file (eg: `100000000001.csv`). - ``` - ITERATOR iter:CSV(<100000000001.csv>) AS - ``` - -# Transformation - -Instructions for transforming the SPOR referentials data. - -## Local - -1. Run `./transform-local.sh` from the terminal console. - -## Accurids - -1. Upload `spor-referentials.rqg` & the CSV files (eg: `100000000001.csv` & `100000000001-ontology.csv`) files to Accurids https://pistoiaalliance.dev.accurids.com. diff --git a/etc/transformation/SPOR/referentials/spor-referentials.rqg b/etc/transformation/SPOR/referentials/spor-referentials.rqg deleted file mode 100644 index 392233ac..00000000 --- a/etc/transformation/SPOR/referentials/spor-referentials.rqg +++ /dev/null @@ -1,334 +0,0 @@ -PREFIX iter: -PREFIX func: -PREFIX dct: -PREFIX skos: -PREFIX skos-xl: -PREFIX rdf: -PREFIX rdfs: -PREFIX owl: -PREFIX xsd: -PREFIX cmns-cls: - -GENERATE { - - a owl:Ontology ; - rdfs:label "SPOR RMS - {?Name}" ; - skos:scopeNote ?Spor_Domain_URI ; - skos:note ?Spor_Status_URI ; - dct:modified "{?Modified_On}"^^xsd:dateTime ; - dct:created "{?Created_On}"^^xsd:dateTime ; - owl:versionInfo ?Version ; - rdfs:comment "Created as part of the IDMP Ontology project using Accurids" ; - . - - - a owl:Class ; - rdfs:label ?Name ; - rdfs:seeAlso ; - skos:exactMatch ?ExactMatch ; - . - - GENERATE { - - a ; - rdfs:label ?English_Term_Name_LangString ; - skos:definition ?Term_Description_LangString ; - skos:notation ?Term_Symbol ; - cmns-cls:isClassifiedBy ; - cmns-cls:isClassifiedBy ?Spor_Term_Status_URI ; - dct:created "{?Term_Created_On_ISO}"^^xsd:dateTime ; - dct:modified "{?Term_Modified_On_ISO}"^^xsd:dateTime ; - . - - GENERATE { - - skos:broader ; - } - ITERATOR iter:for(0,1,?Parent_ID_Length) AS ?index - WHERE { - BIND(func:SplitAtPosition(?Parent_ID,"¦",?index) AS ?Splitted_Parent_ID) - } . - - # generate reification statement for term status - GENERATE { - - a rdf:Statement ; - rdf:subject ; - rdf:predicate cmns-cls:isClassifiedBy ; - rdf:object ?Spor_Term_Status_URI ; - dct:modified "{?Term_Status_Modified_On}"^^xsd:dateTime ; - dct:contributor ?Term_Status_Modified_By ; - } - WHERE { - FILTER(BOUND(?Spor_Term_Status_URI)) - } . - - # generate skos-xl label for Term Name - GENERATE { - - skos-xl:prefLabel ; - . - - - a skos-xl:Label ; - skos-xl:literalForm ?Term_Name_LangString ; - dct:modified "{?Term_Name_Modified_On_ISO}"^^xsd:dateTime ; - dct:contributor ?Term_Name_Modified_By ; - cmns-cls:isClassifiedBy ?Spor_Term_Name_Translation_Status_URI ; - rdfs:comment ?Term_Name_Comments ; - . - } - WHERE { - BIND(STRLANG(?Term_Name, ?Language) AS ?Term_Name_LangString) - - # Normalize date to ISO - BIND( - IF(contains(?Term_Name_Modified_On, "/"), - "{func:SplitAtPosition(func:SplitAtPosition(?Term_Name_Modified_On,"/",2)," ",0)}-{func:SplitAtPosition(?Term_Name_Modified_On,"/",1)}-{func:SplitAtPosition(?Term_Name_Modified_On,"/",0)}T{func:SplitAtPosition(func:SplitAtPosition(?Term_Name_Modified_On,"/",2)," ",1)}", - ?Term_Name_Modified_On) - AS ?Term_Name_Modified_On_ISO ) - - # Map to SPOR Status - BIND( - IF(?Term_Name_Translation_Status = "CURRENT", , - IF(?Term_Name_Translation_Status = "PROVISIONAL", , - IF(?Term_Name_Translation_Status = "NON_CURRENT", , - IF(?Term_Name_Translation_Status = "NULLIFIED", , - ?unbound )))) - AS ?Spor_Term_Name_Translation_Status_URI ) - - FILTER(BOUND(?Term_Name_LangString)) - } . - - # generate skos-xl label for Short Name - GENERATE { - - skos-xl:altLabel ; - . - - - a skos-xl:Label ; - skos-xl:literalForm ?Short_Name_LangString ; - dct:modified "{?Short_Name_Modified_On_ISO}"^^xsd:dateTime ; - dct:contributor ?Short_Name_Modified_By ; - cmns-cls:isClassifiedBy ?Spor_Short_Name_Translation_Status_URI ; - rdfs:comment ?Short_Name_Comments ; - . - } - WHERE { - BIND(STRLANG(?Short_Name, ?Language) AS ?Short_Name_LangString) - - # Normalize date to ISO - BIND( - IF(contains(?Short_Name_Modified_On, "/"), - "{func:SplitAtPosition(func:SplitAtPosition(?Short_Name_Modified_On,"/",2)," ",0)}-{func:SplitAtPosition(?Short_Name_Modified_On,"/",1)}-{func:SplitAtPosition(?Short_Name_Modified_On,"/",0)}T{func:SplitAtPosition(func:SplitAtPosition(?Short_Name_Modified_On,"/",2)," ",1)}", - ?Short_Name_Modified_On) - AS ?Short_Name_Modified_On_ISO ) - - # Map to SPOR Status - BIND( - IF(?Short_Name_Translation_Status = "CURRENT", , - IF(?Short_Name_Translation_Status = "PROVISIONAL", , - IF(?Short_Name_Translation_Status = "NON_CURRENT", , - IF(?Short_Name_Translation_Status = "NULLIFIED", , - ?unbound )))) - AS ?Spor_Short_Name_Translation_Status_URI ) - - FILTER(BOUND(?Short_Name_LangString)) - } . - - # generate skos-xl label for Other Name - GENERATE { - - skos-xl:altLabel ; - . - - - a skos-xl:Label ; - skos-xl:literalForm ?Splitted_Other_Name_LangString ; - dct:modified "{?Splitted_Other_Name_Modified_On_ISO}"^^xsd:dateTime ; - dct:contributor ?Splitted_Other_Name_Modified_By ; - cmns-cls:isClassifiedBy ?Spor_Splitted_Other_Name_Translation_Status_URI ; - rdfs:comment ?Splitted_Other_Name_Comments ; - . - } - ITERATOR iter:for(0,1,?Other_Name_Length) AS ?index - WHERE { - # Normalized data - BIND( - IF(?Other_Name_Comments = "¦", ?unbound, - IF(?Other_Name_Comments = "¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦¦¦¦¦¦¦", ?unbound, - IF(?Other_Name_Comments = "¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦", ?unbound, - ?Other_Name_Comments))))))))))))))) - AS ?Normalized_Other_Name_Comments) - - BIND(IF(BOUND(?Other_Name_Modified_On), strlen(replace(?Other_Name_Modified_On, "[^¦]", "")) + 1, 0) AS ?Other_Name_Modified_On_Length) - BIND(IF(BOUND(?Other_Name_Translation_Status), strlen(replace(?Other_Name_Translation_Status, "[^¦]", "")) + 1, 0) AS ?Other_Name_Translation_Status_Length) - BIND(IF(BOUND(?Other_Name_Rowid), strlen(replace(?Other_Name_Rowid, "[^¦]", "")) + 1, 0) AS ?Other_Name_Rowid_Length) - - # Split the multiple values - BIND(func:SplitAtPosition(?Other_Name,"¦",?index) AS ?Splitted_Other_Name) - BIND(IF(?index < ?Other_Name_Modified_On_Length, - func:SplitAtPosition(?Other_Name_Modified_On,"¦",?index), - ?unbound) AS ?Splitted_Other_Name_Modified_On) - BIND(func:SplitAtPosition(?Other_Name_Modified_By,"¦",?index) AS ?Splitted_Other_Name_Modified_By) - BIND(IF(?index < ?Other_Name_Translation_Status_Length, - func:SplitAtPosition(?Other_Name_Translation_Status,"¦",?index), - ?unbound) AS ?Splitted_Other_Name_Translation_Status) - BIND(func:SplitAtPosition(?Normalized_Other_Name_Comments,"¦",?index) AS ?Splitted_Other_Name_Comments) - BIND(IF(?index < ?Other_Name_Rowid_Length, - func:SplitAtPosition(?Other_Name_Rowid,"¦",?index), - ?unbound) AS ?Splitted_Other_Name_Rowid) - - BIND(STRLANG(?Splitted_Other_Name, ?Language) AS ?Splitted_Other_Name_LangString) - - # Normalize date to ISO - BIND( - IF(contains(?Splitted_Other_Name_Modified_On, "/"), - "{func:SplitAtPosition(func:SplitAtPosition(?Splitted_Other_Name_Modified_On,"/",2)," ",0)}-{func:SplitAtPosition(?Splitted_Other_Name_Modified_On,"/",1)}-{func:SplitAtPosition(?Splitted_Other_Name_Modified_On,"/",0)}T{func:SplitAtPosition(func:SplitAtPosition(?Splitted_Other_Name_Modified_On,"/",2)," ",1)}", - ?Splitted_Other_Name_Modified_On) - AS ?Splitted_Other_Name_Modified_On_ISO ) - - # Map to SPOR Status - BIND( - IF(?Splitted_Other_Name_Translation_Status = "CURRENT", , - IF(?Splitted_Other_Name_Translation_Status = "PROVISIONAL", , - IF(?Splitted_Other_Name_Translation_Status = "NON_CURRENT", , - IF(?Splitted_Other_Name_Translation_Status = "NULLIFIED", , - ?unbound )))) - AS ?Spor_Splitted_Other_Name_Translation_Status_URI ) - - FILTER(BOUND(?Splitted_Other_Name_LangString)) - } . - } - ITERATOR iter:CSV(?source) AS - ?List_ID - ?Term_ID - ?Term_Rowid - ?Language - ?Term_Name - ?Term_Name_Modified_On - ?Term_Name_Modified_By - ?Term_Name_Translation_Status - ?Term_Name_Comments - ?Term_Name_Rowid - ?Short_Name - ?Short_Name_Modified_On - ?Short_Name_Modified_By - ?Short_Name_Translation_Status - ?Short_Name_Comments - ?Short_Name_Rowid - ?Other_Name - ?Other_Name_Modified_On - ?Other_Name_Modified_By - ?Other_Name_Translation_Status - ?Other_Name_Comments - ?Other_Name_Rowid - ?Term_Description - ?Term_Description_Modified_On - ?Term_Description_Modified_By - ?Term_Description_Translation_Status - ?Term_Description_Rowid - ?Term_Symbol - ?Term_Symbol_Modified_On - ?Term_Symbol_Modified_By - ?Term_Symbol_Comments - ?Term_Symbol_Rowid - ?Domain_ID - ?Term_Domain_Rowid - ?List_Hierarchical_Level_Short_Name - ?List_Hierarchical_Level_Name - ?Parent_ID - ?Parent_Term_Rowid - ?Term_Status - ?Term_Status_Modified_On - ?Term_Status_Modified_By - ?Term_Status_Rowid - ?Current_Term_ID - ?Current_Term_Rowid - ?Mapping_Source_ID - ?Mapping_Source_Term_ID - ?Source_Provided_Name - ?Is_Main_Term_Source - ?Mapping_Source_Version - ?Mapping_Source_Term_Status - ?Term_Source_Mapping_Rowid - ?Term_Comments - ?Applicable_to_Country_ID - ?Country_Applicability_Rowid - ?Applicable_to_IT_application_ID - ?IT_Application_Applicability_Rowid - ?Term_Created_On - ?Term_Created_By - ?Term_Modified_On - ?Term_Modified_By - ?Version_Number - ?Version_Date - ?Is_Major_Version - ?Term_Visibility - WHERE { - BIND(IF(?Language = "en", STRLANG(?Term_Name, ?Language), ?unbound) AS ?English_Term_Name_LangString) - BIND(IF(BOUND(?Other_Name), strlen(replace(?Other_Name, "[^¦]", "")) + 1, 0) AS ?Other_Name_Length) - BIND(STRLANG(?Term_Description, ?Language) AS ?Term_Description_LangString) - BIND(IF(BOUND(?Parent_ID), strlen(replace(?Parent_ID, "[^¦]", "")) + 1, 0) AS ?Parent_ID_Length) - BIND( - IF(?Term_Status = "CURRENT", , - IF(?Term_Status = "PROVISIONAL", , - IF(?Term_Status = "NON_CURRENT", , - IF(?Term_Status = "NULLIFIED", , - ?unbound )))) - AS ?Spor_Term_Status_URI ) - BIND( - IF(contains(?Term_Created_On, "/"), - "{func:SplitAtPosition(func:SplitAtPosition(?Term_Created_On,"/",2)," ",0)}-{func:SplitAtPosition(?Term_Created_On,"/",1)}-{func:SplitAtPosition(?Term_Created_On,"/",0)}T{func:SplitAtPosition(func:SplitAtPosition(?Term_Created_On,"/",2)," ",1)}", - ?unbound) - AS ?Term_Created_On_ISO ) - BIND( - IF(contains(?Term_Modified_On, "/"), - "{func:SplitAtPosition(func:SplitAtPosition(?Term_Modified_On,"/",2)," ",0)}-{func:SplitAtPosition(?Term_Modified_On,"/",1)}-{func:SplitAtPosition(?Term_Modified_On,"/",0)}T{func:SplitAtPosition(func:SplitAtPosition(?Term_Modified_On,"/",2)," ",1)}", - ?unbound) - AS ?Term_Modified_On_ISO ) - } . -} -ITERATOR iter:CSV(?ontology_source) AS -?Identifier -?Name -?Owner -?Domain -?Status -?Modified_On -?Created_On -?Version -WHERE { - BIND( - IF(?Domain = "Human use", , - IF(?Domain = "Veterinary use", , - IF(?Domain = "Human and Veterinary use", , - ?unbound ))) - AS ?Spor_Domain_URI ) - - BIND( - IF(?Status = "CURRENT", , - IF(?Status = "PROVISIONAL", , - IF(?Status = "NON_CURRENT", , - IF(?Status = "NULLIFIED", , - ?unbound )))) - AS ?Spor_Status_URI ) - - BIND( - IF(?Identifier = "100000073343", , - IF(?Identifier = "100000073344",, - ?unbound)) AS ?ExactMatch) -} diff --git a/etc/transformation/SPOR/referentials/transform-local.sh b/etc/transformation/SPOR/referentials/transform-local.sh deleted file mode 100755 index c85d8feb..00000000 --- a/etc/transformation/SPOR/referentials/transform-local.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -. ../../_env.sh -. ../../_sparql_generate.sh - -# Perform transformation -query=spor-referentials -echo "Transforming $query.rqg" -$docker_cmd $java_docker_image sh -c "java -Xmx20g \ - -jar sparql-generate-$sparql_generate_version.jar \ - -l INFO \ - -q $query.rqg \ - -o $query.ttl \ - -fo TTL" -echo "Result available in $query.ttl" diff --git a/etc/transformation/SPOR/substances/README.md b/etc/transformation/SPOR/substances/README.md deleted file mode 100644 index 43e250d9..00000000 --- a/etc/transformation/SPOR/substances/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Minimum Requirements - -1. Install Docker. Follow https://docs.docker.com/get-docker instructions. -1. Download the `substances` dataset from https://spor.ema.europa.eu/smswi/#/ by clicking the `Download SMS Export` link. The downloaded file will be in CSV format. -1. Rename the file to become `sms-substances-list.csv`. -1. Change the header name inside `sms-substances-list.csv` from `#SMS_ID` to `SMS_ID`. -1. Copy `sms-substances-list.csv` file to this folder (i.e. `etc/transformation/SPOR/substances`). - -# Transformation - -Instructions for transforming the SPOR substances data. - -## Local - -1. Run `./transform-local.sh` from the terminal console. - -## Accurids - -1. Upload `sms-substances-list.csv`, and `spor-substances.rqg` files to Accurids https://pistoiaalliance.dev.accurids.com. diff --git a/etc/transformation/SPOR/substances/spor-substances.rqg b/etc/transformation/SPOR/substances/spor-substances.rqg deleted file mode 100644 index 0a60bd42..00000000 --- a/etc/transformation/SPOR/substances/spor-substances.rqg +++ /dev/null @@ -1,171 +0,0 @@ -PREFIX iter: -PREFIX skos: -PREFIX dct: -PREFIX rdfs: -PREFIX cmns-dsg: -PREFIX cmns-txt: -PREFIX cmns-id: -PREFIX cmns-cls: -PREFIX cmns-ra: -PREFIX idmp-sub: -PREFIX idmp-ra: -PREFIX idmp-eura: -PREFIX idmp-eureg: -PREFIX idmp-nara: -PREFIX idmp-dtp: -PREFIX spor-st: -PREFIX spor-sub: -PREFIX lcc-639-1: - -GENERATE { - ?substanceURI - a ?idmpSubstanceType, ?sporRMSSubstanceType ; - skos:prefLabel ?prefLabel ; - skos:altLabel ?altLabel ; - dct:created ?Created_Date ; - dct:modified ?Last_Updated_Date ; - cmns-dsg:hasName ?nameURI ; - cmns-id:isIdentifiedBy ; - cmns-id:isIdentifiedBy ; - skos:exactMatch ; - cmns-cls:isClassifiedBy ?idmpSubstanceDomain ; - . - - ?nameURI a idmp-sub:SubstanceName ; - rdfs:label ?Substance_Name_LangString ; - cmns-txt:hasTextValue ?Substance_Name ; - idmp-sub:hasSubstanceNameValue ?Substance_Name ; - dct:source ?sourceAuthority ; - cmns-dsg:isNameOf ?substanceURI ; - idmp-dtp:hasLanguageCode ; - . - - - a idmp-eura:EudraVigilanceCode, cmns-ra:RegisteredIdentifier ; - cmns-id:identifies ?substanceURI ; - rdfs:label ?EV_Code ; - cmns-txt:hasTextValue ?EV_Code ; - cmns-ra:isRegisteredIn idmp-eura:ExtendedEudraVigilanceMedicinalProductDictionary ; - . -} -ITERATOR iter:CSV() AS -?SMS_ID -?Substance_Name -?Is_Preferred_Name -?Language -?Name_Source -?Substance_Domain -?Status -?Substance_Type -?Molecular_Formula -?Molecular_Weight -?Inchikey -?Comment -?Created_Date -?Last_Updated_Date -?EV_Code -?SVG_Flag -?UNII -?INN_Number -?EC_List_Number -?Parent_Substance -WHERE { - BIND( - IF(?Language = "Bulgarian", "bg", - IF(?Language = "Croatian", "hr", - IF(?Language = "Czech", "cs", - IF(?Language = "Danish", "da", - IF(?Language = "Dutch", "nl", - IF(?Language = "English", "en", - IF(?Language = "Estonian", "et", - IF(?Language = "Finnish", "fi", - IF(?Language = "French", "fr", - IF(?Language = "German", "de", - IF(?Language = "Greek, Modern (1453-)", "el", - IF(?Language = "Hungarian", "hu", - IF(?Language = "Icelandic", "is", - IF(?Language = "Irish", "ga", - IF(?Language = "Italian", "it", - IF(?Language = "Latin", "la", - IF(?Language = "Latvian", "lv", - IF(?Language = "Lithuanian", "lt", - IF(?Language = "Maltese", "mt", - IF(?Language = "Norwegian", "no", - IF(?Language = "Polish", "pl", - IF(?Language = "Portuguese", "pt", - IF(?Language = "Romanian", "ro", - IF(?Language = "Slovak", "sk", - IF(?Language = "Slovenian", "sl", - IF(?Language = "Spanish", "es", - IF(?Language = "Swedish", "sv", - ?Language ))))))))))))))))))))))))))) - AS ?Normalized_Language ) - - BIND( STRLANG("{?Substance_Name}","{?Normalized_Language}") AS ?Substance_Name_LangString ) - BIND( IF(?Is_Preferred_Name = "True", ?Substance_Name_LangString, ?unbound) AS ?prefLabel ) - BIND( IF(?Is_Preferred_Name = "False", ?Substance_Name_LangString, ?unbound) AS ?altLabel ) - - BIND( - IF(?Substance_Type = "Chemical", idmp-sub:ChemicalSubstance, - IF(?Substance_Type = "Mixture", idmp-sub:Mixture, - IF(?Substance_Type = "Nucleic acid", idmp-sub:NucleicAcidSubstance, - IF(?Substance_Type = "Polymer", idmp-sub:PolymerSubstance, - IF(?Substance_Type = "Protein", idmp-sub:ProteinSubstance, - IF(?Substance_Type = "Specified Substance Group 1", idmp-sub:SpecifiedSubstanceGroup1, - IF(?Substance_Type = "Specified Substance Group 2", idmp-sub:SpecifiedSubstanceGroup2, - IF(?Substance_Type = "Specified Substance Group 3", idmp-sub:SpecifiedSubstanceGroup3, - IF(?Substance_Type = "Specified Substance Group 4", idmp-sub:SpecifiedSubstanceGroup4, - IF(?Substance_Type = "Structurally Diverse - Allergen", idmp-sub:StructurallyDiverseSubstance, - IF(?Substance_Type = "Structurally Diverse - Cell therapy", idmp-sub:StructurallyDiverseSubstance, - IF(?Substance_Type = "Structurally Diverse - Herbal", idmp-sub:StructurallyDiverseSubstance, - IF(?Substance_Type = "Structurally Diverse - Other", idmp-sub:StructurallyDiverseSubstance, - IF(?Substance_Type = "Structurally Diverse - Plasma derived", idmp-sub:StructurallyDiverseSubstance, - IF(?Substance_Type = "Structurally Diverse - Vaccine", idmp-sub:StructurallyDiverseSubstance, - ?Substance_Type ))))))))))))))) - AS ?idmpSubstanceType ) - - BIND( - IF(?Substance_Type = "Chemical", spor-st:100000075670, - IF(?Substance_Type = "Mixture", spor-st:200000005023, - IF(?Substance_Type = "Nucleic acid", spor-st:200000005035, - IF(?Substance_Type = "Polymer", spor-st:200000005022, - IF(?Substance_Type = "Protein", spor-st:200000005020, - IF(?Substance_Type = "Specified Substance Group 1", spor-st:200000005031, - IF(?Substance_Type = "Specified Substance Group 2", spor-st:200000005032, - IF(?Substance_Type = "Specified Substance Group 3", spor-st:200000005033, - IF(?Substance_Type = "Specified Substance Group 4", spor-st:200000005034, - IF(?Substance_Type = "Structurally Diverse - Allergen", spor-st:200000005026, - IF(?Substance_Type = "Structurally Diverse - Cell therapy", spor-st:200000005029, - IF(?Substance_Type = "Structurally Diverse - Herbal", spor-st:200000005025, - IF(?Substance_Type = "Structurally Diverse - Other", spor-st:200000005030, - IF(?Substance_Type = "Structurally Diverse - Plasma derived", spor-st:200000005024, - IF(?Substance_Type = "Structurally Diverse - Vaccine", spor-st:200000005027, - ?Substance_Type ))))))))))))))) - AS ?sporRMSSubstanceType ) - - BIND( - IF(?Substance_Domain = "Human use", , - IF(?Substance_Domain = "Veterinary use", , - IF(?Substance_Domain = "Human and Veterinary use", , - ?unbound ))) - AS ?idmpSubstanceDomain ) - - BIND( AS ?substanceURI ) - BIND( md5(?Substance_Name) AS ?nameHash ) - BIND( IF(?Name_Source = "", ?unbound, - - ) AS ?nameURI ) - BIND( - IF(?Name_Source = "CHEBI", idmp-eura:ChemicalEntitiesOfBiologicalInterest, # valid - IF(?Name_Source = "CHEMICAL ABSTRACT SERVICE", idmp-nara:ChemicalAbstractsService, # valid - IF(?Name_Source = "EUROPEAN CHEMICALS AGENCY (ECHA)", idmp-eureg:EuropeanChemicalsAgency, # valid - IF(?Name_Source = "FDA SUBSTANCE REGISTRATION SYSTEM", idmp-nara:FoodAndDrugAdministrationRegistrationAuthority, # valid - IF(?Name_Source = "INN", idmp-ra:InternationalNonproprietaryName, # valid - IF(?Name_Source = "ITIS (INTEGRATED TAXONOMIC INFORMATION SYSTEM)", idmp-nara:IntegratedTaxonomicInformationSystem, # valid - IF(?Name_Source = "PUBCHEM", idmp-nara:PubChem, # valid - IF(?Name_Source = "THE MERCK INDEX 20TH EDITION", idmp-ra:MerckIndexOnline, # valid - IF(?Name_Source = "USP", idmp-nara:USPharmacopeiaMedicinesCompendium, # valid - IF(?Name_Source = "WHO", idmp-ra:WorldHealthOrganization, # valid - ?Name_Source )))))))))) - AS ?sourceAuthority ) -} diff --git a/etc/transformation/SPOR/substances/transform-local.sh b/etc/transformation/SPOR/substances/transform-local.sh deleted file mode 100755 index 7049b798..00000000 --- a/etc/transformation/SPOR/substances/transform-local.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh - -. ../../_env.sh -. ../../_sparql_generate.sh - -# Perform transformation -if [ -f sms-substances-list.csv ]; then - query=spor-substances - echo "Transforming $query.rqg" - $docker_cmd $java_docker_image sh -c "java -Xmx20g \ - -jar sparql-generate-$sparql_generate_version.jar \ - -l INFO \ - -q $query.rqg \ - -o $query.ttl \ - -fo TTL" - echo "Result available in $query.ttl" -else - echo "sms-substances-list.csv file not found! Read the instructions in the README.md" - exit 1 -fi diff --git a/etc/transformation/_env.sh b/etc/transformation/_env.sh deleted file mode 100644 index efe3ed97..00000000 --- a/etc/transformation/_env.sh +++ /dev/null @@ -1,4 +0,0 @@ -docker_cmd="docker run --rm -v $(pwd):/data -w /data" -sparql_generate_version="2.0.12" -java_docker_image="eclipse-temurin:11-jre" -ruby_docker_image="ruby:3" diff --git a/etc/transformation/_sparql_generate.sh b/etc/transformation/_sparql_generate.sh deleted file mode 100644 index edc1c091..00000000 --- a/etc/transformation/_sparql_generate.sh +++ /dev/null @@ -1,7 +0,0 @@ -# Downloading the sparql-generate -# TODO: Use sparql-generate image directly -if [ ! -f sparql-generate-$sparql_generate_version.jar ]; then - echo "Downloading sparql-generate version $sparql_generate_version" - download_url="https://github.com/sparql-generate/sparql-generate/releases/download/$sparql_generate_version/sparql-generate-$sparql_generate_version.jar" - $docker_cmd $java_docker_image sh -c "wget -q $download_url" -fi From d79d320ea6640a9dcaf77aa1bbac244028384209 Mon Sep 17 00:00:00 2001 From: mereolog Date: Thu, 26 Sep 2024 16:34:53 +0200 Subject: [PATCH 2/3] modification of unit test script Signed-off-by: mereolog --- .github/workflows/unit_tests_run.yml | 61 ---------------------------- 1 file changed, 61 deletions(-) diff --git a/.github/workflows/unit_tests_run.yml b/.github/workflows/unit_tests_run.yml index fd99c2f6..bf14030c 100644 --- a/.github/workflows/unit_tests_run.yml +++ b/.github/workflows/unit_tests_run.yml @@ -49,64 +49,3 @@ jobs: with: name: merged-ontology path: AboutIDMPDev-ReferenceIndividuals.ttl - GSRS: - runs-on: ubuntu-latest - env: - GSRS_VERSION: ${{ inputs.gsrs_version || '2023-12-14' }} - steps: - - uses: actions/checkout@v4 - - name: Set up Java 11 - uses: actions/setup-java@v4 - with: - distribution: "temurin" - java-version: "11" - cache: "maven" - - name: Install Dependencies & Prepare IDMP Ontologies - run: | - ./etc/unit_tests/dependencies.sh - - name: GSRS Cache - id: gsrs-caches - uses: actions/cache@v4 - with: - path: | - gsrs-transformed.ttl - key: gsrs-public-data-${{ env.GSRS_VERSION }}|transformation-files-${{ hashFiles('etc/transformation/GSRS/*.rqg') }} - - name: Download and Extract GSRS Version ${{ env.GSRS_VERSION }} - if: steps.gsrs-caches.outputs.cache-hit != 'true' || inputs.gsrs_cache_rebuild == 'true' - run: | - url=https://gsrs.ncats.nih.gov/downloads/dump-public-${{ env.GSRS_VERSION }}.gsrs - sudo wget --no-verbose -m -nH -nd -O public_data.gz $url - gunzip public_data.gz - jq -c -s '.' public_data > public_data.json - - name: Transform GSRS - if: steps.gsrs-caches.outputs.cache-hit != 'true' || inputs.gsrs_cache_rebuild == 'true' - run: | - queries=(gsrs-public-data-identifiers gsrs-public-data-names gsrs-public-data-relationships gsrs-public-data-substances) - for query in "${queries[@]}"; do - echo "Transforming $query.rqg" - java -Xmx8G -jar sparql-generate.jar -l INFO -q etc/transformation/GSRS/$query.rqg -o $query.ttl - done - result=gsrs-transformed.ttl - rm -rf $result - for query in "${queries[@]}"; do - cat $query.ttl >>$result - done - - name: Combine GSRS with IDMP - run: | - source_files=(gsrs-transformed.ttl AboutIDMPDev-ReferenceIndividuals.ttl) - output_file=gsrs-final.ttl - # Loop through the source files and append their contents to the output file - for file in "${source_files[@]}"; do - cat $file >> $output_file - done - - name: Run unit tests using Apache Jena - run: | - java -Xmx8G -jar etc/unit_tests/target/unit-test-runner-1.0.jar \ - -ontology_location ./gsrs-final.ttl \ - -root_folder etc/unit_tests/ \ - -cq_source etc/CQ/Example/ \ - -config_file configs/gsrs_tests_config.yaml - if [ $? != 0 ]; - then - exit $? - fi From da8ba4bb7fa2c3bcc8731b958313111b76360b76 Mon Sep 17 00:00:00 2001 From: mereolog Date: Mon, 30 Sep 2024 11:10:02 +0200 Subject: [PATCH 3/3] post-review fix Signed-off-by: mereolog --- .github/workflows/unit_tests_run.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/unit_tests_run.yml b/.github/workflows/unit_tests_run.yml index bf14030c..79f0f3c7 100644 --- a/.github/workflows/unit_tests_run.yml +++ b/.github/workflows/unit_tests_run.yml @@ -6,14 +6,6 @@ on: push: pull_request: branches: ["master"] - workflow_dispatch: - inputs: - gsrs_version: - description: "GSRS dataset version" - required: false - gsrs_cache_rebuild: - description: "Whether to rebuild cache" - required: false permissions: contents: read