Skip to content

Commit

Permalink
Testing poetry
Browse files Browse the repository at this point in the history
  • Loading branch information
bhorinrb committed Oct 24, 2024
1 parent 0c9ab82 commit 2b186e6
Show file tree
Hide file tree
Showing 29 changed files with 559 additions and 86 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ src/yatter/__pycache__/constants.cpython-311.pyc
src/yatter/__pycache__/__main__.cpython-311.pyc
src/yatter/__pycache__/__init__.cpython-311.pyc
src/yatter/__init__.py
mapping_normalized.yml
mapping.yml
test/normalize/NORMKEYS-01/mapping_normalized.yml
test/normalize/NORMKEYS-01/mapping.yml
mapping.rml.ttl
154 changes: 70 additions & 84 deletions src/yatter/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,32 @@ def expand_authors(authors):
return expanded_authors

def expand_sources(sources):
expanded_sources = ordereddict()
for key, source in sources.items():
def expand_source_item(source):
if isinstance(source, list) and len(source) == 2 and '~' in source[0]:
expanded_source = ordereddict()
access, reference = source[0].split('~')
expanded_source['access'] = access
expanded_source['referenceFormulation'] = reference
expanded_source['iterator'] = source[1]
expanded_sources[key] = expanded_source
return expanded_source
elif isinstance(source, dict):
expanded_sources[key] = normalize_yaml(source)
return normalize_yaml(source)
else:
expanded_sources[key] = source
return source

expanded_sources = CommentedSeq() if isinstance(sources, list) else ordereddict()

if isinstance(sources, dict):
for key, source in sources.items():
expanded_sources[key] = expand_source_item(source)
elif isinstance(sources, list):
for source in sources:
expanded_sources.append(expand_source_item(source))

return expanded_sources

def expand_targets(targets):
expanded_targets = ordereddict()
for key, target in targets.items():
def expand_target_item(target):
if isinstance(target, list) and len(target) >= 1:
expanded_target = ordereddict()
access_type = target[0].split('~')
Expand All @@ -83,90 +91,86 @@ def expand_targets(targets):
expanded_target['serialization'] = target[1]
if len(target) > 2:
expanded_target['compression'] = target[2]
expanded_targets[key] = expanded_target
return expanded_target
elif isinstance(target, dict):
expanded_targets[key] = normalize_yaml(target)
return normalize_yaml(target)
else:
expanded_targets[key] = target
return target

expanded_targets = CommentedSeq() if isinstance(targets, list) else ordereddict()

if isinstance(targets, dict):
for key, target in targets.items():
expanded_targets[key] = expand_target_item(target)
elif isinstance(targets, list):
for target in targets:
expanded_targets.append(expand_target_item(target))

return expanded_targets

def expand_predicateobjects(predicateobjects):
expanded_predicateobjects = CommentedSeq()

expanded_predicateobjects = CommentedSeq()
for po in predicateobjects:
expanded_po = ordereddict()

if isinstance(po, list):
expanded_po['predicates'] = po[0]

if len(po) >= 2:
if isinstance(po[1], str) and '~iri' in po[1]:
expanded_po['objects'] = ordereddict()
expanded_po['objects']['value'] = po[1].replace('~iri', '')
expanded_po['objects']['type'] = 'iri'
elif isinstance(po[1], str) and '~lang' in po[1]:
value, lang = po[1].split('~')
expanded_po['objects'] = ordereddict()
expanded_po['objects']['value'] = value
expanded_po['objects']['language'] = lang
else:
expanded_po['objects'] = po[1]

if len(po) == 3:
expanded_po['objects'] = ordereddict()
expanded_po['objects']['value'] = po[1]
expanded_po['objects']['datatype'] = po[2]
if isinstance(po[0], str):
po[0] = [po[0]]
if isinstance(po[1], str):
po[1] = [po[1]]
predicates_list, objects_list = po[0], po[1]

for pred in predicates_list:
expanded_po = ordereddict()
expanded_po['predicates'] = CommentedSeq()
expanded_po['predicates'].append(ordereddict({'value': pred}))

expanded_po['objects'] = CommentedSeq()
for obj in objects_list:
object_expansion = ordereddict()
if isinstance(obj, str) and '~' in obj:
obj_value, obj_type = obj.split('~')
object_expansion['value'] = obj_value
if obj_type == "lang":
object_expansion['language'] = obj_type
else:
object_expansion['type'] = obj_type
elif isinstance(obj, dict) and 'function' in obj:
object_expansion['function'] = obj['function']
if 'parameters' in obj:
object_expansion['parameters'] = expand_parameters(obj['parameters'])

elif isinstance(po, dict):
for key, value in po.items():
if key == 'p':
expanded_po['predicates'] = value
elif key == 'o':
if isinstance(value, list):
expanded_po['objects'] = CommentedSeq()
for obj in value:
expanded_po['objects'].append(normalize_yaml(obj) if isinstance(obj, dict) else obj)
else:
expanded_po['objects'] = normalize_yaml(value)
else:
expanded_po[key] = normalize_yaml(value)
object_expansion['value'] = obj
if len(po) > 2 and isinstance(po[2], str):
if "~" in po[2]:
lang, dtype = po[2].split("~")
object_expansion['language'] = lang
else:
object_expansion['datatype'] = po[2]
expanded_po['objects'].append(object_expansion)

expanded_predicateobjects.append(expanded_po)
expanded_predicateobjects.append(expanded_po)

return expanded_predicateobjects

def expand_predicateobjects(predicateobjects):
expanded_predicateobjects = CommentedSeq()
for po in predicateobjects:
if isinstance(po, list):
expanded_po = ordereddict()
expanded_po['predicates'] = po[0]
if len(po) == 2:
expanded_po['objects'] = po[1]
elif len(po) == 3:
expanded_po['objects'] = ordereddict()
expanded_po['objects']['value'] = po[1]
expanded_po['objects']['datatype'] = po[2]
expanded_predicateobjects.append(expanded_po)
elif isinstance(po, dict):
expanded_po = ordereddict()

for key, value in po.items():
if key == 'p':
expanded_po['predicates'] = value
elif key == 'o':
expanded_po['objects'] = normalize_yaml(value)
else:
expanded_po[key] = normalize_yaml(value)

expanded_predicateobjects.append(expanded_po)
else:
expanded_predicateobjects.append(po)

return expanded_predicateobjects

def expand_parameters(parameters):
expanded_parameters = CommentedSeq()
for param in parameters:
expanded_param = ordereddict()
if isinstance(param, list):
if isinstance(param, list) and len(param) == 2:
expanded_param['parameter'] = param[0]
expanded_param['value'] = param[1]
else:
Expand All @@ -180,14 +184,12 @@ def expand_parameters(parameters):
new_key = get_normalized_key(key)
if new_key == 'authors' and isinstance(value, list):
new_data[new_key] = expand_authors(value)
elif new_key == 'sources' and isinstance(value, dict):
elif new_key == 'sources':
new_data[new_key] = expand_sources(value)
elif new_key == 'targets' and isinstance(value, dict):
elif new_key == 'targets':
new_data[new_key] = expand_targets(value)
elif new_key == 'predicateobjects' and isinstance(value, list):
elif new_key == 'predicateobjects':
new_data[new_key] = expand_predicateobjects(value)
elif new_key == 'parameters' and isinstance(value, list):
new_data[new_key] = expand_parameters(value)
else:
new_data[new_key] = normalize_yaml(value)
return new_data
Expand All @@ -198,19 +200,3 @@ def expand_parameters(parameters):
return new_list
else:
return data


if __name__ == "__main__":
yaml = YAML()
yaml.preserve_quotes = True
yaml.indent(mapping=2, sequence=4, offset=2)

with open("mapping.yml", "r") as file:
data = yaml.load(file)

normalized_data = normalize_yaml(data)

with open("mapping_normalized.yml", "w") as file:
yaml.dump(normalized_data, file)

print("YAML normalizado guardado en mapping_normalized.yml")
13 changes: 13 additions & 0 deletions test/normalize/NORMKEYS-03/mapping.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
targets:
person-target: [data/dump.ttl.gz~void, turtle, gzip]

mappings:
person:
subjects:
- value: http://example.org/{id}
targets:
- person-target
- ["data/dump1.nq~void"]
- ["data/dump2.nq~void"]
- access: http://localhost/sparql
type: sparql
19 changes: 19 additions & 0 deletions test/normalize/NORMKEYS-03/mapping_normalized.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
targets:
person-target:
access: data/dump.ttl.gz
type: void
serialization: turtle
compression: gzip

mappings:
person:
subjects:
- value: http://example.org/{id}
targets:
- person-target
- access: data/dump1.nq
type: void
- access: data/dump2.nq
type: void
- access: http://localhost/sparql
type: sparql
37 changes: 37 additions & 0 deletions test/normalize/NORMKEYS-03/test_normkeys03.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
from ruamel.yaml import YAML
from deepdiff import DeepDiff
from yatter.normalization import normalize_yaml

R2RML_URI = 'http://www.w3.org/ns/r2rml#'


def convert_to_dict(data):
from ruamel.yaml.comments import CommentedMap
if isinstance(data, CommentedMap):
return {key: convert_to_dict(value) for key, value in data.items()}
elif isinstance(data, list):
return [convert_to_dict(item) for item in data]
else:
return data


def test_normkeys03():
yaml = YAML(typ='safe', pure=True)

with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping_normalized.yml')) as file:
expected_mapping = yaml.load(file)

with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.yml')) as file:
data = yaml.load(file)
normalized_mapping = normalize_yaml(data)

expected_mapping = convert_to_dict(expected_mapping)
normalized_mapping = convert_to_dict(normalized_mapping)

ddiff = DeepDiff(expected_mapping, normalized_mapping, ignore_order=True)

if ddiff:
assert False
else:
assert True
4 changes: 4 additions & 0 deletions test/normalize/NORMKEYS-04/mapping.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mappings:
person:
predicateobjects:
- [foaf:firstName, $(firstname)]
7 changes: 7 additions & 0 deletions test/normalize/NORMKEYS-04/mapping_normalized.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mappings:
person:
predicateobjects:
- predicates:
- value: foaf:firstName
objects:
- value: $(firstname)
37 changes: 37 additions & 0 deletions test/normalize/NORMKEYS-04/test_normkeys04.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
from ruamel.yaml import YAML
from deepdiff import DeepDiff
from yatter.normalization import normalize_yaml

R2RML_URI = 'http://www.w3.org/ns/r2rml#'


def convert_to_dict(data):
from ruamel.yaml.comments import CommentedMap
if isinstance(data, CommentedMap):
return {key: convert_to_dict(value) for key, value in data.items()}
elif isinstance(data, list):
return [convert_to_dict(item) for item in data]
else:
return data


def test_normkeys04():
yaml = YAML(typ='safe', pure=True)

with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping_normalized.yml')) as file:
expected_mapping = yaml.load(file)

with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.yml')) as file:
data = yaml.load(file)
normalized_mapping = normalize_yaml(data)

expected_mapping = convert_to_dict(expected_mapping)
normalized_mapping = convert_to_dict(normalized_mapping)

ddiff = DeepDiff(expected_mapping, normalized_mapping, ignore_order=True)

if ddiff:
assert False
else:
assert True
4 changes: 4 additions & 0 deletions test/normalize/NORMKEYS-05/mapping.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mappings:
person:
predicateobjects:
- [[foaf:name, rdfs:label], [$(firstname), $(lastname)]]
15 changes: 15 additions & 0 deletions test/normalize/NORMKEYS-05/mapping_normalized.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
mappings:
person:
predicateobjects:
- predicates:
- value: foaf:name
objects:
- value: $(firstname)
- value: $(lastname)
- predicates:
- value: rdfs:label
objects:
- value: $(firstname)
- value: $(lastname)


Loading

0 comments on commit 2b186e6

Please sign in to comment.