Skip to content

Commit

Permalink
Merge pull request #129 from acdh-oeaw/crossvalidate_tei_refs
Browse files Browse the repository at this point in the history
Crossvalidate tei refs
  • Loading branch information
gythaogg authored Sep 23, 2024
2 parents 919eaf8 + f15e23a commit 53ab30c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
24 changes: 18 additions & 6 deletions apis_ontology/management/commands/get_tei_refs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from collections import defaultdict
from django.core.management.base import BaseCommand
from apis_ontology.models import TibScholRelationMixin
from tqdm.auto import tqdm
import re
from pprint import pprint
from apis_ontology.models import Excerpts
from datetime import datetime


class Command(BaseCommand):
Expand All @@ -24,15 +27,24 @@ def get_all_tei_ids(value):
return tei_ids

models = TibScholRelationMixin.__subclasses__()
unique_refs = []
unique_refs = defaultdict(list)
for m in tqdm(models):
if not hasattr(m, "tei_refs"):
break
for obj in m.objects.all():
if obj.tei_refs:
unique_refs.extend(get_all_tei_ids(obj.tei_refs))
for rel in m.objects.all():
if rel.tei_refs:
all_tei_refs = get_all_tei_ids(rel.tei_refs)
for ref in all_tei_refs:
unique_refs[ref].append(f"{rel.pk},{m},{rel.subj},{rel.obj}")

unique_refs = set(list(unique_refs))
pprint(unique_refs)
missing_refs = []
for ref in unique_refs.keys():
try:
Excerpts.objects.get(xml_id=ref)
except Excerpts.DoesNotExist as e:
missing_refs.append(f"{ref},{unique_refs[ref]}")

with open(f"missing_refs_{datetime.now():%Y%M%d_%H%m%S}.csv", "w") as f:
f.writelines("\n".join(missing_refs))

self.stdout.write(self.style.SUCCESS("Done."))
2 changes: 1 addition & 1 deletion apis_ontology/management/commands/import_excerpts.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ def create_record(row):
create_record(row)

self.stdout.write(self.style.SUCCESS(f"Processed {df.shape[0]} excerpts."))
print("There are f{len(Excerpts.objects.all())} in the database now.")
print(f"There are {len(Excerpts.objects.all())} in the database now.")

0 comments on commit 53ab30c

Please sign in to comment.