diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 94380ec6be..4eaedaa459 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -24,6 +24,7 @@ jobs: KOBOCAT_URL: http://kobocat KOBOCAT_INTERNAL_URL: http://kobocat KOBOFORM_URL: http://kpi + SKIP_TESTS_WITH_CONCURRENCY: 'True' strategy: matrix: python-version: ['3.8', '3.10'] diff --git a/kobo/apps/openrosa/apps/api/tests/fixtures/users.json b/kobo/apps/openrosa/apps/api/tests/fixtures/users.json new file mode 100644 index 0000000000..72ad40c86a --- /dev/null +++ b/kobo/apps/openrosa/apps/api/tests/fixtures/users.json @@ -0,0 +1,18 @@ +[ + { + "model": "auth.user", + "pk": 2, + "fields": { + "username": "bob", + "password": "pbkdf2_sha256$260000$T1eA0O4Ub6c6FAaCsb0fqU$6vX4qMw1VV9tMXFf1d9pL/5z5/2T1MQYYn7vB3p+I2Y=", + "email": "bob@columbia.edu", + "first_name": "bob", + "last_name": "bob", + "is_active": true, + "is_staff": false, + "is_superuser": false, + "last_login": null, + "date_joined": "2015-02-12T19:52:14.406Z" + } + } +] diff --git a/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py b/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py index cf0319fa7e..b219f5ece9 100644 --- a/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py +++ b/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py @@ -1,9 +1,22 @@ +import multiprocessing import os +from collections import defaultdict +from functools import partial +import pytest +import requests import simplejson as json +from django.conf import settings from django.contrib.auth.models import AnonymousUser +from django.core.files.base import ContentFile from django.core.files.uploadedfile import InMemoryUploadedFile +from django.test.testcases import LiveServerTestCase from django_digest.test import DigestAuth +from rest_framework.authtoken.models import Token + +from kobo.apps.kobo_auth.shortcuts import User +from kobo.apps.openrosa.apps.main.models import UserProfile +from kobo.apps.openrosa.libs.tests.mixins.request_mixin import RequestMixin from rest_framework import status from kobo.apps.openrosa.apps.api.tests.viewsets.test_abstract_viewset import ( @@ -11,8 +24,10 @@ ) from kobo.apps.openrosa.apps.api.viewsets.xform_submission_api import XFormSubmissionApi from kobo.apps.openrosa.apps.logger.models import Attachment +from kobo.apps.openrosa.apps.main import tests as main_tests from kobo.apps.openrosa.libs.constants import CAN_ADD_SUBMISSIONS from kobo.apps.openrosa.libs.utils.guardian import assign_perm +from kobo.apps.openrosa.libs.utils import logger_tools from kobo.apps.openrosa.libs.utils.logger_tools import OpenRosaTemporarilyUnavailable @@ -209,7 +224,7 @@ def test_post_submission_authenticated_bad_json(self): self.assertTrue('error' in rendered_response.data) self.assertTrue( rendered_response.data['error'].startswith( - 'Received empty submission' + 'Instance ID is required' ) ) self.assertTrue(rendered_response.status_code == 400) @@ -429,3 +444,107 @@ def test_submission_blocking_flag(self): ) response = self.view(request, username=username) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + +class ConcurrentSubmissionTestCase(RequestMixin, LiveServerTestCase): + """ + Inherit from LiveServerTestCase to be able to test concurrent requests + to submission endpoint in different transactions (and different processes). + Otherwise, DB is populated only on the first request but still empty on + subsequent ones. + """ + fixtures = ['kobo/apps/openrosa/apps/api/tests/fixtures/users'] + + def setUp(self): + self.user = User.objects.get(username='bob') + self.token, _ = Token.objects.get_or_create(user=self.user) + UserProfile.objects.get_or_create(user=self.user) + + def publish_xls_form(self): + path = os.path.join( + settings.OPENROSA_APP_DIR, + 'apps', + 'main', + 'tests', + 'fixtures', + 'transportation', + 'transportation.xls', + ) + + with open(path, 'rb') as f: + xls_file = ContentFile(f.read(), name='transportation.xls') + + self.xform = logger_tools.publish_xls_form(xls_file, self.user) + + @pytest.mark.skipif( + settings.SKIP_TESTS_WITH_CONCURRENCY, + reason='GitLab does not seem to support multi-processes' + ) + def test_post_concurrent_same_submissions(self): + DUPLICATE_SUBMISSIONS_COUNT = 2 # noqa + + self.publish_xls_form() + username = 'bob' + survey = 'transport_2011-07-25_19-05-49' + results = defaultdict(int) + + with multiprocessing.Pool() as pool: + for result in pool.map( + partial( + submit_data, + live_server_url=self.live_server_url, + survey_=survey, + username_=username, + token_=self.token.key + ), + range(DUPLICATE_SUBMISSIONS_COUNT), + ): + results[result] += 1 + + assert results[status.HTTP_201_CREATED] == 1 + assert results[status.HTTP_202_ACCEPTED] == DUPLICATE_SUBMISSIONS_COUNT - 1 + + +def submit_data(identifier, survey_, username_, live_server_url, token_): + """ + Submit data to live server. + + It has to be outside `ConcurrentSubmissionTestCase` class to be pickled by + `multiprocessing.Pool().map()`. + """ + media_file = '1335783522563.jpg' + main_directory = os.path.dirname(main_tests.__file__) + path = os.path.join( + main_directory, + 'fixtures', + 'transportation', + 'instances', + survey_, + media_file, + ) + with open(path, 'rb') as f: + f = InMemoryUploadedFile( + f, + 'media_file', + media_file, + 'image/jpg', + os.path.getsize(path), + None, + ) + submission_path = os.path.join( + main_directory, + 'fixtures', + 'transportation', + 'instances', + survey_, + f'{survey_}.xml', + ) + with open(submission_path) as sf: + files = {'xml_submission_file': sf, 'media_file': f} + headers = {'Authorization': f'Token {token_}'} + response = requests.post( + f'{live_server_url}/{username_}/submission', + files=files, + headers=headers + ) + return response.status_code diff --git a/kobo/apps/openrosa/apps/logger/exceptions.py b/kobo/apps/openrosa/apps/logger/exceptions.py index d8b85c5c94..af1dc5d1bd 100644 --- a/kobo/apps/openrosa/apps/logger/exceptions.py +++ b/kobo/apps/openrosa/apps/logger/exceptions.py @@ -10,6 +10,16 @@ class BuildDbQueriesNoConfirmationProvidedError(Exception): pass +class ConflictingSubmissionUUIDError(Exception): + def __init__(self, message='Submission with this instance ID already exists'): + super().__init__(message) + + +class DuplicateInstanceError(Exception): + def __init__(self, message='Duplicate Instance'): + super().__init__(message) + + class DuplicateUUIDError(Exception): pass @@ -18,9 +28,37 @@ class FormInactiveError(Exception): pass +class InstanceEmptyError(Exception): + def __init__(self, message='Empty instance'): + super().__init__(message) + + +class InstanceInvalidUserError(Exception): + def __init__(self, message='Could not determine the user'): + super().__init__(message) + + +class InstanceIdMissingError(Exception): + def __init__(self, message='Could not determine the instance ID'): + super().__init__(message) + + +class InstanceMultipleNodeError(Exception): + pass + + +class InstanceParseError(Exception): + def __init__(self, message='The instance could not be parsed'): + super().__init__(message) + + class MissingValidationStatusPayloadError(Exception): pass class TemporarilyUnavailableError(Exception): pass + + +class XLSFormError(Exception): + pass diff --git a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml index 9b7b9a3cde..f0569b1166 100644 --- a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml +++ b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml @@ -14,4 +14,7 @@ -1.2627557 36.7926442 0.0 30.0 chrome ie + + uuid:364f173c688e482486a48661700466gg + diff --git a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml index f0ee825508..eaab7f531e 100644 --- a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml +++ b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml @@ -13,4 +13,7 @@ -1.2627557 36.7926442 0.0 30.0 chrome ie + + uuid:364f173c688e482486a48661700466gg + diff --git a/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml b/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml index 4a4550a9f6..6e07597a68 100644 --- a/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml +++ b/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml @@ -1 +1 @@ -2012-08-17T11:24:53.254+03HD2012-08-17T11:24:57.897+03 +2012-08-17T11:24:53.254+03HD2012-08-17T11:24:57.897+03uuid:729f173c688e482486a48661700455ff diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml index 78426e9db8..1054169b4c 100644 --- a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml +++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml @@ -7,4 +7,7 @@ 0 -1.2836198 36.8795437 0.0 1044.0 firefox chrome safari + + uuid:364f173c688e482486a48661700466gg + diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml index aad35e71a4..92c434acac 100644 --- a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml +++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml @@ -6,4 +6,7 @@ 0 -1.2836198 36.8795437 0.0 1044.0 firefox chrome safari + + uuid:639f173c688e482486a48661700456ty + diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/1335783522563.jpg b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/1335783522563.jpg new file mode 100755 index 0000000000..e8d953e387 Binary files /dev/null and b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/1335783522563.jpg differ diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/attachment_with_different_content/1335783522563.jpg b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/attachment_with_different_content/1335783522563.jpg new file mode 100644 index 0000000000..6f842e50d0 Binary files /dev/null and b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/attachment_with_different_content/1335783522563.jpg differ diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment.xml new file mode 100644 index 0000000000..e354297ee5 --- /dev/null +++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment.xml @@ -0,0 +1,17 @@ + + + Larry + Again + + 23 + 1335783522563.jpg + 0 + -1.2836198 36.8795437 0.0 1044.0 + firefox chrome safari + + uuid:729f173c688e482486a48661700455ff + + + c2f940a2f1e8490d8d3c3030452ed401 + + diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment_edit.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment_edit.xml new file mode 100644 index 0000000000..8be650f260 --- /dev/null +++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment_edit.xml @@ -0,0 +1,18 @@ + + + Larry + Again + + 23 + 1335783522563.jpg + 0 + -1.2836198 36.8795437 0.0 1044.0 + firefox chrome safari + + uuid:031c585a-88b7-4962-a566-849a7dd15891 + uuid:729f173c688e482486a48661700455ff + + + c2f940a2f1e8490d8d3c3030452ed401 + + diff --git a/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py b/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py index 5ad0d71b8c..4117385090 100644 --- a/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py +++ b/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py @@ -1,28 +1,36 @@ #!/usr/bin/env python # vim: ai ts=4 sts=4 et sw=4 fileencoding=utf-8 # coding: utf-8 +from collections import defaultdict + from django.conf import settings -from django.core.management.base import BaseCommand, CommandError +from django.core.management.base import BaseCommand from django.db import transaction -from django.db.models import Sum +from django.db.models import F from django.db.models.aggregates import Count -from django.utils import timezone +from kobo.apps.openrosa.apps.logger.models import ( + DailyXFormSubmissionCounter, + MonthlyXFormSubmissionCounter +) from kobo.apps.openrosa.apps.logger.models.attachment import Attachment from kobo.apps.openrosa.apps.logger.models.instance import Instance from kobo.apps.openrosa.apps.viewer.models.parsed_instance import ParsedInstance -from kobo.apps.openrosa.apps.logger.models.xform import XForm -from kobo.apps.openrosa.libs.utils.common_tags import MONGO_STRFTIME +from kobo.apps.openrosa.apps.logger.xform_instance_parser import set_meta class Command(BaseCommand): - help = "Deletes duplicated submissions (i.e same `uuid` and same `xml`)" + help = """ + Deletes or updates duplicated submissions based on uuid and xml_hash. - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.__vaccuum = False - self.__users = set([]) + Workflow: + 1. Identifies submissions with duplicate uuids. + 2. For duplicates with the same xml_hash, deletes duplicates and attaches + their attachments to the original submission. + 3. For duplicates with different xml_hashes, updates their uuids to make + them unique. + """ def add_arguments(self, parser): super().add_arguments(parser) @@ -42,8 +50,9 @@ def add_arguments(self, parser): def handle(self, *args, **options): username = options['user'] xform_id_string = options['xform'] + self._verbosity = options['verbosity'] - # Retrieve all instances with the same `uuid`. + # Retrieve all instances with the same `uuid` query = Instance.objects if xform_id_string: query = query.filter(xform__id_string=xform_id_string) @@ -51,87 +60,146 @@ def handle(self, *args, **options): if username: query = query.filter(xform__user__username=username) - query = query.values_list('uuid', flat=True)\ - .annotate(count_uuid=Count('uuid'))\ - .filter(count_uuid__gt=1)\ + query = ( + query.values_list('uuid', flat=True) + .annotate(count_uuid=Count('uuid')) + .filter(count_uuid__gt=1) .distinct() + ) - for uuid in query.all(): - - duplicated_query = Instance.objects.filter(uuid=uuid) - - instances_with_same_uuid = duplicated_query.values_list('id', - 'xml_hash')\ - .order_by('xml_hash', 'date_created') - xml_hash_ref = None - instance_id_ref = None - - duplicated_instance_ids = [] - for instance_with_same_uuid in instances_with_same_uuid: - instance_id = instance_with_same_uuid[0] - instance_xml_hash = instance_with_same_uuid[1] - - if instance_xml_hash != xml_hash_ref: - self.__clean_up(instance_id_ref, - duplicated_instance_ids) - xml_hash_ref = instance_xml_hash - instance_id_ref = instance_id - duplicated_instance_ids = [] - continue - - duplicated_instance_ids.append(instance_id) - - self.__clean_up(instance_id_ref, - duplicated_instance_ids) - - if not self.__vaccuum: - self.stdout.write('No instances have been purged.') - else: - # Update number of submissions for each user. - for user_ in list(self.__users): - result = XForm.objects.filter(user_id=user_.id)\ - .aggregate(count=Sum('num_of_submissions')) - user_.profile.num_of_submissions = result['count'] - self.stdout.write( - "\tUpdating `{}`'s number of submissions".format( - user_.username)) - user_.profile.save(update_fields=['num_of_submissions']) + for uuid in query.iterator(): + # Get all instances with the same UUID + duplicates_queryset = Instance.objects.filter(uuid=uuid) + + instances = duplicates_queryset.values( + 'id', 'uuid', 'xml_hash', 'xform_id', 'date_created' + ).order_by('xform_id', 'uuid', 'date_created') + + # Separate duplicates by their xml_hash (same and different) + same_xml_hash_duplicates, different_xml_hash_duplicates = ( + self._get_duplicates_by_xml_hash(instances) + ) + + # Handle the same xml_hash duplicates + if same_xml_hash_duplicates: + instance_ref = same_xml_hash_duplicates.pop(0) + self._delete_duplicates( + instance_ref, same_xml_hash_duplicates + ) + + # Handle the different xml_hash duplicates (update uuid) + if different_xml_hash_duplicates: + instance_ref = different_xml_hash_duplicates.pop(0) + self._replace_duplicates(different_xml_hash_duplicates) + + def _delete_duplicates(self, instance_ref, duplicated_instances): + """ + Delete the duplicated instances with the same xml_hash and link their + attachments to the reference instance (instance_ref). + """ + duplicated_instance_ids = [i['id'] for i in duplicated_instances] + + if self._verbosity >= 1: + self.stdout.write( + f'Deleting instance #{duplicated_instance_ids} duplicates…' + ) + + with transaction.atomic(): + # Update attachments + Attachment.objects.select_for_update().filter( + instance_id__in=duplicated_instance_ids + ).update(instance_id=instance_ref['id']) + if self._verbosity >= 2: self.stdout.write( - '\t\tDone! New number: {}'.format(result['count'])) - - def __clean_up(self, instance_id_ref, duplicated_instance_ids): - if instance_id_ref is not None and len(duplicated_instance_ids) > 0: - self.__vaccuum = True - with transaction.atomic(): - self.stdout.write('Link attachments to instance #{}'.format( - instance_id_ref)) - # Update attachments - Attachment.objects.select_for_update()\ - .filter(instance_id__in=duplicated_instance_ids)\ - .update(instance_id=instance_id_ref) - - # Update Mongo - main_instance = Instance.objects.select_for_update()\ - .get(id=instance_id_ref) - main_instance.parsed_instance.save() - - self.stdout.write('\tPurging instances: {}'.format( - duplicated_instance_ids)) - Instance.objects.select_for_update()\ - .filter(id__in=duplicated_instance_ids).delete() - ParsedInstance.objects.select_for_update()\ - .filter(instance_id__in=duplicated_instance_ids).delete() - settings.MONGO_DB.instances.remove( - {'_id': {'$in': duplicated_instance_ids}} + f"\tLinked attachments to instance #{instance_ref['id']}" ) - # Update number of submissions - xform = main_instance.xform + + # Update Mongo + main_instance = Instance.objects.get(id=instance_ref['id']) + main_instance.parsed_instance.save() + + # Delete duplicated ParsedInstances + ParsedInstance.objects.filter( + instance_id__in=duplicated_instance_ids + ).delete() + + # Adjust counters and delete instances + instance_queryset = Instance.objects.filter( + id__in=duplicated_instance_ids + ) + for instance in instance_queryset.values( + 'xform_id', 'date_created__date', 'xform__user_id' + ): + MonthlyXFormSubmissionCounter.objects.filter( + year=instance['date_created__date'].year, + month=instance['date_created__date'].month, + user_id=instance['xform__user_id'], + xform_id=instance['xform_id'], + ).update(counter=F('counter') - 1) + + DailyXFormSubmissionCounter.objects.filter( + date=instance['date_created__date'], + xform_id=instance['xform_id'], + ).update(counter=F('counter') - 1) + + instance_queryset.delete() + + settings.MONGO_DB.instances.delete_many( + {'_id': {'$in': duplicated_instance_ids}} + ) + if self._verbosity > 1: self.stdout.write( - '\tUpdating number of submissions of XForm #{} ({})'.format( - xform.id, xform.id_string)) - xform_submission_count = xform.submission_count(force_update=True) + f'\tPurged instance IDs: {duplicated_instance_ids}' + ) + + def _replace_duplicates(self, duplicated_instances): + """ + Update the UUID of instances with different xml_hash values. + """ + instances_to_update = [] + for idx, duplicated_instance in enumerate(duplicated_instances): + try: + instance = Instance.objects.get(pk=duplicated_instance['id']) + except Instance.DoesNotExist: + continue + + if self._verbosity >= 1: self.stdout.write( - '\t\tDone! New number: {}'.format(xform_submission_count)) - self.stdout.write('') + f'\tUpdating instance #{instance.pk} ({instance.uuid})…' + ) + + # Update the UUID and XML hash + instance.uuid = (f'DUPLICATE-{idx}-{instance.xform.id_string}-' + f'{instance.uuid}') + instance.xml = set_meta( + instance.xml, 'instanceID', instance.uuid + ) + instance.xml_hash = instance.get_hash(instance.xml) + instances_to_update.append(instance) + + # Save the parsed instance to sync MongoDB + parsed_instance = instance.parsed_instance + parsed_instance.save() + + Instance.objects.bulk_update( + instances_to_update, ['uuid', 'xml', 'xml_hash'] + ) + + def _get_duplicates_by_xml_hash(self, instances): + """ + Extract duplicates with the same xml_hash and different xml_hash + """ + same_xml_hash_duplicates, different_xml_hash_duplicates = [], [] + xml_hash_groups = defaultdict(list) + + # Group instances by their xml_hash + for instance in instances: + xml_hash_groups[instance['xml_hash']].append(instance) + + for xml_hash, duplicates in xml_hash_groups.items(): + if len(duplicates) > 1: + same_xml_hash_duplicates.extend(duplicates) + else: + different_xml_hash_duplicates.extend(duplicates) - self.__users.add(xform.user) + return same_xml_hash_duplicates, different_xml_hash_duplicates diff --git a/kobo/apps/openrosa/apps/logger/migrations/0038_add_root_uuid_field_to_instance.py b/kobo/apps/openrosa/apps/logger/migrations/0038_add_root_uuid_field_to_instance.py new file mode 100644 index 0000000000..3e91ddae75 --- /dev/null +++ b/kobo/apps/openrosa/apps/logger/migrations/0038_add_root_uuid_field_to_instance.py @@ -0,0 +1,26 @@ +# Generated by Django 4.2.15 on 2024-09-29 18:31 + +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('logger', '0037_remove_xform_has_kpi_hooks_and_instance_posted_to_kpi'), + ] + + operations = [ + migrations.AddField( + model_name='instance', + name='root_uuid', + field=models.CharField(db_index=True, max_length=249, null=True), + ), + migrations.AddConstraint( + model_name='instance', + constraint=models.UniqueConstraint( + fields=('root_uuid', 'xform'), name='unique_root_uuid_per_xform' + ), + ), + ] diff --git a/kobo/apps/openrosa/apps/logger/models/instance.py b/kobo/apps/openrosa/apps/logger/models/instance.py index 6a6737078a..799b587f32 100644 --- a/kobo/apps/openrosa/apps/logger/models/instance.py +++ b/kobo/apps/openrosa/apps/logger/models/instance.py @@ -12,6 +12,7 @@ from django.contrib.gis.geos import GeometryCollection, Point from django.utils import timezone from django.utils.encoding import smart_str +from django.db.models import UniqueConstraint from jsonfield import JSONField from taggit.managers import TaggableManager @@ -26,6 +27,7 @@ from kobo.apps.openrosa.apps.logger.xform_instance_parser import ( XFormInstanceParser, clean_and_parse_xml, + get_root_uuid_from_xml, get_uuid_from_xml, ) from kobo.apps.openrosa.libs.utils.common_tags import ( @@ -89,6 +91,7 @@ class Instance(AbstractTimeStampedModel): # do not use it anymore. deleted_at = models.DateTimeField(null=True, default=None) + root_uuid = models.CharField(max_length=249, null=True, db_index=True) # ODK keeps track of three statuses for an instance: # incomplete, submitted, complete # we add a fourth status: submitted_via_web @@ -108,6 +111,12 @@ class Instance(AbstractTimeStampedModel): class Meta: app_label = 'logger' + constraints = [ + UniqueConstraint( + fields=['root_uuid', 'xform'], + name='unique_root_uuid_per_xform' + ) + ] @property def asset(self): @@ -192,6 +201,13 @@ def _set_uuid(self): self.uuid = uuid set_uuid(self) + def _populate_root_uuid(self): + if self.xml and not self.root_uuid: + assert ( + root_uuid := get_root_uuid_from_xml(self.xml) + ), 'root_uuid should not be empty' + self.root_uuid = root_uuid + def _populate_xml_hash(self): """ Populate the `xml_hash` attribute of this `Instance` based on the content of the `xml` @@ -329,6 +345,7 @@ def save(self, *args, **kwargs): self._set_survey_type() self._set_uuid() self._populate_xml_hash() + self._populate_root_uuid() # Force validation_status to be dict if self.validation_status is None: diff --git a/kobo/apps/openrosa/apps/logger/models/xform.py b/kobo/apps/openrosa/apps/logger/models/xform.py index f031ef5fd1..787f44536d 100644 --- a/kobo/apps/openrosa/apps/logger/models/xform.py +++ b/kobo/apps/openrosa/apps/logger/models/xform.py @@ -16,7 +16,7 @@ from taggit.managers import TaggableManager from kobo.apps.kobo_auth.shortcuts import User -from kobo.apps.openrosa.apps.logger.xform_instance_parser import XLSFormError +from kobo.apps.openrosa.apps.logger.exceptions import XLSFormError from kobo.apps.openrosa.koboform.pyxform_utils import convert_csv_to_xls from kobo.apps.openrosa.libs.constants import ( CAN_ADD_SUBMISSIONS, diff --git a/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml b/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml index f4e00ed363..a8dfcd00eb 100644 --- a/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml +++ b/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml @@ -1 +1 @@ -Andrew1300375832136.jpg9.045921564102173 7.526530623435974 572.0 48.0northwestkebbibunzaRYRR4TRn/aboreholesingle_pointdieselfederal_governmentfederal_government state_governmentTYnonono_diesel missing_stolen_partswell_maintainedyesyear_roundnobody +Andrew1300375832136.jpg9.045921564102173 7.526530623435974 572.0 48.0northwestkebbibunzaRYRR4TRn/aboreholesingle_pointdieselfederal_governmentfederal_government state_governmentTYnonono_diesel missing_stolen_partswell_maintainedyesyear_roundnobodyuuid:435f173c688e482486a48661700467gh diff --git a/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml b/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml index a886f3c92c..dceb70c882 100644 --- a/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml +++ b/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml @@ -1 +1 @@ -9.055781364440918 7.518349885940552 534.0 16.01299764406815.jpgthtgtgprotected_dug_wellsingle_pointsolarlocal_governmentstate_government communityNoNopump_broken no_dieselwell_maintainedNowet_seasonslga_staff +9.055781364440918 7.518349885940552 534.0 16.01299764406815.jpgthtgtgprotected_dug_wellsingle_pointsolarlocal_governmentstate_government communityNoNopump_broken no_dieselwell_maintainedNowet_seasonslga_staffuuid:364f173c688e482676a48661700466gg diff --git a/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip b/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip index 2da0f47884..5d92a67af0 100644 Binary files a/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip and b/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip differ diff --git a/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py b/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py index 90378b698a..b1f4794bc8 100644 --- a/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py +++ b/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py @@ -32,8 +32,8 @@ def test_date_created_override(self): will set our date as the date_created """ xml_file_path = os.path.join( - settings.OPENROSA_APP_DIR, "apps", "logger", "fixtures", - "tutorial", "instances", "tutorial_2012-06-27_11-27-53.xml") + settings.OPENROSA_APP_DIR, 'apps', 'logger', 'fixtures', + 'tutorial', 'instances', 'tutorial_2012-06-27_11-27-53_w_uuid.xml') xml_file = django_file( xml_file_path, field_name="xml_file", content_type="text/xml") media_files = [] diff --git a/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py b/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py index 0f7f493ebc..5aa68c75ea 100644 --- a/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py +++ b/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py @@ -10,7 +10,7 @@ from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile from kobo.apps.openrosa.apps.main.tests.test_base import TestBase -from kobo.apps.openrosa.apps.logger.models import Instance +from kobo.apps.openrosa.apps.logger.models import Instance, Attachment from kobo.apps.openrosa.apps.logger.models.instance import InstanceHistory from kobo.apps.openrosa.apps.logger.xform_instance_parser import clean_and_parse_xml from kobo.apps.openrosa.apps.viewer.models.parsed_instance import ParsedInstance @@ -36,7 +36,7 @@ def test_form_post(self): """ xml_submission_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), - "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml" + '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml' ) self._make_submission(xml_submission_file_path) @@ -114,7 +114,7 @@ def test_submission_to_not_required_auth_as_anonymous_user(self): xml_submission_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), - "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml" + '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml' ) # Anonymous should be able to submit data @@ -154,7 +154,7 @@ def test_submission_to_require_auth_with_perm(self): xml_submission_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), - "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml" + '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml' ) self._make_submission(xml_submission_file_path, auth=auth) self.assertEqual(self.response.status_code, 201) @@ -244,12 +244,169 @@ def test_duplicate_submission_with_different_content(self): self.assertEqual(self.response.status_code, 201) self.assertEqual(Instance.objects.count(), pre_count + 1) inst = Instance.objects.order_by('pk').last() - self._make_submission(duplicate_xml_submission_file_path) - self.assertEqual(self.response.status_code, 201) - self.assertEqual(Instance.objects.count(), pre_count + 2) + self._make_submission(duplicate_xml_submission_file_path, assert_success=False) + self.assertEqual(self.response.status_code, 409) + self.assertEqual(Instance.objects.count(), pre_count + 1) # this is exactly the same instance another_inst = Instance.objects.order_by('pk').last() - self.assertNotEqual(inst.xml, another_inst.xml) + self.assertEqual(inst.xml, another_inst.xml) + + def test_duplicate_submission_with_same_content_but_with_attachment(self): + """ + Test that submitting the same XML content twice, + first without and then with an attachment, + results in a single instance with the attachment added. + """ + xml_submission_file_path = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment', + 'tutorial_2012-06-27_11-27-53_w_attachment.xml' + ) + media_file_path = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment', + '1335783522563.jpg' + ) + initial_instance_count = Instance.objects.count() + + # Test submission with XML file + self._make_submission(xml_submission_file_path) + initial_instance = Instance.objects.last() + self.assertEqual(self.response.status_code, 201) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + + # Test duplicate submission with attachment + with open(media_file_path, 'rb') as media_file: + self._make_submission(xml_submission_file_path, media_file=media_file) + self.assertEqual(self.response.status_code, 201) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + self.assertEqual( + Attachment.objects.filter(instance=initial_instance).count(), 1 + ) + + def test_duplicate_submission_with_same_content_but_with_different_attachment(self): + """ + Test duplicate submission handling: + - New submission without attachment should succeed. + - Same submission with an attachment should succeed, + adding the attachment. + - Resubmission with the same attachment should be rejected + with a 202 status code. + - Resubmission with a different attachment (same file name) should be + rejected with a 202 status code. + """ + xml_submission_file_path = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment', + 'tutorial_2012-06-27_11-27-53_w_attachment.xml', + ) + media_file_path1 = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment', + '1335783522563.jpg', + ) + media_file_path2 = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment/' + 'attachment_with_different_content', + '1335783522563.jpg', + ) + initial_instance_count = Instance.objects.count() + + # Test submission with XML file + self._make_submission(xml_submission_file_path) + initial_instance = Instance.objects.last() + self.assertEqual(self.response.status_code, 201) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + + # Test duplicate submission with attachment + with open(media_file_path1, 'rb') as media_file: + self._make_submission(xml_submission_file_path, media_file=media_file) + self.assertEqual(self.response.status_code, 201) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + self.assertEqual( + Attachment.objects.filter(instance=initial_instance).count(), 1 + ) + + # Test duplicate submission with the same attachment + with open(media_file_path1, 'rb') as media_file: + self._make_submission(xml_submission_file_path, media_file=media_file) + self.assertEqual(self.response.status_code, 202) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + self.assertEqual( + Attachment.objects.filter(instance=initial_instance).count(), 1 + ) + + # Test duplicate submission with the same attachment name but with + # different attachment content + with open(media_file_path2, 'rb') as media_file2: + self._make_submission(xml_submission_file_path, media_file=media_file2) + self.assertEqual(self.response.status_code, 202) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + self.assertEqual( + Attachment.objects.filter(instance=initial_instance).count(), 1 + ) + + def test_edit_submission_with_same_attachment_name_but_different_content(self): + """ + Test editing a submission with an attachment with the same name + """ + xml_submission_file_path = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment', + 'tutorial_2012-06-27_11-27-53_w_attachment.xml', + ) + xml_edit_submission_file_path = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment', + 'tutorial_2012-06-27_11-27-53_w_attachment_edit.xml', + ) + media_file_path1 = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment', + '1335783522563.jpg', + ) + media_file_path2 = os.path.join( + os.path.dirname(__file__), + '../fixtures/tutorial/instances/tutorial_with_attachment/' + 'attachment_with_different_content', + '1335783522563.jpg', + ) + initial_instance_count = Instance.objects.count() + + # Test submission with attachment + with open(media_file_path1, 'rb') as media_file: + self._make_submission( + xml_submission_file_path, media_file=media_file + ) + initial_instance = Instance.objects.order_by('-pk')[0] + + attachments = Attachment.objects.filter(instance=initial_instance) + self.assertTrue(attachments.count() == 1) + self.assertEqual(self.response.status_code, 201) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + attachment = attachments[0] + attachment_basename = attachment.media_file_basename + attachment_hash = attachment.file_hash + + # test edit submission with the same attachment name but different attachment + # content + with open(media_file_path2, 'rb') as media_file: + self._make_submission( + xml_edit_submission_file_path, media_file=media_file + ) + + edited_instance = Instance.objects.order_by('-pk')[0] + edited_attachments = Attachment.objects.filter(instance=edited_instance) + self.assertTrue(attachments.count() == 1) + self.assertEqual(Instance.objects.count(), initial_instance_count + 1) + self.assertEqual(self.response.status_code, 201) + + edited_attachment = edited_attachments[0] + edited_attachment_basename = edited_attachment.media_file_basename + edited_attachment_hash = edited_attachment.file_hash + self.assertEqual(attachment_basename, edited_attachment_basename) + self.assertNotEqual(attachment_hash, edited_attachment_hash) def test_owner_can_edit_submissions(self): xml_submission_file_path = os.path.join( @@ -392,7 +549,7 @@ def test_submission_when_requires_auth(self): xml_submission_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), - "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml" + '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml' ) auth = DigestAuth('alice', 'alice') self._make_submission( @@ -409,7 +566,7 @@ def test_submission_linked_to_reporter(self): xml_submission_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), - "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml" + '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml' ) auth = DigestAuth('alice', 'alice') self._make_submission( diff --git a/kobo/apps/openrosa/apps/logger/tests/test_parsing.py b/kobo/apps/openrosa/apps/logger/tests/test_parsing.py index f5cd23dbfa..783ffe6531 100644 --- a/kobo/apps/openrosa/apps/logger/tests/test_parsing.py +++ b/kobo/apps/openrosa/apps/logger/tests/test_parsing.py @@ -67,7 +67,10 @@ def test_parse_xform_nested_repeats(self): 'has_kids': '1' }, 'web_browsers': 'chrome ie', - 'gps': '-1.2627557 36.7926442 0.0 30.0' + 'gps': '-1.2627557 36.7926442 0.0 30.0', + 'meta': { + 'instanceID': 'uuid:364f173c688e482486a48661700466gg' + } } } self.assertEqual(dict_, expected_dict) @@ -85,7 +88,8 @@ def test_parse_xform_nested_repeats(self): 'kids/has_kids': '1', 'info/age': '80', 'web_browsers': 'chrome ie', - 'info/name': 'Adam' + 'info/name': 'Adam', + 'meta/instanceID': 'uuid:364f173c688e482486a48661700466gg' } self.assertEqual(flat_dict, expected_flat_dict) @@ -161,6 +165,15 @@ def test_get_uuid_from_xml(self): instanceID = get_uuid_from_xml(xml_str) self.assertEqual(instanceID, '729f173c688e482486a48661700455ff') + # Additional test case for a custom prefixed UUID + submission = """ + + uuid:kobotoolbox.org:123456789 + + """ + custom_instance_id = get_uuid_from_xml(submission) + self.assertEqual(custom_instance_id, 'kobotoolbox.org:123456789') + def test_get_deprecated_uuid_from_xml(self): with open( os.path.join( diff --git a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py index 371ee04f30..3781092626 100644 --- a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py +++ b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py @@ -1,9 +1,10 @@ -# coding: utf-8 +import uuid + from django.test import RequestFactory, TestCase from pyxform import SurveyElementBuilder from kobo.apps.kobo_auth.shortcuts import User -from kobo.apps.openrosa.apps.logger.xform_instance_parser import DuplicateInstance +from kobo.apps.openrosa.apps.logger.exceptions import DuplicateInstanceError from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile from kobo.apps.openrosa.apps.viewer.models.data_dictionary import DataDictionary from kobo.apps.openrosa.libs.utils.logger_tools import ( @@ -36,18 +37,25 @@ def _get_xml_for_form(self, xform): xform.save() def _submit_at_hour(self, hour): - st_xml = '2012-01-11T%d:00:00.000+00' % hour + st_xml = ( + f'' + f'2012-01-11T{hour}:00:00.000+00' + f'' + f'uuid:918a1889-389f-4427-b48a-0ba16b7c9b{hour}' + f'' + f'' + ) try: create_instance(self.user.username, TempFileProxy(st_xml), []) - except DuplicateInstance: + except DuplicateInstanceError: pass def _submit_simple_yes(self): create_instance(self.user.username, TempFileProxy( - 'Yes<' - '/yesno>'), []) + f'Yes<' + f'/yesno>' + f'uuid:{str(uuid.uuid4())}' + f''), []) def setUp(self): self.user = User.objects.create(username='admin', email='sample@example.com') diff --git a/kobo/apps/openrosa/apps/logger/xform_instance_parser.py b/kobo/apps/openrosa/apps/logger/xform_instance_parser.py index afcda3e4da..0ee1a78363 100644 --- a/kobo/apps/openrosa/apps/logger/xform_instance_parser.py +++ b/kobo/apps/openrosa/apps/logger/xform_instance_parser.py @@ -1,10 +1,9 @@ from __future__ import annotations -import logging import re import sys from datetime import datetime -from typing import Optional +from typing import Optional, Union from xml.dom import Node import dateutil.parser @@ -13,38 +12,14 @@ from django.utils.encoding import smart_str from django.utils.translation import gettext as t +from kobo.apps.openrosa.apps.logger.exceptions import InstanceEmptyError from kobo.apps.openrosa.libs.utils.common_tags import XFORM_ID_STRING +from kpi.utils.log import logging -class XLSFormError(Exception): - pass - - -class DuplicateInstance(Exception): - def __str__(self): - return t('Duplicate Instance') - - -class InstanceInvalidUserError(Exception): - def __str__(self): - return t('Could not determine the user.') - - -class InstanceParseError(Exception): - def __str__(self): - return t('The instance could not be parsed.') - - -class InstanceEmptyError(InstanceParseError): - def __str__(self): - return t('Empty instance') - - -class InstanceMultipleNodeError(Exception): - pass - - -def get_meta_from_xml(xml_str, meta_name): +def get_meta_node_from_xml( + xml_str: str, meta_name: str +) -> Union[None, tuple[str, minidom.Document]]: xml = clean_and_parse_xml(xml_str) children = xml.childNodes # children ideally contains a single element @@ -71,22 +46,30 @@ def get_meta_from_xml(xml_str, meta_name): return None uuid_tag = uuid_tags[0] - return uuid_tag.firstChild.nodeValue.strip() if uuid_tag.firstChild\ - else None + return uuid_tag, xml + + +def get_meta_from_xml(xml_str: str, meta_name: str) -> str: + if node_and_root := get_meta_node_from_xml(xml_str, meta_name): + node, _ = node_and_root + return node.firstChild.nodeValue.strip() if node.firstChild else None def get_uuid_from_xml(xml): - def _uuid_only(uuid, regex): - matches = regex.match(uuid) - if matches and len(matches.groups()) > 0: - return matches.groups()[0] - return None + def _uuid_only(uuid): + """ + Strips the 'uuid:' prefix from the provided identifier if it exists. + This preserves any custom ID schemes (e.g., 'kobotoolbox.org:123456789') + while ensuring only the 'uuid:' prefix is removed. This approach + adheres to the OpenRosa spec, allowing custom prefixes to be stored + intact in the database to prevent potential ID collisions. + """ + return re.sub(r'^uuid:', '', uuid) uuid = get_meta_from_xml(xml, 'instanceID') - regex = re.compile(r'uuid:(.*)') if uuid: - return _uuid_only(uuid, regex) + return _uuid_only(uuid) # check in survey_node attributes xml = clean_and_parse_xml(xml) children = xml.childNodes @@ -97,10 +80,19 @@ def _uuid_only(uuid, regex): survey_node = children[0] uuid = survey_node.getAttribute('instanceID') if uuid != '': - return _uuid_only(uuid, regex) + return _uuid_only(uuid) return None +def get_root_uuid_from_xml(xml): + root_uuid = get_meta_from_xml(xml, 'rootUuid') + if root_uuid: + return root_uuid + + # If no rootUuid, fall back to instanceID + return get_uuid_from_xml(xml) + + def get_submission_date_from_xml(xml) -> Optional[datetime]: # check in survey_node attributes xml = clean_and_parse_xml(xml) @@ -126,13 +118,26 @@ def get_deprecated_uuid_from_xml(xml): return None -def clean_and_parse_xml(xml_string: str) -> Node: +def clean_and_parse_xml(xml_string: str) -> minidom.Document: clean_xml_str = xml_string.strip() clean_xml_str = re.sub(r'>\s+<', '><', smart_str(clean_xml_str)) xml_obj = minidom.parseString(clean_xml_str) return xml_obj +def set_meta(xml_str: str, meta_name: str, new_value: str) -> str: + + if not (node_and_root := get_meta_node_from_xml(xml_str, meta_name)): + raise ValueError(f'{meta_name} node not found') + + node, root = node_and_root + + if node.firstChild: + node.firstChild.nodeValue = new_value + + return root.toxml() + + def _xml_node_to_dict(node: Node, repeats: list = []) -> dict: assert isinstance(node, Node) if len(node.childNodes) == 0: @@ -290,9 +295,9 @@ def __init__(self, xml_str, data_dictionary): try: self.parse(xml_str) except Exception as e: - logger = logging.getLogger('console_logger') - logger.error( - "Failed to parse instance '%s'" % xml_str, exc_info=True) + logging.error( + f"Failed to parse instance '{xml_str}'", exc_info=True + ) # `self.parse()` has been wrapped in to try/except but it makes the # exception silently ignored. # `logger_tool.py::safe_create_instance()` needs the exception @@ -340,10 +345,9 @@ def _set_attributes(self): try: assert key not in self._attributes except AssertionError: - logger = logging.getLogger('console_logger') - #logger.debug( - # f'Skipping duplicate attribute: {key} with value {value}' - #) + logging.debug( + f'Skipping duplicate attribute: {key} with value {value}' + ) else: self._attributes[key] = value @@ -374,7 +378,6 @@ def parse_xform_instance(xml_str, data_dictionary): def get_xform_media_question_xpaths( xform: 'kobo.apps.openrosa.apps.logger.models.XForm', ) -> list: - logger = logging.getLogger('console_logger') parser = XFormInstanceParser(xform.xml, xform.data_dictionary(use_cache=True)) all_attributes = _get_all_attributes(parser.get_root_node()) media_field_xpaths = [] @@ -388,7 +391,7 @@ def get_xform_media_question_xpaths( try: next_attribute = next(all_attributes) except StopIteration: - logger.error( + logging.error( f'`ref` attribute seems to be missing in {xform.xml}', exc_info=True, ) @@ -398,8 +401,7 @@ def get_xform_media_question_xpaths( try: assert next_attribute_key.lower() == 'ref' except AssertionError: - logger = logging.getLogger('console_logger') - logger.error( + logging.error( f'`ref` should come after `mediatype:{value}` in {xform.xml}', exc_info=True, ) diff --git a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml index d5c336bef7..d8276b02c4 100644 --- a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml +++ b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml @@ -1 +1 @@ -40.81101715564728 -73.96446704864502 -152.0 16.0 +40.81101715564728 -73.96446704864502 -152.0 16.0uuid:729f173c688e482486a48661700455ff diff --git a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml index fe1f4db02e..57bbed33cb 100644 --- a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml +++ b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml @@ -1 +1 @@ -40.811086893081665 -73.96449387073517 -113.0 16.0 +40.811086893081665 -73.96449387073517 -113.0 16.0uuid:435f173c688e482486a48661700467gh diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml index 2cb36d609a..5bfed2204b 100644 --- a/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml +++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml @@ -4,4 +4,7 @@ -1.2627107 36.7925771 0.0 37.5 chrome safari + + uuid:435f173c688e482486a486617004534df + diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml index 0e5f835ffc..5cd3b97dec 100644 --- a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml +++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml @@ -17,4 +17,7 @@ -1.2627557 36.7926442 0.0 30.0 chrome ie + + uuid:435f173c688e482463a486617004534df + diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml index 2d2659147b..5b92f562ad 100644 --- a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml +++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml @@ -13,4 +13,7 @@ -1.2627557 36.7926442 0.0 30.0 chrome ie firefox + + uuid:32sd3c688e482486a48661700455ff + diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_03.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_03.xml new file mode 100644 index 0000000000..5aaf43f931 --- /dev/null +++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_03.xml @@ -0,0 +1,20 @@ + + + + Adam + 80 + + + 1 + + Liam + 40 + + + Emma + 70 + + +-1.2627557 36.7926442 0.0 30.0 +chrome + diff --git a/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py b/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py index 79086df69c..0ec24e4635 100644 --- a/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py +++ b/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py @@ -51,16 +51,27 @@ def _publish_xls_fixture_set_xform(self, fixture): self.xform = XForm.objects.all().reverse()[0] def _submit_fixture_instance( - self, fixture, instance, submission_time=None): + self, fixture, instance, submission_time=None, add_submission_uuid=None): """ Submit an instance at tests/fixtures/[fixture]/instances/[fixture]_[instance].xml """ xml_submission_file_path = xml_inst_filepath_from_fixture_name( fixture, instance) - self._make_submission( - xml_submission_file_path, forced_submission_time=submission_time) - self.assertEqual(self.response.status_code, 201) + + if add_submission_uuid: + xml_submission_file_path = ( + self._add_submission_uuid_to_submission_xml( + xml_submission_file_path + ) + ) + try: + self._make_submission( + xml_submission_file_path, forced_submission_time=submission_time) + self.assertEqual(self.response.status_code, 201) + finally: + if add_submission_uuid: + os.remove(xml_submission_file_path) def _publish_single_level_repeat_form(self): self._publish_xls_fixture_set_xform("new_repeats") @@ -207,6 +218,7 @@ def test_csv_columns_for_gps_within_groups(self): 'gps_group/_gps_longitude', 'gps_group/_gps_altitude', 'gps_group/_gps_precision', + 'meta/instanceID', 'web_browsers/firefox', 'web_browsers/chrome', 'web_browsers/ie', @@ -246,6 +258,7 @@ def test_format_mongo_data_for_csv(self): 'kids/kids_details[1]/kids_age': '50', 'kids/kids_details[2]/kids_name': 'Cain', 'kids/kids_details[2]/kids_age': '76', + 'meta/instanceID': 'uuid:435f173c688e482463a486617004534df', 'web_browsers/chrome': True, 'web_browsers/ie': True, 'web_browsers/safari': False, @@ -494,7 +507,7 @@ def test_query_mongo(self): self._publish_single_level_repeat_form() # submit 3 instances for i in range(3): - self._submit_fixture_instance("new_repeats", "01") + self._submit_fixture_instance('new_repeats', '03', None, True) df_builder = XLSDataFrameBuilder(self.user.username, self.xform.id_string) record_count = df_builder._query_mongo(count=True) @@ -526,10 +539,10 @@ def test_csv_export_with_df_size_limit(self): self._publish_single_level_repeat_form() # submit 7 instances for i in range(4): - self._submit_fixture_instance("new_repeats", "01") + self._submit_fixture_instance('new_repeats', '03', None, True) self._submit_fixture_instance("new_repeats", "02") for i in range(2): - self._submit_fixture_instance("new_repeats", "01") + self._submit_fixture_instance('new_repeats', '03', None, True) csv_df_builder = CSVDataFrameBuilder(self.user.username, self.xform.id_string) record_count = csv_df_builder._query_mongo(count=True) diff --git a/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py b/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py index c2d14aadcb..af9f5ebfa4 100644 --- a/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py +++ b/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py @@ -1,6 +1,7 @@ # coding: utf-8 import os import re +import uuid from tempfile import NamedTemporaryFile from typing import Union @@ -38,6 +39,33 @@ def _add_uuid_to_submission_xml(self, path, xform): return path + def _add_submission_uuid_to_submission_xml(self, path): + with open(path, 'rb') as _file: + xml_content = _file.read().decode() + + # Find the closing tag of the root element (e.g., ) + closing_tag_index = xml_content.rfind(f'') + + if closing_tag_index == -1: + raise ValueError('Root element closing tag not found') + + # Construct the meta element with a new UUID + meta_element = f'uuid:{str(uuid.uuid4())}' + + # Insert the meta element before the closing tag of the root element + xml_content = ( + xml_content[:closing_tag_index] + + meta_element # noqa: W503 + + xml_content[closing_tag_index:] # noqa: W503 + ) + + # Write the updated XML content to a temporary file and return the path + with NamedTemporaryFile(delete=False, mode='w') as tmp_file: + tmp_file.write(xml_content) + path = tmp_file.name + + return path + def _make_submission( self, path: str, diff --git a/kobo/apps/openrosa/libs/utils/logger_tools.py b/kobo/apps/openrosa/libs/utils/logger_tools.py index fc03f679f3..bed5a08b0e 100644 --- a/kobo/apps/openrosa/libs/utils/logger_tools.py +++ b/kobo/apps/openrosa/libs/utils/logger_tools.py @@ -1,5 +1,7 @@ from __future__ import annotations +import contextlib +import hashlib import logging import os import re @@ -10,7 +12,6 @@ from typing import Generator, Optional, Union from xml.etree import ElementTree as ET from xml.parsers.expat import ExpatError - try: from zoneinfo import ZoneInfo except ImportError: @@ -24,7 +25,7 @@ from django.core.exceptions import PermissionDenied, ValidationError from django.core.files.base import File from django.core.mail import mail_admins -from django.db import IntegrityError, transaction +from django.db import connection, IntegrityError, transaction from django.db.models import Q from django.http import ( Http404, @@ -44,7 +45,8 @@ from kobo.apps.openrosa.apps.logger.exceptions import ( DuplicateUUIDError, FormInactiveError, - TemporarilyUnavailableError, + InstanceIdMissingError, + TemporarilyUnavailableError, ConflictingSubmissionUUIDError, ) from kobo.apps.openrosa.apps.logger.models import Attachment, Instance, XForm from kobo.apps.openrosa.apps.logger.models.attachment import ( @@ -62,11 +64,13 @@ update_xform_monthly_counter, update_xform_submission_count, ) -from kobo.apps.openrosa.apps.logger.xform_instance_parser import ( - DuplicateInstance, +from kobo.apps.openrosa.apps.logger.exceptions import ( + DuplicateInstanceError, InstanceEmptyError, InstanceInvalidUserError, InstanceMultipleNodeError, +) +from kobo.apps.openrosa.apps.logger.xform_instance_parser import ( clean_and_parse_xml, get_deprecated_uuid_from_xml, get_submission_date_from_xml, @@ -81,7 +85,6 @@ default_kobocat_storage as default_storage, ) from kpi.utils.object_permission import get_database_user -from kpi.utils.hash import calculate_hash from kpi.utils.mongo_helper import MongoHelper OPEN_ROSA_VERSION_HEADER = 'X-OpenRosa-Version' @@ -137,7 +140,6 @@ def check_edit_submission_permissions( )) -@transaction.atomic # paranoia; redundant since `ATOMIC_REQUESTS` set to `True` def create_instance( username: str, xml_file: File, @@ -163,51 +165,40 @@ def create_instance( # get new and deprecated uuid's new_uuid = get_uuid_from_xml(xml) + if not new_uuid: + raise InstanceIdMissingError - # Dorey's rule from 2012 (commit 890a67aa): - # Ignore submission as a duplicate IFF - # * a submission's XForm collects start time - # * the submitted XML is an exact match with one that - # has already been submitted for that user. - # The start-time requirement protected submissions with identical responses - # from being rejected as duplicates *before* KoBoCAT had the concept of - # submission UUIDs. Nowadays, OpenRosa requires clients to send a UUID (in - # ``) within every submission; if the incoming XML has a UUID - # and still exactly matches an existing submission, it's certainly a - # duplicate (https://docs.opendatakit.org/openrosa-metadata/#fields). - if xform.has_start_time or new_uuid is not None: - # XML matches are identified by identical content hash OR, when a - # content hash is not present, by string comparison of the full - # content, which is slow! Use the management command - # `populate_xml_hashes_for_instances` to hash existing submissions + with get_instance_lock(xml_hash, new_uuid, xform.id) as lock_acquired: + if not lock_acquired: + raise DuplicateInstanceError + + # Check for an existing instance existing_instance = Instance.objects.filter( - Q(xml_hash=xml_hash) | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml), + xml_hash=xml_hash, xform__user=xform.user, ).first() - else: - existing_instance = None - - if existing_instance: - existing_instance.check_active(force=False) - # ensure we have saved the extra attachments - new_attachments, _ = save_attachments(existing_instance, media_files) - if not new_attachments: - raise DuplicateInstance() + + if existing_instance: + existing_instance.check_active(force=False) + # ensure we have saved the extra attachments + new_attachments, _ = save_attachments(existing_instance, media_files) + if not new_attachments: + raise DuplicateInstanceError + else: + # Update Mongo via the related ParsedInstance + existing_instance.parsed_instance.save(asynchronous=False) + return existing_instance else: - # Update Mongo via the related ParsedInstance - existing_instance.parsed_instance.save(asynchronous=False) - return existing_instance - else: - instance = save_submission( - request, - xform, - xml, - media_files, - new_uuid, - status, - date_created_override, - ) - return instance + instance = save_submission( + request, + xform, + xml, + media_files, + new_uuid, + status, + date_created_override, + ) + return instance def disposition_ext_and_date(name, extension, show_date=True): @@ -228,6 +219,26 @@ def dict2xform(submission: dict, xform_id_string: str) -> str: return xml_head + dict2xml(submission) + xml_tail +@contextlib.contextmanager +def get_instance_lock(xml_hash: str, submission_uuid: str, xform_id: int) -> bool: + int_lock = int.from_bytes( + hashlib.shake_128( + f'{xform_id}!!{submission_uuid}!!{xml_hash}'.encode() + ).digest(7), 'little' + ) + acquired = False + + try: + with transaction.atomic(): + cur = connection.cursor() + cur.execute('SELECT pg_try_advisory_lock(%s::bigint);', (int_lock,)) + acquired = cur.fetchone()[0] + yield acquired + finally: + cur.execute('SELECT pg_advisory_unlock(%s::bigint);', (int_lock,)) + cur.close() + + def get_instance_or_404(**criteria): """ Mimic `get_object_or_404` but handles duplicate records. @@ -308,6 +319,9 @@ def status_code(self): except InstanceEmptyError: result.error = t('Received empty submission. No instance was created') result.http_error_response = OpenRosaResponseBadRequest(result.error) + except InstanceIdMissingError: + result.error = t('Instance ID is required') + result.http_error_response = OpenRosaResponseBadRequest(result.error) except FormInactiveError: result.error = t('Form is not active') result.http_error_response = OpenRosaResponseNotAllowed(result.error) @@ -320,7 +334,13 @@ def status_code(self): except ExpatError: result.error = t('Improperly formatted XML.') result.http_error_response = OpenRosaResponseBadRequest(result.error) - except DuplicateInstance: + except ConflictingSubmissionUUIDError: + result.error = t('Submission with this instance ID already exists') + response = OpenRosaResponse(result.error) + response.status_code = 409 + response['Location'] = request.build_absolute_uri(request.path) + result.http_error_response = response + except DuplicateInstanceError: result.error = t('Duplicate submission') response = OpenRosaResponse(result.error) response.status_code = 202 @@ -647,9 +667,7 @@ def save_attachments( media_file=attachment_filename, mimetype=f.content_type, ).first() - if existing_attachment and ( - existing_attachment.file_hash == calculate_hash(f.read()) - ): + if existing_attachment: # We already have this attachment! continue f.seek(0) @@ -791,16 +809,23 @@ def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]: # Update Attachment objects to hide them if they are not used anymore. # We do not want to delete them until the instance itself is deleted. + # If the new attachment has the same basename as an existing one but + # different content, update the existing one. + # FIXME Temporary hack to leave background-audio files and audit files alone # Bug comes from `get_xform_media_question_xpaths()` queryset = Attachment.objects.filter(instance=instance).exclude( - Q(media_file_basename__in=basenames) - | Q(media_file_basename__endswith='.enc') + Q(media_file_basename__endswith='.enc') | Q(media_file_basename='audit.csv') | Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$') + ).order_by('-id') + + latest_attachments = queryset[:len(basenames)] + remaining_attachments = queryset.exclude( + id__in=latest_attachments.values_list('id', flat=True) ) - soft_deleted_attachments = list(queryset.all()) - queryset.update(deleted_at=dj_timezone.now()) + soft_deleted_attachments = list(remaining_attachments.all()) + remaining_attachments.update(deleted_at=dj_timezone.now()) return soft_deleted_attachments @@ -821,18 +846,20 @@ def _get_instance( """ # check if it is an edit submission old_uuid = get_deprecated_uuid_from_xml(xml) - instances = Instance.objects.filter(uuid=old_uuid) - - if instances: + if old_uuid and (instance := Instance.objects.filter(uuid=old_uuid).first()): # edits - instance = instances[0] check_edit_submission_permissions(request, xform) InstanceHistory.objects.create( xml=instance.xml, xform_instance=instance, uuid=old_uuid) instance.xml = xml instance._populate_xml_hash() instance.uuid = new_uuid - instance.save() + try: + instance.save() + except IntegrityError as e: + if 'root_uuid' in str(e): + raise ConflictingSubmissionUUIDError + raise else: submitted_by = ( get_database_user(request.user) @@ -851,8 +878,12 @@ def _get_instance( # Only set the attribute if requested, i.e. don't bother ever # setting it to `False` instance.defer_counting = True - instance.save() - + try: + instance.save() + except IntegrityError as e: + if 'root_uuid' in str(e): + raise ConflictingSubmissionUUIDError + raise return instance diff --git a/kobo/apps/subsequences/tests/test_submission_stream.py b/kobo/apps/subsequences/tests/test_submission_stream.py index 7b4262aacd..38aebdc22c 100644 --- a/kobo/apps/subsequences/tests/test_submission_stream.py +++ b/kobo/apps/subsequences/tests/test_submission_stream.py @@ -1,8 +1,10 @@ from copy import deepcopy +from unittest.mock import patch from django.contrib.auth import get_user_model from django.test import TestCase +from kobo.apps.openrosa.apps.logger.exceptions import ConflictingSubmissionUUIDError from kobo.apps.subsequences.models import SubmissionExtras from kobo.apps.subsequences.utils import stream_with_extras from kpi.models import Asset @@ -254,30 +256,39 @@ def test_stream_with_extras_handles_duplicated_submission_uuids(self): '_uuid': '1c05898e-b43c-491d-814c-79595eb84e81', }, ] - self.asset.deployment.mock_submissions(submissions) - # Process submissions with extras - output = list( - stream_with_extras( - self.asset.deployment.get_submissions(user=self.asset.owner), - self.asset, + # Mock the get_submissions method to return the test submissions + with patch.object( + self.asset.deployment, + 'get_submissions', + return_value=submissions, + ): + # Expect a ConflictingSubmissionUUIDError due to duplicated UUIDs + with self.assertRaises(ConflictingSubmissionUUIDError): + self.asset.deployment.mock_submissions(submissions) + + # Process submissions with extras + output = list( + stream_with_extras( + self.asset.deployment.get_submissions(user=self.asset.owner), + self.asset, + ) ) - ) - # Make sure that uuid values for single or multiple choice qualitative - # analysis questions are kept as strings and not mutated - for submission in output: - supplemental_details = submission['_supplementalDetails'] - for qual_response in supplemental_details['Tell_me_a_story']['qual']: - if qual_response['type'] not in [ - 'qual_select_one', - 'qual_select_multiple', - ]: - # question is not a single or multiple choice one - continue + # Make sure that uuid values for single or multiple choice qualitative + # analysis questions are kept as strings and not mutated + for submission in output: + supplemental_details = submission['_supplementalDetails'] + for qual_response in supplemental_details['Tell_me_a_story']['qual']: + if qual_response['type'] not in [ + 'qual_select_one', + 'qual_select_multiple', + ]: + # question is not a single or multiple choice one + continue - for v in qual_response['val']: - assert isinstance(v['uuid'], str) + for v in qual_response['val']: + assert isinstance(v['uuid'], str) def test_stream_with_extras_ignores_empty_qual_responses(self): # Modify submission extras 'val' to be an empty string diff --git a/kobo/settings/testing.py b/kobo/settings/testing.py index 421adcaeb6..3ebdc13dac 100644 --- a/kobo/settings/testing.py +++ b/kobo/settings/testing.py @@ -1,4 +1,5 @@ # coding: utf-8 +import os from django.contrib.auth.management import DEFAULT_DB_ALIAS from mongomock import MongoClient as MockMongoClient @@ -51,4 +52,6 @@ TEST_HTTP_HOST = 'testserver' TEST_USERNAME = 'bob' +SKIP_TESTS_WITH_CONCURRENCY = os.getenv('SKIP_TESTS_WITH_CONCURRENCY', False) + OPENROSA_DB_ALIAS = DEFAULT_DB_ALIAS diff --git a/kpi/tests/api/v2/test_api_submissions.py b/kpi/tests/api/v2/test_api_submissions.py index b659b4d28f..808570bc0a 100644 --- a/kpi/tests/api/v2/test_api_submissions.py +++ b/kpi/tests/api/v2/test_api_submissions.py @@ -25,6 +25,7 @@ from kobo.apps.openrosa.apps.logger.models.instance import Instance from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile from kobo.apps.openrosa.libs.utils.logger_tools import dict2xform +from kobo.apps.openrosa.apps.logger.exceptions import InstanceIdMissingError from kpi.constants import ( ASSET_TYPE_SURVEY, PERM_ADD_SUBMISSIONS, @@ -1590,6 +1591,16 @@ def test_edit_submission_with_xml_missing_uuids(self): # The form UUID is already omitted by these tests, but fail if that # changes in the future assert 'formhub/uuid' not in submission.keys() + + # Attempt to mock the submission without UUIDs + with self.assertRaises(InstanceIdMissingError) as ex: + self.asset.deployment.mock_submissions([submission], create_uuids=False) + + # Rejecting the submission because it does not have an instance ID + self.assertEqual(str(ex.exception), 'Could not determine the instance ID') + + # Test the edit flow with a submission that has a UUID + submission['meta/instanceID'] = 'uuid:9710c729-00a5-41f1-b740-8dd618bb4a49' self.asset.deployment.mock_submissions([submission], create_uuids=False) # Find and verify the new submission @@ -1607,7 +1618,10 @@ def test_edit_submission_with_xml_missing_uuids(self): submission_xml_root = fromstring_preserve_root_xmlns(submission_xml) assert submission_json['_id'] == submission['_id'] assert submission_xml_root.find('./find_this').text == 'hello!' - assert submission_xml_root.find('./meta/instanceID') is None + assert ( + submission_xml_root.find('./meta/instanceID').text == # noqa: W504 + 'uuid:9710c729-00a5-41f1-b740-8dd618bb4a49' + ) assert submission_xml_root.find('./formhub/uuid') is None # Get edit endpoint