diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 94380ec6be..4eaedaa459 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -24,6 +24,7 @@ jobs:
KOBOCAT_URL: http://kobocat
KOBOCAT_INTERNAL_URL: http://kobocat
KOBOFORM_URL: http://kpi
+ SKIP_TESTS_WITH_CONCURRENCY: 'True'
strategy:
matrix:
python-version: ['3.8', '3.10']
diff --git a/kobo/apps/openrosa/apps/api/tests/fixtures/users.json b/kobo/apps/openrosa/apps/api/tests/fixtures/users.json
new file mode 100644
index 0000000000..72ad40c86a
--- /dev/null
+++ b/kobo/apps/openrosa/apps/api/tests/fixtures/users.json
@@ -0,0 +1,18 @@
+[
+ {
+ "model": "auth.user",
+ "pk": 2,
+ "fields": {
+ "username": "bob",
+ "password": "pbkdf2_sha256$260000$T1eA0O4Ub6c6FAaCsb0fqU$6vX4qMw1VV9tMXFf1d9pL/5z5/2T1MQYYn7vB3p+I2Y=",
+ "email": "bob@columbia.edu",
+ "first_name": "bob",
+ "last_name": "bob",
+ "is_active": true,
+ "is_staff": false,
+ "is_superuser": false,
+ "last_login": null,
+ "date_joined": "2015-02-12T19:52:14.406Z"
+ }
+ }
+]
diff --git a/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py b/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py
index cf0319fa7e..b219f5ece9 100644
--- a/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py
+++ b/kobo/apps/openrosa/apps/api/tests/viewsets/test_xform_submission_api.py
@@ -1,9 +1,22 @@
+import multiprocessing
import os
+from collections import defaultdict
+from functools import partial
+import pytest
+import requests
import simplejson as json
+from django.conf import settings
from django.contrib.auth.models import AnonymousUser
+from django.core.files.base import ContentFile
from django.core.files.uploadedfile import InMemoryUploadedFile
+from django.test.testcases import LiveServerTestCase
from django_digest.test import DigestAuth
+from rest_framework.authtoken.models import Token
+
+from kobo.apps.kobo_auth.shortcuts import User
+from kobo.apps.openrosa.apps.main.models import UserProfile
+from kobo.apps.openrosa.libs.tests.mixins.request_mixin import RequestMixin
from rest_framework import status
from kobo.apps.openrosa.apps.api.tests.viewsets.test_abstract_viewset import (
@@ -11,8 +24,10 @@
)
from kobo.apps.openrosa.apps.api.viewsets.xform_submission_api import XFormSubmissionApi
from kobo.apps.openrosa.apps.logger.models import Attachment
+from kobo.apps.openrosa.apps.main import tests as main_tests
from kobo.apps.openrosa.libs.constants import CAN_ADD_SUBMISSIONS
from kobo.apps.openrosa.libs.utils.guardian import assign_perm
+from kobo.apps.openrosa.libs.utils import logger_tools
from kobo.apps.openrosa.libs.utils.logger_tools import OpenRosaTemporarilyUnavailable
@@ -209,7 +224,7 @@ def test_post_submission_authenticated_bad_json(self):
self.assertTrue('error' in rendered_response.data)
self.assertTrue(
rendered_response.data['error'].startswith(
- 'Received empty submission'
+ 'Instance ID is required'
)
)
self.assertTrue(rendered_response.status_code == 400)
@@ -429,3 +444,107 @@ def test_submission_blocking_flag(self):
)
response = self.view(request, username=username)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+
+
+class ConcurrentSubmissionTestCase(RequestMixin, LiveServerTestCase):
+ """
+ Inherit from LiveServerTestCase to be able to test concurrent requests
+ to submission endpoint in different transactions (and different processes).
+ Otherwise, DB is populated only on the first request but still empty on
+ subsequent ones.
+ """
+ fixtures = ['kobo/apps/openrosa/apps/api/tests/fixtures/users']
+
+ def setUp(self):
+ self.user = User.objects.get(username='bob')
+ self.token, _ = Token.objects.get_or_create(user=self.user)
+ UserProfile.objects.get_or_create(user=self.user)
+
+ def publish_xls_form(self):
+ path = os.path.join(
+ settings.OPENROSA_APP_DIR,
+ 'apps',
+ 'main',
+ 'tests',
+ 'fixtures',
+ 'transportation',
+ 'transportation.xls',
+ )
+
+ with open(path, 'rb') as f:
+ xls_file = ContentFile(f.read(), name='transportation.xls')
+
+ self.xform = logger_tools.publish_xls_form(xls_file, self.user)
+
+ @pytest.mark.skipif(
+ settings.SKIP_TESTS_WITH_CONCURRENCY,
+ reason='GitLab does not seem to support multi-processes'
+ )
+ def test_post_concurrent_same_submissions(self):
+ DUPLICATE_SUBMISSIONS_COUNT = 2 # noqa
+
+ self.publish_xls_form()
+ username = 'bob'
+ survey = 'transport_2011-07-25_19-05-49'
+ results = defaultdict(int)
+
+ with multiprocessing.Pool() as pool:
+ for result in pool.map(
+ partial(
+ submit_data,
+ live_server_url=self.live_server_url,
+ survey_=survey,
+ username_=username,
+ token_=self.token.key
+ ),
+ range(DUPLICATE_SUBMISSIONS_COUNT),
+ ):
+ results[result] += 1
+
+ assert results[status.HTTP_201_CREATED] == 1
+ assert results[status.HTTP_202_ACCEPTED] == DUPLICATE_SUBMISSIONS_COUNT - 1
+
+
+def submit_data(identifier, survey_, username_, live_server_url, token_):
+ """
+ Submit data to live server.
+
+ It has to be outside `ConcurrentSubmissionTestCase` class to be pickled by
+ `multiprocessing.Pool().map()`.
+ """
+ media_file = '1335783522563.jpg'
+ main_directory = os.path.dirname(main_tests.__file__)
+ path = os.path.join(
+ main_directory,
+ 'fixtures',
+ 'transportation',
+ 'instances',
+ survey_,
+ media_file,
+ )
+ with open(path, 'rb') as f:
+ f = InMemoryUploadedFile(
+ f,
+ 'media_file',
+ media_file,
+ 'image/jpg',
+ os.path.getsize(path),
+ None,
+ )
+ submission_path = os.path.join(
+ main_directory,
+ 'fixtures',
+ 'transportation',
+ 'instances',
+ survey_,
+ f'{survey_}.xml',
+ )
+ with open(submission_path) as sf:
+ files = {'xml_submission_file': sf, 'media_file': f}
+ headers = {'Authorization': f'Token {token_}'}
+ response = requests.post(
+ f'{live_server_url}/{username_}/submission',
+ files=files,
+ headers=headers
+ )
+ return response.status_code
diff --git a/kobo/apps/openrosa/apps/logger/exceptions.py b/kobo/apps/openrosa/apps/logger/exceptions.py
index d8b85c5c94..af1dc5d1bd 100644
--- a/kobo/apps/openrosa/apps/logger/exceptions.py
+++ b/kobo/apps/openrosa/apps/logger/exceptions.py
@@ -10,6 +10,16 @@ class BuildDbQueriesNoConfirmationProvidedError(Exception):
pass
+class ConflictingSubmissionUUIDError(Exception):
+ def __init__(self, message='Submission with this instance ID already exists'):
+ super().__init__(message)
+
+
+class DuplicateInstanceError(Exception):
+ def __init__(self, message='Duplicate Instance'):
+ super().__init__(message)
+
+
class DuplicateUUIDError(Exception):
pass
@@ -18,9 +28,37 @@ class FormInactiveError(Exception):
pass
+class InstanceEmptyError(Exception):
+ def __init__(self, message='Empty instance'):
+ super().__init__(message)
+
+
+class InstanceInvalidUserError(Exception):
+ def __init__(self, message='Could not determine the user'):
+ super().__init__(message)
+
+
+class InstanceIdMissingError(Exception):
+ def __init__(self, message='Could not determine the instance ID'):
+ super().__init__(message)
+
+
+class InstanceMultipleNodeError(Exception):
+ pass
+
+
+class InstanceParseError(Exception):
+ def __init__(self, message='The instance could not be parsed'):
+ super().__init__(message)
+
+
class MissingValidationStatusPayloadError(Exception):
pass
class TemporarilyUnavailableError(Exception):
pass
+
+
+class XLSFormError(Exception):
+ pass
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml
index 9b7b9a3cde..f0569b1166 100644
--- a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml
+++ b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/multiple_nodes_error.xml
@@ -14,4 +14,7 @@
-1.2627557 36.7926442 0.0 30.0
chrome ie
+
+ uuid:364f173c688e482486a48661700466gg
+
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml
index f0ee825508..eaab7f531e 100644
--- a/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml
+++ b/kobo/apps/openrosa/apps/logger/fixtures/new_repeats/instances/new_repeats_2012-07-05-14-33-53.xml
@@ -13,4 +13,7 @@
-1.2627557 36.7926442 0.0 30.0
chrome ie
+
+ uuid:364f173c688e482486a48661700466gg
+
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml b/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml
index 4a4550a9f6..6e07597a68 100644
--- a/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml
+++ b/kobo/apps/openrosa/apps/logger/fixtures/test_forms/survey_names/instances/survey_names_2012-08-17_11-24-53.xml
@@ -1 +1 @@
-2012-08-17T11:24:53.254+03HD2012-08-17T11:24:57.897+03
+2012-08-17T11:24:53.254+03HD2012-08-17T11:24:57.897+03uuid:729f173c688e482486a48661700455ff
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml
index 78426e9db8..1054169b4c 100644
--- a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml
+++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_xform_uuid.xml
@@ -7,4 +7,7 @@
0
-1.2836198 36.8795437 0.0 1044.0
firefox chrome safari
+
+ uuid:364f173c688e482486a48661700466gg
+
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml
index aad35e71a4..92c434acac 100644
--- a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml
+++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_unicode_submission.xml
@@ -6,4 +6,7 @@
0
-1.2836198 36.8795437 0.0 1044.0
firefox chrome safari
+
+ uuid:639f173c688e482486a48661700456ty
+
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/1335783522563.jpg b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/1335783522563.jpg
new file mode 100755
index 0000000000..e8d953e387
Binary files /dev/null and b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/1335783522563.jpg differ
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/attachment_with_different_content/1335783522563.jpg b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/attachment_with_different_content/1335783522563.jpg
new file mode 100644
index 0000000000..6f842e50d0
Binary files /dev/null and b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/attachment_with_different_content/1335783522563.jpg differ
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment.xml
new file mode 100644
index 0000000000..e354297ee5
--- /dev/null
+++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment.xml
@@ -0,0 +1,17 @@
+
+
+ Larry
+ Again
+
+ 23
+
+ 0
+ -1.2836198 36.8795437 0.0 1044.0
+ firefox chrome safari
+
+ uuid:729f173c688e482486a48661700455ff
+
+
+ c2f940a2f1e8490d8d3c3030452ed401
+
+
diff --git a/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment_edit.xml b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment_edit.xml
new file mode 100644
index 0000000000..8be650f260
--- /dev/null
+++ b/kobo/apps/openrosa/apps/logger/fixtures/tutorial/instances/tutorial_with_attachment/tutorial_2012-06-27_11-27-53_w_attachment_edit.xml
@@ -0,0 +1,18 @@
+
+
+ Larry
+ Again
+
+ 23
+
+ 0
+ -1.2836198 36.8795437 0.0 1044.0
+ firefox chrome safari
+
+ uuid:031c585a-88b7-4962-a566-849a7dd15891
+ uuid:729f173c688e482486a48661700455ff
+
+
+ c2f940a2f1e8490d8d3c3030452ed401
+
+
diff --git a/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py b/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py
index 5ad0d71b8c..4117385090 100644
--- a/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py
+++ b/kobo/apps/openrosa/apps/logger/management/commands/clean_duplicated_submissions.py
@@ -1,28 +1,36 @@
#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4 fileencoding=utf-8
# coding: utf-8
+from collections import defaultdict
+
from django.conf import settings
-from django.core.management.base import BaseCommand, CommandError
+from django.core.management.base import BaseCommand
from django.db import transaction
-from django.db.models import Sum
+from django.db.models import F
from django.db.models.aggregates import Count
-from django.utils import timezone
+from kobo.apps.openrosa.apps.logger.models import (
+ DailyXFormSubmissionCounter,
+ MonthlyXFormSubmissionCounter
+)
from kobo.apps.openrosa.apps.logger.models.attachment import Attachment
from kobo.apps.openrosa.apps.logger.models.instance import Instance
from kobo.apps.openrosa.apps.viewer.models.parsed_instance import ParsedInstance
-from kobo.apps.openrosa.apps.logger.models.xform import XForm
-from kobo.apps.openrosa.libs.utils.common_tags import MONGO_STRFTIME
+from kobo.apps.openrosa.apps.logger.xform_instance_parser import set_meta
class Command(BaseCommand):
- help = "Deletes duplicated submissions (i.e same `uuid` and same `xml`)"
+ help = """
+ Deletes or updates duplicated submissions based on uuid and xml_hash.
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- self.__vaccuum = False
- self.__users = set([])
+ Workflow:
+ 1. Identifies submissions with duplicate uuids.
+ 2. For duplicates with the same xml_hash, deletes duplicates and attaches
+ their attachments to the original submission.
+ 3. For duplicates with different xml_hashes, updates their uuids to make
+ them unique.
+ """
def add_arguments(self, parser):
super().add_arguments(parser)
@@ -42,8 +50,9 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
username = options['user']
xform_id_string = options['xform']
+ self._verbosity = options['verbosity']
- # Retrieve all instances with the same `uuid`.
+ # Retrieve all instances with the same `uuid`
query = Instance.objects
if xform_id_string:
query = query.filter(xform__id_string=xform_id_string)
@@ -51,87 +60,146 @@ def handle(self, *args, **options):
if username:
query = query.filter(xform__user__username=username)
- query = query.values_list('uuid', flat=True)\
- .annotate(count_uuid=Count('uuid'))\
- .filter(count_uuid__gt=1)\
+ query = (
+ query.values_list('uuid', flat=True)
+ .annotate(count_uuid=Count('uuid'))
+ .filter(count_uuid__gt=1)
.distinct()
+ )
- for uuid in query.all():
-
- duplicated_query = Instance.objects.filter(uuid=uuid)
-
- instances_with_same_uuid = duplicated_query.values_list('id',
- 'xml_hash')\
- .order_by('xml_hash', 'date_created')
- xml_hash_ref = None
- instance_id_ref = None
-
- duplicated_instance_ids = []
- for instance_with_same_uuid in instances_with_same_uuid:
- instance_id = instance_with_same_uuid[0]
- instance_xml_hash = instance_with_same_uuid[1]
-
- if instance_xml_hash != xml_hash_ref:
- self.__clean_up(instance_id_ref,
- duplicated_instance_ids)
- xml_hash_ref = instance_xml_hash
- instance_id_ref = instance_id
- duplicated_instance_ids = []
- continue
-
- duplicated_instance_ids.append(instance_id)
-
- self.__clean_up(instance_id_ref,
- duplicated_instance_ids)
-
- if not self.__vaccuum:
- self.stdout.write('No instances have been purged.')
- else:
- # Update number of submissions for each user.
- for user_ in list(self.__users):
- result = XForm.objects.filter(user_id=user_.id)\
- .aggregate(count=Sum('num_of_submissions'))
- user_.profile.num_of_submissions = result['count']
- self.stdout.write(
- "\tUpdating `{}`'s number of submissions".format(
- user_.username))
- user_.profile.save(update_fields=['num_of_submissions'])
+ for uuid in query.iterator():
+ # Get all instances with the same UUID
+ duplicates_queryset = Instance.objects.filter(uuid=uuid)
+
+ instances = duplicates_queryset.values(
+ 'id', 'uuid', 'xml_hash', 'xform_id', 'date_created'
+ ).order_by('xform_id', 'uuid', 'date_created')
+
+ # Separate duplicates by their xml_hash (same and different)
+ same_xml_hash_duplicates, different_xml_hash_duplicates = (
+ self._get_duplicates_by_xml_hash(instances)
+ )
+
+ # Handle the same xml_hash duplicates
+ if same_xml_hash_duplicates:
+ instance_ref = same_xml_hash_duplicates.pop(0)
+ self._delete_duplicates(
+ instance_ref, same_xml_hash_duplicates
+ )
+
+ # Handle the different xml_hash duplicates (update uuid)
+ if different_xml_hash_duplicates:
+ instance_ref = different_xml_hash_duplicates.pop(0)
+ self._replace_duplicates(different_xml_hash_duplicates)
+
+ def _delete_duplicates(self, instance_ref, duplicated_instances):
+ """
+ Delete the duplicated instances with the same xml_hash and link their
+ attachments to the reference instance (instance_ref).
+ """
+ duplicated_instance_ids = [i['id'] for i in duplicated_instances]
+
+ if self._verbosity >= 1:
+ self.stdout.write(
+ f'Deleting instance #{duplicated_instance_ids} duplicates…'
+ )
+
+ with transaction.atomic():
+ # Update attachments
+ Attachment.objects.select_for_update().filter(
+ instance_id__in=duplicated_instance_ids
+ ).update(instance_id=instance_ref['id'])
+ if self._verbosity >= 2:
self.stdout.write(
- '\t\tDone! New number: {}'.format(result['count']))
-
- def __clean_up(self, instance_id_ref, duplicated_instance_ids):
- if instance_id_ref is not None and len(duplicated_instance_ids) > 0:
- self.__vaccuum = True
- with transaction.atomic():
- self.stdout.write('Link attachments to instance #{}'.format(
- instance_id_ref))
- # Update attachments
- Attachment.objects.select_for_update()\
- .filter(instance_id__in=duplicated_instance_ids)\
- .update(instance_id=instance_id_ref)
-
- # Update Mongo
- main_instance = Instance.objects.select_for_update()\
- .get(id=instance_id_ref)
- main_instance.parsed_instance.save()
-
- self.stdout.write('\tPurging instances: {}'.format(
- duplicated_instance_ids))
- Instance.objects.select_for_update()\
- .filter(id__in=duplicated_instance_ids).delete()
- ParsedInstance.objects.select_for_update()\
- .filter(instance_id__in=duplicated_instance_ids).delete()
- settings.MONGO_DB.instances.remove(
- {'_id': {'$in': duplicated_instance_ids}}
+ f"\tLinked attachments to instance #{instance_ref['id']}"
)
- # Update number of submissions
- xform = main_instance.xform
+
+ # Update Mongo
+ main_instance = Instance.objects.get(id=instance_ref['id'])
+ main_instance.parsed_instance.save()
+
+ # Delete duplicated ParsedInstances
+ ParsedInstance.objects.filter(
+ instance_id__in=duplicated_instance_ids
+ ).delete()
+
+ # Adjust counters and delete instances
+ instance_queryset = Instance.objects.filter(
+ id__in=duplicated_instance_ids
+ )
+ for instance in instance_queryset.values(
+ 'xform_id', 'date_created__date', 'xform__user_id'
+ ):
+ MonthlyXFormSubmissionCounter.objects.filter(
+ year=instance['date_created__date'].year,
+ month=instance['date_created__date'].month,
+ user_id=instance['xform__user_id'],
+ xform_id=instance['xform_id'],
+ ).update(counter=F('counter') - 1)
+
+ DailyXFormSubmissionCounter.objects.filter(
+ date=instance['date_created__date'],
+ xform_id=instance['xform_id'],
+ ).update(counter=F('counter') - 1)
+
+ instance_queryset.delete()
+
+ settings.MONGO_DB.instances.delete_many(
+ {'_id': {'$in': duplicated_instance_ids}}
+ )
+ if self._verbosity > 1:
self.stdout.write(
- '\tUpdating number of submissions of XForm #{} ({})'.format(
- xform.id, xform.id_string))
- xform_submission_count = xform.submission_count(force_update=True)
+ f'\tPurged instance IDs: {duplicated_instance_ids}'
+ )
+
+ def _replace_duplicates(self, duplicated_instances):
+ """
+ Update the UUID of instances with different xml_hash values.
+ """
+ instances_to_update = []
+ for idx, duplicated_instance in enumerate(duplicated_instances):
+ try:
+ instance = Instance.objects.get(pk=duplicated_instance['id'])
+ except Instance.DoesNotExist:
+ continue
+
+ if self._verbosity >= 1:
self.stdout.write(
- '\t\tDone! New number: {}'.format(xform_submission_count))
- self.stdout.write('')
+ f'\tUpdating instance #{instance.pk} ({instance.uuid})…'
+ )
+
+ # Update the UUID and XML hash
+ instance.uuid = (f'DUPLICATE-{idx}-{instance.xform.id_string}-'
+ f'{instance.uuid}')
+ instance.xml = set_meta(
+ instance.xml, 'instanceID', instance.uuid
+ )
+ instance.xml_hash = instance.get_hash(instance.xml)
+ instances_to_update.append(instance)
+
+ # Save the parsed instance to sync MongoDB
+ parsed_instance = instance.parsed_instance
+ parsed_instance.save()
+
+ Instance.objects.bulk_update(
+ instances_to_update, ['uuid', 'xml', 'xml_hash']
+ )
+
+ def _get_duplicates_by_xml_hash(self, instances):
+ """
+ Extract duplicates with the same xml_hash and different xml_hash
+ """
+ same_xml_hash_duplicates, different_xml_hash_duplicates = [], []
+ xml_hash_groups = defaultdict(list)
+
+ # Group instances by their xml_hash
+ for instance in instances:
+ xml_hash_groups[instance['xml_hash']].append(instance)
+
+ for xml_hash, duplicates in xml_hash_groups.items():
+ if len(duplicates) > 1:
+ same_xml_hash_duplicates.extend(duplicates)
+ else:
+ different_xml_hash_duplicates.extend(duplicates)
- self.__users.add(xform.user)
+ return same_xml_hash_duplicates, different_xml_hash_duplicates
diff --git a/kobo/apps/openrosa/apps/logger/migrations/0038_add_root_uuid_field_to_instance.py b/kobo/apps/openrosa/apps/logger/migrations/0038_add_root_uuid_field_to_instance.py
new file mode 100644
index 0000000000..3e91ddae75
--- /dev/null
+++ b/kobo/apps/openrosa/apps/logger/migrations/0038_add_root_uuid_field_to_instance.py
@@ -0,0 +1,26 @@
+# Generated by Django 4.2.15 on 2024-09-29 18:31
+
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+ ('logger', '0037_remove_xform_has_kpi_hooks_and_instance_posted_to_kpi'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='instance',
+ name='root_uuid',
+ field=models.CharField(db_index=True, max_length=249, null=True),
+ ),
+ migrations.AddConstraint(
+ model_name='instance',
+ constraint=models.UniqueConstraint(
+ fields=('root_uuid', 'xform'), name='unique_root_uuid_per_xform'
+ ),
+ ),
+ ]
diff --git a/kobo/apps/openrosa/apps/logger/models/instance.py b/kobo/apps/openrosa/apps/logger/models/instance.py
index 6a6737078a..799b587f32 100644
--- a/kobo/apps/openrosa/apps/logger/models/instance.py
+++ b/kobo/apps/openrosa/apps/logger/models/instance.py
@@ -12,6 +12,7 @@
from django.contrib.gis.geos import GeometryCollection, Point
from django.utils import timezone
from django.utils.encoding import smart_str
+from django.db.models import UniqueConstraint
from jsonfield import JSONField
from taggit.managers import TaggableManager
@@ -26,6 +27,7 @@
from kobo.apps.openrosa.apps.logger.xform_instance_parser import (
XFormInstanceParser,
clean_and_parse_xml,
+ get_root_uuid_from_xml,
get_uuid_from_xml,
)
from kobo.apps.openrosa.libs.utils.common_tags import (
@@ -89,6 +91,7 @@ class Instance(AbstractTimeStampedModel):
# do not use it anymore.
deleted_at = models.DateTimeField(null=True, default=None)
+ root_uuid = models.CharField(max_length=249, null=True, db_index=True)
# ODK keeps track of three statuses for an instance:
# incomplete, submitted, complete
# we add a fourth status: submitted_via_web
@@ -108,6 +111,12 @@ class Instance(AbstractTimeStampedModel):
class Meta:
app_label = 'logger'
+ constraints = [
+ UniqueConstraint(
+ fields=['root_uuid', 'xform'],
+ name='unique_root_uuid_per_xform'
+ )
+ ]
@property
def asset(self):
@@ -192,6 +201,13 @@ def _set_uuid(self):
self.uuid = uuid
set_uuid(self)
+ def _populate_root_uuid(self):
+ if self.xml and not self.root_uuid:
+ assert (
+ root_uuid := get_root_uuid_from_xml(self.xml)
+ ), 'root_uuid should not be empty'
+ self.root_uuid = root_uuid
+
def _populate_xml_hash(self):
"""
Populate the `xml_hash` attribute of this `Instance` based on the content of the `xml`
@@ -329,6 +345,7 @@ def save(self, *args, **kwargs):
self._set_survey_type()
self._set_uuid()
self._populate_xml_hash()
+ self._populate_root_uuid()
# Force validation_status to be dict
if self.validation_status is None:
diff --git a/kobo/apps/openrosa/apps/logger/models/xform.py b/kobo/apps/openrosa/apps/logger/models/xform.py
index f031ef5fd1..787f44536d 100644
--- a/kobo/apps/openrosa/apps/logger/models/xform.py
+++ b/kobo/apps/openrosa/apps/logger/models/xform.py
@@ -16,7 +16,7 @@
from taggit.managers import TaggableManager
from kobo.apps.kobo_auth.shortcuts import User
-from kobo.apps.openrosa.apps.logger.xform_instance_parser import XLSFormError
+from kobo.apps.openrosa.apps.logger.exceptions import XLSFormError
from kobo.apps.openrosa.koboform.pyxform_utils import convert_csv_to_xls
from kobo.apps.openrosa.libs.constants import (
CAN_ADD_SUBMISSIONS,
diff --git a/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml b/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml
index f4e00ed363..a8dfcd00eb 100644
--- a/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml
+++ b/kobo/apps/openrosa/apps/logger/tests/Water_2011_03_17_2011-03-17_16-29-59/Water_2011_03_17_2011-03-17_16-29-59.xml
@@ -1 +1 @@
-Andrew1300375832136.jpg9.045921564102173 7.526530623435974 572.0 48.0northwestkebbibunzaRYRR4TRn/aboreholesingle_pointdieselfederal_governmentfederal_government state_governmentTYnonono_diesel missing_stolen_partswell_maintainedyesyear_roundnobody
+Andrew1300375832136.jpg9.045921564102173 7.526530623435974 572.0 48.0northwestkebbibunzaRYRR4TRn/aboreholesingle_pointdieselfederal_governmentfederal_government state_governmentTYnonono_diesel missing_stolen_partswell_maintainedyesyear_roundnobodyuuid:435f173c688e482486a48661700467gh
diff --git a/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml b/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml
index a886f3c92c..dceb70c882 100644
--- a/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml
+++ b/kobo/apps/openrosa/apps/logger/tests/Water_Translated_2011_03_10_2011-03-10_14-38-28.xml
@@ -1 +1 @@
-9.055781364440918 7.518349885940552 534.0 16.01299764406815.jpgthtgtgprotected_dug_wellsingle_pointsolarlocal_governmentstate_government communityNoNopump_broken no_dieselwell_maintainedNowet_seasonslga_staff
+9.055781364440918 7.518349885940552 534.0 16.01299764406815.jpgthtgtgprotected_dug_wellsingle_pointsolarlocal_governmentstate_government communityNoNopump_broken no_dieselwell_maintainedNowet_seasonslga_staffuuid:364f173c688e482676a48661700466gg
diff --git a/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip b/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip
index 2da0f47884..5d92a67af0 100644
Binary files a/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip and b/kobo/apps/openrosa/apps/logger/tests/data_from_sdcard/bulk_submission.zip differ
diff --git a/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py b/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py
index 90378b698a..b1f4794bc8 100644
--- a/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py
+++ b/kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py
@@ -32,8 +32,8 @@ def test_date_created_override(self):
will set our date as the date_created
"""
xml_file_path = os.path.join(
- settings.OPENROSA_APP_DIR, "apps", "logger", "fixtures",
- "tutorial", "instances", "tutorial_2012-06-27_11-27-53.xml")
+ settings.OPENROSA_APP_DIR, 'apps', 'logger', 'fixtures',
+ 'tutorial', 'instances', 'tutorial_2012-06-27_11-27-53_w_uuid.xml')
xml_file = django_file(
xml_file_path, field_name="xml_file", content_type="text/xml")
media_files = []
diff --git a/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py b/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py
index 0f7f493ebc..5aa68c75ea 100644
--- a/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py
+++ b/kobo/apps/openrosa/apps/logger/tests/test_form_submission.py
@@ -10,7 +10,7 @@
from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile
from kobo.apps.openrosa.apps.main.tests.test_base import TestBase
-from kobo.apps.openrosa.apps.logger.models import Instance
+from kobo.apps.openrosa.apps.logger.models import Instance, Attachment
from kobo.apps.openrosa.apps.logger.models.instance import InstanceHistory
from kobo.apps.openrosa.apps.logger.xform_instance_parser import clean_and_parse_xml
from kobo.apps.openrosa.apps.viewer.models.parsed_instance import ParsedInstance
@@ -36,7 +36,7 @@ def test_form_post(self):
"""
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
- "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
+ '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml'
)
self._make_submission(xml_submission_file_path)
@@ -114,7 +114,7 @@ def test_submission_to_not_required_auth_as_anonymous_user(self):
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
- "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
+ '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml'
)
# Anonymous should be able to submit data
@@ -154,7 +154,7 @@ def test_submission_to_require_auth_with_perm(self):
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
- "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
+ '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml'
)
self._make_submission(xml_submission_file_path, auth=auth)
self.assertEqual(self.response.status_code, 201)
@@ -244,12 +244,169 @@ def test_duplicate_submission_with_different_content(self):
self.assertEqual(self.response.status_code, 201)
self.assertEqual(Instance.objects.count(), pre_count + 1)
inst = Instance.objects.order_by('pk').last()
- self._make_submission(duplicate_xml_submission_file_path)
- self.assertEqual(self.response.status_code, 201)
- self.assertEqual(Instance.objects.count(), pre_count + 2)
+ self._make_submission(duplicate_xml_submission_file_path, assert_success=False)
+ self.assertEqual(self.response.status_code, 409)
+ self.assertEqual(Instance.objects.count(), pre_count + 1)
# this is exactly the same instance
another_inst = Instance.objects.order_by('pk').last()
- self.assertNotEqual(inst.xml, another_inst.xml)
+ self.assertEqual(inst.xml, another_inst.xml)
+
+ def test_duplicate_submission_with_same_content_but_with_attachment(self):
+ """
+ Test that submitting the same XML content twice,
+ first without and then with an attachment,
+ results in a single instance with the attachment added.
+ """
+ xml_submission_file_path = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment',
+ 'tutorial_2012-06-27_11-27-53_w_attachment.xml'
+ )
+ media_file_path = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment',
+ '1335783522563.jpg'
+ )
+ initial_instance_count = Instance.objects.count()
+
+ # Test submission with XML file
+ self._make_submission(xml_submission_file_path)
+ initial_instance = Instance.objects.last()
+ self.assertEqual(self.response.status_code, 201)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+
+ # Test duplicate submission with attachment
+ with open(media_file_path, 'rb') as media_file:
+ self._make_submission(xml_submission_file_path, media_file=media_file)
+ self.assertEqual(self.response.status_code, 201)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+ self.assertEqual(
+ Attachment.objects.filter(instance=initial_instance).count(), 1
+ )
+
+ def test_duplicate_submission_with_same_content_but_with_different_attachment(self):
+ """
+ Test duplicate submission handling:
+ - New submission without attachment should succeed.
+ - Same submission with an attachment should succeed,
+ adding the attachment.
+ - Resubmission with the same attachment should be rejected
+ with a 202 status code.
+ - Resubmission with a different attachment (same file name) should be
+ rejected with a 202 status code.
+ """
+ xml_submission_file_path = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment',
+ 'tutorial_2012-06-27_11-27-53_w_attachment.xml',
+ )
+ media_file_path1 = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment',
+ '1335783522563.jpg',
+ )
+ media_file_path2 = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment/'
+ 'attachment_with_different_content',
+ '1335783522563.jpg',
+ )
+ initial_instance_count = Instance.objects.count()
+
+ # Test submission with XML file
+ self._make_submission(xml_submission_file_path)
+ initial_instance = Instance.objects.last()
+ self.assertEqual(self.response.status_code, 201)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+
+ # Test duplicate submission with attachment
+ with open(media_file_path1, 'rb') as media_file:
+ self._make_submission(xml_submission_file_path, media_file=media_file)
+ self.assertEqual(self.response.status_code, 201)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+ self.assertEqual(
+ Attachment.objects.filter(instance=initial_instance).count(), 1
+ )
+
+ # Test duplicate submission with the same attachment
+ with open(media_file_path1, 'rb') as media_file:
+ self._make_submission(xml_submission_file_path, media_file=media_file)
+ self.assertEqual(self.response.status_code, 202)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+ self.assertEqual(
+ Attachment.objects.filter(instance=initial_instance).count(), 1
+ )
+
+ # Test duplicate submission with the same attachment name but with
+ # different attachment content
+ with open(media_file_path2, 'rb') as media_file2:
+ self._make_submission(xml_submission_file_path, media_file=media_file2)
+ self.assertEqual(self.response.status_code, 202)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+ self.assertEqual(
+ Attachment.objects.filter(instance=initial_instance).count(), 1
+ )
+
+ def test_edit_submission_with_same_attachment_name_but_different_content(self):
+ """
+ Test editing a submission with an attachment with the same name
+ """
+ xml_submission_file_path = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment',
+ 'tutorial_2012-06-27_11-27-53_w_attachment.xml',
+ )
+ xml_edit_submission_file_path = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment',
+ 'tutorial_2012-06-27_11-27-53_w_attachment_edit.xml',
+ )
+ media_file_path1 = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment',
+ '1335783522563.jpg',
+ )
+ media_file_path2 = os.path.join(
+ os.path.dirname(__file__),
+ '../fixtures/tutorial/instances/tutorial_with_attachment/'
+ 'attachment_with_different_content',
+ '1335783522563.jpg',
+ )
+ initial_instance_count = Instance.objects.count()
+
+ # Test submission with attachment
+ with open(media_file_path1, 'rb') as media_file:
+ self._make_submission(
+ xml_submission_file_path, media_file=media_file
+ )
+ initial_instance = Instance.objects.order_by('-pk')[0]
+
+ attachments = Attachment.objects.filter(instance=initial_instance)
+ self.assertTrue(attachments.count() == 1)
+ self.assertEqual(self.response.status_code, 201)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+ attachment = attachments[0]
+ attachment_basename = attachment.media_file_basename
+ attachment_hash = attachment.file_hash
+
+ # test edit submission with the same attachment name but different attachment
+ # content
+ with open(media_file_path2, 'rb') as media_file:
+ self._make_submission(
+ xml_edit_submission_file_path, media_file=media_file
+ )
+
+ edited_instance = Instance.objects.order_by('-pk')[0]
+ edited_attachments = Attachment.objects.filter(instance=edited_instance)
+ self.assertTrue(attachments.count() == 1)
+ self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
+ self.assertEqual(self.response.status_code, 201)
+
+ edited_attachment = edited_attachments[0]
+ edited_attachment_basename = edited_attachment.media_file_basename
+ edited_attachment_hash = edited_attachment.file_hash
+ self.assertEqual(attachment_basename, edited_attachment_basename)
+ self.assertNotEqual(attachment_hash, edited_attachment_hash)
def test_owner_can_edit_submissions(self):
xml_submission_file_path = os.path.join(
@@ -392,7 +549,7 @@ def test_submission_when_requires_auth(self):
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
- "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
+ '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml'
)
auth = DigestAuth('alice', 'alice')
self._make_submission(
@@ -409,7 +566,7 @@ def test_submission_linked_to_reporter(self):
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
- "../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
+ '../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml'
)
auth = DigestAuth('alice', 'alice')
self._make_submission(
diff --git a/kobo/apps/openrosa/apps/logger/tests/test_parsing.py b/kobo/apps/openrosa/apps/logger/tests/test_parsing.py
index f5cd23dbfa..783ffe6531 100644
--- a/kobo/apps/openrosa/apps/logger/tests/test_parsing.py
+++ b/kobo/apps/openrosa/apps/logger/tests/test_parsing.py
@@ -67,7 +67,10 @@ def test_parse_xform_nested_repeats(self):
'has_kids': '1'
},
'web_browsers': 'chrome ie',
- 'gps': '-1.2627557 36.7926442 0.0 30.0'
+ 'gps': '-1.2627557 36.7926442 0.0 30.0',
+ 'meta': {
+ 'instanceID': 'uuid:364f173c688e482486a48661700466gg'
+ }
}
}
self.assertEqual(dict_, expected_dict)
@@ -85,7 +88,8 @@ def test_parse_xform_nested_repeats(self):
'kids/has_kids': '1',
'info/age': '80',
'web_browsers': 'chrome ie',
- 'info/name': 'Adam'
+ 'info/name': 'Adam',
+ 'meta/instanceID': 'uuid:364f173c688e482486a48661700466gg'
}
self.assertEqual(flat_dict, expected_flat_dict)
@@ -161,6 +165,15 @@ def test_get_uuid_from_xml(self):
instanceID = get_uuid_from_xml(xml_str)
self.assertEqual(instanceID, '729f173c688e482486a48661700455ff')
+ # Additional test case for a custom prefixed UUID
+ submission = """
+
+ uuid:kobotoolbox.org:123456789
+
+ """
+ custom_instance_id = get_uuid_from_xml(submission)
+ self.assertEqual(custom_instance_id, 'kobotoolbox.org:123456789')
+
def test_get_deprecated_uuid_from_xml(self):
with open(
os.path.join(
diff --git a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
index 371ee04f30..3781092626 100644
--- a/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
+++ b/kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
@@ -1,9 +1,10 @@
-# coding: utf-8
+import uuid
+
from django.test import RequestFactory, TestCase
from pyxform import SurveyElementBuilder
from kobo.apps.kobo_auth.shortcuts import User
-from kobo.apps.openrosa.apps.logger.xform_instance_parser import DuplicateInstance
+from kobo.apps.openrosa.apps.logger.exceptions import DuplicateInstanceError
from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile
from kobo.apps.openrosa.apps.viewer.models.data_dictionary import DataDictionary
from kobo.apps.openrosa.libs.utils.logger_tools import (
@@ -36,18 +37,25 @@ def _get_xml_for_form(self, xform):
xform.save()
def _submit_at_hour(self, hour):
- st_xml = '2012-01-11T%d:00:00.000+00' % hour
+ st_xml = (
+ f''
+ f'2012-01-11T{hour}:00:00.000+00'
+ f''
+ f'uuid:918a1889-389f-4427-b48a-0ba16b7c9b{hour}'
+ f''
+ f''
+ )
try:
create_instance(self.user.username, TempFileProxy(st_xml), [])
- except DuplicateInstance:
+ except DuplicateInstanceError:
pass
def _submit_simple_yes(self):
create_instance(self.user.username, TempFileProxy(
- 'Yes<'
- '/yesno>'), [])
+ f'Yes<'
+ f'/yesno>'
+ f'uuid:{str(uuid.uuid4())}'
+ f''), [])
def setUp(self):
self.user = User.objects.create(username='admin', email='sample@example.com')
diff --git a/kobo/apps/openrosa/apps/logger/xform_instance_parser.py b/kobo/apps/openrosa/apps/logger/xform_instance_parser.py
index afcda3e4da..0ee1a78363 100644
--- a/kobo/apps/openrosa/apps/logger/xform_instance_parser.py
+++ b/kobo/apps/openrosa/apps/logger/xform_instance_parser.py
@@ -1,10 +1,9 @@
from __future__ import annotations
-import logging
import re
import sys
from datetime import datetime
-from typing import Optional
+from typing import Optional, Union
from xml.dom import Node
import dateutil.parser
@@ -13,38 +12,14 @@
from django.utils.encoding import smart_str
from django.utils.translation import gettext as t
+from kobo.apps.openrosa.apps.logger.exceptions import InstanceEmptyError
from kobo.apps.openrosa.libs.utils.common_tags import XFORM_ID_STRING
+from kpi.utils.log import logging
-class XLSFormError(Exception):
- pass
-
-
-class DuplicateInstance(Exception):
- def __str__(self):
- return t('Duplicate Instance')
-
-
-class InstanceInvalidUserError(Exception):
- def __str__(self):
- return t('Could not determine the user.')
-
-
-class InstanceParseError(Exception):
- def __str__(self):
- return t('The instance could not be parsed.')
-
-
-class InstanceEmptyError(InstanceParseError):
- def __str__(self):
- return t('Empty instance')
-
-
-class InstanceMultipleNodeError(Exception):
- pass
-
-
-def get_meta_from_xml(xml_str, meta_name):
+def get_meta_node_from_xml(
+ xml_str: str, meta_name: str
+) -> Union[None, tuple[str, minidom.Document]]:
xml = clean_and_parse_xml(xml_str)
children = xml.childNodes
# children ideally contains a single element
@@ -71,22 +46,30 @@ def get_meta_from_xml(xml_str, meta_name):
return None
uuid_tag = uuid_tags[0]
- return uuid_tag.firstChild.nodeValue.strip() if uuid_tag.firstChild\
- else None
+ return uuid_tag, xml
+
+
+def get_meta_from_xml(xml_str: str, meta_name: str) -> str:
+ if node_and_root := get_meta_node_from_xml(xml_str, meta_name):
+ node, _ = node_and_root
+ return node.firstChild.nodeValue.strip() if node.firstChild else None
def get_uuid_from_xml(xml):
- def _uuid_only(uuid, regex):
- matches = regex.match(uuid)
- if matches and len(matches.groups()) > 0:
- return matches.groups()[0]
- return None
+ def _uuid_only(uuid):
+ """
+ Strips the 'uuid:' prefix from the provided identifier if it exists.
+ This preserves any custom ID schemes (e.g., 'kobotoolbox.org:123456789')
+ while ensuring only the 'uuid:' prefix is removed. This approach
+ adheres to the OpenRosa spec, allowing custom prefixes to be stored
+ intact in the database to prevent potential ID collisions.
+ """
+ return re.sub(r'^uuid:', '', uuid)
uuid = get_meta_from_xml(xml, 'instanceID')
- regex = re.compile(r'uuid:(.*)')
if uuid:
- return _uuid_only(uuid, regex)
+ return _uuid_only(uuid)
# check in survey_node attributes
xml = clean_and_parse_xml(xml)
children = xml.childNodes
@@ -97,10 +80,19 @@ def _uuid_only(uuid, regex):
survey_node = children[0]
uuid = survey_node.getAttribute('instanceID')
if uuid != '':
- return _uuid_only(uuid, regex)
+ return _uuid_only(uuid)
return None
+def get_root_uuid_from_xml(xml):
+ root_uuid = get_meta_from_xml(xml, 'rootUuid')
+ if root_uuid:
+ return root_uuid
+
+ # If no rootUuid, fall back to instanceID
+ return get_uuid_from_xml(xml)
+
+
def get_submission_date_from_xml(xml) -> Optional[datetime]:
# check in survey_node attributes
xml = clean_and_parse_xml(xml)
@@ -126,13 +118,26 @@ def get_deprecated_uuid_from_xml(xml):
return None
-def clean_and_parse_xml(xml_string: str) -> Node:
+def clean_and_parse_xml(xml_string: str) -> minidom.Document:
clean_xml_str = xml_string.strip()
clean_xml_str = re.sub(r'>\s+<', '><', smart_str(clean_xml_str))
xml_obj = minidom.parseString(clean_xml_str)
return xml_obj
+def set_meta(xml_str: str, meta_name: str, new_value: str) -> str:
+
+ if not (node_and_root := get_meta_node_from_xml(xml_str, meta_name)):
+ raise ValueError(f'{meta_name} node not found')
+
+ node, root = node_and_root
+
+ if node.firstChild:
+ node.firstChild.nodeValue = new_value
+
+ return root.toxml()
+
+
def _xml_node_to_dict(node: Node, repeats: list = []) -> dict:
assert isinstance(node, Node)
if len(node.childNodes) == 0:
@@ -290,9 +295,9 @@ def __init__(self, xml_str, data_dictionary):
try:
self.parse(xml_str)
except Exception as e:
- logger = logging.getLogger('console_logger')
- logger.error(
- "Failed to parse instance '%s'" % xml_str, exc_info=True)
+ logging.error(
+ f"Failed to parse instance '{xml_str}'", exc_info=True
+ )
# `self.parse()` has been wrapped in to try/except but it makes the
# exception silently ignored.
# `logger_tool.py::safe_create_instance()` needs the exception
@@ -340,10 +345,9 @@ def _set_attributes(self):
try:
assert key not in self._attributes
except AssertionError:
- logger = logging.getLogger('console_logger')
- #logger.debug(
- # f'Skipping duplicate attribute: {key} with value {value}'
- #)
+ logging.debug(
+ f'Skipping duplicate attribute: {key} with value {value}'
+ )
else:
self._attributes[key] = value
@@ -374,7 +378,6 @@ def parse_xform_instance(xml_str, data_dictionary):
def get_xform_media_question_xpaths(
xform: 'kobo.apps.openrosa.apps.logger.models.XForm',
) -> list:
- logger = logging.getLogger('console_logger')
parser = XFormInstanceParser(xform.xml, xform.data_dictionary(use_cache=True))
all_attributes = _get_all_attributes(parser.get_root_node())
media_field_xpaths = []
@@ -388,7 +391,7 @@ def get_xform_media_question_xpaths(
try:
next_attribute = next(all_attributes)
except StopIteration:
- logger.error(
+ logging.error(
f'`ref` attribute seems to be missing in {xform.xml}',
exc_info=True,
)
@@ -398,8 +401,7 @@ def get_xform_media_question_xpaths(
try:
assert next_attribute_key.lower() == 'ref'
except AssertionError:
- logger = logging.getLogger('console_logger')
- logger.error(
+ logging.error(
f'`ref` should come after `mediatype:{value}` in {xform.xml}',
exc_info=True,
)
diff --git a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml
index d5c336bef7..d8276b02c4 100644
--- a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml
+++ b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_20-52-08.xml
@@ -1 +1 @@
-40.81101715564728 -73.96446704864502 -152.0 16.0
+40.81101715564728 -73.96446704864502 -152.0 16.0uuid:729f173c688e482486a48661700455ff
diff --git a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml
index fe1f4db02e..57bbed33cb 100644
--- a/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml
+++ b/kobo/apps/openrosa/apps/main/tests/fixtures/gps/instances/gps_1980-01-23_21-21-33.xml
@@ -1 +1 @@
-40.811086893081665 -73.96449387073517 -113.0 16.0
+40.811086893081665 -73.96449387073517 -113.0 16.0uuid:435f173c688e482486a48661700467gh
diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml
index 2cb36d609a..5bfed2204b 100644
--- a/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml
+++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/grouped_gps/instances/grouped_gps_01.xml
@@ -4,4 +4,7 @@
-1.2627107 36.7925771 0.0 37.5
chrome safari
+
+ uuid:435f173c688e482486a486617004534df
+
diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml
index 0e5f835ffc..5cd3b97dec 100644
--- a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml
+++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_01.xml
@@ -17,4 +17,7 @@
-1.2627557 36.7926442 0.0 30.0
chrome ie
+
+ uuid:435f173c688e482463a486617004534df
+
diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml
index 2d2659147b..5b92f562ad 100644
--- a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml
+++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_02.xml
@@ -13,4 +13,7 @@
-1.2627557 36.7926442 0.0 30.0
chrome ie firefox
+
+ uuid:32sd3c688e482486a48661700455ff
+
diff --git a/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_03.xml b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_03.xml
new file mode 100644
index 0000000000..5aaf43f931
--- /dev/null
+++ b/kobo/apps/openrosa/apps/viewer/tests/fixtures/new_repeats/instances/new_repeats_03.xml
@@ -0,0 +1,20 @@
+
+
+
+ Adam
+ 80
+
+
+ 1
+
+ Liam
+ 40
+
+
+ Emma
+ 70
+
+
+-1.2627557 36.7926442 0.0 30.0
+chrome
+
diff --git a/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py b/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py
index 79086df69c..0ec24e4635 100644
--- a/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py
+++ b/kobo/apps/openrosa/apps/viewer/tests/test_pandas_mongo_bridge.py
@@ -51,16 +51,27 @@ def _publish_xls_fixture_set_xform(self, fixture):
self.xform = XForm.objects.all().reverse()[0]
def _submit_fixture_instance(
- self, fixture, instance, submission_time=None):
+ self, fixture, instance, submission_time=None, add_submission_uuid=None):
"""
Submit an instance at
tests/fixtures/[fixture]/instances/[fixture]_[instance].xml
"""
xml_submission_file_path = xml_inst_filepath_from_fixture_name(
fixture, instance)
- self._make_submission(
- xml_submission_file_path, forced_submission_time=submission_time)
- self.assertEqual(self.response.status_code, 201)
+
+ if add_submission_uuid:
+ xml_submission_file_path = (
+ self._add_submission_uuid_to_submission_xml(
+ xml_submission_file_path
+ )
+ )
+ try:
+ self._make_submission(
+ xml_submission_file_path, forced_submission_time=submission_time)
+ self.assertEqual(self.response.status_code, 201)
+ finally:
+ if add_submission_uuid:
+ os.remove(xml_submission_file_path)
def _publish_single_level_repeat_form(self):
self._publish_xls_fixture_set_xform("new_repeats")
@@ -207,6 +218,7 @@ def test_csv_columns_for_gps_within_groups(self):
'gps_group/_gps_longitude',
'gps_group/_gps_altitude',
'gps_group/_gps_precision',
+ 'meta/instanceID',
'web_browsers/firefox',
'web_browsers/chrome',
'web_browsers/ie',
@@ -246,6 +258,7 @@ def test_format_mongo_data_for_csv(self):
'kids/kids_details[1]/kids_age': '50',
'kids/kids_details[2]/kids_name': 'Cain',
'kids/kids_details[2]/kids_age': '76',
+ 'meta/instanceID': 'uuid:435f173c688e482463a486617004534df',
'web_browsers/chrome': True,
'web_browsers/ie': True,
'web_browsers/safari': False,
@@ -494,7 +507,7 @@ def test_query_mongo(self):
self._publish_single_level_repeat_form()
# submit 3 instances
for i in range(3):
- self._submit_fixture_instance("new_repeats", "01")
+ self._submit_fixture_instance('new_repeats', '03', None, True)
df_builder = XLSDataFrameBuilder(self.user.username,
self.xform.id_string)
record_count = df_builder._query_mongo(count=True)
@@ -526,10 +539,10 @@ def test_csv_export_with_df_size_limit(self):
self._publish_single_level_repeat_form()
# submit 7 instances
for i in range(4):
- self._submit_fixture_instance("new_repeats", "01")
+ self._submit_fixture_instance('new_repeats', '03', None, True)
self._submit_fixture_instance("new_repeats", "02")
for i in range(2):
- self._submit_fixture_instance("new_repeats", "01")
+ self._submit_fixture_instance('new_repeats', '03', None, True)
csv_df_builder = CSVDataFrameBuilder(self.user.username,
self.xform.id_string)
record_count = csv_df_builder._query_mongo(count=True)
diff --git a/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py b/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py
index c2d14aadcb..af9f5ebfa4 100644
--- a/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py
+++ b/kobo/apps/openrosa/libs/tests/mixins/make_submission_mixin.py
@@ -1,6 +1,7 @@
# coding: utf-8
import os
import re
+import uuid
from tempfile import NamedTemporaryFile
from typing import Union
@@ -38,6 +39,33 @@ def _add_uuid_to_submission_xml(self, path, xform):
return path
+ def _add_submission_uuid_to_submission_xml(self, path):
+ with open(path, 'rb') as _file:
+ xml_content = _file.read().decode()
+
+ # Find the closing tag of the root element (e.g., )
+ closing_tag_index = xml_content.rfind(f'{self.xform.id_string}>')
+
+ if closing_tag_index == -1:
+ raise ValueError('Root element closing tag not found')
+
+ # Construct the meta element with a new UUID
+ meta_element = f'uuid:{str(uuid.uuid4())}'
+
+ # Insert the meta element before the closing tag of the root element
+ xml_content = (
+ xml_content[:closing_tag_index]
+ + meta_element # noqa: W503
+ + xml_content[closing_tag_index:] # noqa: W503
+ )
+
+ # Write the updated XML content to a temporary file and return the path
+ with NamedTemporaryFile(delete=False, mode='w') as tmp_file:
+ tmp_file.write(xml_content)
+ path = tmp_file.name
+
+ return path
+
def _make_submission(
self,
path: str,
diff --git a/kobo/apps/openrosa/libs/utils/logger_tools.py b/kobo/apps/openrosa/libs/utils/logger_tools.py
index fc03f679f3..bed5a08b0e 100644
--- a/kobo/apps/openrosa/libs/utils/logger_tools.py
+++ b/kobo/apps/openrosa/libs/utils/logger_tools.py
@@ -1,5 +1,7 @@
from __future__ import annotations
+import contextlib
+import hashlib
import logging
import os
import re
@@ -10,7 +12,6 @@
from typing import Generator, Optional, Union
from xml.etree import ElementTree as ET
from xml.parsers.expat import ExpatError
-
try:
from zoneinfo import ZoneInfo
except ImportError:
@@ -24,7 +25,7 @@
from django.core.exceptions import PermissionDenied, ValidationError
from django.core.files.base import File
from django.core.mail import mail_admins
-from django.db import IntegrityError, transaction
+from django.db import connection, IntegrityError, transaction
from django.db.models import Q
from django.http import (
Http404,
@@ -44,7 +45,8 @@
from kobo.apps.openrosa.apps.logger.exceptions import (
DuplicateUUIDError,
FormInactiveError,
- TemporarilyUnavailableError,
+ InstanceIdMissingError,
+ TemporarilyUnavailableError, ConflictingSubmissionUUIDError,
)
from kobo.apps.openrosa.apps.logger.models import Attachment, Instance, XForm
from kobo.apps.openrosa.apps.logger.models.attachment import (
@@ -62,11 +64,13 @@
update_xform_monthly_counter,
update_xform_submission_count,
)
-from kobo.apps.openrosa.apps.logger.xform_instance_parser import (
- DuplicateInstance,
+from kobo.apps.openrosa.apps.logger.exceptions import (
+ DuplicateInstanceError,
InstanceEmptyError,
InstanceInvalidUserError,
InstanceMultipleNodeError,
+)
+from kobo.apps.openrosa.apps.logger.xform_instance_parser import (
clean_and_parse_xml,
get_deprecated_uuid_from_xml,
get_submission_date_from_xml,
@@ -81,7 +85,6 @@
default_kobocat_storage as default_storage,
)
from kpi.utils.object_permission import get_database_user
-from kpi.utils.hash import calculate_hash
from kpi.utils.mongo_helper import MongoHelper
OPEN_ROSA_VERSION_HEADER = 'X-OpenRosa-Version'
@@ -137,7 +140,6 @@ def check_edit_submission_permissions(
))
-@transaction.atomic # paranoia; redundant since `ATOMIC_REQUESTS` set to `True`
def create_instance(
username: str,
xml_file: File,
@@ -163,51 +165,40 @@ def create_instance(
# get new and deprecated uuid's
new_uuid = get_uuid_from_xml(xml)
+ if not new_uuid:
+ raise InstanceIdMissingError
- # Dorey's rule from 2012 (commit 890a67aa):
- # Ignore submission as a duplicate IFF
- # * a submission's XForm collects start time
- # * the submitted XML is an exact match with one that
- # has already been submitted for that user.
- # The start-time requirement protected submissions with identical responses
- # from being rejected as duplicates *before* KoBoCAT had the concept of
- # submission UUIDs. Nowadays, OpenRosa requires clients to send a UUID (in
- # ``) within every submission; if the incoming XML has a UUID
- # and still exactly matches an existing submission, it's certainly a
- # duplicate (https://docs.opendatakit.org/openrosa-metadata/#fields).
- if xform.has_start_time or new_uuid is not None:
- # XML matches are identified by identical content hash OR, when a
- # content hash is not present, by string comparison of the full
- # content, which is slow! Use the management command
- # `populate_xml_hashes_for_instances` to hash existing submissions
+ with get_instance_lock(xml_hash, new_uuid, xform.id) as lock_acquired:
+ if not lock_acquired:
+ raise DuplicateInstanceError
+
+ # Check for an existing instance
existing_instance = Instance.objects.filter(
- Q(xml_hash=xml_hash) | Q(xml_hash=Instance.DEFAULT_XML_HASH, xml=xml),
+ xml_hash=xml_hash,
xform__user=xform.user,
).first()
- else:
- existing_instance = None
-
- if existing_instance:
- existing_instance.check_active(force=False)
- # ensure we have saved the extra attachments
- new_attachments, _ = save_attachments(existing_instance, media_files)
- if not new_attachments:
- raise DuplicateInstance()
+
+ if existing_instance:
+ existing_instance.check_active(force=False)
+ # ensure we have saved the extra attachments
+ new_attachments, _ = save_attachments(existing_instance, media_files)
+ if not new_attachments:
+ raise DuplicateInstanceError
+ else:
+ # Update Mongo via the related ParsedInstance
+ existing_instance.parsed_instance.save(asynchronous=False)
+ return existing_instance
else:
- # Update Mongo via the related ParsedInstance
- existing_instance.parsed_instance.save(asynchronous=False)
- return existing_instance
- else:
- instance = save_submission(
- request,
- xform,
- xml,
- media_files,
- new_uuid,
- status,
- date_created_override,
- )
- return instance
+ instance = save_submission(
+ request,
+ xform,
+ xml,
+ media_files,
+ new_uuid,
+ status,
+ date_created_override,
+ )
+ return instance
def disposition_ext_and_date(name, extension, show_date=True):
@@ -228,6 +219,26 @@ def dict2xform(submission: dict, xform_id_string: str) -> str:
return xml_head + dict2xml(submission) + xml_tail
+@contextlib.contextmanager
+def get_instance_lock(xml_hash: str, submission_uuid: str, xform_id: int) -> bool:
+ int_lock = int.from_bytes(
+ hashlib.shake_128(
+ f'{xform_id}!!{submission_uuid}!!{xml_hash}'.encode()
+ ).digest(7), 'little'
+ )
+ acquired = False
+
+ try:
+ with transaction.atomic():
+ cur = connection.cursor()
+ cur.execute('SELECT pg_try_advisory_lock(%s::bigint);', (int_lock,))
+ acquired = cur.fetchone()[0]
+ yield acquired
+ finally:
+ cur.execute('SELECT pg_advisory_unlock(%s::bigint);', (int_lock,))
+ cur.close()
+
+
def get_instance_or_404(**criteria):
"""
Mimic `get_object_or_404` but handles duplicate records.
@@ -308,6 +319,9 @@ def status_code(self):
except InstanceEmptyError:
result.error = t('Received empty submission. No instance was created')
result.http_error_response = OpenRosaResponseBadRequest(result.error)
+ except InstanceIdMissingError:
+ result.error = t('Instance ID is required')
+ result.http_error_response = OpenRosaResponseBadRequest(result.error)
except FormInactiveError:
result.error = t('Form is not active')
result.http_error_response = OpenRosaResponseNotAllowed(result.error)
@@ -320,7 +334,13 @@ def status_code(self):
except ExpatError:
result.error = t('Improperly formatted XML.')
result.http_error_response = OpenRosaResponseBadRequest(result.error)
- except DuplicateInstance:
+ except ConflictingSubmissionUUIDError:
+ result.error = t('Submission with this instance ID already exists')
+ response = OpenRosaResponse(result.error)
+ response.status_code = 409
+ response['Location'] = request.build_absolute_uri(request.path)
+ result.http_error_response = response
+ except DuplicateInstanceError:
result.error = t('Duplicate submission')
response = OpenRosaResponse(result.error)
response.status_code = 202
@@ -647,9 +667,7 @@ def save_attachments(
media_file=attachment_filename,
mimetype=f.content_type,
).first()
- if existing_attachment and (
- existing_attachment.file_hash == calculate_hash(f.read())
- ):
+ if existing_attachment:
# We already have this attachment!
continue
f.seek(0)
@@ -791,16 +809,23 @@ def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]:
# Update Attachment objects to hide them if they are not used anymore.
# We do not want to delete them until the instance itself is deleted.
+ # If the new attachment has the same basename as an existing one but
+ # different content, update the existing one.
+
# FIXME Temporary hack to leave background-audio files and audit files alone
# Bug comes from `get_xform_media_question_xpaths()`
queryset = Attachment.objects.filter(instance=instance).exclude(
- Q(media_file_basename__in=basenames)
- | Q(media_file_basename__endswith='.enc')
+ Q(media_file_basename__endswith='.enc')
| Q(media_file_basename='audit.csv')
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$')
+ ).order_by('-id')
+
+ latest_attachments = queryset[:len(basenames)]
+ remaining_attachments = queryset.exclude(
+ id__in=latest_attachments.values_list('id', flat=True)
)
- soft_deleted_attachments = list(queryset.all())
- queryset.update(deleted_at=dj_timezone.now())
+ soft_deleted_attachments = list(remaining_attachments.all())
+ remaining_attachments.update(deleted_at=dj_timezone.now())
return soft_deleted_attachments
@@ -821,18 +846,20 @@ def _get_instance(
"""
# check if it is an edit submission
old_uuid = get_deprecated_uuid_from_xml(xml)
- instances = Instance.objects.filter(uuid=old_uuid)
-
- if instances:
+ if old_uuid and (instance := Instance.objects.filter(uuid=old_uuid).first()):
# edits
- instance = instances[0]
check_edit_submission_permissions(request, xform)
InstanceHistory.objects.create(
xml=instance.xml, xform_instance=instance, uuid=old_uuid)
instance.xml = xml
instance._populate_xml_hash()
instance.uuid = new_uuid
- instance.save()
+ try:
+ instance.save()
+ except IntegrityError as e:
+ if 'root_uuid' in str(e):
+ raise ConflictingSubmissionUUIDError
+ raise
else:
submitted_by = (
get_database_user(request.user)
@@ -851,8 +878,12 @@ def _get_instance(
# Only set the attribute if requested, i.e. don't bother ever
# setting it to `False`
instance.defer_counting = True
- instance.save()
-
+ try:
+ instance.save()
+ except IntegrityError as e:
+ if 'root_uuid' in str(e):
+ raise ConflictingSubmissionUUIDError
+ raise
return instance
diff --git a/kobo/apps/subsequences/tests/test_submission_stream.py b/kobo/apps/subsequences/tests/test_submission_stream.py
index 7b4262aacd..38aebdc22c 100644
--- a/kobo/apps/subsequences/tests/test_submission_stream.py
+++ b/kobo/apps/subsequences/tests/test_submission_stream.py
@@ -1,8 +1,10 @@
from copy import deepcopy
+from unittest.mock import patch
from django.contrib.auth import get_user_model
from django.test import TestCase
+from kobo.apps.openrosa.apps.logger.exceptions import ConflictingSubmissionUUIDError
from kobo.apps.subsequences.models import SubmissionExtras
from kobo.apps.subsequences.utils import stream_with_extras
from kpi.models import Asset
@@ -254,30 +256,39 @@ def test_stream_with_extras_handles_duplicated_submission_uuids(self):
'_uuid': '1c05898e-b43c-491d-814c-79595eb84e81',
},
]
- self.asset.deployment.mock_submissions(submissions)
- # Process submissions with extras
- output = list(
- stream_with_extras(
- self.asset.deployment.get_submissions(user=self.asset.owner),
- self.asset,
+ # Mock the get_submissions method to return the test submissions
+ with patch.object(
+ self.asset.deployment,
+ 'get_submissions',
+ return_value=submissions,
+ ):
+ # Expect a ConflictingSubmissionUUIDError due to duplicated UUIDs
+ with self.assertRaises(ConflictingSubmissionUUIDError):
+ self.asset.deployment.mock_submissions(submissions)
+
+ # Process submissions with extras
+ output = list(
+ stream_with_extras(
+ self.asset.deployment.get_submissions(user=self.asset.owner),
+ self.asset,
+ )
)
- )
- # Make sure that uuid values for single or multiple choice qualitative
- # analysis questions are kept as strings and not mutated
- for submission in output:
- supplemental_details = submission['_supplementalDetails']
- for qual_response in supplemental_details['Tell_me_a_story']['qual']:
- if qual_response['type'] not in [
- 'qual_select_one',
- 'qual_select_multiple',
- ]:
- # question is not a single or multiple choice one
- continue
+ # Make sure that uuid values for single or multiple choice qualitative
+ # analysis questions are kept as strings and not mutated
+ for submission in output:
+ supplemental_details = submission['_supplementalDetails']
+ for qual_response in supplemental_details['Tell_me_a_story']['qual']:
+ if qual_response['type'] not in [
+ 'qual_select_one',
+ 'qual_select_multiple',
+ ]:
+ # question is not a single or multiple choice one
+ continue
- for v in qual_response['val']:
- assert isinstance(v['uuid'], str)
+ for v in qual_response['val']:
+ assert isinstance(v['uuid'], str)
def test_stream_with_extras_ignores_empty_qual_responses(self):
# Modify submission extras 'val' to be an empty string
diff --git a/kobo/settings/testing.py b/kobo/settings/testing.py
index 421adcaeb6..3ebdc13dac 100644
--- a/kobo/settings/testing.py
+++ b/kobo/settings/testing.py
@@ -1,4 +1,5 @@
# coding: utf-8
+import os
from django.contrib.auth.management import DEFAULT_DB_ALIAS
from mongomock import MongoClient as MockMongoClient
@@ -51,4 +52,6 @@
TEST_HTTP_HOST = 'testserver'
TEST_USERNAME = 'bob'
+SKIP_TESTS_WITH_CONCURRENCY = os.getenv('SKIP_TESTS_WITH_CONCURRENCY', False)
+
OPENROSA_DB_ALIAS = DEFAULT_DB_ALIAS
diff --git a/kpi/tests/api/v2/test_api_submissions.py b/kpi/tests/api/v2/test_api_submissions.py
index b659b4d28f..808570bc0a 100644
--- a/kpi/tests/api/v2/test_api_submissions.py
+++ b/kpi/tests/api/v2/test_api_submissions.py
@@ -25,6 +25,7 @@
from kobo.apps.openrosa.apps.logger.models.instance import Instance
from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile
from kobo.apps.openrosa.libs.utils.logger_tools import dict2xform
+from kobo.apps.openrosa.apps.logger.exceptions import InstanceIdMissingError
from kpi.constants import (
ASSET_TYPE_SURVEY,
PERM_ADD_SUBMISSIONS,
@@ -1590,6 +1591,16 @@ def test_edit_submission_with_xml_missing_uuids(self):
# The form UUID is already omitted by these tests, but fail if that
# changes in the future
assert 'formhub/uuid' not in submission.keys()
+
+ # Attempt to mock the submission without UUIDs
+ with self.assertRaises(InstanceIdMissingError) as ex:
+ self.asset.deployment.mock_submissions([submission], create_uuids=False)
+
+ # Rejecting the submission because it does not have an instance ID
+ self.assertEqual(str(ex.exception), 'Could not determine the instance ID')
+
+ # Test the edit flow with a submission that has a UUID
+ submission['meta/instanceID'] = 'uuid:9710c729-00a5-41f1-b740-8dd618bb4a49'
self.asset.deployment.mock_submissions([submission], create_uuids=False)
# Find and verify the new submission
@@ -1607,7 +1618,10 @@ def test_edit_submission_with_xml_missing_uuids(self):
submission_xml_root = fromstring_preserve_root_xmlns(submission_xml)
assert submission_json['_id'] == submission['_id']
assert submission_xml_root.find('./find_this').text == 'hello!'
- assert submission_xml_root.find('./meta/instanceID') is None
+ assert (
+ submission_xml_root.find('./meta/instanceID').text == # noqa: W504
+ 'uuid:9710c729-00a5-41f1-b740-8dd618bb4a49'
+ )
assert submission_xml_root.find('./formhub/uuid') is None
# Get edit endpoint