diff --git a/event_sink_clickhouse/__init__.py b/event_sink_clickhouse/__init__.py index f51492c..c688f5e 100644 --- a/event_sink_clickhouse/__init__.py +++ b/event_sink_clickhouse/__init__.py @@ -2,4 +2,4 @@ A sink for Open edX events to send them to ClickHouse. """ -__version__ = "0.3.0" +__version__ = "0.4.0" diff --git a/event_sink_clickhouse/sinks/course_published.py b/event_sink_clickhouse/sinks/course_published.py index 30705d7..87b9e41 100644 --- a/event_sink_clickhouse/sinks/course_published.py +++ b/event_sink_clickhouse/sinks/course_published.py @@ -79,6 +79,10 @@ def serialize_item(self, item, many=False, initial=None): # Serialize the XBlocks to dicts and map them with their location as keys the # whole map needs to be completed before we can define relationships index = 0 + section_idx = 0 + subsection_idx = 0 + unit_idx = 0 + for block in items: index += 1 fields = self.serialize_xblock( @@ -88,6 +92,22 @@ def serialize_item(self, item, many=False, initial=None): initial["dump_id"], initial["time_last_dumped"], ) + + if fields["xblock_data_json"]["block_type"] == "chapter": + section_idx += 1 + subsection_idx = 0 + unit_idx = 0 + elif fields["xblock_data_json"]["block_type"] == "sequential": + subsection_idx += 1 + unit_idx = 0 + elif fields["xblock_data_json"]["block_type"] == "vertical": + unit_idx += 1 + + fields["xblock_data_json"]["section"] = section_idx + fields["xblock_data_json"]["subsection"] = subsection_idx + fields["xblock_data_json"]["unit"] = unit_idx + + fields["xblock_data_json"] = json.dumps(fields["xblock_data_json"]) location_to_node[ XBlockSink.strip_branch_and_version(block.location) ] = fields @@ -147,6 +167,7 @@ def serialize_xblock( "block_type": block_type, "detached": 1 if block_type in detached_xblock_types else 0, "graded": 1 if getattr(item, "graded", False) else 0, + "completion_mode": getattr(item, "completion_mode", ""), } # Core table data, if things change here it's a big deal. @@ -155,7 +176,7 @@ def serialize_xblock( "course_key": str(course_key), "location": str(item.location), "display_name": item.display_name_with_default.replace("'", "'"), - "xblock_data_json": json.dumps(json_data), + "xblock_data_json": json_data, "order": index, "edited_on": str(getattr(item, "edited_on", "")), "dump_id": dump_id, diff --git a/test_utils/helpers.py b/test_utils/helpers.py index 8ea02b6..baf55f6 100644 --- a/test_utils/helpers.py +++ b/test_utils/helpers.py @@ -52,8 +52,8 @@ class FakeXBlock: """ Fakes the parameters of an XBlock that we care about. """ - def __init__(self, identifier, detached_block=False, graded=False): - self.block_type = "course_info" if detached_block else "vertical" + def __init__(self, identifier, block_type="vertical", graded=False, completion_mode="unknown"): + self.block_type = block_type self.scope_ids = Mock() self.scope_ids.usage_id.course_key = course_key_factory() self.scope_ids.block_type = self.block_type @@ -62,6 +62,7 @@ def __init__(self, identifier, detached_block=False, graded=False): self.edited_on = datetime.now() self.children = [] self.graded = graded + self.completion_mode = completion_mode def get_children(self): """ @@ -124,6 +125,40 @@ def fake_course_overview_factory(modified=None): ) +def fake_serialize_fake_course_overview(course_overview): + """ + Return a dict representation of a FakeCourseOverview. + """ + json_fields = { + "advertised_start": str(course_overview.advertised_start), + "announcement": str(course_overview.announcement), + "lowest_passing_grade": float(course_overview.lowest_passing_grade), + "invitation_only": course_overview.invitation_only, + "max_student_enrollments_allowed": course_overview.max_student_enrollments_allowed, + "effort": course_overview.effort, + "enable_proctored_exams": course_overview.enable_proctored_exams, + "entrance_exam_enabled": course_overview.entrance_exam_enabled, + "external_id": course_overview.external_id, + "language": course_overview.language, + } + + return { + "org": course_overview.org, + "course_key": str(course_overview.id), + "display_name": course_overview.display_name, + "course_start": course_overview.start, + "course_end": course_overview.end, + "enrollment_start": course_overview.enrollment_start, + "enrollment_end": course_overview.enrollment_end, + "self_paced": course_overview.self_paced, + "course_data_json": json.dumps(json_fields), + "created": course_overview.created, + "modified": course_overview.modified, + "dump_id": "", + "time_last_dumped": "", + } + + def mock_course_overview(): """ Create a fake CourseOverview object that supports just the things we care about. @@ -170,29 +205,41 @@ def course_factory(): Return a fake course structure that exercises most of the serialization features. """ # Create a base block - top_block = FakeXBlock("top") + top_block = FakeXBlock("top", block_type="course") course = [top_block, ] - # Create a few children + # Create a few sections for i in range(3): - block = FakeXBlock(f"Child {i}") + block = FakeXBlock(f"Section {i}", block_type="chapter") course.append(block) top_block.children.append(block) - # Create grandchildren on some children + # Create some subsections if i > 0: - sub_block = FakeXBlock(f"Grandchild {i}") - course.append(sub_block) - block.children.append(sub_block) + for ii in range(3): + sub_block = FakeXBlock(f"Subsection {ii}", block_type="sequential") + course.append(sub_block) + block.children.append(sub_block) + + for iii in range(3): + # Create some units + unit_block = FakeXBlock(f"Unit {iii}", block_type="vertical") + course.append(unit_block) + sub_block.children.append(unit_block) # Create some detached blocks at the top level for i in range(3): - course.append(FakeXBlock(f"Detached {i}", detached_block=True)) + course.append(FakeXBlock(f"Detached {i}", block_type="course_info")) # Create some graded blocks at the top level for i in range(3): course.append(FakeXBlock(f"Graded {i}", graded=True)) + # Create some completable blocks at the top level + course.append(FakeXBlock("Completable", completion_mode="completable")) + course.append(FakeXBlock("Aggregator", completion_mode="aggregator")) + course.append(FakeXBlock("Excluded", completion_mode="excluded")) + return course diff --git a/tests/test_course_published.py b/tests/test_course_published.py index 66b8952..56f78f8 100644 --- a/tests/test_course_published.py +++ b/tests/test_course_published.py @@ -4,7 +4,7 @@ import json import logging from datetime import datetime -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest import requests @@ -13,7 +13,7 @@ from responses import matchers from responses.registries import OrderedRegistry -from event_sink_clickhouse.sinks.course_published import CourseOverviewSink +from event_sink_clickhouse.sinks.course_published import CourseOverviewSink, XBlockSink from event_sink_clickhouse.tasks import dump_course_to_clickhouse from test_utils.helpers import ( check_block_csv_matcher, @@ -22,6 +22,7 @@ course_factory, course_str_factory, fake_course_overview_factory, + fake_serialize_fake_course_overview, get_clickhouse_http_params, mock_course_overview, mock_detached_xblock_types, @@ -43,34 +44,7 @@ def test_course_publish_success(mock_modulestore, mock_detached, mock_overview, course_overview = fake_course_overview_factory(modified=datetime.now()) mock_modulestore.return_value.get_items.return_value = course - json_fields = { - "advertised_start": str(course_overview.advertised_start), - "announcement": str(course_overview.announcement), - "lowest_passing_grade": float(course_overview.lowest_passing_grade), - "invitation_only": course_overview.invitation_only, - "max_student_enrollments_allowed": course_overview.max_student_enrollments_allowed, - "effort": course_overview.effort, - "enable_proctored_exams": course_overview.enable_proctored_exams, - "entrance_exam_enabled": course_overview.entrance_exam_enabled, - "external_id": course_overview.external_id, - "language": course_overview.language, - } - - mock_serialize_item.return_value = { - "org": course_overview.org, - "course_key": str(course_overview.id), - "display_name": course_overview.display_name, - "course_start": course_overview.start, - "course_end": course_overview.end, - "enrollment_start": course_overview.enrollment_start, - "enrollment_end": course_overview.enrollment_end, - "self_paced": course_overview.self_paced, - "course_data_json": json.dumps(json_fields), - "created": course_overview.created, - "modified": course_overview.modified, - "dump_id": "", - "time_last_dumped": "", - } + mock_serialize_item.return_value = fake_serialize_fake_course_overview(course_overview) # Fake the "detached types" list since we can't import it here mock_detached.return_value = mock_detached_xblock_types() @@ -129,34 +103,7 @@ def test_course_publish_clickhouse_error(mock_modulestore, mock_detached, mock_o course_overview = fake_course_overview_factory(modified=datetime.now()) mock_overview.return_value.get_from_id.return_value = course_overview - json_fields = { - "advertised_start": str(course_overview.advertised_start), - "announcement": str(course_overview.announcement), - "lowest_passing_grade": float(course_overview.lowest_passing_grade), - "invitation_only": course_overview.invitation_only, - "max_student_enrollments_allowed": course_overview.max_student_enrollments_allowed, - "effort": course_overview.effort, - "enable_proctored_exams": course_overview.enable_proctored_exams, - "entrance_exam_enabled": course_overview.entrance_exam_enabled, - "external_id": course_overview.external_id, - "language": course_overview.language, - } - - mock_serialize_item.return_value = { - "org": course_overview.org, - "course_key": str(course_overview.id), - "display_name": course_overview.display_name, - "course_start": course_overview.start, - "course_end": course_overview.end, - "enrollment_start": course_overview.enrollment_start, - "enrollment_end": course_overview.enrollment_end, - "self_paced": course_overview.self_paced, - "course_data_json": json.dumps(json_fields), - "created": course_overview.created, - "modified": course_overview.modified, - "dump_id": "", - "time_last_dumped": "", - } + mock_serialize_item.return_value = fake_serialize_fake_course_overview(course_overview) # This will raise an exception when we try to post to ClickHouse responses.post( @@ -265,3 +212,107 @@ def test_get_last_dump_time(): last_published_date = sink.get_last_dumped_timestamp(course_key) dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f+00:00") assert dt + + +@patch("event_sink_clickhouse.sinks.course_published.get_detached_xblock_types") +@patch("event_sink_clickhouse.sinks.course_published.get_modulestore") +# pytest:disable=unused-argument +def test_xblock_tree_structure(mock_modulestore, mock_detached): + """ + Test that our calculations of section/subsection/unit are correct. + """ + # Create a fake course structure with a few fake XBlocks + course = course_factory() + course_overview = fake_course_overview_factory(modified=datetime.now()) + mock_modulestore.return_value.get_items.return_value = course + + # Fake the "detached types" list since we can't import it here + mock_detached.return_value = mock_detached_xblock_types() + + fake_serialized_course_overview = fake_serialize_fake_course_overview(course_overview) + sink = XBlockSink(connection_overrides={}, log=MagicMock()) + + # Remove the relationships sink, we're just checking the structure here. + sink.serialize_relationships = MagicMock() + initial_data = {"dump_id": "xyz", "time_last_dumped": "2023-09-05"} + results = sink.serialize_item(fake_serialized_course_overview, initial=initial_data) + + def _check_tree_location(block, expected_section=0, expected_subsection=0, expected_unit=0): + """ + Assert the expected values in certain returned blocks or print useful debug information. + """ + try: + j = json.loads(block["xblock_data_json"]) + assert j["section"] == expected_section + assert j["subsection"] == expected_subsection + assert j["unit"] == expected_unit + except AssertionError as e: + print(e) + print(block) + raise + + # The tree has new sections at these indexes + _check_tree_location(results[1], 1) + _check_tree_location(results[2], 2) + _check_tree_location(results[15], 3) + + # The tree has new subsections at these indexes + _check_tree_location(results[3], 2, 1) + _check_tree_location(results[7], 2, 2) + _check_tree_location(results[11], 2, 3) + _check_tree_location(results[24], 3, 3) + + # The tree has new units at these indexes + _check_tree_location(results[4], 2, 1, 1) + _check_tree_location(results[5], 2, 1, 2) + _check_tree_location(results[6], 2, 1, 3) + _check_tree_location(results[10], 2, 2, 3) + _check_tree_location(results[25], 3, 3, 1) + _check_tree_location(results[26], 3, 3, 2) + _check_tree_location(results[27], 3, 3, 3) + + +@patch("event_sink_clickhouse.sinks.course_published.get_detached_xblock_types") +@patch("event_sink_clickhouse.sinks.course_published.get_modulestore") +def test_xblock_graded_completable_mode(mock_modulestore, mock_detached): + """ + Test that our grading and completion fields serialize. + """ + # Create a fake course structure with a few fake XBlocks + course = course_factory() + course_overview = fake_course_overview_factory(modified=datetime.now()) + mock_modulestore.return_value.get_items.return_value = course + + # Fake the "detached types" list since we can't import it here + mock_detached.return_value = mock_detached_xblock_types() + + fake_serialized_course_overview = fake_serialize_fake_course_overview(course_overview) + sink = XBlockSink(connection_overrides={}, log=MagicMock()) + + # Remove the relationships sink, we're just checking the structure here. + sink.serialize_relationships = MagicMock() + initial_data = {"dump_id": "xyz", "time_last_dumped": "2023-09-05"} + results = sink.serialize_item(fake_serialized_course_overview, initial=initial_data) + + def _check_item_serialized_location(block, expected_graded=0, expected_completion_mode="unknown"): + """ + Assert the expected values in certain returned blocks or print useful debug information. + """ + try: + j = json.loads(block["xblock_data_json"]) + assert j["graded"] == expected_graded + assert j["completion_mode"] == expected_completion_mode + except AssertionError as e: + print(e) + print(block) + raise + + # These tree indexes are the only ones which should have gradable set + _check_item_serialized_location(results[31], 1) + _check_item_serialized_location(results[32], 1) + _check_item_serialized_location(results[33], 1) + + # These tree indexes are the only ones which should have non-"unknown" completion_modes. + _check_item_serialized_location(results[34], 0, "completable") + _check_item_serialized_location(results[35], 0, "aggregator") + _check_item_serialized_location(results[36], 0, "excluded")