diff --git a/Pipfile b/Pipfile new file mode 100644 index 000000000..c398b0d5a --- /dev/null +++ b/Pipfile @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] + +[requires] +python_version = "3.10" diff --git a/backend/review/import_utils/import_to_db.py b/backend/review/import_utils/import_to_db.py index 5079af58c..36d5db3fe 100644 --- a/backend/review/import_utils/import_to_db.py +++ b/backend/review/import_utils/import_to_db.py @@ -270,9 +270,9 @@ def stat(key, amt=1): return stat -def import_summary_rows(summaries, show_progress_bar=True): +def import_summary_rows(summaries: iter, show_progress_bar=True): """ - Imports summary rows given a summaries list. + Imports summary rows given a summaries iterable. """ stats = dict() stat = gen_stat(stats) diff --git a/backend/review/import_utils/parse_sql.py b/backend/review/import_utils/parse_sql.py index c790d74d8..af9371e20 100644 --- a/backend/review/import_utils/parse_sql.py +++ b/backend/review/import_utils/parse_sql.py @@ -5,6 +5,8 @@ from lark import Lark, Transformer from tqdm import tqdm +from courses.util import semester_suffix_map_inv, translate_semester_inv + """ This file contains functionality for parsing the SQL dump files that we get from ISC. @@ -72,7 +74,14 @@ def row(self, items): to generate a row in a quasi-JSON format. Who needs MongoDB? """ _, col_names, values = items - return dict(zip(col_names, values)) + row_dict = dict(zip(col_names, values)) + + # Convert new OpenData API semesters to internal semesters + if "TERM" in row_dict: + stripped_term = row_dict["TERM"].strip() + if stripped_term[-2:] in semester_suffix_map_inv: + row_dict["TERM"] = translate_semester_inv(stripped_term) + return row_dict def TOKEN(self, items): """ diff --git a/backend/review/management/commands/iscimport.py b/backend/review/management/commands/iscimport.py index f39440808..f81f88b2f 100644 --- a/backend/review/management/commands/iscimport.py +++ b/backend/review/management/commands/iscimport.py @@ -27,6 +27,16 @@ ISC_DESC_TABLE = "TEST_PCR_COURSE_DESC_V" +def assert_semesters_not_current(semesters): + current_semester = get_current_semester() + for semester in semesters: + if semester == current_semester: + raise ValueError( + f"You cannot import reviews for the current semester ({current_semester}). " + f"Did you forget to update the SEMESTER option in the Django admin console?" + ) + + class Command(BaseCommand): help = """ Import course review data from the zip of mysqldump files that we get from ISC every semester. @@ -150,7 +160,7 @@ def handle(self, *args, **kwargs): import_all = kwargs["import_all"] s3_bucket = kwargs["s3_bucket"] is_zip_file = kwargs["zip"] or s3_bucket is not None - summary_file = kwargs["summary_file"] + summary_file = kwargs["summary_file"] # either summary table or summary hist table import_details = kwargs["import_details"] import_descriptions = kwargs["import_descriptions"] show_progress_bar = kwargs["show_progress_bar"] @@ -164,13 +174,7 @@ def handle(self, *args, **kwargs): "Must define semester with (-s) or explicitly import all semesters with (-a)." ) if semesters is not None: - current_semester = get_current_semester() - for semester in semesters: - if semester == current_semester: - raise ValueError( - f"You cannot import reviews for the current semester ({current_semester}). " - f"Did you forget to update the SEMESTER option in the Django admin console?" - ) + assert_semesters_not_current(semesters) if s3_bucket is not None: fp = "/tmp/pcrdump.zip" @@ -185,63 +189,66 @@ def handle(self, *args, **kwargs): "modified if the whole script succeeds." ) - with transaction.atomic(): # Only commit changes if the whole script succeeds - # TODO: When we import details and crosslistings, get their data here too. - tables_to_get = [summary_file] - idx = 1 - detail_idx = -1 - if import_details: - tables_to_get.append(ISC_RATING_TABLE) - detail_idx = idx - idx += 1 - - description_idx = -1 - if import_descriptions: - tables_to_get.append(ISC_DESC_TABLE) - description_idx = idx - idx += 1 - - files = self.get_files(src, is_zip_file, tables_to_get) - - summary_fo = files[0] - print("Loading summary file...") - summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False) + tables_to_get = [summary_file] + idx = 1 + detail_idx = -1 + if import_details: + tables_to_get.append(ISC_RATING_TABLE) + detail_idx = idx + idx += 1 + + description_idx = -1 + if import_descriptions: + tables_to_get.append(ISC_DESC_TABLE) + description_idx = idx + idx += 1 + + files = self.get_files(src, is_zip_file, tables_to_get) + summary_fo = files[0] + + print("Loading summary file...") + summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False) + gc.collect() + print("SQL parsed and loaded!") + if not import_all: + full_len = len(summary_rows) + summary_rows = [r for r in summary_rows if r["TERM"] in semesters] gc.collect() - print("SQL parsed and loaded!") - - if not import_all: - full_len = len(summary_rows) - summary_rows = [r for r in summary_rows if r["TERM"] in semesters] - gc.collect() - filtered_len = len(summary_rows) - print(f"Filtered {full_len} rows down to {filtered_len} rows.") - - semesters = sorted(list({r["TERM"] for r in summary_rows})) - gc.collect() - to_delete = Review.objects.filter(section__course__semester__in=semesters) - delete_count = to_delete.count() - - if delete_count > 0: - if not force: - prompt = input( - f"This import will overwrite {delete_count} rows that have already been" - + "imported. Continue? (y/N) " + filtered_len = len(summary_rows) + print(f"Filtered {full_len} rows down to {filtered_len} rows.") + semesters = sorted(list({r["TERM"] for r in summary_rows})) + gc.collect() + + for semester in semesters: + print(f"Loading {semester}...") + with transaction.atomic(): # Commit changes if all imports for the semester succeed + to_delete = Review.objects.filter(section__course__semester=semester) + delete_count = to_delete.count() + if delete_count > 0: + if not force: + prompt = input( + f"This import will overwrite {delete_count} rows that have already been" + + "imported. Continue? (y/N) " + ) + if prompt.strip().upper() != "Y": + print("Aborting...") + return 0 + + print( + f"Deleting {delete_count} existing reviews for {semester} " + "from the database..." ) - if prompt.strip().upper() != "Y": - print("Aborting...") - return 0 + to_delete.delete() - print( - f"Deleting {delete_count} existing reviews for semesters from the database..." + print(f"Importing reviews for semester {semester}") + stats = import_summary_rows( + (r for r in summary_rows if r["TERM"] == semester), show_progress_bar ) - to_delete.delete() - - print(f"Importing reviews for semester(s) {', '.join(semesters)}") - stats = import_summary_rows(summary_rows, show_progress_bar) - print(stats) + print(stats) - gc.collect() + gc.collect() + with transaction.atomic(): if import_details: print("Loading details file...") stats = import_ratings_rows( @@ -260,16 +267,16 @@ def handle(self, *args, **kwargs): ) print(stats) - self.close_files(files) - # invalidate cached views - print("Invalidating cache...") - del_count = clear_cache() - print(f"{del_count if del_count >=0 else 'all'} cache entries removed.") + self.close_files(files) + # invalidate cached views + print("Invalidating cache...") + del_count = clear_cache() + print(f"{del_count if del_count >=0 else 'all'} cache entries removed.") - gc.collect() + gc.collect() - print("Recomputing Section.has_reviews...") - recompute_has_reviews() + print("Recomputing Section.has_reviews...") + recompute_has_reviews() print("Done.") return 0 diff --git a/backend/tests/review/test_import.py b/backend/tests/review/test_import.py index 2db8310a4..6918cc8bb 100644 --- a/backend/tests/review/test_import.py +++ b/backend/tests/review/test_import.py @@ -116,6 +116,15 @@ def test_parse_descriptions(self): self.assertIsInstance(parse, dict) self.assertDictEqual(expected, parse) + def test_semester_transformation(self): + query = """ + INSERT into PCRDEV.TEST_PCR_COURSE_DESC_V + (TERM) Values ('202230'); + """ + parse = parse_row(query) + expected = {"TERM": "2022C"} + self.assertDictEqual(parse, expected) + class ReviewImportTestCase(TestCase): def setUp(self):