From 58e2adc4a1543d8018a3ba57b19ef7dd818600f4 Mon Sep 17 00:00:00 2001 From: AaDalal <57609353+AaDalal@users.noreply.github.com> Date: Fri, 29 Sep 2023 09:21:08 -0400 Subject: [PATCH] make iscimport work semester by semester --- backend/review/import_utils/import_to_db.py | 4 +- .../review/management/commands/iscimport.py | 137 +++++++++--------- 2 files changed, 72 insertions(+), 69 deletions(-) diff --git a/backend/review/import_utils/import_to_db.py b/backend/review/import_utils/import_to_db.py index 5079af58c..36d5db3fe 100644 --- a/backend/review/import_utils/import_to_db.py +++ b/backend/review/import_utils/import_to_db.py @@ -270,9 +270,9 @@ def stat(key, amt=1): return stat -def import_summary_rows(summaries, show_progress_bar=True): +def import_summary_rows(summaries: iter, show_progress_bar=True): """ - Imports summary rows given a summaries list. + Imports summary rows given a summaries iterable. """ stats = dict() stat = gen_stat(stats) diff --git a/backend/review/management/commands/iscimport.py b/backend/review/management/commands/iscimport.py index f219f73dc..7086ac289 100644 --- a/backend/review/management/commands/iscimport.py +++ b/backend/review/management/commands/iscimport.py @@ -27,6 +27,15 @@ ISC_DESC_TABLE = "TEST_PCR_COURSE_DESC_V" +def assert_semesters_not_current(semesters): + current_semester = get_current_semester() + for semester in semesters: + if semester == current_semester: + raise ValueError( + f"You cannot import reviews for the current semester ({current_semester}). " + f"Did you forget to update the SEMESTER option in the Django admin console?" + ) + class Command(BaseCommand): help = """ Import course review data from the zip of mysqldump files that we get from ISC every semester. @@ -164,13 +173,7 @@ def handle(self, *args, **kwargs): "Must define semester with (-s) or explicitly import all semesters with (-a)." ) if semesters is not None: - current_semester = get_current_semester() - for semester in semesters: - if semester == current_semester: - raise ValueError( - f"You cannot import reviews for the current semester ({current_semester}). " - f"Did you forget to update the SEMESTER option in the Django admin console?" - ) + assert_semesters_not_current(semesters) if s3_bucket is not None: fp = "/tmp/pcrdump.zip" @@ -185,63 +188,63 @@ def handle(self, *args, **kwargs): "modified if the whole script succeeds." ) - with transaction.atomic(): # Only commit changes if the whole script succeeds - # TODO: When we import details and crosslistings, get their data here too. - tables_to_get = [summary_file] - idx = 1 - detail_idx = -1 - if import_details: - tables_to_get.append(ISC_RATING_TABLE) - detail_idx = idx - idx += 1 - - description_idx = -1 - if import_descriptions: - tables_to_get.append(ISC_DESC_TABLE) - description_idx = idx - idx += 1 - - files = self.get_files(src, is_zip_file, tables_to_get) - - summary_fo = files[0] - print("Loading summary file...") - summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False) - gc.collect() - print("SQL parsed and loaded!") - - if not import_all: - full_len = len(summary_rows) - summary_rows = [r for r in summary_rows if r["TERM"] in semesters] - gc.collect() - filtered_len = len(summary_rows) - print(f"Filtered {full_len} rows down to {filtered_len} rows.") - - semesters = sorted(list({r["TERM"] for r in summary_rows})) + tables_to_get = [summary_file] + idx = 1 + detail_idx = -1 + if import_details: + tables_to_get.append(ISC_RATING_TABLE) + detail_idx = idx + idx += 1 + + description_idx = -1 + if import_descriptions: + tables_to_get.append(ISC_DESC_TABLE) + description_idx = idx + idx += 1 + + files = self.get_files(src, is_zip_file, tables_to_get) + summary_fo = files[0] + + print("Loading summary file...") + summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False) + gc.collect() + print("SQL parsed and loaded!") + if not import_all: + full_len = len(summary_rows) + summary_rows = [r for r in summary_rows if r["TERM"] in semesters] gc.collect() - to_delete = Review.objects.filter(section__course__semester__in=semesters) - delete_count = to_delete.count() - - if delete_count > 0: - if not force: - prompt = input( - f"This import will overwrite {delete_count} rows that have already been" - + "imported. Continue? (y/N) " + filtered_len = len(summary_rows) + print(f"Filtered {full_len} rows down to {filtered_len} rows.") + semesters = sorted(list({r["TERM"] for r in summary_rows})) + gc.collect() + + for semester in semesters: + print(f"Loading {semester}...") + with transaction.atomic(): # Only commit changes if the whole script succeeds + to_delete = Review.objects.filter(section__course__semester=semester) + delete_count = to_delete.count() + if delete_count > 0: + if not force: + prompt = input( + f"This import will overwrite {delete_count} rows that have already been" + + "imported. Continue? (y/N) " + ) + if prompt.strip().upper() != "Y": + print("Aborting...") + return 0 + + print( + f"Deleting {delete_count} existing reviews for {semester} from the database..." ) - if prompt.strip().upper() != "Y": - print("Aborting...") - return 0 - - print( - f"Deleting {delete_count} existing reviews for semesters from the database..." - ) - to_delete.delete() + to_delete.delete() - print(f"Importing reviews for semester(s) {', '.join(semesters)}") - stats = import_summary_rows(summary_rows, show_progress_bar) - print(stats) + print(f"Importing reviews for semester {semester}") + stats = import_summary_rows((r for r in summary_rows if r["TERM"] == semester), show_progress_bar) + print(stats) - gc.collect() + gc.collect() + with transaction.atomic(): if import_details: print("Loading details file...") stats = import_ratings_rows( @@ -260,16 +263,16 @@ def handle(self, *args, **kwargs): ) print(stats) - self.close_files(files) - # invalidate cached views - print("Invalidating cache...") - del_count = clear_cache() - print(f"{del_count if del_count >=0 else 'all'} cache entries removed.") + self.close_files(files) + # invalidate cached views + print("Invalidating cache...") + del_count = clear_cache() + print(f"{del_count if del_count >=0 else 'all'} cache entries removed.") - gc.collect() + gc.collect() - print("Recomputing Section.has_reviews...") - recompute_has_reviews() + print("Recomputing Section.has_reviews...") + recompute_has_reviews() print("Done.") return 0