Skip to content

Commit

Permalink
Merge pull request #515 from pennlabs/aagam/update-iscimport
Browse files Browse the repository at this point in the history
Update iscimport script to handle banner semesters (e.g., '202310')
  • Loading branch information
AaDalal authored Oct 26, 2023
2 parents 37f1f42 + 19caff4 commit f22d2e9
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 70 deletions.
11 changes: 11 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]

[dev-packages]

[requires]
python_version = "3.10"
4 changes: 2 additions & 2 deletions backend/review/import_utils/import_to_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ def stat(key, amt=1):
return stat


def import_summary_rows(summaries, show_progress_bar=True):
def import_summary_rows(summaries: iter, show_progress_bar=True):
"""
Imports summary rows given a summaries list.
Imports summary rows given a summaries iterable.
"""
stats = dict()
stat = gen_stat(stats)
Expand Down
11 changes: 10 additions & 1 deletion backend/review/import_utils/parse_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from lark import Lark, Transformer
from tqdm import tqdm

from courses.util import semester_suffix_map_inv, translate_semester_inv


"""
This file contains functionality for parsing the SQL dump files that we get from ISC.
Expand Down Expand Up @@ -72,7 +74,14 @@ def row(self, items):
to generate a row in a quasi-JSON format. Who needs MongoDB?
"""
_, col_names, values = items
return dict(zip(col_names, values))
row_dict = dict(zip(col_names, values))

# Convert new OpenData API semesters to internal semesters
if "TERM" in row_dict:
stripped_term = row_dict["TERM"].strip()
if stripped_term[-2:] in semester_suffix_map_inv:
row_dict["TERM"] = translate_semester_inv(stripped_term)
return row_dict

def TOKEN(self, items):
"""
Expand Down
141 changes: 74 additions & 67 deletions backend/review/management/commands/iscimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@
ISC_DESC_TABLE = "TEST_PCR_COURSE_DESC_V"


def assert_semesters_not_current(semesters):
current_semester = get_current_semester()
for semester in semesters:
if semester == current_semester:
raise ValueError(
f"You cannot import reviews for the current semester ({current_semester}). "
f"Did you forget to update the SEMESTER option in the Django admin console?"
)


class Command(BaseCommand):
help = """
Import course review data from the zip of mysqldump files that we get from ISC every semester.
Expand Down Expand Up @@ -150,7 +160,7 @@ def handle(self, *args, **kwargs):
import_all = kwargs["import_all"]
s3_bucket = kwargs["s3_bucket"]
is_zip_file = kwargs["zip"] or s3_bucket is not None
summary_file = kwargs["summary_file"]
summary_file = kwargs["summary_file"] # either summary table or summary hist table
import_details = kwargs["import_details"]
import_descriptions = kwargs["import_descriptions"]
show_progress_bar = kwargs["show_progress_bar"]
Expand All @@ -164,13 +174,7 @@ def handle(self, *args, **kwargs):
"Must define semester with (-s) or explicitly import all semesters with (-a)."
)
if semesters is not None:
current_semester = get_current_semester()
for semester in semesters:
if semester == current_semester:
raise ValueError(
f"You cannot import reviews for the current semester ({current_semester}). "
f"Did you forget to update the SEMESTER option in the Django admin console?"
)
assert_semesters_not_current(semesters)

if s3_bucket is not None:
fp = "/tmp/pcrdump.zip"
Expand All @@ -185,63 +189,66 @@ def handle(self, *args, **kwargs):
"modified if the whole script succeeds."
)

with transaction.atomic(): # Only commit changes if the whole script succeeds
# TODO: When we import details and crosslistings, get their data here too.
tables_to_get = [summary_file]
idx = 1
detail_idx = -1
if import_details:
tables_to_get.append(ISC_RATING_TABLE)
detail_idx = idx
idx += 1

description_idx = -1
if import_descriptions:
tables_to_get.append(ISC_DESC_TABLE)
description_idx = idx
idx += 1

files = self.get_files(src, is_zip_file, tables_to_get)

summary_fo = files[0]
print("Loading summary file...")
summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False)
tables_to_get = [summary_file]
idx = 1
detail_idx = -1
if import_details:
tables_to_get.append(ISC_RATING_TABLE)
detail_idx = idx
idx += 1

description_idx = -1
if import_descriptions:
tables_to_get.append(ISC_DESC_TABLE)
description_idx = idx
idx += 1

files = self.get_files(src, is_zip_file, tables_to_get)
summary_fo = files[0]

print("Loading summary file...")
summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False)
gc.collect()
print("SQL parsed and loaded!")
if not import_all:
full_len = len(summary_rows)
summary_rows = [r for r in summary_rows if r["TERM"] in semesters]
gc.collect()
print("SQL parsed and loaded!")

if not import_all:
full_len = len(summary_rows)
summary_rows = [r for r in summary_rows if r["TERM"] in semesters]
gc.collect()
filtered_len = len(summary_rows)
print(f"Filtered {full_len} rows down to {filtered_len} rows.")

semesters = sorted(list({r["TERM"] for r in summary_rows}))
gc.collect()
to_delete = Review.objects.filter(section__course__semester__in=semesters)
delete_count = to_delete.count()

if delete_count > 0:
if not force:
prompt = input(
f"This import will overwrite {delete_count} rows that have already been"
+ "imported. Continue? (y/N) "
filtered_len = len(summary_rows)
print(f"Filtered {full_len} rows down to {filtered_len} rows.")
semesters = sorted(list({r["TERM"] for r in summary_rows}))
gc.collect()

for semester in semesters:
print(f"Loading {semester}...")
with transaction.atomic(): # Commit changes if all imports for the semester succeed
to_delete = Review.objects.filter(section__course__semester=semester)
delete_count = to_delete.count()
if delete_count > 0:
if not force:
prompt = input(
f"This import will overwrite {delete_count} rows that have already been"
+ "imported. Continue? (y/N) "
)
if prompt.strip().upper() != "Y":
print("Aborting...")
return 0

print(
f"Deleting {delete_count} existing reviews for {semester} "
"from the database..."
)
if prompt.strip().upper() != "Y":
print("Aborting...")
return 0
to_delete.delete()

print(
f"Deleting {delete_count} existing reviews for semesters from the database..."
print(f"Importing reviews for semester {semester}")
stats = import_summary_rows(
(r for r in summary_rows if r["TERM"] == semester), show_progress_bar
)
to_delete.delete()

print(f"Importing reviews for semester(s) {', '.join(semesters)}")
stats = import_summary_rows(summary_rows, show_progress_bar)
print(stats)
print(stats)

gc.collect()
gc.collect()

with transaction.atomic():
if import_details:
print("Loading details file...")
stats = import_ratings_rows(
Expand All @@ -260,16 +267,16 @@ def handle(self, *args, **kwargs):
)
print(stats)

self.close_files(files)
# invalidate cached views
print("Invalidating cache...")
del_count = clear_cache()
print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")
self.close_files(files)
# invalidate cached views
print("Invalidating cache...")
del_count = clear_cache()
print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")

gc.collect()
gc.collect()

print("Recomputing Section.has_reviews...")
recompute_has_reviews()
print("Recomputing Section.has_reviews...")
recompute_has_reviews()

print("Done.")
return 0
9 changes: 9 additions & 0 deletions backend/tests/review/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ def test_parse_descriptions(self):
self.assertIsInstance(parse, dict)
self.assertDictEqual(expected, parse)

def test_semester_transformation(self):
query = """
INSERT into PCRDEV.TEST_PCR_COURSE_DESC_V
(TERM) Values ('202230');
"""
parse = parse_row(query)
expected = {"TERM": "2022C"}
self.assertDictEqual(parse, expected)


class ReviewImportTestCase(TestCase):
def setUp(self):
Expand Down

0 comments on commit f22d2e9

Please sign in to comment.