From 92fb4841a2dad7adb3125b99834888fc15df38f8 Mon Sep 17 00:00:00 2001 From: Beda Kosata Date: Thu, 7 Sep 2023 14:03:09 +0200 Subject: [PATCH] postpone timezone regex evaluation until first use - shaves off time from package import --- dateparser/timezone_parser.py | 42 +++++++++++++++++++++++++++++------ dateparser/utils/__init__.py | 6 ++--- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/dateparser/timezone_parser.py b/dateparser/timezone_parser.py index c7a284ecb..20193b3ea 100644 --- a/dateparser/timezone_parser.py +++ b/dateparser/timezone_parser.py @@ -32,8 +32,8 @@ def __getinitargs__(self): def pop_tz_offset_from_string(date_string, as_offset=True): - if _search_regex_ignorecase.search(date_string): - for name, info in _tz_offsets: + if TzRegexCache.search_regex_ignorecase().search(date_string): + for name, info in TzRegexCache.tz_offsets(): timezone_re = info["regex"] timezone_match = timezone_re.search(date_string) if timezone_match: @@ -47,7 +47,7 @@ def pop_tz_offset_from_string(date_string, as_offset=True): def word_is_tz(word): - return bool(_search_regex.match(word)) + return bool(TzRegexCache.search_regex().match(word)) def convert_to_local_tz(datetime_obj, datetime_tz_offset): @@ -85,8 +85,36 @@ def get_local_tz_offset(): return offset -_search_regex_parts = [] -_tz_offsets = list(build_tz_offsets(_search_regex_parts)) -_search_regex = re.compile("|".join(_search_regex_parts)) -_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE) +class TzRegexCache: + _ready = False + _search_regex_parts = [] + _tz_offsets = [] + _search_regex = None + _search_regex_ignorecase = None + + @classmethod + def prepare(cls): + if not cls._ready: + cls._search_regex_parts = [] + cls._tz_offsets = list(build_tz_offsets(cls._search_regex_parts)) + cls._search_regex = re.compile("|".join(cls._search_regex_parts)) + cls._search_regex_ignorecase = re.compile("|".join(cls._search_regex_parts), re.IGNORECASE) + cls._ready = True + + @classmethod + def tz_offsets(cls): + cls.prepare() + return cls._tz_offsets + + @classmethod + def search_regex(cls): + cls.prepare() + return cls._search_regex + + @classmethod + def search_regex_ignorecase(cls): + cls.prepare() + return cls._search_regex_ignorecase + + local_tz_offset = get_local_tz_offset() diff --git a/dateparser/utils/__init__.py b/dateparser/utils/__init__.py index 023c5fbb3..5c3679fe0 100644 --- a/dateparser/utils/__init__.py +++ b/dateparser/utils/__init__.py @@ -9,7 +9,7 @@ from pytz import UTC, UnknownTimeZoneError, timezone from tzlocal import get_localzone -from dateparser.timezone_parser import StaticTzInfo, _tz_offsets +from dateparser.timezone_parser import StaticTzInfo, TzRegexCache def strip_braces(date_string): @@ -73,7 +73,7 @@ def get_timezone_from_tz_string(tz_string): try: return timezone(tz_string) except UnknownTimeZoneError as e: - for name, info in _tz_offsets: + for name, info in TzRegexCache.tz_offsets(): if info["regex"].search(" %s" % tz_string): return StaticTzInfo(name, info["offset"]) else: @@ -104,7 +104,7 @@ def apply_tzdatabase_timezone(date_time, pytz_string): def apply_dateparser_timezone(utc_datetime, offset_or_timezone_abb): - for name, info in _tz_offsets: + for name, info in TzRegexCache.tz_offsets(): if info["regex"].search(" %s" % offset_or_timezone_abb): tz = StaticTzInfo(name, info["offset"]) return utc_datetime.astimezone(tz)