-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handle None
values in vrpt data
#83
base: master
Are you sure you want to change the base?
Changes from all commits
8974733
7cb09a8
4d802cd
275232c
d0a23a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ | |
__author__ = "John Westbrook" | ||
__email__ = "[email protected]" | ||
__license__ = "Apache 2.0" | ||
__version__ = "1.725" | ||
__version__ = "1.727" |
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -13,6 +13,8 @@ | |||||||||||||||||
# to minimize costly functon calls for simple casts. | ||||||||||||||||||
# 24-Mar-2019 jdw adjust null value filtering | ||||||||||||||||||
# 4-Apr-2022 bv handle embedded iterable float values in 'castIterableFloat' method | ||||||||||||||||||
# 21-Dec-2024 bv Skip integers that exceed max int32 (2147483647) | ||||||||||||||||||
# 23-Dec-2-24 bv Handle "None" values in vrpt data | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix/update the date to 2025 |
||||||||||||||||||
## | ||||||||||||||||||
""" | ||||||||||||||||||
Factory for functional elements of the transformations between input data and | ||||||||||||||||||
|
@@ -89,6 +91,8 @@ def __init__(self, schemaDefAccessObj, filterType): | |||||||||||||||||
self.__transFlags["normalizeEnums"] = "normalize-enums" in filterType | ||||||||||||||||||
self.__transFlags["translateXMLCharRefs"] = "translateXMLCharRefs" in filterType | ||||||||||||||||||
self.__transFlags["normalizeDates"] = True | ||||||||||||||||||
# Can be added to filterType later if needed | ||||||||||||||||||
self.__transFlags["dropLargeIntegers"] = True | ||||||||||||||||||
logger.debug("FLAGS settings are %r", self.__transFlags) | ||||||||||||||||||
# | ||||||||||||||||||
self.__wsPattern = re.compile(r"\s+", flags=re.UNICODE | re.MULTILINE) | ||||||||||||||||||
|
@@ -217,7 +221,7 @@ def processRecord(self, tableId, row, attributeNameList, containerName=None): | |||||||||||||||||
if atName in dT["pureCast"]: | ||||||||||||||||||
if nullFlag and self.__transFlags["dropEmpty"]: | ||||||||||||||||||
continue | ||||||||||||||||||
if (row[ii] == "?") or (row[ii] == ".") or (row[ii]) == "": | ||||||||||||||||||
if (row[ii] == "?") or (row[ii] == ".") or (row[ii]) == "" or (row[ii]) == "None": | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about simply:
Suggested change
Also, do you want to check for |
||||||||||||||||||
if self.__transFlags["dropEmpty"]: | ||||||||||||||||||
continue | ||||||||||||||||||
else: | ||||||||||||||||||
|
@@ -227,7 +231,14 @@ def processRecord(self, tableId, row, attributeNameList, containerName=None): | |||||||||||||||||
if dT["pureCast"][atName] == "string": | ||||||||||||||||||
dD[dT["atNameD"][atName]] = row[ii] | ||||||||||||||||||
elif dT["pureCast"][atName] == "integer": | ||||||||||||||||||
dD[dT["atNameD"][atName]] = int(row[ii]) | ||||||||||||||||||
if abs(int(row[ii])) > 2147483647 and self.__transFlags["dropLargeIntegers"]: | ||||||||||||||||||
# Skip large integers | ||||||||||||||||||
logger.warning("Skipping large integer in entry %s table %s attribute %s", containerName, tableId, atName) | ||||||||||||||||||
continue | ||||||||||||||||||
Comment on lines
+234
to
+237
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a minor adjustment (swap the order of conditions to save time if the flag is
Suggested change
|
||||||||||||||||||
# Or set large integers to maxInt32 | ||||||||||||||||||
# dD[dT["atNameD"][atName]] = 2147483647 | ||||||||||||||||||
Comment on lines
+238
to
+239
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. delete
Suggested change
|
||||||||||||||||||
else: | ||||||||||||||||||
dD[dT["atNameD"][atName]] = int(row[ii]) | ||||||||||||||||||
elif dT["pureCast"][atName] == "float": | ||||||||||||||||||
dD[dT["atNameD"][atName]] = float(row[ii]) | ||||||||||||||||||
continue | ||||||||||||||||||
|
@@ -321,7 +332,7 @@ def castInteger(self, trfTup): | |||||||||||||||||
""" | ||||||||||||||||||
if trfTup.isNull: | ||||||||||||||||||
return trfTup | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == ""): | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == "") or (trfTup.value == "None"): | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again here (as well as in the other identical lines further below (i.e., line 345, 357, 368)), how about:
Suggested change
|
||||||||||||||||||
return TrfValue(self.__nullValueOther, trfTup.atId, trfTup.origLength, True) | ||||||||||||||||||
return TrfValue(int(trfTup.value), trfTup.atId, trfTup.origLength, False) | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -331,7 +342,7 @@ def castIterableInteger(self, trfTup): | |||||||||||||||||
""" | ||||||||||||||||||
if trfTup.isNull: | ||||||||||||||||||
return trfTup | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == ""): | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == "") or (trfTup.value == "None"): | ||||||||||||||||||
return TrfValue(self.__nullValueOther, trfTup.atId, trfTup.origLength, True) | ||||||||||||||||||
# vL = [int(v.strip()) for v in str(trfTup.value).split(self.__tObj.getIterableSeparator(trfTup.atId))] | ||||||||||||||||||
vL = [int(v.strip()) if v.strip() not in [".", "?"] else None for v in str(trfTup.value).split(self.__tObj.getIterableSeparator(trfTup.atId))] | ||||||||||||||||||
|
@@ -343,7 +354,7 @@ def castFloat(self, trfTup): | |||||||||||||||||
""" | ||||||||||||||||||
if trfTup.isNull: | ||||||||||||||||||
return trfTup | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == ""): | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == "") or (trfTup.value == "None"): | ||||||||||||||||||
return TrfValue(self.__nullValueOther, trfTup.atId, trfTup.origLength, True) | ||||||||||||||||||
return TrfValue(float(trfTup.value), trfTup.atId, trfTup.origLength, False) | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -354,7 +365,7 @@ def castIterableFloat(self, trfTup): | |||||||||||||||||
# logger.info(">> atId %r value %r delimiter %r", trfTup.atId, trfTup.value, self.__tObj.getIterableSeparator(trfTup.atId)) | ||||||||||||||||||
if trfTup.isNull: | ||||||||||||||||||
return trfTup | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == ""): | ||||||||||||||||||
if (trfTup.value == "?") or (trfTup.value == ".") or (trfTup.value is None) or (trfTup.value == "") or (trfTup.value == "None"): | ||||||||||||||||||
return TrfValue(self.__nullValueOther, trfTup.atId, trfTup.origLength, True) | ||||||||||||||||||
# vL = [float(v.strip()) for v in str(trfTup.value).split(self.__tObj.getIterableSeparator(trfTup.atId))] | ||||||||||||||||||
if not self.__tObj.isEmbeddedIterable(trfTup.atId): | ||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update the date to 2025