From 173bc3b2102b7ff21dde0a0c625f563f1a1202fa Mon Sep 17 00:00:00 2001 From: ddio Date: Tue, 29 Oct 2024 15:12:49 +0800 Subject: [PATCH] bug: remove comman inside string in NUXT hydration --- scrapy-tw-rental-house/scrapy_twrh/spiders/rental591/util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scrapy-tw-rental-house/scrapy_twrh/spiders/rental591/util.py b/scrapy-tw-rental-house/scrapy_twrh/spiders/rental591/util.py index 3128f20..1fc6750 100644 --- a/scrapy-tw-rental-house/scrapy_twrh/spiders/rental591/util.py +++ b/scrapy-tw-rental-house/scrapy_twrh/spiders/rental591/util.py @@ -148,6 +148,9 @@ def list_values(self): # dirty hack 1, remove comma from "12,345" XD value_str = re.sub(r'"(\d+),(\d+)"', r'\1\2', value_str) + # dirty hack 2, we won't need "市中心,拎包入住,含車位" for now. + # we have to remove the comma in the string + value_str = re.sub(r'"(([^\u0000-\u007F]|\\)[^",]*),[^"]+"', r'\1', value_str) ret = [] for raw_value in value_str.split(','): # remove leading and trailing double quotes