Skip to content

Commit

Permalink
Merge pull request #183 from g0v/more-str-obfuscation
Browse files Browse the repository at this point in the history
More str obfuscation
  • Loading branch information
ddio authored Sep 26, 2024
2 parents 79b2d25 + 068c4b7 commit 9f3901e
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 9 deletions.
2 changes: 1 addition & 1 deletion scrapy-tw-rental-house/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "scrapy-tw-rental-house"
version = "1.4.0"
version = "1.4.1"
description = "Scrapy spider for TW Rental House"
readme = "README.md"
authors = ["ddio <[email protected]>"]
Expand Down
16 changes: 14 additions & 2 deletions scrapy-tw-rental-house/scrapy_twrh/spiders/rental591/util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from collections import namedtuple
from scrapy.http import Response
from scrapy_twrh.spiders.util import clean_number
Expand Down Expand Up @@ -45,20 +46,31 @@ def reorder_inline_flex_dom(base: Response, selector):
items = base.css(selector)
ret = []
for item in items:
# child span may contain style="display:inline-flex;"
# child span may contain style="display:inline-flex;flex-direction:row-reverse;"
i_list = item.css('span[style*=display\\:inline-flex] > i')
plain_value = item.xpath('text()').get()
if plain_value is not None:
ret.append(plain_value)
elif i_list:
# check if it's reversed, find all values of flex-direction
container_style = item.css('span[style*=display\\:inline-flex]::attr(style)').get()

# we may have multiple flex-direction, get last one
flex_directions = re.findall(r'flex-direction: ?([\w-]+)', container_style)
order_base = 1
if flex_directions:
last_flex_direction = flex_directions[-1]
if last_flex_direction == 'row-reverse':
order_base = -1
# store i_list order (in style:order) and its ::text content)
shuffled_list = []
for i in i_list:
order = i.css('::attr(style)').re_first(r'order:(\d+)')
order = int(order) * order_base
text = i.css('::text').get()
shuffled_list.append((order, text))
# sort by order
shuffled_list.sort(key=lambda x: int(x[0]))
shuffled_list.sort(key=lambda x: x[0])
ret.append(''.join(map(lambda x: x[1], shuffled_list)))
return ret

Expand Down
2 changes: 1 addition & 1 deletion scrapy-twrh-example/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions twrh-dataset/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion twrh-dataset/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ readme = "README.md"
python = "^3.10"
# cffi = "==1.13.2"
django = "^5"
scrapy-tw-rental-house = "==1.4.0"
scrapy-tw-rental-house = "==1.4.1"
psycopg2-binary = "^2.9.9"
pylint-django = "^2.5.5"
sentry-sdk = "^1.39.1"
Expand Down

0 comments on commit 9f3901e

Please sign in to comment.