From 8a0c2000469b179f1928951f80176fb7c2b494e5 Mon Sep 17 00:00:00 2001 From: Cullen Watson Date: Sat, 3 Feb 2024 07:20:19 -0600 Subject: [PATCH] fix(zr): date posted --- pyproject.toml | 2 +- src/jobspy/scrapers/glassdoor/__init__.py | 2 ++ src/jobspy/scrapers/ziprecruiter/__init__.py | 15 ++------------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8680452..c816a16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.1.40" +version = "1.1.41" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/JobSpy" diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/jobspy/scrapers/glassdoor/__init__.py index 0852c2e..893357c 100644 --- a/src/jobspy/scrapers/glassdoor/__init__.py +++ b/src/jobspy/scrapers/glassdoor/__init__.py @@ -246,6 +246,8 @@ def get_location(self, location: str, is_remote: bool) -> (int, str): location_type = "CITY" elif location_type == "S": location_type = "STATE" + elif location_type == 'N': + location_type = "COUNTRY" return int(items[0]["locationId"]), location_type @staticmethod diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py index d0582cf..2b07f33 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/jobspy/scrapers/ziprecruiter/__init__.py @@ -6,8 +6,7 @@ """ import math import time -import re -from datetime import datetime, date +from datetime import datetime, timezone from typing import Optional, Tuple, Any from bs4 import BeautifulSoup @@ -119,17 +118,7 @@ def process_job(self, job: dict) -> JobPost | None: job_type = ZipRecruiterScraper.get_job_type_enum( job.get("employment_type", "").replace("_", "").lower() ) - - save_job_url = job.get("SaveJobURL", "") - posted_time_match = re.search( - r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url - ) - if posted_time_match: - date_time_str = posted_time_match.group(1) - date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ") - date_posted = date_posted_obj.date() - else: - date_posted = date.today() + date_posted = datetime.fromisoformat(job['posted_time'].rstrip("Z")).date() return JobPost( title=title,