Skip to content

Commit

Permalink
fix(glassdoor): retry
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Jan 3, 2024
1 parent 33d442b commit 3c19942
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 8 deletions.
8 changes: 4 additions & 4 deletions src/jobspy/scrapers/glassdoor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, proxy: Optional[str] = None):
"""
Initializes GlassdoorScraper with the Glassdoor job search url
"""
site = Site(Site.ZIP_RECRUITER)
site = Site(Site.GLASSDOOR)
super().__init__(site, proxy=proxy)

self.url = None
Expand All @@ -49,7 +49,7 @@ def fetch_jobs_page(
payload = self.add_payload(
scraper_input, location_id, location_type, page_num, cursor
)
session = create_session(self.proxy, is_tls=False)
session = create_session(self.proxy, is_tls=False, has_retry=True)
response = session.post(
f"{self.url}/graph", headers=self.headers(), timeout=10, data=payload
)
Expand Down Expand Up @@ -171,7 +171,7 @@ def get_location(self, location: str, is_remote: bool) -> (int, str):
if not location or is_remote:
return "11047", "STATE" # remote options
url = f"{self.url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
session = create_session(self.proxy)
session = create_session(self.proxy, has_retry=True)
response = session.get(url)
if response.status_code != 200:
raise GlassdoorException(
Expand All @@ -194,7 +194,7 @@ def add_payload(
location_type: str,
page_num: int,
cursor: str | None = None,
) -> dict[str, str | Any]:
) -> str:
payload = {
"operationName": "JobSearchResultsQuery",
"variables": {
Expand Down
20 changes: 16 additions & 4 deletions src/jobspy/scrapers/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import re
import numpy as np

import requests
import tls_client
import requests
from requests.adapters import HTTPAdapter, Retry

from ..jobs import JobType


Expand All @@ -27,11 +29,11 @@ def extract_emails_from_text(text: str) -> list[str] | None:
return email_regex.findall(text)


def create_session(proxy: dict | None = None, is_tls: bool = True):
def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bool = False):
"""
Creates a tls client session
Creates a requests session with optional tls, proxy, and retry settings.
:return: A session object with or without proxies.
:return: A session object
"""
if is_tls:
session = tls_client.Session(
Expand All @@ -44,6 +46,16 @@ def create_session(proxy: dict | None = None, is_tls: bool = True):
session.allow_redirects = True
if proxy:
session.proxies.update(proxy)
if has_retry:
retries = Retry(total=3,
connect=3,
status=3,
status_forcelist=[500, 502, 503, 504, 429],
backoff_factor=1)
adapter = HTTPAdapter(max_retries=retries)

session.mount('http://', adapter)
session.mount('https://', adapter)

return session

Expand Down

0 comments on commit 3c19942

Please sign in to comment.