diff --git a/CHANGELOG b/CHANGELOG.md similarity index 98% rename from CHANGELOG rename to CHANGELOG.md index 5ee37bd0..66d8cbab 100644 --- a/CHANGELOG +++ b/CHANGELOG.md @@ -7,10 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Deprecated + +- `--type user` is now deprecated (will be removed in next major) + ### Fixed - Ignore empty playlists (#340) +### Changed + +- Merge behaviors of user/channel types and add support for `forHandle` (#339, fix for #338) + ## [3.1.0] - 2024-09-05 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 46e3beaf..5543d444 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -41,11 +41,11 @@ To add a new locale (`fr` in this example, use only ISO-639-1): ## releasing * Update your dependencies: `pip install -U setuptools wheel twine` -* Make sure CHANGELOG is up-to-date +* Make sure CHANGELOG.md is up-to-date * Bump version on `youtube2zim/VERSION` * Build packages `python ./setup.py sdist bdist_wheel` * Upload to PyPI `twine upload dist/youtube2zim-2.0.0*`. -* Commit your CHANGELOG + version bump changes +* Commit your CHANGELOG.md + version bump changes * Tag version on git `git tag -a v2.0.0` ## developing the ZIM UI in Vue.JS diff --git a/Dockerfile b/Dockerfile index e8429a67..29eee8df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,7 +35,7 @@ RUN pip install --no-cache-dir /src/scraper # Copy code + associated artifacts COPY scraper/src /src/scraper/src -COPY *.md LICENSE CHANGELOG /src/ +COPY *.md LICENSE CHANGELOG.md /src/ # Install + cleanup RUN pip install --no-cache-dir /src/scraper \ diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py index 45f7d986..843c48e7 100644 --- a/scraper/src/youtube2zim/scraper.py +++ b/scraper/src/youtube2zim/scraper.py @@ -123,6 +123,12 @@ def __init__( ): # data-retrieval info self.collection_type = collection_type + if self.collection_type == USER: + logger.warning( + "Collection type 'user' is deprecated. Please use 'channel' type," + " behaviors have been merged. 'user' type is going to be dropped in " + " next major release" + ) self.youtube_id = youtube_id self.api_key = api_key self.dateafter = dateafter diff --git a/scraper/src/youtube2zim/youtube.py b/scraper/src/youtube2zim/youtube.py index f5c64093..54a0f6ed 100644 --- a/scraper/src/youtube2zim/youtube.py +++ b/scraper/src/youtube2zim/youtube.py @@ -80,32 +80,32 @@ def credentials_ok(): return False -def get_channel_json(channel_id, *, for_username=False): +def get_channel_json(channel_id): """fetch or retieve-save and return the Youtube ChannelResult JSON""" fname = f"channel_{channel_id}" channel_json = load_json(YOUTUBE.cache_dir, fname) if channel_json is None: - logger.debug(f"query youtube-api for Channel #{channel_id}") - req = requests.get( - CHANNELS_API, - params={ - "forUsername" if for_username else "id": channel_id, - "part": "brandingSettings,snippet,contentDetails", - "key": YOUTUBE.api_key, - }, - timeout=REQUEST_TIMEOUT, - ) - if req.status_code >= HTTPStatus.BAD_REQUEST: - logger.error(f"HTTP {req.status_code} Error response: {req.text}") - req.raise_for_status() - try: - channel_json = req.json()["items"][0] - except (KeyError, IndexError): - if for_username: - logger.error(f"Invalid username `{channel_id}`: Not Found") - else: - logger.error(f"Invalid channelId `{channel_id}`: Not Found") - raise + for criteria in ["forHandle", "id", "forUsername"]: + logger.debug(f"query youtube-api for {channel_id} by {criteria}") + req = requests.get( + CHANNELS_API, + params={ + criteria: channel_id, + "part": "brandingSettings,snippet,contentDetails", + "key": YOUTUBE.api_key, + }, + timeout=REQUEST_TIMEOUT, + ) + if req.status_code >= HTTPStatus.BAD_REQUEST: + logger.error(f"HTTP {req.status_code} Error response: {req.text}") + req.raise_for_status() + req_json = req.json() + if "items" not in req_json: + logger.warning(f"Failed to find {channel_id} by {criteria}") + continue + channel_json = req_json["items"][0] + if channel_json is None: + raise Exception(f"Impossible to find {channel_id}, check for typos") save_json(YOUTUBE.cache_dir, fname, channel_json) return channel_json @@ -324,13 +324,8 @@ def extract_playlists_details_from(collection_type, youtube_id): uploads_playlist_id = None main_channel_id = None if collection_type in (USER, CHANNEL): - if collection_type == USER: - # youtube_id is a Username, fetch actual channelId through channel - channel_json = get_channel_json(youtube_id, for_username=True) - else: - # youtube_id is a channelId - channel_json = get_channel_json(youtube_id) - + # get_channel_json is capable to retrieve user and channel + channel_json = get_channel_json(youtube_id) main_channel_id = channel_json["id"] # retrieve list of playlists for that channel