From 212decad00ee6c85fe2d9b56d8ab74df1d72d78b Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 13 Oct 2023 11:36:33 +0200 Subject: [PATCH 1/5] Adopt python bootstrap - move to hatch instead of setuptools - Python 3.11 - upgrade dependencies - re-arrange Dockerfile to optimize image / build time - many small code adaptations linked to issues raised by linter or type checker - rework CI/CD workflows - add dummy test to open the road to many more --- .github/workflows/Publish.yml | 44 +++ ...{docker.yml => PublishDockerDevImage.yaml} | 18 +- .github/workflows/QA.yml | 34 ++ .github/workflows/Tests.yml | 62 +++ .gitignore | 366 ++++++++++++++++-- .pre-commit-config.yaml | 27 ++ Dockerfile | 40 +- MANIFEST.in | 2 - README.md | 10 +- contrib/reencode_low_quality.py | 6 +- contrib/video_encoding_tester.py | 34 +- get_js_deps.sh | 2 +- hatch_build.py | 43 ++ pyproject.toml | 226 +++++++++++ requirements.txt | 11 - setup.py | 56 --- src/youtube2zim/__about__.py | 1 + {youtube2zim => src/youtube2zim}/__init__.py | 0 {youtube2zim => src/youtube2zim}/__main__.py | 3 +- {youtube2zim => src/youtube2zim}/constants.py | 8 +- .../youtube2zim}/entrypoint.py | 13 +- .../locale/fr/LC_MESSAGES/messages.mo | Bin .../locale/fr/LC_MESSAGES/messages.po | 0 .../youtube2zim}/locale/messages.pot | 0 .../youtube2zim}/playlists/__init__.py | 0 .../youtube2zim}/playlists/__main__.py | 3 +- .../youtube2zim}/playlists/entrypoint.py | 7 +- .../youtube2zim}/playlists/scraper.py | 60 +-- .../youtube2zim}/processing.py | 3 +- {youtube2zim => src/youtube2zim}/scraper.py | 110 ++++-- .../youtube2zim}/templates/article.html | 0 .../youtube2zim}/templates/assets/app.js | 0 .../youtube2zim}/templates/assets/article.css | 0 .../youtube2zim}/templates/assets/banner.jpg | Bin .../youtube2zim}/templates/assets/db.js | 0 .../templates/assets/font/Roboto-Black.ttf | Bin .../templates/assets/font/Roboto-Bold.ttf | Bin .../templates/assets/font/Roboto-Light.ttf | Bin .../assets/font/Roboto-LightItalic.ttf | Bin .../templates/assets/font/Roboto-Regular.ttf | Bin .../youtube2zim}/templates/assets/home.css | 0 .../youtube2zim}/templates/assets/sample.jpg | Bin .../templates/assets/webp-trigger.js | 0 .../youtube2zim}/templates/home.html | 0 {youtube2zim => src/youtube2zim}/utils.py | 23 +- {youtube2zim => src/youtube2zim}/youtube.py | 43 +- tasks.py | 109 ++++++ tests/test_dummy.py | 5 + youtube2zim/VERSION | 1 - 49 files changed, 1130 insertions(+), 240 deletions(-) create mode 100644 .github/workflows/Publish.yml rename .github/workflows/{docker.yml => PublishDockerDevImage.yaml} (64%) create mode 100644 .github/workflows/QA.yml create mode 100644 .github/workflows/Tests.yml create mode 100644 .pre-commit-config.yaml delete mode 100644 MANIFEST.in create mode 100644 hatch_build.py create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100644 setup.py create mode 100644 src/youtube2zim/__about__.py rename {youtube2zim => src/youtube2zim}/__init__.py (100%) rename {youtube2zim => src/youtube2zim}/__main__.py (89%) rename {youtube2zim => src/youtube2zim}/constants.py (86%) rename {youtube2zim => src/youtube2zim}/entrypoint.py (97%) rename {youtube2zim => src/youtube2zim}/locale/fr/LC_MESSAGES/messages.mo (100%) rename {youtube2zim => src/youtube2zim}/locale/fr/LC_MESSAGES/messages.po (100%) rename {youtube2zim => src/youtube2zim}/locale/messages.pot (100%) rename {youtube2zim => src/youtube2zim}/playlists/__init__.py (100%) rename {youtube2zim => src/youtube2zim}/playlists/__main__.py (88%) rename {youtube2zim => src/youtube2zim}/playlists/entrypoint.py (94%) rename {youtube2zim => src/youtube2zim}/playlists/scraper.py (84%) rename {youtube2zim => src/youtube2zim}/processing.py (97%) rename {youtube2zim => src/youtube2zim}/scraper.py (91%) rename {youtube2zim => src/youtube2zim}/templates/article.html (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/app.js (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/article.css (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/banner.jpg (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/db.js (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/font/Roboto-Black.ttf (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/font/Roboto-Bold.ttf (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/font/Roboto-Light.ttf (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/font/Roboto-LightItalic.ttf (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/font/Roboto-Regular.ttf (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/home.css (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/sample.jpg (100%) rename {youtube2zim => src/youtube2zim}/templates/assets/webp-trigger.js (100%) rename {youtube2zim => src/youtube2zim}/templates/home.html (100%) rename {youtube2zim => src/youtube2zim}/utils.py (59%) rename {youtube2zim => src/youtube2zim}/youtube.py (90%) create mode 100644 tasks.py create mode 100644 tests/test_dummy.py delete mode 100644 youtube2zim/VERSION diff --git a/.github/workflows/Publish.yml b/.github/workflows/Publish.yml new file mode 100644 index 00000000..8b50a45f --- /dev/null +++ b/.github/workflows/Publish.yml @@ -0,0 +1,44 @@ +name: Build and upload to PyPI + +on: + release: + types: [published] + +jobs: + publish: + runs-on: ubuntu-22.04 + permissions: + id-token: write # mandatory for PyPI trusted publishing + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version-file: pyproject.toml + architecture: x64 + + - name: Build packages + run: | + pip install -U pip build + python -m build --sdist --wheel + + - name: Upload to PyPI + uses: pypa/gh-action-pypi-publish@release/v1.8 + with: + packages-dir: dist/ + + - name: Build and push Docker image + uses: openzim/docker-publish-action@v10 + with: + image-name: openzim/youtube + tag-pattern: /^v([0-9.]+)$/ + latest-on-tag: true + restrict-to: openzim/youtube + registries: ghcr.io + credentials: + GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }} + GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }} + repo_description: auto + repo_overview: auto diff --git a/.github/workflows/docker.yml b/.github/workflows/PublishDockerDevImage.yaml similarity index 64% rename from .github/workflows/docker.yml rename to .github/workflows/PublishDockerDevImage.yaml index 1310e0b9..c52eb014 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/PublishDockerDevImage.yaml @@ -1,25 +1,23 @@ -name: Docker +name: Publish Docker dev image on: push: branches: - main - tags: - - v* jobs: - build-and-push: - name: Deploy Docker Image + publish: runs-on: ubuntu-22.04 + steps: - - uses: actions/checkout@v3.4.0 - - name: Build and push + - uses: actions/checkout@v3 + + - name: Build and push Docker image uses: openzim/docker-publish-action@v10 with: image-name: openzim/youtube - on-master: dev - tag-pattern: /^v([0-9.]+)$/ - latest-on-tag: true + manual-tag: dev + latest-on-tag: false restrict-to: openzim/youtube registries: ghcr.io credentials: diff --git a/.github/workflows/QA.yml b/.github/workflows/QA.yml new file mode 100644 index 00000000..1c2f822b --- /dev/null +++ b/.github/workflows/QA.yml @@ -0,0 +1,34 @@ +name: QA + +on: + pull_request: + push: + branches: + - main + +jobs: + check-scraper-qa: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version-file: pyproject.toml + architecture: x64 + + - name: Install dependencies + run: | + pip install -U pip + pip install -e .[lint,check,scripts,test] + + - name: Check black formatting + run: inv lint-black + + - name: Check ruff + run: inv lint-ruff + + - name: Check pyright + run: inv check-pyright \ No newline at end of file diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml new file mode 100644 index 00000000..4d938be9 --- /dev/null +++ b/.github/workflows/Tests.yml @@ -0,0 +1,62 @@ +name: Tests + +on: + pull_request: + push: + branches: + - main + +jobs: + test-scraper: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version-file: pyproject.toml + architecture: x64 + + - name: Install dependencies (and project) + run: | + pip install -U pip + pip install -e .[test,scripts] + + - name: Run the tests + run: inv coverage --args "-vvv" + + - name: Upload coverage report to codecov + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + + build-scraper: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version-file: pyproject.toml + architecture: x64 + + - name: Ensure we can build Python targets + run: | + pip install -U pip build + python3 -m build --sdist --wheel + + build-docker: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + + - name: Ensure we can build the Docker image + run: | + docker build -t testimage . + + - name: Ensure we can start the Docker image + run: | + docker run --rm testimage diff --git a/.gitignore b/.gitignore index 2400d668..1788d792 100644 --- a/.gitignore +++ b/.gitignore @@ -1,27 +1,211 @@ +# Created by https://www.toptal.com/developers/gitignore/api/linux,macos,python,database,visualstudiocode,intellij +# Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,python,database,visualstudiocode,intellij + +### Database ### +*.accdb +*.db +*.dbf +*.mdb +*.pdb +*.sqlite3 +*.db-shm +*.db-wal + +### Intellij ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### Intellij Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij +.idea/**/azureSettings.xml + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +*$py.class # C extensions *.so # Distribution / packaging .Python -env/ -venv/ -bin/ build/ develop-eggs/ dist/ +downloads/ eggs/ +.eggs/ lib/ lib64/ parts/ sdist/ var/ +wheels/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec # Installer logs pip-log.txt @@ -30,44 +214,168 @@ pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ +.nox/ .coverage +.coverage.* .cache nosetests.xml coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ -# Mr Developer -.mr.developer.cfg -.project -.pydevproject - -# Rope -.ropeproject +# Translations +*.mo +*.pot # Django stuff: *.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy # Sphinx documentation docs/_build/ -.DS_Store -*.sublime-project -*.sublime-workspace -*.tar.bz2 -*.db -*~ -static -rdf-files -dl-cache +# PyBuilder +.pybuilder/ +target/ -*.zip -build/ -dist/ +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/linux,macos,python,database,visualstudiocode,intellij # JS deps -youtube2zim/templates/assets/chosen/ -youtube2zim/templates/assets/jquery.min.js -youtube2zim/templates/assets/ogvjs/ -youtube2zim/templates/assets/videojs-ogvjs.js -youtube2zim/templates/assets/videojs/ -youtube2zim/templates/assets/polyfills.js -youtube2zim/templates/assets/webp-hero.bundle.js +src/youtube2zim/templates/assets/chosen/ +src/youtube2zim/templates/assets/jquery.min.js +src/youtube2zim/templates/assets/ogvjs/ +src/youtube2zim/templates/assets/videojs-ogvjs.js +src/youtube2zim/templates/assets/videojs/ +src/youtube2zim/templates/assets/polyfills.js +src/youtube2zim/templates/assets/webp-hero.bundle.js + +# output dir +output + +# ignore all vscode, this is not standard configuration in this place +.vscode diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..d464164b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer +- repo: https://github.com/psf/black + rev: "23.7.0" + hooks: + - id: black +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.282 + hooks: + - id: ruff +- repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.331 + hooks: + - id: pyright + name: pyright (system) + description: 'pyright static type checker' + entry: pyright + language: system + 'types_or': [python, pyi] + require_serial: true + minimum_pre_commit_version: '2.9.2' diff --git a/Dockerfile b/Dockerfile index b250cbac..bfff6d56 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,37 @@ -FROM python:3.8 +FROM python:3.11-bookworm LABEL org.opencontainers.image.source https://github.com/openzim/youtube # Install necessary packages -RUN apt-get update -y \ - && apt-get install -y --no-install-recommends locales-all wget unzip ffmpeg aria2 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - - -COPY youtube2zim /src/youtube2zim -COPY get_js_deps.sh requirements.txt setup.py README.md LICENSE MANIFEST.in /src/ -RUN pip3 install $(grep "zimscraperlib" /src/requirements.txt) -RUN cd /src/ && python3 ./setup.py install +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + locales-all \ + wget \ + unzip \ + ffmpeg \ + aria2 \ + && rm -rf /var/lib/apt/lists/* \ + && python -m pip install --no-cache-dir -U \ + pip +# Custom entrypoint COPY entrypoint.sh /usr/local/bin/entrypoint.sh ENTRYPOINT ["entrypoint.sh"] - RUN mkdir -p /output WORKDIR /output + +# Copy pyproject.toml and its dependencies +COPY pyproject.toml README.md get_js_deps.sh hatch_build.py /src/ +COPY src/youtube2zim/__about__.py /src/src/youtube2zim/__about__.py + +# Install Python dependencies +RUN pip install --no-cache-dir /src + +# Copy code + associated artifacts +COPY src /src/src +COPY *.md LICENSE CHANGELOG /src/ + +# Install + cleanup +RUN pip install --no-cache-dir /src \ + && rm -rf /src + CMD ["youtube2zim", "--help"] diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 11681480..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include *.md -recursive-include youtube2zim * diff --git a/README.md b/README.md index d2c6c2bb..36b2ff2d 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ youtube2zim --help # Display youtube2zim help At the end, call `deactivate` to quit the virtual environment. -See `requirements.txt` for the list of python dependencies. +See `pyproject.toml` for the list of python dependencies. ## Docker @@ -67,7 +67,7 @@ To get an API: 3. When asked, choose _Create Credentials_ and select the **API Key** type. ([Credentials page](https://console.developers.google.com/apis/credentials)) ```bash -youtube2zim --api-key "" --type user --id "Vsauce" +youtube2zim --api-key "" --type user --id "Vsauce" --name "vsauce" ``` ## Notes @@ -78,12 +78,14 @@ youtube2zim --api-key "" --type user --id "Vsauce" youtube2zim-playlists --------------------- -`youtube2zim` produces a single ZIM file for a youtube request (`channel`, `user`, `playlists`. +`youtube2zim` produces a single ZIM file for a youtube request (`channel`, `user`, `playlist`). -`youtube2zim-playlists` allows you to **create one ZIM file per playlist** instead. +`youtube2zim-playlists` allows you to ** automatically create one ZIM file per playlist** of a given channel or user instead. This script is a wrapper around `youtube2zim` and is bundled with the main package. +The difference between a channel and a user is due to Youtube legacy. Some old users have to be searched as a user, while more recent ones have to be searched as a channel. Try your best bet, and if it fails try the ohter type. + ## Usage `youtube2zim-playlists --help` diff --git a/contrib/reencode_low_quality.py b/contrib/reencode_low_quality.py index cc31ca2d..489ecc63 100644 --- a/contrib/reencode_low_quality.py +++ b/contrib/reencode_low_quality.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ turn a regular build-folder into a low-quality one by re-encoding all videos. @@ -31,7 +30,7 @@ def main(build_path): sys.exit(1) # retrieve source video_format - with open(build_dir.joinpath("metadata.json"), "r") as fp: + with open(build_dir.joinpath("metadata.json")) as fp: metadata = json.load(fp) video_format = metadata["video_format"] @@ -52,7 +51,8 @@ def main(build_path): if __name__ == "__main__": - if len(sys.argv) != 2: + nb_expected_args = 2 + if len(sys.argv) != nb_expected_args: logger.error("you must supply a path to a build folder") sys.exit(1) main(sys.argv[-1]) diff --git a/contrib/video_encoding_tester.py b/contrib/video_encoding_tester.py index 54c16895..b24be9e4 100644 --- a/contrib/video_encoding_tester.py +++ b/contrib/video_encoding_tester.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu +# ruff: noqa: T201, T203, DTZ003 """ video encoding comparator @@ -155,7 +155,7 @@ def download_original(output_dir, youtube_id, video_format): fpath = expected_path.parent.joinpath(expected_path.stem) audext, vidext = {"webm": ("webm", "webm"), "mp4": ("m4a", "mp4")}[video_format] subprocess.run( - [ + [ # noqa: S607 "youtube-dl", "-o", f"{fpath}.%(ext)s", @@ -169,7 +169,7 @@ def download_original(output_dir, youtube_id, video_format): ) -def get_src_path(output_dir, youtube_id, video_format): +def get_src_path(output_dir: pathlib.Path, youtube_id, video_format): video_format_path = output_dir.joinpath(f"{youtube_id}.orig.{video_format}") if video_format_path.exists(): return video_format_path @@ -223,14 +223,16 @@ def hduration(value): return humanfriendly.format_timespan(value) def hsduration(value): - if value >= 3600: - hours = value // 3600 - value = value % 3600 + seconds_per_hour = 3600 + seconds_per_minute = 60 + if value >= seconds_per_hour: + hours = value // seconds_per_hour + value = value % seconds_per_hour else: hours = 0 - if value >= 60: - minutes = value // 60 - value = value % 60 + if value >= seconds_per_minute: + minutes = value // seconds_per_minute + value = value % seconds_per_minute else: minutes = 0 return f"{hours:02}:{minutes:02}:{value:02}" @@ -310,7 +312,7 @@ def hsduration(value): fh.write(page) -def main(output_dir): +def main(output_dir: pathlib.Path): if not output_dir.exists() or not output_dir.is_dir(): print(f"{output_dir} is not a valid directory.") return 1 @@ -328,9 +330,12 @@ def main(output_dir): report[youtube_id]["duration"] = get_duration_for( output_dir, youtube_id ) - report[youtube_id][video_format]["size"] = ( - get_src_path(output_dir, youtube_id, video_format).stat().st_size - ) + src_path = get_src_path(output_dir, youtube_id, video_format) + if not src_path: + raise Exception( + f"src_path of {output_dir}, {youtube_id}, {video_format} is missing" + ) + report[youtube_id][video_format]["size"] = src_path.stat().st_size report[youtube_id][video_format]["bitrate"] = ( report[youtube_id][video_format]["size"] @@ -361,7 +366,8 @@ def main(output_dir): if __name__ == "__main__": - if len(sys.argv) < 2: + nb_expected_args = 2 + if len(sys.argv) != nb_expected_args: print("you must pass a target folder") sys.exit(1) sys.exit(main(pathlib.Path(sys.argv[1]))) diff --git a/get_js_deps.sh b/get_js_deps.sh index eb6c1e72..3382488e 100755 --- a/get_js_deps.sh +++ b/get_js_deps.sh @@ -17,7 +17,7 @@ fi # Absolute path this script is in. SCRIPT_PATH="$( cd "$(dirname "$0")" ; pwd -P )" -ASSETS_PATH="${SCRIPT_PATH}/youtube2zim/templates/assets" +ASSETS_PATH="${SCRIPT_PATH}/src/youtube2zim/templates/assets" echo "About to download JS assets to ${ASSETS_PATH}" diff --git a/hatch_build.py b/hatch_build.py new file mode 100644 index 00000000..cfaafdb4 --- /dev/null +++ b/hatch_build.py @@ -0,0 +1,43 @@ +import logging +import subprocess +from pathlib import Path + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + +# update list in constants.py as well +JS_DEPS = [ + "videojs", + "ogvjs", + "chosen", + "videojs-ogvjs.js", + "jquery.min.js", + "polyfills.js", + "webp-hero.bundle.js", +] + + +class GetJsDepsHook(BuildHookInterface): + def initialize(self, version, build_data): + if self.deps_already_installed(): + logger.info("JS dependencies are already installed, skipping it") + return + Path(self.root).joinpath("src/youtube2zim/templates/assets") + subprocess.run( + str(Path(self.root).joinpath("get_js_deps.sh")), + check=True, + ) + return super().initialize(version, build_data) + + def deps_already_installed(self) -> bool: + for dep in JS_DEPS: + if ( + not Path(self.root) + .joinpath("src/youtube2zim/templates/assets") + .joinpath(dep) + .exists() + ): + return False + return True diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..34f365aa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,226 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "youtube2zim" +authors = [{ name = "Kiwix", email = "dev@kiwix.org" }] +keywords = ["kiwix", "zim", "offline", "youtube"] +requires-python = ">=3.11" +description = "Make ZIM file from a Youtube channel, user or playlist(s)" +readme = "README.md" +license = { text = "GPL-3.0-or-later" } +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", +] +dependencies = [ + "python-slugify==3.0.3", + "yt-dlp", # youtube-dl should be updated as frequently as possible + "python-dateutil==2.8.0", + "jinja2>=2.11,<3.0", + "MarkupSafe==2.0.1", # jinja2 dependency (https://github.com/pallets/markupsafe/issues/284) + "zimscraperlib>=2.0.0,<2.1.0", + "requests==2.31.0", + "kiwixstorage==0.8.3", + "pif==0.8.2", +] +dynamic = ["version"] + +[project.optional-dependencies] +scripts = ["invoke==2.2.0"] +lint = ["black==23.9.1", "ruff==0.0.292"] +check = ["pyright==1.1.331"] +test = ["pytest==7.4.2", "coverage==7.3.2"] +dev = [ + "pre-commit==3.4.0", + "debugpy==1.8.0", + "youtube2zim[scripts]", + "youtube2zim[lint]", + "youtube2zim[test]", + "youtube2zim[check]", + # hatchling is a dev dependency only needed for hook development on developer machine + "hatchling==1.18.0", + "humanfriendly==10.0" +] + +[project.urls] +Homepage = "https://github.com/openzim/youtube" +Donate = "https://www.kiwix.org/en/support-us/" + +[project.scripts] +youtube2zim = "youtube2zim.__main__:main" +youtube2zim-playlists = "youtube2zim.playlists.__main__:main" + +[tool.hatch.version] +path = "src/youtube2zim/__about__.py" + +[tool.hatch.build] +exclude = ["/.github"] + +[tool.hatch.build.hooks.custom] +path = "hatch_build.py" +dependencies = ["zimscraperlib==3.1.1"] + +[tool.hatch.envs.default] +features = ["dev"] + +[tool.hatch.envs.test] +features = ["scripts", "test"] + +[tool.hatch.envs.test.scripts] +run = "inv test --args '{args}'" +run-cov = "inv test-cov --args '{args}'" +report-cov = "inv report-cov" +coverage = "inv coverage --args '{args}'" +html = "inv coverage --html --args '{args}'" + +[tool.hatch.envs.lint] +template = "lint" +python = "py311" +skip-install = false +features = ["scripts", "lint"] + +[tool.hatch.envs.lint.scripts] +black = "inv lint-black --args '{args}'" +ruff = "inv lint-ruff --args '{args}'" +all = "inv lintall --args '{args}'" +fix-black = "inv fix-black --args '{args}'" +fix-ruff = "inv fix-ruff --args '{args}'" +fixall = "inv fixall --args '{args}'" + +[tool.hatch.envs.check] +features = ["scripts", "check"] + +[tool.hatch.envs.check.scripts] +pyright = "inv check-pyright --args '{args}'" +all = "inv checkall --args '{args}'" + +[tool.black] +line-length = 88 +target-version = ['py311'] +exclude = "(src/youtube2zim/templates/.*|.hatch/.*)" + +[tool.ruff] +target-version = "py311" +line-length = 88 +src = ["src"] +select = [ + "A", # flake8-builtins + # "ANN", # flake8-annotations + "ARG", # flake8-unused-arguments + # "ASYNC", # flake8-async + "B", # flake8-bugbear + # "BLE", # flake8-blind-except + "C4", # flake8-comprehensions + "C90", # mccabe + # "COM", # flake8-commas + # "D", # pydocstyle + # "DJ", # flake8-django + "DTZ", # flake8-datetimez + "E", # pycodestyle (default) + "EM", # flake8-errmsg + # "ERA", # eradicate + # "EXE", # flake8-executable + "F", # Pyflakes (default) + # "FA", # flake8-future-annotations + "FBT", # flake8-boolean-trap + # "FLY", # flynt + # "G", # flake8-logging-format + "I", # isort + "ICN", # flake8-import-conventions + # "INP", # flake8-no-pep420 + # "INT", # flake8-gettext + "ISC", # flake8-implicit-str-concat + "N", # pep8-naming + # "NPY", # NumPy-specific rules + # "PD", # pandas-vet + # "PGH", # pygrep-hooks + # "PIE", # flake8-pie + # "PL", # Pylint + "PLC", # Pylint: Convention + "PLE", # Pylint: Error + "PLR", # Pylint: Refactor + "PLW", # Pylint: Warning + # "PT", # flake8-pytest-style + # "PTH", # flake8-use-pathlib + # "PYI", # flake8-pyi + "Q", # flake8-quotes + # "RET", # flake8-return + # "RSE", # flake8-raise + "RUF", # Ruff-specific rules + "S", # flake8-bandit + # "SIM", # flake8-simplify + # "SLF", # flake8-self + "T10", # flake8-debugger + "T20", # flake8-print + # "TCH", # flake8-type-checking + # "TD", # flake8-todos + "TID", # flake8-tidy-imports + # "TRY", # tryceratops + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Allow use of date.today + "DTZ011", + # Remove flake8-errmsg since we consider they bloat the code and provide limited value + "EM", + # Allow boolean positional values in function calls, like `dict.get(... True)` + "FBT003", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore warnings on subprocess.run / popen + "S603", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", +] +unfixable = [ + # Don't touch unused imports + "F401", +] + +[tool.ruff.isort] +known-first-party = ["youtube2zim"] + +[tool.ruff.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.ruff.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] + +[tool.pytest.ini_options] +minversion = "7.3" +testpaths = ["tests"] +pythonpath = [".", "src"] + +[tool.coverage.paths] +great_project = ["src/youtube2zim"] +tests = ["tests"] + +[tool.coverage.run] +source_pkgs = ["youtube2zim"] +branch = true +parallel = true +omit = ["src/youtube2zim/__about__.py"] + +[tool.coverage.report] +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] + +[tool.pyright] +include = ["src", "tests", "tasks.py"] +exclude = [".env/**", ".venv/**", "src/youtube2zim/templates", ".hatch"] +extraPaths = ["src"] +pythonVersion = "3.11" +typeCheckingMode = "basic" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index b31e0027..00000000 --- a/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -python-slugify==3.0.3 -# youtube-dl should be updated as frequently as possible -yt-dlp -python-dateutil==2.8.0 -jinja2>=2.11,<3.0 -# jinja2 dependency (https://github.com/pallets/markupsafe/issues/284) -MarkupSafe==2.0.1 -zimscraperlib>=2.0.0,<2.1.0 -requests>=2.26.0,<3.0 -kiwixstorage>=0.8.1,<1.0 -pif==0.8.2 diff --git a/setup.py b/setup.py deleted file mode 100644 index 777b6e07..00000000 --- a/setup.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# vim: ai ts=4 sts=4 et sw=4 nu - -import pathlib -import subprocess - -from setuptools import setup - -root_dir = pathlib.Path(__file__).parent - - -def read(*names, **kwargs): - with open(root_dir.joinpath(*names), "r") as fh: - return fh.read() - - -print("Downloading and fixing JS dependencies...") -ps = subprocess.run([str(root_dir.joinpath("get_js_deps.sh").resolve())]) -ps.check_returncode() - - -setup( - name="youtube2zim", - version=read("youtube2zim", "VERSION").strip(), - description="Make ZIM file from a Youtube channel, user or playlist(s)", - long_description=read("README.md"), - long_description_content_type="text/markdown", - author="dattaz", - author_email="taz@dattaz.fr", - url="https://github.com/openzim/youtube", - keywords="kiwix zim youtube offline", - license="GPLv3+", - packages=["youtube2zim"], - install_requires=[ - line.strip() - for line in read("requirements.txt").splitlines() - if not line.strip().startswith("#") - ], - zip_safe=False, - include_package_data=True, - entry_points={ - "console_scripts": [ - "youtube2zim=youtube2zim.__main__:main", - "youtube2zim-playlists=youtube2zim.playlists.__main__:main", - ] - }, - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - ], - python_requires=">=3.6", -) diff --git a/src/youtube2zim/__about__.py b/src/youtube2zim/__about__.py new file mode 100644 index 00000000..df02df08 --- /dev/null +++ b/src/youtube2zim/__about__.py @@ -0,0 +1 @@ +__version__ = "2.2.0-dev0" diff --git a/youtube2zim/__init__.py b/src/youtube2zim/__init__.py similarity index 100% rename from youtube2zim/__init__.py rename to src/youtube2zim/__init__.py diff --git a/youtube2zim/__main__.py b/src/youtube2zim/__main__.py similarity index 89% rename from youtube2zim/__main__.py rename to src/youtube2zim/__main__.py index f348845c..65f84603 100644 --- a/youtube2zim/__main__.py +++ b/src/youtube2zim/__main__.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pathlib @@ -8,7 +7,7 @@ def main(): # allows running it from source using python youtube2zim - sys.path = [str(pathlib.Path(__file__).parent.parent.resolve())] + sys.path + sys.path = [str(pathlib.Path(__file__).parent.parent.resolve()), *sys.path] from youtube2zim.entrypoint import main as entry diff --git a/youtube2zim/constants.py b/src/youtube2zim/constants.py similarity index 86% rename from youtube2zim/constants.py rename to src/youtube2zim/constants.py index e12628fe..e3eaa166 100644 --- a/youtube2zim/constants.py +++ b/src/youtube2zim/constants.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import logging @@ -7,13 +6,12 @@ from zimscraperlib.logging import getLogger +from youtube2zim.__about__ import __version__ + ROOT_DIR = pathlib.Path(__file__).parent NAME = ROOT_DIR.name -with open(ROOT_DIR.joinpath("VERSION"), "r") as fh: - VERSION = fh.read().strip() - -SCRAPER = f"{NAME} {VERSION}" +SCRAPER = f"{NAME} {__version__}" CHANNEL = "channel" PLAYLIST = "playlist" diff --git a/youtube2zim/entrypoint.py b/src/youtube2zim/entrypoint.py similarity index 97% rename from youtube2zim/entrypoint.py rename to src/youtube2zim/entrypoint.py index b1abd8c8..7c4741f5 100755 --- a/youtube2zim/entrypoint.py +++ b/src/youtube2zim/entrypoint.py @@ -1,13 +1,20 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import argparse import logging import sys -from .constants import CHANNEL, NAME, PLAYLIST, SCRAPER, USER, YOUTUBE, logger -from .scraper import Youtube2Zim +from youtube2zim.constants import ( + CHANNEL, + NAME, + PLAYLIST, + SCRAPER, + USER, + YOUTUBE, + logger, +) +from youtube2zim.scraper import Youtube2Zim def main(): diff --git a/youtube2zim/locale/fr/LC_MESSAGES/messages.mo b/src/youtube2zim/locale/fr/LC_MESSAGES/messages.mo similarity index 100% rename from youtube2zim/locale/fr/LC_MESSAGES/messages.mo rename to src/youtube2zim/locale/fr/LC_MESSAGES/messages.mo diff --git a/youtube2zim/locale/fr/LC_MESSAGES/messages.po b/src/youtube2zim/locale/fr/LC_MESSAGES/messages.po similarity index 100% rename from youtube2zim/locale/fr/LC_MESSAGES/messages.po rename to src/youtube2zim/locale/fr/LC_MESSAGES/messages.po diff --git a/youtube2zim/locale/messages.pot b/src/youtube2zim/locale/messages.pot similarity index 100% rename from youtube2zim/locale/messages.pot rename to src/youtube2zim/locale/messages.pot diff --git a/youtube2zim/playlists/__init__.py b/src/youtube2zim/playlists/__init__.py similarity index 100% rename from youtube2zim/playlists/__init__.py rename to src/youtube2zim/playlists/__init__.py diff --git a/youtube2zim/playlists/__main__.py b/src/youtube2zim/playlists/__main__.py similarity index 88% rename from youtube2zim/playlists/__main__.py rename to src/youtube2zim/playlists/__main__.py index 5bd8a687..ee2c869a 100644 --- a/youtube2zim/playlists/__main__.py +++ b/src/youtube2zim/playlists/__main__.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pathlib @@ -8,7 +7,7 @@ def main(): # allows running it from source using python youtube2zim - sys.path = [str(pathlib.Path(__file__).parent.parent.parent.resolve())] + sys.path + sys.path = [str(pathlib.Path(__file__).parent.parent.parent.resolve()), *sys.path] from youtube2zim.playlists.entrypoint import main as entry diff --git a/youtube2zim/playlists/entrypoint.py b/src/youtube2zim/playlists/entrypoint.py similarity index 94% rename from youtube2zim/playlists/entrypoint.py rename to src/youtube2zim/playlists/entrypoint.py index 674689ce..bf2a9972 100644 --- a/youtube2zim/playlists/entrypoint.py +++ b/src/youtube2zim/playlists/entrypoint.py @@ -1,13 +1,12 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import argparse import logging import sys -from ..constants import CHANNEL, NAME, PLAYLIST, SCRAPER, USER, logger -from ..utils import has_argument +from youtube2zim.constants import CHANNEL, NAME, PLAYLIST, SCRAPER, USER, logger +from youtube2zim.utils import has_argument def main(): @@ -89,7 +88,7 @@ def main(): logger.setLevel(logging.DEBUG if args.debug else logging.INFO) - from .scraper import YoutubeHandler + from youtube2zim.playlists.scraper import YoutubeHandler try: handler = YoutubeHandler(dict(args._get_kwargs()), extra_args=extra_args) diff --git a/youtube2zim/playlists/scraper.py b/src/youtube2zim/playlists/scraper.py similarity index 84% rename from youtube2zim/playlists/scraper.py rename to src/youtube2zim/playlists/scraper.py index 39927ce6..27c626d5 100644 --- a/youtube2zim/playlists/scraper.py +++ b/src/youtube2zim/playlists/scraper.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ @@ -22,8 +21,12 @@ import requests from zimscraperlib.logging import nicer_args_join -from ..constants import NAME, PLAYLIST, YOUTUBE, logger -from ..youtube import credentials_ok, extract_playlists_details_from +from youtube2zim.constants import NAME, PLAYLIST, YOUTUBE, logger +from youtube2zim.youtube import ( + REQUEST_TIMEOUT, + credentials_ok, + extract_playlists_details_from, +) class YoutubeHandler: @@ -32,9 +35,13 @@ def __init__( options, extra_args, ): - # save options as properties - for key, value in options.items(): - setattr(self, key, value) + # extract values from options + self.api_key = options["api_key"] + self.debug = options["debug"] + self.playlists_mode = options["playlists_mode"] + self.collection_type = options["collection_type"] + self.youtube_id = options["youtube_id"] + self.extra_args = extra_args self.build_dir = pathlib.Path(tempfile.mkdtemp()) @@ -119,7 +126,8 @@ def run_playlist_zim(self, playlist): """run youtube2zim for an individual playlist""" playlist_id = playlist.playlist_id - args = self.youtube2zim_exe + [ + args = [ + *self.youtube2zim_exe, "--type", PLAYLIST, "--id", @@ -155,32 +163,30 @@ def run_playlist_zim(self, playlist): args += self.extra_args logger.debug(nicer_args_join(args)) - process = subprocess.run( + process = subprocess.run( # noqa: PLW1510 args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - universal_newlines=True, + text=True, ) return process.returncode == 0, process def handle_single_zim(self): """redirect request to standard youtube2zim""" - args = ( - self.youtube2zim_exe - + [ - "--type", - self.collection_type, - "--id", - self.youtube_id, - "--api-key", - self.api_key, - ] - + self.extra_args - ) + args = [ + *self.youtube2zim_exe, + "--type", + self.collection_type, + "--id", + self.youtube_id, + "--api-key", + self.api_key, + *self.extra_args, + ] if self.debug: args.append("--debug") - return subprocess.run(args).returncode + return subprocess.run(args).returncode # noqa: PLW1510 @staticmethod def compute_format(playlist, fmt): @@ -196,19 +202,21 @@ def fetch_metadata(self): # load JSON from source (URL or file) try: if str(self.metadata_from).startswith("http"): - self.metadata = requests.get(str(self.metadata_from)).json() + self.metadata = requests.get( + str(self.metadata_from), timeout=REQUEST_TIMEOUT + ).json() else: if not self.metadata_from.exists(): - raise IOError( + raise OSError( f"--metadata-from file could not be found: {self.metadata_from}" ) - with open(self.metadata_from, "r") as fh: + with open(self.metadata_from) as fh: self.metadata = json.load(fh) except Exception as exc: logger.debug(exc) raise ValueError( f"--metadata-from could not be loaded as JSON: {self.metadata_from}" - ) + ) from exc # ensure the basic format is respected: dict of playlist ID to dict of meta if not isinstance(self.metadata, dict) or len(self.metadata) != sum( diff --git a/youtube2zim/processing.py b/src/youtube2zim/processing.py similarity index 97% rename from youtube2zim/processing.py rename to src/youtube2zim/processing.py index 7021f4f6..7836c054 100644 --- a/youtube2zim/processing.py +++ b/src/youtube2zim/processing.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu from zimscraperlib.image.optimization import optimize_image from zimscraperlib.image.transformation import resize_image from zimscraperlib.video.encoding import reencode -from .constants import logger +from youtube2zim.constants import logger def process_thumbnail(thumbnail_path, preset): diff --git a/youtube2zim/scraper.py b/src/youtube2zim/scraper.py similarity index 91% rename from youtube2zim/scraper.py rename to src/youtube2zim/scraper.py index c82c4dbd..ca9f8928 100644 --- a/youtube2zim/scraper.py +++ b/src/youtube2zim/scraper.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ @@ -36,7 +35,7 @@ from zimscraperlib.video.presets import VideoMp4Low, VideoWebmLow from zimscraperlib.zim import make_zim_file -from .constants import ( +from youtube2zim.constants import ( CHANNEL, PLAYLIST, ROOT_DIR, @@ -45,9 +44,16 @@ YOUTUBE_LANG_MAP, logger, ) -from .processing import post_process_video, process_thumbnail -from .utils import clean_text, get_slug, load_json, save_json -from .youtube import ( +from youtube2zim.processing import post_process_video, process_thumbnail +from youtube2zim.utils import ( + clean_text, + get_slug, + load_json, + load_mandatory_json, + render_template, + save_json, +) +from youtube2zim.youtube import ( credentials_ok, extract_playlists_details_from, get_channel_json, @@ -58,6 +64,8 @@ skip_outofrange_videos, ) +MAXIMUM_YOUTUBEID_LENGTH = 24 + class Youtube2Zim: def __init__( @@ -214,12 +222,12 @@ def is_playlist(self): def is_single_channel(self): if self.is_channel or self.is_user: return True - return len(list(set([pl.creator_id for pl in self.playlists]))) == 1 + return len(list({pl.creator_id for pl in self.playlists})) == 1 @property def sorted_playlists(self): """sorted list of playlists (by title) but with Uploads one at first if any""" - if len(self.playlists) < 2: + if len(self.playlists) <= 1: return self.playlists sorted_playlists = sorted(self.playlists, key=lambda x: x.title) @@ -251,7 +259,7 @@ def run(self): logger.info( f"starting youtube scraper for {self.collection_type}#{self.youtube_id}" ) - logger.info("preparing build folder at {}".format(self.build_dir.resolve())) + logger.info(f"preparing build folder at {self.build_dir.resolve()}") self.prepare_build_folder() logger.info("testing Youtube credentials") @@ -304,7 +312,7 @@ def run(self): logger.error(f"{len(failed)} video(s) failed to download: {failed}") if len(failed) >= len(succeeded): logger.critical("More than half of videos failed. exiting") - raise IOError("Too much videos failed to download") + raise OSError("Too much videos failed to download") logger.info("retrieve channel-info for all videos (author details)") get_videos_authors_info(succeeded) @@ -321,7 +329,15 @@ def run(self): # make zim file os.makedirs(self.output_dir, exist_ok=True) if not self.no_zim: - period = datetime.datetime.now().strftime("%Y-%m") + if not self.name: + raise Exception("name is mandatory") + if not self.title: + raise Exception("title is mandatory") + if not self.description: + raise Exception("description is mandatory") + if not self.creator: + raise Exception("creator is mandatory") + period = datetime.date.today().strftime("%Y-%m") self.fname = ( self.fname.format(period=period) if self.fname @@ -371,12 +387,15 @@ def validate_dateafter_input(self): "Invalid dateafter input. Valid dateafter format: " "YYYYMMDD or (now|today)[+-][0-9](day|week|month|year)(s)." ) - raise ValueError(f"Invalid dateafter input: {exc}") + raise ValueError(f"Invalid dateafter input: {exc}") from exc def validate_id(self): # space not allowed in youtube-ID self.youtube_id = self.youtube_id.replace(" ", "") - if self.collection_type == "channel" and len(self.youtube_id) > 24: + if ( + self.collection_type == "channel" + and len(self.youtube_id) > MAXIMUM_YOUTUBEID_LENGTH + ): raise ValueError("Invalid ChannelId") if "," in self.youtube_id and self.collection_type != "playlist": raise ValueError("Invalid YoutubeId") @@ -419,23 +438,27 @@ def check_branding_values(self): return logger.info("checking your branding files and values") if self.profile_image: - if self.profile_image.startswith("http"): + if isinstance(self.profile_image, str) and self.profile_image.startswith( + "http" + ): stream_file(self.profile_image, self.profile_path) else: self.profile_image = Path(self.profile_image) if not self.profile_image.exists(): - raise IOError( + raise OSError( f"--profile image could not be found: {self.profile_image}" ) shutil.move(self.profile_image, self.profile_path) resize_image(self.profile_path, width=100, height=100, method="thumbnail") if self.banner_image: - if self.banner_image.startswith("http"): + if isinstance(self.banner_image, str) and self.banner_image.startswith( + "http" + ): stream_file(self.banner_image, self.banner_path) else: self.banner_image = Path(self.banner_image) if not self.banner_image.exists(): - raise IOError( + raise OSError( f"--banner image could not be found: {self.banner_image}" ) shutil.move(self.banner_image, self.banner_path) @@ -486,7 +509,6 @@ def extract_videos_list(self): self.videos_ids = [*all_videos.keys()] # unpacking so it's subscriptable def download_video_files(self, max_concurrency): - audext, vidext = {"webm": ("webm", "webm"), "mp4": ("m4a", "mp4")}[ self.video_format ] @@ -575,14 +597,17 @@ def get_slot(): def download_from_cache(self, key, video_path, encoder_version): """whether it successfully downloaded from cache""" + if not self.s3_storage: + raise Exception( + "Cannot download from cache if s3_storage is not configured" + ) if self.use_any_optimized_version: if not self.s3_storage.has_object(key, self.s3_storage.bucket_name): return False - else: - if not self.s3_storage.has_object_matching_meta( - key, tag="encoder_version", value=f"v{encoder_version}" - ): - return False + elif not self.s3_storage.has_object_matching_meta( + key, tag="encoder_version", value=f"v{encoder_version}" + ): + return False video_path.parent.mkdir(parents=True, exist_ok=True) try: self.s3_storage.download_file(key, video_path) @@ -594,6 +619,8 @@ def download_from_cache(self, key, video_path, encoder_version): def upload_to_cache(self, key, video_path, encoder_version): """whether it successfully uploaded to cache""" + if not self.s3_storage: + raise Exception("Cannot upload to cache if s3_storage is not configured") try: self.s3_storage.upload_file( video_path, key, meta={"encoder_version": f"v{encoder_version}"} @@ -612,6 +639,7 @@ def download_video(self, video_id, options): video_location = options_copy["y2z_videos_dir"].joinpath(video_id) video_path = video_location.joinpath(f"video.{self.video_format}") + s3_key = None if self.s3_storage: s3_key = f"{self.video_format}/{self.video_quality}/{video_id}" logger.debug( @@ -661,6 +689,7 @@ def download_thumbnail(self, video_id, options): video_location = options_copy["y2z_videos_dir"].joinpath(video_id) thumbnail_path = video_location.joinpath("video.webp") + s3_key = None if self.s3_storage: s3_key = f"thumbnails/high/{video_id}" logger.debug( @@ -725,9 +754,9 @@ def download_video_files_batch(self, options, videos_ids): return succeeded, failed def download_authors_branding(self): - videos_channels_json = load_json(self.cache_dir, "videos_channels") + videos_channels_json = load_mandatory_json(self.cache_dir, "videos_channels") uniq_channel_ids = list( - set([chan["channelId"] for chan in videos_channels_json.values()]) + {chan["channelId"] for chan in videos_channels_json.values()} ) for channel_id in uniq_channel_ids: save_channel_branding(self.channels_dir, channel_id, save_banner=False) @@ -744,6 +773,8 @@ def copy_default_banner(self, channel_id): def update_metadata(self): # we use title, description, profile and banner of channel/user # or channel of first playlist + if not self.main_channel_id: + raise Exception("main_channel_id is mandatory") try: main_channel_json = get_channel_json(self.main_channel_id) except KeyError: @@ -797,8 +828,8 @@ def update_metadata(self): # set colors from images if not supplied if self.main_color is None or self.secondary_color is None: profile_main, profile_secondary = get_colors(self.profile_path) - self.main_color = self.main_color or profile_main - self.secondary_color = self.secondary_color or profile_secondary + self.main_color = self.main_color or profile_main + self.secondary_color = self.secondary_color or profile_secondary resize_image( self.profile_path, @@ -870,10 +901,11 @@ def to_jinja_subtitle(lang): loader=jinja2.FileSystemLoader(str(self.templates_dir)), autoescape=True ) - videos = load_json(self.cache_dir, "videos").values() + videos = load_mandatory_json(self.cache_dir, "videos") + videos = videos.values() # filter videos so we only include the ones we could retrieve videos = list(filter(is_present, videos)) - videos_channels = load_json(self.cache_dir, "videos_channels") + videos_channels = load_mandatory_json(self.cache_dir, "videos_channels") has_channel = functools.partial(video_has_channel, videos_channels) # filter videos to exclude those for which we have no channel (#76) videos = list(filter(has_channel, videos)) @@ -889,7 +921,9 @@ def to_jinja_subtitle(lang): subtitles = get_subtitles(video_id) video_url = f"https://www.youtube.com/watch?v={video_id}" - html = env.get_template("article.html").render( + html = render_template( + env=env, + template_name="article.html", video_id=video_id, video_format=self.video_format, author=author, @@ -909,7 +943,9 @@ def to_jinja_subtitle(lang): fp.write(html) # build homepage - html = env.get_template("home.html").render( + html = render_template( + env=env, + template_name="home.html", playlists=self.playlists, video_format=self.video_format, title=self.title, @@ -925,14 +961,20 @@ def to_jinja_subtitle(lang): # rewrite app.js including `format` with open(self.assets_dir.joinpath("app.js"), "w", encoding="utf-8") as fp: fp.write( - env.get_template("assets/app.js").render(video_format=self.video_format) + render_template( + env=env, + template_name="assets/app.js", + video_format=self.video_format, + ) ) # rewrite app.js including `pagination` with open(self.assets_dir.joinpath("db.js"), "w", encoding="utf-8") as fp: fp.write( - env.get_template("assets/db.js").render( - NB_VIDEOS_PER_PAGE=self.nb_videos_per_page + render_template( + env=env, + template_name="assets/db.js", + NB_VIDEOS_PER_PAGE=self.nb_videos_per_page, ) ) @@ -955,7 +997,7 @@ def to_data_js(video): # write all playlists as they are for playlist in self.playlists: # retrieve list of videos for PL - playlist_videos = load_json( + playlist_videos = load_mandatory_json( self.cache_dir, f"playlist_{playlist.playlist_id}_videos" ) # filtering-out missing ones (deleted or not downloaded) diff --git a/youtube2zim/templates/article.html b/src/youtube2zim/templates/article.html similarity index 100% rename from youtube2zim/templates/article.html rename to src/youtube2zim/templates/article.html diff --git a/youtube2zim/templates/assets/app.js b/src/youtube2zim/templates/assets/app.js similarity index 100% rename from youtube2zim/templates/assets/app.js rename to src/youtube2zim/templates/assets/app.js diff --git a/youtube2zim/templates/assets/article.css b/src/youtube2zim/templates/assets/article.css similarity index 100% rename from youtube2zim/templates/assets/article.css rename to src/youtube2zim/templates/assets/article.css diff --git a/youtube2zim/templates/assets/banner.jpg b/src/youtube2zim/templates/assets/banner.jpg similarity index 100% rename from youtube2zim/templates/assets/banner.jpg rename to src/youtube2zim/templates/assets/banner.jpg diff --git a/youtube2zim/templates/assets/db.js b/src/youtube2zim/templates/assets/db.js similarity index 100% rename from youtube2zim/templates/assets/db.js rename to src/youtube2zim/templates/assets/db.js diff --git a/youtube2zim/templates/assets/font/Roboto-Black.ttf b/src/youtube2zim/templates/assets/font/Roboto-Black.ttf similarity index 100% rename from youtube2zim/templates/assets/font/Roboto-Black.ttf rename to src/youtube2zim/templates/assets/font/Roboto-Black.ttf diff --git a/youtube2zim/templates/assets/font/Roboto-Bold.ttf b/src/youtube2zim/templates/assets/font/Roboto-Bold.ttf similarity index 100% rename from youtube2zim/templates/assets/font/Roboto-Bold.ttf rename to src/youtube2zim/templates/assets/font/Roboto-Bold.ttf diff --git a/youtube2zim/templates/assets/font/Roboto-Light.ttf b/src/youtube2zim/templates/assets/font/Roboto-Light.ttf similarity index 100% rename from youtube2zim/templates/assets/font/Roboto-Light.ttf rename to src/youtube2zim/templates/assets/font/Roboto-Light.ttf diff --git a/youtube2zim/templates/assets/font/Roboto-LightItalic.ttf b/src/youtube2zim/templates/assets/font/Roboto-LightItalic.ttf similarity index 100% rename from youtube2zim/templates/assets/font/Roboto-LightItalic.ttf rename to src/youtube2zim/templates/assets/font/Roboto-LightItalic.ttf diff --git a/youtube2zim/templates/assets/font/Roboto-Regular.ttf b/src/youtube2zim/templates/assets/font/Roboto-Regular.ttf similarity index 100% rename from youtube2zim/templates/assets/font/Roboto-Regular.ttf rename to src/youtube2zim/templates/assets/font/Roboto-Regular.ttf diff --git a/youtube2zim/templates/assets/home.css b/src/youtube2zim/templates/assets/home.css similarity index 100% rename from youtube2zim/templates/assets/home.css rename to src/youtube2zim/templates/assets/home.css diff --git a/youtube2zim/templates/assets/sample.jpg b/src/youtube2zim/templates/assets/sample.jpg similarity index 100% rename from youtube2zim/templates/assets/sample.jpg rename to src/youtube2zim/templates/assets/sample.jpg diff --git a/youtube2zim/templates/assets/webp-trigger.js b/src/youtube2zim/templates/assets/webp-trigger.js similarity index 100% rename from youtube2zim/templates/assets/webp-trigger.js rename to src/youtube2zim/templates/assets/webp-trigger.js diff --git a/youtube2zim/templates/home.html b/src/youtube2zim/templates/home.html similarity index 100% rename from youtube2zim/templates/home.html rename to src/youtube2zim/templates/home.html diff --git a/youtube2zim/utils.py b/src/youtube2zim/utils.py similarity index 59% rename from youtube2zim/utils.py rename to src/youtube2zim/utils.py index 5be94a65..ce8f83a4 100644 --- a/youtube2zim/utils.py +++ b/src/youtube2zim/utils.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import json +import jinja2 from slugify import slugify -def get_slug(text, js_safe=True): +def get_slug(text, *, js_safe=True): """slug from text to build URL parts""" if js_safe: return slugify(text, regex_pattern=r"[^-a-z0-9_]+").replace("-", "_") @@ -31,12 +31,29 @@ def load_json(cache_dir, key): if not fname.exists(): return None try: - with open(fname, "r") as fp: + with open(fname) as fp: return json.load(fp) except Exception: return None +def load_mandatory_json(cache_dir, key): + """load mandatory JSON collection from path or raise an error""" + fname = cache_dir.joinpath(f"{key}.json") + if not fname.exists(): + raise Exception(f"JSON file at {fname} not found") + with open(fname) as fp: + return json.load(fp) + + def has_argument(arg_name, all_args): """whether --arg_name is specified in all_args""" return list(filter(lambda x: x.startswith(f"--{arg_name}"), all_args)) + + +def render_template(env: jinja2.Environment, template_name: str, **kwargs): + """render a Jinja template and ensures that result is a string""" + html = env.get_template(template_name).render(kwargs) + if not isinstance(html, str): + raise Exception("Jinja template did not returned a string") + return html diff --git a/youtube2zim/youtube.py b/src/youtube2zim/youtube.py similarity index 90% rename from youtube2zim/youtube.py rename to src/youtube2zim/youtube.py index b2de9fa9..742453ad 100644 --- a/youtube2zim/youtube.py +++ b/src/youtube2zim/youtube.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu +from http import HTTPStatus + import requests from dateutil import parser as dt_parser from zimscraperlib.download import stream_file from zimscraperlib.image.transformation import resize_image -from .constants import CHANNEL, PLAYLIST, USER, YOUTUBE, logger -from .utils import get_slug, load_json, save_json +from youtube2zim.constants import CHANNEL, PLAYLIST, USER, YOUTUBE, logger +from youtube2zim.utils import get_slug, load_json, save_json YOUTUBE_API = "https://www.googleapis.com/youtube/v3" PLAYLIST_API = f"{YOUTUBE_API}/playlists" @@ -19,6 +20,7 @@ VIDEOS_API = f"{YOUTUBE_API}/videos" MAX_VIDEOS_PER_REQUEST = 50 # for VIDEOS_API RESULTS_PER_PAGE = 50 # max: 50 +REQUEST_TIMEOUT = 60 class Playlist: @@ -55,9 +57,11 @@ def __dict__(self): def credentials_ok(): """check that a Youtube search is successful, validating API_KEY""" req = requests.get( - SEARCH_API, params={"part": "snippet", "maxResults": 1, "key": YOUTUBE.api_key} + SEARCH_API, + params={"part": "snippet", "maxResults": 1, "key": YOUTUBE.api_key}, + timeout=REQUEST_TIMEOUT, ) - if req.status_code > 400: + if req.status_code >= HTTPStatus.BAD_REQUEST: logger.error(f"HTTP {req.status_code} Error response: {req.text}") try: req.raise_for_status() @@ -66,7 +70,7 @@ def credentials_ok(): return False -def get_channel_json(channel_id, for_username=False): +def get_channel_json(channel_id, *, for_username=False): """fetch or retieve-save and return the Youtube ChannelResult JSON""" fname = f"channel_{channel_id}" channel_json = load_json(YOUTUBE.cache_dir, fname) @@ -79,8 +83,9 @@ def get_channel_json(channel_id, for_username=False): "part": "brandingSettings,snippet,contentDetails", "key": YOUTUBE.api_key, }, + timeout=REQUEST_TIMEOUT, ) - if req.status_code > 400: + if req.status_code >= HTTPStatus.BAD_REQUEST: logger.error(f"HTTP {req.status_code} Error response: {req.text}") req.raise_for_status() try: @@ -118,8 +123,9 @@ def get_channel_playlists_json(channel_id): "maxResults": RESULTS_PER_PAGE, "pageToken": page_token, }, + timeout=REQUEST_TIMEOUT, ) - if req.status_code > 400: + if req.status_code >= HTTPStatus.BAD_REQUEST: logger.error(f"HTTP {req.status_code} Error response: {req.text}") req.raise_for_status() channel_playlists_json = req.json() @@ -140,8 +146,9 @@ def get_playlist_json(playlist_id): req = requests.get( PLAYLIST_API, params={"id": playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, + timeout=REQUEST_TIMEOUT, ) - if req.status_code > 400: + if req.status_code >= HTTPStatus.BAD_REQUEST: logger.error(f"HTTP {req.status_code} Error response: {req.text}") req.raise_for_status() try: @@ -178,8 +185,9 @@ def get_videos_json(playlist_id): "maxResults": RESULTS_PER_PAGE, "pageToken": page_token, }, + timeout=REQUEST_TIMEOUT, ) - if req.status_code > 400: + if req.status_code >= HTTPStatus.BAD_REQUEST: logger.error(f"HTTP {req.status_code} Error response: {req.text}") req.raise_for_status() videos_json = req.json() @@ -200,9 +208,7 @@ def get_videos_authors_info(videos_ids): if items is not None: return items - logger.debug( - "query youtube-api for Video details of {} videos".format(len(videos_ids)) - ) + logger.debug(f"query youtube-api for Video details of {len(videos_ids)} videos") items = {} @@ -220,8 +226,9 @@ def retrieve_videos_for(videos_ids): "maxResults": RESULTS_PER_PAGE, "pageToken": page_token, }, + timeout=REQUEST_TIMEOUT, ) - if req.status_code > 400: + if req.status_code >= HTTPStatus.BAD_REQUEST: logger.error(f"HTTP {req.status_code} Error response: {req.text}") req.raise_for_status() videos_json = req.json() @@ -251,16 +258,20 @@ def retrieve_videos_for(videos_ids): return items -def save_channel_branding(channels_dir, channel_id, save_banner=False): +def save_channel_branding(channels_dir, channel_id, *, save_banner=False): """download, save and resize profile [and banner] of a channel""" channel_json = get_channel_json(channel_id) thumbnails = channel_json["snippet"]["thumbnails"] + thumnbail = None for quality in ("medium", "default"): # high:800px, medium:240px, default:88px if quality in thumbnails.keys(): thumnbail = thumbnails[quality]["url"] break + if not thumnbail: + raise Exception("thumnbail not found") + channel_dir = channels_dir.joinpath(channel_id) channel_dir.mkdir(exist_ok=True) @@ -302,7 +313,7 @@ def extract_playlists_details_from(collection_type, youtube_id): uploads_playlist_id = None main_channel_id = None - if collection_type == USER or collection_type == CHANNEL: + if collection_type in (USER, CHANNEL): if collection_type == USER: # youtube_id is a Username, fetch actual channelId through channel channel_json = get_channel_json(youtube_id, for_username=True) diff --git a/tasks.py b/tasks.py new file mode 100644 index 00000000..90854e86 --- /dev/null +++ b/tasks.py @@ -0,0 +1,109 @@ +# pyright: strict, reportUntypedFunctionDecorator=false +import os + +from invoke.context import Context +from invoke.tasks import task # pyright: ignore [reportUnknownVariableType] + +use_pty = not os.getenv("CI", "") + + +@task(optional=["args"], help={"args": "pytest additional arguments"}) +def test(ctx: Context, args: str = ""): + """run tests (without coverage)""" + ctx.run(f"pytest {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "pytest additional arguments"}) +def test_cov(ctx: Context, args: str = ""): + """run test vith coverage""" + ctx.run(f"coverage run -m pytest {args}", pty=use_pty) + + +@task(optional=["html"], help={"html": "flag to export html report"}) +def report_cov(ctx: Context, *, html: bool = False): + """report coverage""" + ctx.run("coverage combine", warn=True, pty=use_pty) + ctx.run("coverage report --show-missing", pty=use_pty) + if html: + ctx.run("coverage html", pty=use_pty) + + +@task( + optional=["args", "html"], + help={ + "args": "pytest additional arguments", + "html": "flag to export html report", + }, +) +def coverage(ctx: Context, args: str = "", *, html: bool = False): + """run tests and report coverage""" + test_cov(ctx, args=args) + report_cov(ctx, html=html) + + +@task(optional=["args"], help={"args": "black additional arguments"}) +def lint_black(ctx: Context, args: str = "."): + args = args or "." # needed for hatch script + ctx.run("black --version", pty=use_pty) + ctx.run(f"black --check --diff {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "ruff additional arguments"}) +def lint_ruff(ctx: Context, args: str = "."): + args = args or "." # needed for hatch script + ctx.run("ruff --version", pty=use_pty) + ctx.run(f"ruff check {args}", pty=use_pty) + + +@task( + optional=["args"], + help={ + "args": "linting tools (black, ruff) additional arguments, typically a path", + }, +) +def lintall(ctx: Context, args: str = "."): + """Check linting""" + args = args or "." # needed for hatch script + lint_black(ctx, args) + lint_ruff(ctx, args) + + +@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"}) +def check_pyright(ctx: Context, args: str = ""): + """check static types with pyright""" + ctx.run("pyright --version") + ctx.run(f"pyright {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"}) +def checkall(ctx: Context, args: str = ""): + """check static types""" + check_pyright(ctx, args) + + +@task(optional=["args"], help={"args": "black additional arguments"}) +def fix_black(ctx: Context, args: str = "."): + """fix black formatting""" + args = args or "." # needed for hatch script + ctx.run(f"black {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "ruff additional arguments"}) +def fix_ruff(ctx: Context, args: str = "."): + """fix all ruff rules""" + args = args or "." # needed for hatch script + ctx.run(f"ruff --fix {args}", pty=use_pty) + + +@task( + optional=["args"], + help={ + "args": "linting tools (black, ruff) additional arguments, typically a path", + }, +) +def fixall(ctx: Context, args: str = "."): + """Fix everything automatically""" + args = args or "." # needed for hatch script + fix_black(ctx, args) + fix_ruff(ctx, args) + lintall(ctx, args) diff --git a/tests/test_dummy.py b/tests/test_dummy.py new file mode 100644 index 00000000..6cfebdac --- /dev/null +++ b/tests/test_dummy.py @@ -0,0 +1,5 @@ +from youtube2zim.constants import __version__ + + +def test_dummy(): + assert __version__ diff --git a/youtube2zim/VERSION b/youtube2zim/VERSION deleted file mode 100644 index d3026563..00000000 --- a/youtube2zim/VERSION +++ /dev/null @@ -1 +0,0 @@ -2.1.18 From 2bcd9dbd4e054546cc8b18ab06be630c6bfd5eb5 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 13 Oct 2023 15:06:49 +0200 Subject: [PATCH 2/5] Update JS deps JQuery Chosen is deprecated and forces us to stay on JQuery 1 ogv.js has not been updated --- get_js_deps.sh | 10 +++++----- src/youtube2zim/scraper.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/get_js_deps.sh b/get_js_deps.sh index 3382488e..7e3fc13f 100755 --- a/get_js_deps.sh +++ b/get_js_deps.sh @@ -22,12 +22,12 @@ ASSETS_PATH="${SCRIPT_PATH}/src/youtube2zim/templates/assets" echo "About to download JS assets to ${ASSETS_PATH}" echo "getting video.js" -curl -L -O https://github.com/videojs/video.js/releases/download/v7.20.3/video-js-7.20.3.zip +curl -L -O https://github.com/videojs/video.js/releases/download/v8.6.1/video-js-8.6.1.zip rm -rf $ASSETS_PATH/videojs mkdir -p $ASSETS_PATH/videojs -unzip -o -d $ASSETS_PATH/videojs video-js-7.20.3.zip +unzip -o -d $ASSETS_PATH/videojs video-js-8.6.1.zip rm -rf $ASSETS_PATH/videojs/alt $ASSETS_PATH/videojs/examples -rm -f video-js-7.20.3.zip +rm -f video-js-8.6.1.zip echo "getting ogv.js" curl -L -O https://github.com/brion/ogv.js/releases/download/1.8.9/ogvjs-1.8.9.zip @@ -51,10 +51,10 @@ echo "getting jquery.js" curl -L -o $ASSETS_PATH/jquery.min.js https://code.jquery.com/jquery-1.12.4.min.js echo "getting webp-hero" -curl -L -O https://unpkg.com/webp-hero@0.0.0-dev.26/dist-cjs/polyfills.js +curl -L -O https://unpkg.com/webp-hero@0.0.2/dist-cjs/polyfills.js rm -f $ASSETS_PATH/polyfills.js mv polyfills.js $ASSETS_PATH/polyfills.js -curl -L -O https://unpkg.com/webp-hero@0.0.0-dev.26/dist-cjs/webp-hero.bundle.js +curl -L -O https://unpkg.com/webp-hero@0.0.2/dist-cjs/webp-hero.bundle.js rm -f $ASSETS_PATH/webp-hero.bundle.js mv webp-hero.bundle.js $ASSETS_PATH/webp-hero.bundle.js diff --git a/src/youtube2zim/scraper.py b/src/youtube2zim/scraper.py index ca9f8928..f53cada9 100644 --- a/src/youtube2zim/scraper.py +++ b/src/youtube2zim/scraper.py @@ -448,7 +448,7 @@ def check_branding_values(self): raise OSError( f"--profile image could not be found: {self.profile_image}" ) - shutil.move(self.profile_image, self.profile_path) + shutil.copy(self.profile_image, self.profile_path) resize_image(self.profile_path, width=100, height=100, method="thumbnail") if self.banner_image: if isinstance(self.banner_image, str) and self.banner_image.startswith( @@ -461,7 +461,7 @@ def check_branding_values(self): raise OSError( f"--banner image could not be found: {self.banner_image}" ) - shutil.move(self.banner_image, self.banner_path) + shutil.copy(self.banner_image, self.banner_path) resize_image(self.banner_path, width=1060, height=175, method="thumbnail") if self.main_color and not is_hex_color(self.main_color): From 57b3ee5f04d48b8359c719fc040782c83c623f8a Mon Sep 17 00:00:00 2001 From: benoit74 Date: Mon, 16 Oct 2023 10:50:31 +0200 Subject: [PATCH 3/5] No need for crossorigin attribute + value was not passed --- src/youtube2zim/templates/assets/app.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/youtube2zim/templates/assets/app.js b/src/youtube2zim/templates/assets/app.js index bf83f543..2aef291d 100644 --- a/src/youtube2zim/templates/assets/app.js +++ b/src/youtube2zim/templates/assets/app.js @@ -138,7 +138,7 @@ function firstVideo(video) { } videoIntro.innerHTML = '' + '