From 4b727454df966dd2eeea0534daaa328697f70c4f Mon Sep 17 00:00:00 2001
From: Dan Niles <56271899+dan-niles@users.noreply.github.com>
Date: Tue, 4 Jun 2024 09:09:16 +0530
Subject: [PATCH 1/3] Update scraper to generate JSON files for `zimui`

---
 CHANGELOG                          |   2 +
 scraper/pyproject.toml             |   4 +-
 scraper/src/youtube2zim/schemas.py |  94 +++++++++
 scraper/src/youtube2zim/scraper.py | 302 +++++++++++++++++++++++++----
 scraper/src/youtube2zim/youtube.py |  15 +-
 zimui/index.html                   |   4 +-
 zimui/public/favicon.ico           | Bin 4286 -> 0 bytes
 7 files changed, 383 insertions(+), 38 deletions(-)
 create mode 100644 scraper/src/youtube2zim/schemas.py
 delete mode 100644 zimui/public/favicon.ico

diff --git a/CHANGELOG b/CHANGELOG
index 9f9e905a..4cd1b28e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Move scraper files to `scraper` subfolder and update workflows
 - Bump `requests` package from 2.32.0 to 2.32.2
 - Initialize new Vue.js project in `zimui` subfolder
+- Update dependencies in pyproject.toml (pydantic, pyhumps, python-slugify)
+- Update scraper to generate JSON files for `zimui` (#212)
 
 ## [2.3.0] - 2024-05-22
 
diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml
index 95714248..8d282add 100644
--- a/scraper/pyproject.toml
+++ b/scraper/pyproject.toml
@@ -8,7 +8,7 @@ requires-python = ">=3.12,<3.13"
 description = "Make ZIM file from a Youtube channel, user or playlist(s)"
 readme = "../README.md"
 dependencies = [
-  "python-slugify==3.0.3",
+  "python-slugify==8.0.4",
   "yt-dlp",                      # youtube-dl should be updated as frequently as possible
   "python-dateutil==2.9.0.post0",
   "jinja2==3.1.4",
@@ -16,6 +16,8 @@ dependencies = [
   "requests==2.32.2",
   "kiwixstorage==0.8.3",
   "pif==0.8.2",
+  "pydantic==2.7.2",
+  "pyhumps==3.8.0",
 ]
 dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]
 
diff --git a/scraper/src/youtube2zim/schemas.py b/scraper/src/youtube2zim/schemas.py
new file mode 100644
index 00000000..b50547da
--- /dev/null
+++ b/scraper/src/youtube2zim/schemas.py
@@ -0,0 +1,94 @@
+from humps import camelize
+from pydantic import BaseModel
+
+
+class CamelModel(BaseModel):
+    """Model to transform Python snake_case into JSON camelCase."""
+
+    class Config:
+        alias_generator = camelize
+        populate_by_name = True
+
+
+class Author(CamelModel):
+    channel_id: str
+    channel_title: str
+    profile_path: str | None = None
+    banner_path: str | None = None
+
+
+class Subtitle(CamelModel):
+    """Class to serialize data about a YouTube video subtitle."""
+
+    code: str
+    name: str
+
+
+class Video(CamelModel):
+    """Class to serialize data about a YouTube video."""
+
+    id: str
+    title: str
+    description: str
+    author: Author
+    publication_date: str
+    video_path: str
+    thumbnail_path: str | None = None
+    subtitle_path: str | None = None
+    subtitle_list: list[Subtitle]
+    duration: str
+
+
+class VideoPreview(CamelModel):
+    """Class to serialize data about a YouTube video for preview."""
+
+    slug: str
+    id: str
+    title: str
+    thumbnail_path: str | None = None
+    duration: str
+
+
+class Playlist(CamelModel):
+    """Class to serialize data about a YouTube playlist."""
+
+    id: str
+    author: Author
+    title: str
+    description: str
+    publication_date: str
+    thumbnail_path: str | None = None
+    videos: list[VideoPreview]
+    videos_count: int
+
+
+class PlaylistPreview(CamelModel):
+    """Class to serialize data about a YouTube playlist for preview."""
+
+    slug: str
+    id: str
+    title: str
+    thumbnail_path: str | None = None
+    videos_count: int
+    main_video_slug: str
+
+
+class Playlists(CamelModel):
+    """Class to serialize data about a list of YouTube playlists."""
+
+    playlists: list[PlaylistPreview]
+
+
+class Channel(CamelModel):
+    """Class to serialize data about a YouTube channel."""
+
+    id: str
+    title: str
+    description: str
+    channel_name: str
+    channel_description: str
+    profile_path: str | None = None
+    banner_path: str | None = None
+    joined_date: str
+    collection_type: str
+    main_playlist: str | None = None
diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py
index ba1e3329..3d844512 100644
--- a/scraper/src/youtube2zim/scraper.py
+++ b/scraper/src/youtube2zim/scraper.py
@@ -36,6 +36,7 @@
 from zimscraperlib.inputs import compute_descriptions
 from zimscraperlib.video.presets import VideoMp4Low, VideoWebmLow
 from zimscraperlib.zim import Creator
+from zimscraperlib.zim.filesystem import FileItem
 from zimscraperlib.zim.metadata import (
     validate_description,
     validate_longdescription,
@@ -54,6 +55,16 @@
     logger,
 )
 from youtube2zim.processing import post_process_video, process_thumbnail
+from youtube2zim.schemas import (
+    Author,
+    Channel,
+    Playlist,
+    PlaylistPreview,
+    Playlists,
+    Subtitle,
+    Video,
+    VideoPreview,
+)
 from youtube2zim.utils import (
     clean_text,
     get_slug,
@@ -66,7 +77,7 @@
     credentials_ok,
     extract_playlists_details_from,
     get_channel_json,
-    # get_videos_authors_info,
+    get_videos_authors_info,
     get_videos_json,
     save_channel_branding,
     skip_deleted_videos,
@@ -317,43 +328,37 @@ def run(self):
             )
         logger.info(f"{nb_videos_msg}.")
 
-        # Commented out for now,
-        # but we have to rework this part to work with the new vuejs zimui
-
         # download videos (and recompress)
-        # logger.info(
-        #     "downloading all videos, subtitles and thumbnails "
-        #     f"(concurrency={self.max_concurrency})"
-        # )
-        # logger.info(f"  format: {self.video_format}")
-        # logger.info(f"  quality: {self.video_quality}")
-        # logger.info(f"  generated-subtitles: {self.all_subtitles}")
-        # if self.s3_storage:
-        #     logger.info(
-        #         f"  using cache: {self.s3_storage.url.netloc} "
-        #         f"with bucket: {self.s3_storage.bucket_name}"
-        #     )
-        # succeeded, failed = self.download_video_files(
-        #     max_concurrency=self.max_concurrency
-        # )
-        # if failed:
-        #     logger.error(f"{len(failed)} video(s) failed to download: {failed}")
-        #     if len(failed) >= len(succeeded):
-        #         logger.critical("More than half of videos failed. exiting")
-        #         raise OSError("Too much videos failed to download")
-
-        # logger.info("retrieve channel-info for all videos (author details)")
-        # get_videos_authors_info(succeeded)
-
-        # logger.info("download all author's profile pictures")
-        # self.download_authors_branding()
+        logger.info(
+            "downloading all videos, subtitles and thumbnails "
+            f"(concurrency={self.max_concurrency})"
+        )
+        logger.info(f"  format: {self.video_format}")
+        logger.info(f"  quality: {self.video_quality}")
+        logger.info(f"  generated-subtitles: {self.all_subtitles}")
+        if self.s3_storage:
+            logger.info(
+                f"  using cache: {self.s3_storage.url.netloc} "
+                f"with bucket: {self.s3_storage.bucket_name}"
+            )
+        succeeded, failed = self.download_video_files(
+            max_concurrency=self.max_concurrency
+        )
+        if failed:
+            logger.error(f"{len(failed)} video(s) failed to download: {failed}")
+            if len(failed) >= len(succeeded):
+                logger.critical("More than half of videos failed. exiting")
+                raise OSError("Too much videos failed to download")
+
+        logger.info("retrieve channel-info for all videos (author details)")
+        get_videos_authors_info(succeeded)
+
+        logger.info("download all author's profile pictures")
+        self.download_authors_branding()
 
         logger.info("update general metadata")
         self.update_metadata()
 
-        # logger.info("creating HTML files")
-        # self.make_html_files(succeeded)
-
         # make zim file
         os.makedirs(self.output_dir, exist_ok=True)
         if not self.no_zim:
@@ -412,6 +417,12 @@ def run(self):
                 logger.debug(f"Preparing zimfile at {self.zim_file.filename}")
                 logger.debug(f"Recursively adding files from {self.build_dir}")
                 self.add_zimui()
+
+                logger.info("creating JSON files")
+                self.make_json_files(succeeded)
+
+                logger.info("Adding files to ZIM")
+                self.add_files_to_zim(self.build_dir, self.zim_file)
             except KeyboardInterrupt:
                 self.zim_file.can_finish = False
                 logger.error("KeyboardInterrupt, exiting.")
@@ -923,6 +934,7 @@ def update_metadata(self):
             method="thumbnail",
             dst=self.build_dir.joinpath("favicon.png"),
         )
+        png_profile_path.unlink()
 
     def make_html_files(self, actual_videos_ids):
         """make up HTML structure to read the content
@@ -1108,3 +1120,227 @@ def to_data_js(video):
 
         # clean videos left out in videos directory
         remove_unused_videos(videos)
+
+    def make_json_files(self, actual_videos_ids):
+        """Generate JSON files to be consumed by the frontend"""
+
+        def remove_unused_videos(videos):
+            video_ids = [video["contentDetails"]["videoId"] for video in videos]
+            for path in self.videos_dir.iterdir():
+                if path.is_dir() and path.name not in video_ids:
+                    logger.debug(f"Removing unused video {path.name}")
+                    shutil.rmtree(path, ignore_errors=True)
+
+        def is_present(video):
+            """whether this video has actually been succeffuly downloaded"""
+            return video["contentDetails"]["videoId"] in actual_videos_ids
+
+        def video_has_channel(videos_channels, video):
+            return video["contentDetails"]["videoId"] in videos_channels
+
+        def get_thumbnail_path(video_id):
+            return f"videos/{video_id}/video.webp"
+
+        def get_subtitles(video_id) -> list[Subtitle]:
+            video_dir = self.videos_dir.joinpath(video_id)
+            languages = [
+                x.stem.split(".")[1]
+                for x in video_dir.iterdir()
+                if x.is_file() and x.name.endswith(".vtt")
+            ]
+
+            def to_subtitle_object(lang):
+                try:
+                    try:
+                        subtitle = get_language_details(
+                            YOUTUBE_LANG_MAP.get(lang, lang)
+                        )
+                    except NotFound:
+                        lang_simpl = re.sub(r"^([a-z]{2})-.+$", r"\1", lang)
+                        subtitle = get_language_details(
+                            YOUTUBE_LANG_MAP.get(lang_simpl, lang_simpl)
+                        )
+                except Exception:
+                    logger.error(f"Failed to get language details for {lang}")
+                    raise
+                return Subtitle(
+                    code=lang,
+                    name=f"{subtitle['english'].title()} - {subtitle['query']}",
+                )
+
+            # Youtube.com sorts subtitles by English name
+            return sorted(map(to_subtitle_object, languages), key=lambda x: x.name)
+
+        def get_videos_list(playlist):
+            videos = load_mandatory_json(
+                self.cache_dir, f"playlist_{playlist.playlist_id}_videos"
+            )
+            videos = list(filter(skip_deleted_videos, videos))
+            videos = list(filter(is_present, videos))
+            videos = list(filter(has_channel, videos))
+            videos = sorted(videos, key=lambda v: v["snippet"]["position"])
+            return videos
+
+        def generate_video_object(video) -> Video:
+            video_id = video["contentDetails"]["videoId"]
+            author = videos_channels[video_id]
+            subtitles_list = get_subtitles(video_id)
+            return Video(
+                id=video_id,
+                title=video["snippet"]["title"],
+                description=video["snippet"]["description"],
+                author=Author(
+                    channel_id=author["channelId"],
+                    channel_title=author["channelTitle"],
+                    profile_path=f"channels/{author['channelId']}/profile.jpg",
+                    banner_path=f"channels/{author['channelId']}/banner.jpg",
+                ),
+                publication_date=video["contentDetails"]["videoPublishedAt"],
+                video_path=f"videos/{video_id}/video.{self.video_format}",
+                thumbnail_path=get_thumbnail_path(video_id),
+                subtitle_path=f"videos/{video_id}" if len(subtitles_list) > 0 else None,
+                subtitle_list=subtitles_list,
+                duration=videos_channels[video_id]["duration"],
+            )
+
+        def generate_video_preview_object(video) -> VideoPreview:
+            video_id = video["contentDetails"]["videoId"]
+            return VideoPreview(
+                slug=get_video_slug(video),
+                id=video_id,
+                title=video["snippet"]["title"],
+                thumbnail_path=get_thumbnail_path(video_id),
+                duration=videos_channels[video_id]["duration"],
+            )
+
+        def get_video_slug(video) -> str:
+            title = video["snippet"]["title"]
+            video_id = video["contentDetails"]["videoId"]
+            return f"{get_slug(title)}-{video_id[:4]}"
+
+        def generate_playlist_object(playlist) -> Playlist:
+            videos = get_videos_list(playlist)
+            return Playlist(
+                id=playlist.playlist_id,
+                title=playlist.title,
+                description=playlist.description,
+                videos=[generate_video_preview_object(video) for video in videos],
+                publication_date=playlist.published_at,
+                author=Author(
+                    channel_id=playlist.creator_id,
+                    channel_title=playlist.creator_name,
+                    profile_path=f"channels/{playlist.creator_id}/profile.jpg",
+                    banner_path=f"channels/{playlist.creator_id}/banner.jpg",
+                ),
+                videos_count=len(videos),
+                thumbnail_path=get_thumbnail_path(
+                    videos[0]["contentDetails"]["videoId"]
+                ),
+            )
+
+        def generate_playlist_preview_object(playlist) -> PlaylistPreview:
+            videos = get_videos_list(playlist)
+            return PlaylistPreview(
+                slug=get_playlist_slug(playlist),
+                id=playlist.playlist_id,
+                title=playlist.title,
+                thumbnail_path=get_thumbnail_path(
+                    videos[0]["contentDetails"]["videoId"]
+                ),
+                videos_count=len(videos),
+                main_video_slug=get_video_slug(videos[0]),
+            )
+
+        def get_playlist_slug(playlist) -> str:
+            return f"{get_slug(playlist.title)}-{playlist.playlist_id[-4:]}"
+
+        videos = load_mandatory_json(self.cache_dir, "videos").values()
+        # filter videos so we only include the ones we could retrieve
+        videos = list(filter(is_present, videos))
+        videos_channels = load_mandatory_json(self.cache_dir, "videos_channels")
+        has_channel = functools.partial(video_has_channel, videos_channels)
+        # filter videos to exclude those for which we have no channel (#76)
+        videos = list(filter(has_channel, videos))
+        for video in videos:
+            slug = get_video_slug(video)
+            self.zim_file.add_item_for(
+                path=f"videos/{slug}.json",
+                title=slug,
+                content=generate_video_object(video).model_dump_json(
+                    by_alias=True, indent=2
+                ),
+                mimetype="application/json",
+                is_front=False,
+            )
+
+        # write playlists JSON files
+        playlist_list = []
+
+        main_playlist_slug = None
+        if len(self.playlists) > 0:
+            main_playlist_slug = get_playlist_slug(
+                self.playlists[0]
+            )  # set first playlist as main playlist
+
+        for playlist in self.playlists:
+            playlist_slug = get_playlist_slug(playlist)
+            playlist_path = f"playlists/{playlist_slug}.json"
+
+            if playlist.playlist_id != self.uploads_playlist_id:
+                playlist_list.append(generate_playlist_preview_object(playlist))
+            else:
+                main_playlist_slug = (
+                    playlist_slug  # set uploads playlist as main playlist
+                )
+
+            self.zim_file.add_item_for(
+                path=playlist_path,
+                title=playlist.title,
+                content=generate_playlist_object(playlist).model_dump_json(
+                    by_alias=True, indent=2
+                ),
+                mimetype="application/json",
+                is_front=False,
+            )
+
+        # write playlists.json file
+        self.zim_file.add_item_for(
+            path="playlists.json",
+            title="Playlists",
+            content=Playlists(playlists=playlist_list).model_dump_json(
+                by_alias=True, indent=2
+            ),
+            mimetype="application/json",
+            is_front=False,
+        )
+
+        # write channel.json file
+        channel_data = get_channel_json(self.main_channel_id)
+        self.zim_file.add_item_for(
+            path="channel.json",
+            title=self.title,
+            content=Channel(
+                id=str(self.main_channel_id),
+                title=str(self.title),
+                description=str(self.description),
+                channel_name=channel_data["snippet"]["title"],
+                channel_description=channel_data["snippet"]["description"],
+                profile_path="profile.jpg",
+                banner_path="banner.jpg",
+                collection_type=self.collection_type,
+                main_playlist=main_playlist_slug,
+                joined_date=channel_data["snippet"]["publishedAt"],
+            ).model_dump_json(by_alias=True, indent=2),
+            mimetype="application/json",
+            is_front=False,
+        )
+
+        # clean videos left out in videos directory
+        remove_unused_videos(videos)
+
+    def add_files_to_zim(self, dir_path: Path, zim_file: Creator):
+        """recursively add a path to a zim file"""
+        for file_path in filter(
+            lambda file_path: file_path.is_file(), dir_path.rglob("*")
+        ):
+            zim_file.add_item(FileItem(dir_path, file_path))
diff --git a/scraper/src/youtube2zim/youtube.py b/scraper/src/youtube2zim/youtube.py
index 2725e14c..f9cd6909 100644
--- a/scraper/src/youtube2zim/youtube.py
+++ b/scraper/src/youtube2zim/youtube.py
@@ -24,12 +24,21 @@
 
 
 class Playlist:
-    def __init__(self, playlist_id, title, description, creator_id, creator_name):
+    def __init__(
+        self,
+        playlist_id,
+        title,
+        description,
+        creator_id,
+        creator_name,
+        published_at=None,
+    ):
         self.playlist_id = playlist_id
         self.title = title
         self.description = description
         self.creator_id = creator_id
         self.creator_name = creator_name
+        self.published_at = published_at
         self.slug = get_slug(title, js_safe=True)
 
     @classmethod
@@ -41,6 +50,7 @@ def from_id(cls, playlist_id):
             description=playlist_json["snippet"]["description"],
             creator_id=playlist_json["snippet"]["channelId"],
             creator_name=playlist_json["snippet"]["channelTitle"],
+            published_at=playlist_json["snippet"]["publishedAt"],
         )
 
     def to_dict(self):
@@ -221,7 +231,7 @@ def retrieve_videos_for(videos_ids):
                 VIDEOS_API,
                 params={
                     "id": ",".join(videos_ids),
-                    "part": "snippet",
+                    "part": "snippet,contentDetails",
                     "key": YOUTUBE.api_key,
                     "maxResults": RESULTS_PER_PAGE,
                     "pageToken": page_token,
@@ -238,6 +248,7 @@ def retrieve_videos_for(videos_ids):
                         item["id"]: {
                             "channelId": item["snippet"]["channelId"],
                             "channelTitle": item["snippet"]["channelTitle"],
+                            "duration": item["contentDetails"]["duration"],
                         }
                     }
                 )
diff --git a/zimui/index.html b/zimui/index.html
index cd531825..22929b5e 100644
--- a/zimui/index.html
+++ b/zimui/index.html
@@ -2,13 +2,13 @@
 <html lang="en">
   <head>
     <meta charset="UTF-8" />
-    <link rel="icon" href="/favicon.ico" />
+    <link rel="icon" href="./favicon.png" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>YouTube in a ZIM</title>
   </head>
 
   <body>
     <div id="app"></div>
-    <script type="module" src="/src/main.ts"></script>
+    <script type="module" src="./src/main.ts"></script>
   </body>
 </html>
diff --git a/zimui/public/favicon.ico b/zimui/public/favicon.ico
deleted file mode 100644
index df36fcfb72584e00488330b560ebcf34a41c64c2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4286
zcmds*O-Phc6o&64GDVCEQHxsW(p4>LW*W<827=Unuo8sGpRux(DN@jWP-e29Wl%wj
zY84_aq9}^Am9-cWTD5GGEo#+5Fi2wX_P*bo+xO!)p*7B;iKlbFd(U~_d(U?#hLj56
zPhFkj-|A6~Qk#@g^#D^U0XT1cu=c-vu1+SElX9NR;kzAUV(q0|dl0|%h|dI$%VICy
zJnu2^L*Te9JrJMGh%-P79CL0}dq92RGU6gI{v2~|)p}sG5x0U*z<8U;Ij*hB9z?ei
z@g6Xq-pDoPl=MANPiR7%172VA%r)kev<ISBgE$F{SFy+(=9Z)f)De0Se}ZDZW}Z3B
zElCeVrw;K0Fdl_Cg=gZOFXXc3pL)Q05CAuT+XucQ<8g~3dteP~|7s7c6QYP;fy;mF
zMN;>tV-_5H*QJKFmd;8yA$98zCxBZYXTNZ#QFk2(TX0;Y2dt&WitL#$96|gJY=3xX
zpCoi|YNzgO3R`f@IiEeSmKrPSf#h#Qd<$%Ej^RIeeYfsxhPMOG`S`Pz8q``=511zm
zAm)MX5AV^5xIWPyEu7u>qYs?pn$I4nL9J!=K=SGlKLXpE<5x+2cDTXq?brj?n6sp=
zphe9;_JHf40^9~}9i08r{XM$7HB!`{Ys~TK0kx<}ZQng`UPvH*11|q7&l9?@FQz;8
zx!=3<4seY*%=OlbCbcae?<QnEgvj4i?s}Yk=qA2z`-^*<eK3c)MS4JOdbsTQEOa0)
z0NWqlna2rzs>5^V_}*K>Uo6ZWV8mTyE^B=DKy7-sdLYkR5Z?paTgK-zyIkKjIcpyO
z{+uIt&YSa_$QnN_@t~L014dyK(fOOo+W*MIxbA6Ndgr=Y!f#Tokqv}n<7-9qfHkc3
z=>a|HWqcX8fzQCT=dqVbogRq!-S>H%yA{1w#2Pn;=e>JiEj7Hl;zdt-2f+j2%DeVD
zsW0Ab)ZK@0cIW%W7<X*Er!BfRbvU93$DH%#v6dRt^6HBxz1xBNHx=$&_Gv<&J}Ljk
zJN<Fzx(`Oe@KgQ0F$<14=XV#WK`o#6Ku>z}H{&~yGhn~D;aiP4=;m-HCo`BEI+Kd6
z={Xwx{T<?%b6i9IjI)Ls)S{-*mq<@~R{?$}ZKjf;^k75i_}(2MXt}^SEBVg7AI@28
zo_uPg2V)_e-`2Ois=PYoe%9u*n9({PFR)OnHJPi{dNx>Kx<YG`4QQ>D#iCLfl2<BD
h7L=-;Q>vQGDitKtN>z|-AdCN|$jTFDg0m3O`WLD4_s#$S


From 99817858463334030f4807a0ec5f975ac63189c8 Mon Sep 17 00:00:00 2001
From: Dan Niles <56271899+dan-niles@users.noreply.github.com>
Date: Tue, 11 Jun 2024 13:57:31 +0530
Subject: [PATCH 2/3] Remove old jinja template files and  method in scraper.py

---
 CHANGELOG                                     |   1 +
 scraper/src/youtube2zim/scraper.py            | 190 ------------------
 .../src/youtube2zim/templates/article.html    |  44 ----
 scraper/src/youtube2zim/templates/home.html   |  63 ------
 scraper/src/youtube2zim/utils.py              |   9 -
 5 files changed, 1 insertion(+), 306 deletions(-)
 delete mode 100644 scraper/src/youtube2zim/templates/article.html
 delete mode 100644 scraper/src/youtube2zim/templates/home.html

diff --git a/CHANGELOG b/CHANGELOG
index 4cd1b28e..11f68e4f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Initialize new Vue.js project in `zimui` subfolder
 - Update dependencies in pyproject.toml (pydantic, pyhumps, python-slugify)
 - Update scraper to generate JSON files for `zimui` (#212)
+- Remove old UI files and methods: template files (home.html, article.html) and `make_html_files` method in scraper.py
 
 ## [2.3.0] - 2024-05-22
 
diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py
index 3d844512..2c61f9cc 100644
--- a/scraper/src/youtube2zim/scraper.py
+++ b/scraper/src/youtube2zim/scraper.py
@@ -10,7 +10,6 @@
 import concurrent.futures
 import datetime
 import functools
-import json
 import locale
 import os
 import re
@@ -20,10 +19,7 @@
 from gettext import gettext as _
 from pathlib import Path
 
-import jinja2
 import yt_dlp
-from babel.dates import format_date
-from dateutil import parser as dt_parser
 from kiwixstorage import KiwixStorage
 from pif import get_public_ip
 from zimscraperlib.download import stream_file
@@ -70,7 +66,6 @@
     get_slug,
     load_json,
     load_mandatory_json,
-    render_template,
     save_json,
 )
 from youtube2zim.youtube import (
@@ -936,191 +931,6 @@ def update_metadata(self):
         )
         png_profile_path.unlink()
 
-    def make_html_files(self, actual_videos_ids):
-        """make up HTML structure to read the content
-
-        /home.html                                  Homepage
-
-        for each video:
-            - <slug-title>.html                     HTML article
-            - videos/<videoId>/video.<ext>          video file
-            - videos/<videoId>/video.<lang>.vtt     subtititle(s)
-            - videos/<videoId>/video.webp            template
-        """
-
-        def remove_unused_videos(videos):
-            video_ids = [video["contentDetails"]["videoId"] for video in videos]
-            for path in self.videos_dir.iterdir():
-                if path.is_dir() and path.name not in video_ids:
-                    logger.debug(f"Removing unused video {path.name}")
-                    shutil.rmtree(path, ignore_errors=True)
-
-        def is_present(video):
-            """whether this video has actually been succeffuly downloaded"""
-            return video["contentDetails"]["videoId"] in actual_videos_ids
-
-        def video_has_channel(videos_channels, video):
-            return video["contentDetails"]["videoId"] in videos_channels
-
-        def get_subtitles(video_id):
-            video_dir = self.videos_dir.joinpath(video_id)
-            languages = [
-                x.stem.split(".")[1]
-                for x in video_dir.iterdir()
-                if x.is_file() and x.name.endswith(".vtt")
-            ]
-
-            def to_jinja_subtitle(lang):
-                try:
-                    try:
-                        subtitle = get_language_details(
-                            YOUTUBE_LANG_MAP.get(lang, lang)
-                        )
-                    except NotFound:
-                        lang_simpl = re.sub(r"^([a-z]{2})-.+$", r"\1", lang)
-                        subtitle = get_language_details(
-                            YOUTUBE_LANG_MAP.get(lang_simpl, lang_simpl)
-                        )
-                except Exception:
-                    logger.error(f"Failed to get language details for {lang}")
-                    raise
-                return {
-                    "code": lang,
-                    # Youtube.com uses `English - code` format.
-                    # Note: videojs displays it lowercased anyway
-                    "name": f"{subtitle['english'].title()} - {subtitle['query']}",
-                }
-
-            # Youtube.com sorts subtitles by English name
-            return sorted(map(to_jinja_subtitle, languages), key=lambda x: x["name"])
-
-        env = jinja2.Environment(
-            loader=jinja2.FileSystemLoader(str(self.templates_dir)), autoescape=True
-        )
-
-        videos = load_mandatory_json(self.cache_dir, "videos").values()
-        # filter videos so we only include the ones we could retrieve
-        videos = list(filter(is_present, videos))
-        videos_channels = load_mandatory_json(self.cache_dir, "videos_channels")
-        has_channel = functools.partial(video_has_channel, videos_channels)
-        # filter videos to exclude those for which we have no channel (#76)
-        videos = list(filter(has_channel, videos))
-        for video in videos:
-            video_id = video["contentDetails"]["videoId"]
-            title = video["snippet"]["title"]
-            slug = get_slug(title)
-            description = video["snippet"]["description"]
-            publication_date = dt_parser.parse(
-                video["contentDetails"]["videoPublishedAt"]
-            )
-            author = videos_channels[video_id]
-            subtitles = get_subtitles(video_id)
-            video_url = f"https://www.youtube.com/watch?v={video_id}"
-
-            html = render_template(
-                env=env,
-                template_name="article.html",
-                video_id=video_id,
-                video_format=self.video_format,
-                author=author,
-                title=title,
-                description=description,
-                date=format_date(publication_date, format="medium", locale=self.locale),
-                subtitles=subtitles,
-                url=video_url,
-                channel_id=video["snippet"]["channelId"],
-                color=self.main_color,
-                background_color=self.secondary_color,
-                autoplay=self.autoplay,
-            )
-            with open(
-                self.build_dir.joinpath(f"{slug}.html"), "w", encoding="utf-8"
-            ) as fp:
-                fp.write(html)
-
-        # build homepage
-        html = render_template(
-            env=env,
-            template_name="home.html",
-            playlists=self.playlists,
-            video_format=self.video_format,
-            title=self.title,
-            description=self.description,
-            color=self.main_color,
-            background_color=self.secondary_color,
-            page_label=_("Page {current}/{total}"),
-            back_label=_("Back to top"),
-        )
-        with open(self.build_dir.joinpath("home.html"), "w", encoding="utf-8") as fp:
-            fp.write(html)
-
-        # rewrite app.js including `format`
-        with open(self.assets_dir.joinpath("app.js"), "w", encoding="utf-8") as fp:
-            fp.write(
-                render_template(
-                    env=env,
-                    template_name="assets/app.js",
-                    video_format=self.video_format,
-                )
-            )
-
-        # rewrite app.js including `pagination`
-        with open(self.assets_dir.joinpath("db.js"), "w", encoding="utf-8") as fp:
-            fp.write(
-                render_template(
-                    env=env,
-                    template_name="assets/db.js",
-                    NB_VIDEOS_PER_PAGE=self.nb_videos_per_page,
-                )
-            )
-
-        # write list of videos in data.js
-        def to_data_js(video):
-            return {
-                "id": video["contentDetails"]["videoId"],
-                "title": video["snippet"]["title"],
-                "slug": get_slug(video["snippet"]["title"]),
-                "description": video["snippet"]["description"],
-                "subtitles": get_subtitles(video["contentDetails"]["videoId"]),
-                "thumbnail": str(
-                    Path("videos").joinpath(
-                        video["contentDetails"]["videoId"], "video.webp"
-                    )
-                ),
-            }
-
-        with open(self.assets_dir.joinpath("data.js"), "w", encoding="utf-8") as fp:
-            # write all playlists as they are
-            for playlist in self.playlists:
-                # retrieve list of videos for PL
-                playlist_videos = load_mandatory_json(
-                    self.cache_dir, f"playlist_{playlist.playlist_id}_videos"
-                )
-                # filtering-out missing ones (deleted or not downloaded)
-                playlist_videos = list(filter(skip_deleted_videos, playlist_videos))
-                playlist_videos = list(filter(is_present, playlist_videos))
-                playlist_videos = list(filter(has_channel, playlist_videos))
-                # sorting them based on playlist
-                playlist_videos.sort(key=lambda v: v["snippet"]["position"])
-
-                fp.write(
-                    "var json_{slug} = {json_str};\n".format(
-                        slug=playlist.slug,
-                        json_str=json.dumps(
-                            list(map(to_data_js, playlist_videos)), indent=4
-                        ),
-                    )
-                )
-
-        # write a metadata.json file with some content-related data
-        with open(
-            self.build_dir.joinpath("metadata.json"), "w", encoding="utf-8"
-        ) as fp:
-            json.dump({"video_format": self.video_format}, fp, indent=4)
-
-        # clean videos left out in videos directory
-        remove_unused_videos(videos)
-
     def make_json_files(self, actual_videos_ids):
         """Generate JSON files to be consumed by the frontend"""
 
diff --git a/scraper/src/youtube2zim/templates/article.html b/scraper/src/youtube2zim/templates/article.html
deleted file mode 100644
index cd9a70a7..00000000
--- a/scraper/src/youtube2zim/templates/article.html
+++ /dev/null
@@ -1,44 +0,0 @@
-<!DOCTYPE html><html><head><meta charset="utf-8">
-        <title>{{ title }}</title>
-        <meta content="utf-8" http-equiv="encoding">
-        <meta content="text/html;charset=utf-8" http-equiv="Content-Type">
-        <meta name="viewport" content="width=device-width, initial-scale=1">
-        <link href="assets/videojs/video-js.min.css" rel="stylesheet">
-        <link href="assets/article.css" rel="stylesheet" type="text/css">
-        <style type="text/css">
-           body {background: {{ background_color }};}
-        </style>
-        <link id="favicon" rel="shortcut icon" href="profile.jpg" type="image/jpeg">
-        <script src="assets/videojs/video.min.js"></script>
-        <script src="assets/ogvjs/ogv-support.js"></script>
-        <script src="assets/ogvjs/ogv.js"></script>
-        <script src="assets/videojs-ogvjs.js"></script>
-        <script src="assets/polyfills.js"></script>
-        <script src="assets/webp-hero.bundle.js"></script>
-    </head>
-    <body>
-        <div id="content">
-            <a href="home.html"><img id="backtolist" src="channels/{{ author.channelId }}/profile.jpg" title="Back to the list" /></a>
-            <p id="speaker">{{ author.channelTitle }}</p>
-            <p id="title">{{ title }}</p>
-            <video  class="video-js vjs-default-skin"
-                    id="video_container"
-                    poster="videos/{{ video_id }}/video.webp"
-                    width="480px" height="270px"
-                    controls="true"
-                    {% if autoplay %}autoplay="true"{% endif %}
-                    preload="true"
-                    data-setup='{"techOrder": ["html5", "ogvjs"], "ogvjs": {"base": "assets/ogvjs"}, "autoplay": {% if autoplay %}true{% else %}false{% endif %}, "preload": true, "controls": true, "controlBar": {"pictureInPictureToggle":false}}'>
-                <source src="videos/{{ video_id }}/video.{{ video_format }}" type="video/{{ video_format }}" />{% if subtitles %}
-                {% for language in subtitles %}<track kind="subtitles" src="videos/{{ video_id }}/video.{{ language.code }}.vtt" srclang="{{ language.code }}" label="{{ language.name }}" />
-                {% endfor %}{% endif %}
-            </video>
-            <div id="description">
-                <p>{{ description }}</p>
-            </div>
-            <div id="date">{{ date }}</div>
-        </div>
-        <script src="assets/jquery.min.js" type="text/javascript"></script>
-        <script src="assets/webp-trigger.js"></script>
-    </body>
-</html>
diff --git a/scraper/src/youtube2zim/templates/home.html b/scraper/src/youtube2zim/templates/home.html
deleted file mode 100644
index cdeeb717..00000000
--- a/scraper/src/youtube2zim/templates/home.html
+++ /dev/null
@@ -1,63 +0,0 @@
-<!DOCTYPE html><html><head><meta charset="utf-8">
-    <title>{{ title }}</title>
-    <meta content="utf-8" http-equiv="encoding">
-    <meta content="text/html;charset=utf-8" http-equiv="Content-Type">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <link href="assets/chosen/chosen.min.css" rel="stylesheet" type="text/css">
-    <link href="assets/videojs/video-js.min.css" rel="stylesheet">
-    <link href="assets/home.css" rel="stylesheet" type="text/css">
-    <style type="text/css">
-    a:hover { color: {{ color }}; }
-    a.nostyle:hover { background-color: {{ color }}; }
-    body { background: {{ background_color }}; }
-    </style>
-    <link id="favicon" rel="shortcut icon" href="profile.jpg" type="image/jpeg">
-    <script src="assets/videojs/video.min.js"></script>
-    <script src="assets/ogvjs/ogv-support.js"></script>
-    <script src="assets/ogvjs/ogv.js"></script>
-    <script src="assets/videojs-ogvjs.js"></script>
-    <script src="assets/polyfills.js"></script>
-    <script src="assets/webp-hero.bundle.js"></script>
-  </head>
-  <body>
-    <div class="container">
-      <div id="header">
-        <a href="home.html" id="top-link"><img id="header-profile" src="profile.jpg"></a>
-      </div>
-      <div id="header-line"><p>{{ title }}</p></div>
-      <div class="header-playlists">
-        <form name="playlist" id="header-playlists" class="{% if playlists|length < 2 %}hidden{% endif %}"><select class="chosen-select" name="list">
-        {% for playlist in playlists %}<option value="{{ playlist.slug }}">{{ playlist.title }}</option>{% endfor %}</select>
-        </form>
-      </div>
-      <table>
-        <tr>
-          <td>
-            <div class="pagination">
-              <div class="left-arrow">&#10096;</div>
-              <span class="pagination-text" data-format="{{ page_label }}"></span>
-              <div class="right-arrow">&#10097;</div>
-            </div>
-          </td>
-        </tr>
-      </table>
-      <div id="grid-container">
-        <div id="video-intro" class="rig grid">
-        </div>
-     <ul id="video-items" class="rig grid"></ul>
-    </div>
-    <div class="backtotop"><span>{{ back_label }}</span></div>
-    <div class="pagination bottom">
-      <div class="left-arrow">&#10096;</div>
-      <span class="pagination-text" data-format="{{ page_label }}"></span>
-      <div class="right-arrow">&#10097;</div>
-    </div>
-  </div>
-  <script src="assets/jquery.min.js" type="text/javascript"></script>
-  <script src="assets/chosen/chosen.jquery.js" type="text/javascript"></script>
-  <script src="assets/webp-trigger.js"></script>
-  <script src="assets/data.js"></script>
-  <script src="assets/db.js"></script>
-  <script src="assets/app.js"></script>
-</body>
-</html>
diff --git a/scraper/src/youtube2zim/utils.py b/scraper/src/youtube2zim/utils.py
index 427fd110..3398e17f 100644
--- a/scraper/src/youtube2zim/utils.py
+++ b/scraper/src/youtube2zim/utils.py
@@ -4,7 +4,6 @@
 import json
 from pathlib import Path
 
-import jinja2
 from slugify import slugify
 
 
@@ -45,11 +44,3 @@ def load_mandatory_json(cache_dir: Path, key):
 def has_argument(arg_name, all_args):
     """whether --arg_name is specified in all_args"""
     return list(filter(lambda x: x.startswith(f"--{arg_name}"), all_args))
-
-
-def render_template(env: jinja2.Environment, template_name: str, **kwargs):
-    """render a Jinja template and ensures that result is a string"""
-    html = env.get_template(template_name).render(kwargs)
-    if not isinstance(html, str):
-        raise Exception("Jinja template did not returned a string")
-    return html

From 0d7050f2ff8a5e1a5b8d17cebe5e784fe1a4cf42 Mon Sep 17 00:00:00 2001
From: Dan Niles <56271899+dan-niles@users.noreply.github.com>
Date: Tue, 11 Jun 2024 14:00:04 +0530
Subject: [PATCH 3/3] Remove `locale` folder and files used for translation

---
 CHANGELOG                                     |  1 +
 Dockerfile                                    |  1 -
 .../locale/fr/LC_MESSAGES/messages.po         | 35 -------------------
 3 files changed, 1 insertion(+), 36 deletions(-)
 delete mode 100644 scraper/src/youtube2zim/locale/fr/LC_MESSAGES/messages.po

diff --git a/CHANGELOG b/CHANGELOG
index 11f68e4f..57f8f54e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Update dependencies in pyproject.toml (pydantic, pyhumps, python-slugify)
 - Update scraper to generate JSON files for `zimui` (#212)
 - Remove old UI files and methods: template files (home.html, article.html) and `make_html_files` method in scraper.py
+- Remove broken locale folder and files used for translation; translation will be restored with #222
 
 ## [2.3.0] - 2024-05-22
 
diff --git a/Dockerfile b/Dockerfile
index 17b1fb36..80a3f4ab 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,6 @@ LABEL org.opencontainers.image.source https://github.com/openzim/youtube
 # Install necessary packages
 RUN apt-get update \
      && apt-get install -y --no-install-recommends \
-     locales-all \
      wget \
      unzip \
      ffmpeg \
diff --git a/scraper/src/youtube2zim/locale/fr/LC_MESSAGES/messages.po b/scraper/src/youtube2zim/locale/fr/LC_MESSAGES/messages.po
deleted file mode 100644
index 84f1cc09..00000000
--- a/scraper/src/youtube2zim/locale/fr/LC_MESSAGES/messages.po
+++ /dev/null
@@ -1,35 +0,0 @@
-# French translations for PROJECT.
-# Copyright (C) 2019 ORGANIZATION
-# This file is distributed under the same license as the PROJECT project.
-# FIRST AUTHOR <EMAIL@ADDRESS>, 2019.
-#
-msgid ""
-msgstr ""
-"Project-Id-Version: PROJECT VERSION\n"
-"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
-"POT-Creation-Date: 2020-09-25 08:13+0000\n"
-"PO-Revision-Date: 2019-09-05 11:50+0000\n"
-"Last-Translator: reg <rgaudin@gmail.com>\n"
-"Language: fr\n"
-"Language-Team: \n"
-"Plural-Forms: nplurals=2; plural=(n > 1)\n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=utf-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Generated-By: Babel 2.8.0\n"
-
-#: youtube2zim/scraper.py:738
-msgid "Youtube Channel “{title}”"
-msgstr "Chaîne Youtube « {title} »"
-
-#: youtube2zim/scraper.py:742
-msgid "Youtube Channels"
-msgstr "Chaînes Youtube"
-
-#: youtube2zim/scraper.py:871
-msgid "Page {current}/{total}"
-msgstr "Page {current}/{total}"
-
-#: youtube2zim/scraper.py:872
-msgid "Back to top"
-msgstr "Haut"