openzim · arjitcodes · Oct 26, 2024 · Oct 27, 2024 · Oct 27, 2024 · Oct 31, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+
+### Changed
+
+- Differentiate user uploaded shorts, lives & long videos (#367)
+
+### Fixed
+
+- Corrected the short video resolution in the UI (#366)
+
 ### Fixed
 
 - Check for empty playlists after filtering, and after downloading videos (#375)

diff --git a/scraper/src/youtube2zim/playlists/scraper.py b/scraper/src/youtube2zim/playlists/scraper.py
@@ -91,7 +91,9 @@ def run(self):
         (
             playlists,
             main_channel_id,
-            uploads_playlist_id,
+            user_long_uploads_playlist_id,
+            user_short_uploads_playlist_id,
+            user_lives_playlist_id,
             is_playlist,
         ) = extract_playlists_details_from(self.youtube_id)
 
@@ -106,10 +108,6 @@ def run(self):
         shutil.rmtree(self.build_dir, ignore_errors=True)
 
         for playlist in playlists:
-            if playlist.playlist_id == uploads_playlist_id:
-                logger.info(f"Skipping playlist {playlist.playlist_id} (uploads one)")
-                continue
-
             logger.info(f"Executing youtube2zim for playlist {playlist.playlist_id}")
             success, process = self.run_playlist_zim(playlist)
             if success:

diff --git a/scraper/src/youtube2zim/schemas.py b/scraper/src/youtube2zim/schemas.py
@@ -105,7 +105,10 @@ class Channel(CamelModel):
     profile_path: str | None = None
     banner_path: str | None = None
     joined_date: str
-    main_playlist: str | None = None
+    first_playlist: str | None = None
+    user_long_uploads_playlist: str | None = None
+    user_short_uploads_playlist: str | None = None
+    user_lives_playlist: str | None = None
     playlist_count: int
 
 

diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py
@@ -170,7 +170,9 @@
 
         # process-related
         self.playlists = []
-        self.uploads_playlist_id = None
+        self.user_long_uploads_playlist_id = None
+        self.user_short_uploads_playlist_id = None
+        self.user_lives_playlist_id = None
         self.videos_ids = []
         self.video_ids_count = 0
         self.videos_processed = 0
@@ -229,30 +231,6 @@
     def is_single_channel(self):
         return len({pl.creator_id for pl in self.playlists}) == 1
 
-    @property
-    def sorted_playlists(self):
-        """sorted list of playlists (by title) but with Uploads one at first if any"""
-        if len(self.playlists) <= 1:
-            return self.playlists
-
-        sorted_playlists = sorted(self.playlists, key=lambda x: x.title)
-        index = 0
-        # make sure our Uploads, special playlist is first
-        if self.uploads_playlist_id:
-            try:
-                index = [
-                    index
-                    for index, p in enumerate(sorted_playlists)
-                    if p.playlist_id == self.uploads_playlist_id
-                ][-1]
-            except Exception:
-                index = 0
-        return (
-            [sorted_playlists[index]]
-            + sorted_playlists[0:index]
-            + sorted_playlists[index + 1 :]
-        )
-
     def run(self):
         """execute the scraper step by step"""
 
@@ -278,170 +256,170 @@
            # check that build_dir is correct
            if not self.build_dir.exists() or not self.build_dir.is_dir():
                raise OSError(f"Incorrect build_dir: {self.build_dir}")

            logger.info(f"starting youtube scraper for {self.youtube_id}")
            logger.info(f"preparing build folder at {self.build_dir.resolve()}")
            self.prepare_build_folder()

            logger.info("testing Youtube credentials")
            if not credentials_ok():
                raise ValueError(
                    "Unable to connect to Youtube API v3. check `API_KEY`."
                )

            if self.s3_url_with_credentials and not self.s3_credentials_ok():
                raise ValueError(
                    "Unable to connect to Optimization Cache. Check its URL."
                )

            # fail early if supplied branding files are missing
            self.check_branding_values()

            logger.info("compute playlists list to retrieve")
            self.extract_playlists()

            logger.info(
                ".. {} playlists:\n   {}".format(
                    len(self.playlists),
                    "\n   ".join([p.playlist_id for p in self.playlists]),
                )
            )

            logger.info("compute list of videos")
            self.extract_videos_list()

            self.video_ids_count = len(self.videos_ids)
            nb_videos_msg = f".. {self.video_ids_count} videos"
            if self.dateafter.start.year != 1:
                nb_videos_msg += (
                    f" in date range: {self.dateafter.start} - {datetime.date.today()}"
                )
            logger.info(f"{nb_videos_msg}.")

            # set a timer to report progress only every 10 seconds
            every(10).seconds.do(self.report_progress)

            logger.info("update general metadata")
            self.update_metadata()

            if not self.title:
                raise Exception("title is mandatory")
            if not self.description:
                raise Exception("description is mandatory")
            if not self.creator:
                raise Exception("creator is mandatory")

            # check that illustration is correct
            illustration = "favicon.png"
            illustration_path = self.build_dir / illustration
            if not illustration_path.exists() or not illustration_path.is_file():
                raise OSError(
                    f"Incorrect illustration: {illustration} ({illustration_path})"
                )
            with open(illustration_path, "rb") as fh:
                illustration_data = fh.read()

            logger.info("building ZIM file")
            self.zim_file = Creator(
                filename=self.output_dir / self.fname,
                main_path="index.html",
                ignore_duplicates=True,
                disable_metadata_checks=self.disable_metadata_checks,
            )
            self.zim_file.config_metadata(
                Name=self.name,
                Language=self.language,
                Title=self.title,
                Description=self.description,
                LongDescription=self.long_description,
                Creator=self.creator,
                Publisher=self.publisher,
                Tags=";".join(self.tags) if self.tags else "",
                Scraper=SCRAPER,
                Date=datetime.date.today(),
                Illustration_48x48_at_1=illustration_data,
            )
            self.zim_file.start()

            logger.debug(f"Preparing zimfile at {self.zim_file.filename}")

            logger.info("add main channel branding to ZIM")
            self.add_main_channel_branding_to_zim()

            logger.debug(f"add zimui files from {self.zimui_dist}")
            self.add_zimui()

            # download videos (and recompress)
            logger.info(
                "downloading all videos, subtitles and thumbnails "
                f"(concurrency={self.max_concurrency})"
            )
            logger.info(f"  format: {self.video_format}")
            logger.info(f"  quality: {self.video_quality}")
            logger.info(f"  generated-subtitles: {self.all_subtitles}")
            if self.s3_storage:
                logger.info(
                    f"  using cache: {self.s3_storage.url.netloc} "
                    f"with bucket: {self.s3_storage.bucket_name}"
                )
            succeeded, failed = self.download_video_files(
                max_concurrency=self.max_concurrency
            )
            if failed:
                logger.error(f"{len(failed)} video(s) failed to download: {failed}")
                if len(failed) >= len(succeeded):
                    logger.critical("More than half of videos failed. exiting")
                    raise OSError("Too much videos failed to download")

            logger.info("retrieve channel-info for all videos (author details)")
            get_videos_authors_info(succeeded)

            logger.info("download all author's profile pictures")
            self.download_authors_branding()

            logger.info("creating JSON files")
            self.make_json_files(succeeded)
        except KeyboardInterrupt:
            logger.error("KeyboardInterrupt, exiting.")
            return 1
        except Exception as exc:
            logger.error(f"Interrupting process due to error: {exc}")
            logger.exception(exc)
            return 1
        else:
            logger.info("Finishing ZIM file…")
            self.zim_file.finish()
        finally:
            self.report_progress()
            logger.info("removing temp folder")
            shutil.rmtree(self.build_dir, ignore_errors=True)

        logger.info("all done!")

    def add_zimui(self):
        logger.info(f"Adding files in {self.zimui_dist}")
        for file in self.zimui_dist.rglob("*"):
            if file.is_dir():
                continue
            path = str(Path(file).relative_to(self.zimui_dist))
            logger.debug(f"Adding {path} to ZIM")
            if path == "index.html":  # Change index.html title and add to ZIM
                index_html_path = self.zimui_dist / path
                html_content = index_html_path.read_text(encoding="utf-8")
                new_html_content = re.sub(
                    r"(<title>)(.*?)(</title>)",
                    rf"\1{self.title}\3",
                    html_content,
                    flags=re.IGNORECASE,
                )
                self.zim_file.add_item_for(
                    path=path,
                    content=new_html_content,
                    mimetype="text/html",
                    is_front=True,
                )
            else:
                self.zim_file.add_item_for(
                    path,
                    fpath=file,
                    is_front=False,
@@ -552,7 +530,9 @@
         (
             self.playlists,
             self.main_channel_id,
-            self.uploads_playlist_id,
+            self.user_long_uploads_playlist_id,
+            self.user_short_uploads_playlist_id,
+            self.user_lives_playlist_id,
             self.is_playlist,
         ) = extract_playlists_details_from(self.youtube_id)
 
@@ -960,76 +940,76 @@
            if self.is_playlist and len(self.playlists) == 1
            else clean_text(main_channel_json["snippet"]["description"])
        ) or "-"
        self.title = self.title or auto_title or "-"
        self.description, self.long_description = compute_descriptions(
            default_description=auto_description,
            user_description=self.description,
            user_long_description=self.long_description,
        )

        if self.creator is None:
            if self.is_single_channel:
                self.creator = _("Youtube Channel “{title}”").format(
                    title=main_channel_json["snippet"]["title"]
                )
            else:
                self.creator = _("Youtube Channels")

        self.tags = self.tags or ["youtube"]
        if "_videos:yes" not in self.tags:
            self.tags.append("_videos:yes")

        # copy our main_channel branding into /(profile|banner).jpg if not supplied
        if not self.profile_path.exists():
            shutil.copy(
                self.channels_dir.joinpath(self.main_channel_id, "profile.jpg"),
                self.profile_path,
            )

        # set colors from images if not supplied
        if self.main_color is None or self.secondary_color is None:
            profile_main, profile_secondary = get_colors(self.profile_path)
            self.main_color = self.main_color or profile_main
            self.secondary_color = self.secondary_color or profile_secondary

        # convert profile image to png for favicon
        png_profile_path = self.build_dir.joinpath("profile.png")
        convert_image(self.profile_path, png_profile_path)

        resize_image(
            png_profile_path,
            width=48,
            height=48,
            method="thumbnail",
            dst=self.build_dir.joinpath("favicon.png"),
        )
        png_profile_path.unlink()

    def make_json_files(self, actual_videos_ids):
        """Generate JSON files to be consumed by the frontend"""

        def remove_unused_videos():
            for path in self.videos_dir.iterdir():
                if path.is_dir() and path.name not in actual_videos_ids:
                    logger.debug(f"Removing unused video {path.name}")
                    shutil.rmtree(path, ignore_errors=True)

        def is_present(video):
            """whether this video has actually been succeffuly downloaded"""
            return video["contentDetails"]["videoId"] in actual_videos_ids

        def video_has_channel(videos_channels, video):
            return video["contentDetails"]["videoId"] in videos_channels

        def get_thumbnail_path(video_id):
            return f"videos/{video_id}/video.webp"

        def get_subtitles(video_id) -> list[Subtitle]:
            subtitles_list = load_json(self.subtitles_cache_dir, video_id)
            if subtitles_list is None:
                return []
            return subtitles_list["subtitles"]

        def get_videos_list(playlist):
            videos = load_mandatory_json(
                self.cache_dir, f"playlist_{playlist.playlist_id}_videos"
@@ -1045,6 +1025,7 @@
             author = videos_channels[video_id]
             subtitles_list = get_subtitles(video_id)
             channel_data = get_channel_json(author["channelId"])
+
             return Video(
                 id=video_id,
                 title=video["snippet"]["title"],
@@ -1151,10 +1132,13 @@
             )
 
         # write playlists JSON files
-        playlist_list = []
-        home_playlist_list = []
+        playlist_list: list[PlaylistPreview] = []
+        home_playlist_list: list[Playlist] = []
+
+        user_long_uploads_playlist_slug = None
+        user_short_uploads_playlist_slug = None
+        user_lives_playlist_slug = None
 
-        main_playlist_slug = None
         empty_playlists = list(
             filter(lambda playlist: len(get_videos_list(playlist)) == 0, self.playlists)
         )
@@ -1167,10 +1151,6 @@
         if len(self.playlists) == 0:
             raise Exception("No playlist succeeded to download")
 
-        main_playlist_slug = get_playlist_slug(
-            self.playlists[0]
-        )  # set first playlist as main playlist
-
         for playlist in self.playlists:
             playlist_slug = get_playlist_slug(playlist)
             playlist_path = f"playlists/{playlist_slug}.json"
@@ -1195,16 +1175,15 @@
             # modify playlist object for preview on homepage
             playlist_obj.videos = playlist_obj.videos[:12]
 
-            if playlist.playlist_id == self.uploads_playlist_id:
-                main_playlist_slug = (
-                    playlist_slug  # set uploads playlist as main playlist
-                )
-                # insert uploads playlist at the beginning of the list
-                playlist_list.insert(0, generate_playlist_preview_object(playlist))
-                home_playlist_list.insert(0, playlist_obj)
+            home_playlist_list.append(playlist_obj)
+            if playlist.playlist_id == self.user_long_uploads_playlist_id:
+                user_long_uploads_playlist_slug = playlist_slug
+            elif playlist.playlist_id == self.user_short_uploads_playlist_id:
+                user_short_uploads_playlist_slug = playlist_slug
+            elif playlist.playlist_id == self.user_lives_playlist_id:
+                user_lives_playlist_slug = playlist_slug
             else:
                 playlist_list.append(generate_playlist_preview_object(playlist))
-                home_playlist_list.append(playlist_obj)
 
         # write playlists.json file
         self.zim_file.add_item_for(
@@ -1241,7 +1220,10 @@
                 channel_description=channel_data["snippet"]["description"],
                 profile_path="profile.jpg",
                 banner_path="banner.jpg",
-                main_playlist=main_playlist_slug,
+                first_playlist=playlist_list[0].id,
+                user_long_uploads_playlist=user_long_uploads_playlist_slug,
+                user_short_uploads_playlist=user_short_uploads_playlist_slug,
+                user_lives_playlist=user_lives_playlist_slug,
                 playlist_count=len(self.playlists),
                 joined_date=channel_data["snippet"]["publishedAt"],
             ).model_dump_json(by_alias=True, indent=2),

diff --git a/scraper/src/youtube2zim/youtube.py b/scraper/src/youtube2zim/youtube.py
@@ -56,6 +56,10 @@ def __init__(
     @classmethod
     def from_id(cls, playlist_id):
         playlist_json = get_playlist_json(playlist_id)
+        if playlist_json is None:
+            raise PlaylistNotFoundError(
+                f"Invalid playlistId `{playlist_id}`: Not Found"
+            )
         return Playlist(
             playlist_id=playlist_id,
             title=playlist_json["snippet"]["title"],
@@ -176,10 +180,13 @@ def get_playlist_json(playlist_id):
         req.raise_for_status()
         try:
             playlist_json = req.json()["items"][0]
+            total_results = req.json().get("pageInfo", {}).get("totalResults", 0)
+            if total_results == 0:
+                logger.error(f"Playlist `{playlist_id}`: No Item Available")
+                return None
         except IndexError:
-            raise PlaylistNotFoundError(
-                f"Invalid playlistId `{playlist_id}`: Not Found"
-            ) from None
+            logger.error(f"Invalid playlistId `{playlist_id}`: Not Found")
+            return None
         save_json(YOUTUBE.cache_dir, fname, playlist_json)
     return playlist_json
 
@@ -336,8 +343,9 @@ def skip_outofrange_videos(date_range, item):
 def extract_playlists_details_from(youtube_id: str):
     """prepare a list of Playlist from user request"""
 
-    uploads_playlist_id = None
-    main_channel_id = None
+    main_channel_id = user_long_uploads_playlist_id = user_short_uploads_playlist_id = (
+        user_lives_playlist_id
+    ) = None
     if "," not in youtube_id:
         try:
             # first try to consider passed ID is a channel ID (or username or handle)
@@ -347,11 +355,36 @@ def extract_playlists_details_from(youtube_id: str):
             playlist_ids = [
                 p["id"] for p in get_channel_playlists_json(main_channel_id)
             ]
-            # we always include uploads playlist (contains everything)
-            playlist_ids += [
-                channel_json["contentDetails"]["relatedPlaylists"]["uploads"]
-            ]
-            uploads_playlist_id = playlist_ids[-1]
+
+            # Get special playlists JSON objects
+            user_long_uploads_json = get_playlist_json("UULF" + main_channel_id[2:])
+            user_short_uploads_json = get_playlist_json("UUSH" + main_channel_id[2:])
+            user_lives_json = get_playlist_json("UULV" + main_channel_id[2:])
+
+            # Extract special playlists IDs if the JSON objects are not None
+            user_long_uploads_playlist_id = (
+                user_long_uploads_json["id"] if user_long_uploads_json else None
+            )
+            user_short_uploads_playlist_id = (
+                user_short_uploads_json["id"] if user_short_uploads_json else None
+            )
+            user_lives_playlist_id = user_lives_json["id"] if user_lives_json else None
+
+            # Add special playlists if they exists, in proper order
+            playlist_ids = (
+                list(
+                    filter(
+                        None,
+                        [
+                            user_long_uploads_playlist_id,
+                            user_short_uploads_playlist_id,
+                            user_lives_playlist_id,
+                        ],
+                    )
+                )
+                + playlist_ids
+            )
+
             is_playlist = False
         except ChannelNotFoundError:
             # channel not found, then ID should be a playlist
@@ -370,6 +403,8 @@ def extract_playlists_details_from(youtube_id: str):
         # dict.fromkeys maintains the order of playlist_ids while removing duplicates
         [Playlist.from_id(playlist_id) for playlist_id in dict.fromkeys(playlist_ids)],
         main_channel_id,
-        uploads_playlist_id,
+        user_long_uploads_playlist_id,
+        user_short_uploads_playlist_id,
+        user_lives_playlist_id,
         is_playlist,
     )
diff --git a/scraper/tests-integration/integration.py b/scraper/tests-integration/integration.py
@@ -47,7 +47,7 @@ def test_zim_channel_json():
 
     assert channel_json["id"] == "UC8elThf5TGMpQfQc_VE917Q"
     assert channel_json["channelName"] == "openZIM_testing"
-    assert channel_json["mainPlaylist"] == "uploads_from_openzim_testing-917Q"
+    assert channel_json["firstPlaylist"] == "uploads_from_openzim_testing-917Q"
 
 
 def test_zim_videos():

diff --git a/zimui/cypress/fixtures/channel/channel.json b/zimui/cypress/fixtures/channel/channel.json
@@ -7,5 +7,5 @@
   "profilePath": "profile.jpg",
   "bannerPath": "banner.jpg",
   "joinedDate": "2024-06-04T13:30:16.232286Z",
-  "mainPlaylist": "uploads_from_openzim_testing-917Q"
+  "firstPlaylist": "uploads_from_openzim_testing-917Q"
 }
diff --git a/zimui/src/assets/main.css b/zimui/src/assets/main.css
@@ -6,6 +6,7 @@
 html {
   overflow: auto !important;
   font-family: 'Roboto', sans-serif;
+  overflow-y: scroll !important;
 }
 
 body {

diff --git a/zimui/src/assets/vjs-youtube.css b/zimui/src/assets/vjs-youtube.css
@@ -54,3 +54,38 @@
 .vjs-youtube .vjs-tech canvas {
   border-radius: 8px;
 }
+
+
+.video-js.vjs-fluid,
+.video-js.vjs-16-9,
+.video-js.vjs-4-3,
+video.video-js,
+video.vjs-tech {
+  max-height: calc(100vh - 64px);
+  position: relative !important;
+  width: 100%;
+  height: auto !important;
+  max-width: 100% !important;
+  padding-top: 0 !important;
+  line-height: 0;
+}
+.vjs-control-bar {
+  line-height: 1;
+}
+
+/* Fullscreen styles */
+.video-js.vjs-fullscreen {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  background-color: black;
+  text-align: center;
+}
+
+.video-js.vjs-fullscreen video {
+  margin: auto;
+  width: auto !important;
+  height: 100% !important;
+  max-height: 100vh;
+  object-fit: contain;
+}
diff --git a/zimui/src/components/channel/ChannelHeader.vue b/zimui/src/components/channel/ChannelHeader.vue
@@ -20,23 +20,34 @@ onMounted(async () => {
   }
 })
 
-const tabs = [
-  {
-    id: 0,
-    title: 'Videos',
-    to: { name: 'videos' }
-  },
-  {
-    id: 1,
-    title: 'Playlists',
-    to: { name: 'playlists' }
+// Computed tabs array based on store data
+const tabs = computed(() => {
+  const baseTabs = [
+    { id: 0, title: 'Home', to: { name: 'home' } }
+  ];
+
+  if (main.channel?.userLongUploadsPlaylist) {
+    baseTabs.push({ id: 1, title: 'Videos', to: { name: 'videos' } });
+  }
+
+  if (main.channel?.userShortUploadsPlaylist) {
+    baseTabs.push({ id: 2, title: 'Shorts', to: { name: 'shorts' } });
+  }
+
+  if (main.channel?.userLivesPlaylist) {
+    baseTabs.push({ id: 3, title: 'Lives', to: { name: 'lives' } });
   }
-]
+
+  baseTabs.push({ id: 4, title: 'Playlists', to: { name: 'playlists' } });
+
+  return baseTabs;
+});
+
 
 // Hide tabs if there is only one playlist
 const hideTabs = computed(() => main.channel?.playlistCount === 1)
 
-const tab = ref<number>(tabs[0].id)
+const tab = ref<number>(tabs.value[0]?.id || 0);
 </script>
 
 <template>
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ @@
     html {
       overflow: auto !important;
       font-family: 'Roboto', sans-serif;
+      overflow-y: scroll !important;
     }
     body {
@@ Expand Down @@