From b4c7c69661825deb3bf015c2d2752298639a6c9a Mon Sep 17 00:00:00 2001 From: arjitdas Date: Sat, 26 Oct 2024 22:30:19 +0530 Subject: [PATCH 01/11] fix: differentiate shorts and lives and videos --- scraper/pyproject.toml | 1 + scraper/src/youtube2zim/schemas.py | 4 + scraper/src/youtube2zim/scraper.py | 65 ++++-- scraper/src/youtube2zim/youtube.py | 209 ++++++++++++++++++ zimui/src/assets/vjs-youtube.css | 35 +++ .../src/components/channel/ChannelHeader.vue | 35 ++- .../channel/tabs/ChannelHomeGridTab.vue | 59 +++++ ...deosListTab.vue => ChannelHomeListTab.vue} | 0 .../channel/tabs/ChannelHomeTab.vue | 19 ++ .../components/channel/tabs/LivesGridTab.vue | 53 +++++ .../src/components/channel/tabs/LivesTab.vue | 18 ++ .../components/channel/tabs/ShortsGridTab.vue | 52 +++++ .../src/components/channel/tabs/ShortsTab.vue | 18 ++ .../components/channel/tabs/VideosGridTab.vue | 13 +- .../src/components/channel/tabs/VideosTab.vue | 5 +- zimui/src/router/index.ts | 20 +- zimui/src/types/Channel.ts | 3 + 17 files changed, 570 insertions(+), 39 deletions(-) create mode 100644 zimui/src/components/channel/tabs/ChannelHomeGridTab.vue rename zimui/src/components/channel/tabs/{VideosListTab.vue => ChannelHomeListTab.vue} (100%) create mode 100644 zimui/src/components/channel/tabs/ChannelHomeTab.vue create mode 100644 zimui/src/components/channel/tabs/LivesGridTab.vue create mode 100644 zimui/src/components/channel/tabs/LivesTab.vue create mode 100644 zimui/src/components/channel/tabs/ShortsGridTab.vue create mode 100644 zimui/src/components/channel/tabs/ShortsTab.vue diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml index 0922fc2b..0c98003b 100644 --- a/scraper/pyproject.toml +++ b/scraper/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "pydantic==2.9.1", "pyhumps==3.8.0", "schedule==1.2.2", + "isodate==0.7.2", ] dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"] diff --git a/scraper/src/youtube2zim/schemas.py b/scraper/src/youtube2zim/schemas.py index f7d390a7..f42d2c67 100644 --- a/scraper/src/youtube2zim/schemas.py +++ b/scraper/src/youtube2zim/schemas.py @@ -45,6 +45,7 @@ class Video(CamelModel): subtitle_path: str | None = None subtitle_list: list[Subtitle] duration: str + is_short: bool class VideoPreview(CamelModel): @@ -107,6 +108,9 @@ class Channel(CamelModel): joined_date: str collection_type: str main_playlist: str | None = None + long_videos_playlist: str | None=None + shorts_playlist: str | None=None + lives_playlist: str | None=None playlist_count: int diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py index b1e94b3f..9f781381 100644 --- a/scraper/src/youtube2zim/scraper.py +++ b/scraper/src/youtube2zim/scraper.py @@ -84,6 +84,7 @@ skip_deleted_videos, skip_non_public_videos, skip_outofrange_videos, + is_short, ) MAXIMUM_YOUTUBEID_LENGTH = 24 @@ -179,6 +180,9 @@ def __init__( # process-related self.playlists = [] self.uploads_playlist_id = None + self.long_videos_playlist_id = None + self.shorts_playlist_id = None + self.lives_playlist_id = None self.videos_ids = [] self.video_ids_count = 0 self.videos_processed = 0 @@ -590,6 +594,9 @@ def extract_playlists(self): self.playlists, self.main_channel_id, self.uploads_playlist_id, + self.long_videos_playlist_id, + self.shorts_playlist_id, + self.lives_playlist_id, ) = extract_playlists_details_from(self.collection_type, self.youtube_id) def extract_videos_list(self): @@ -1077,6 +1084,13 @@ def generate_video_object(video) -> Video: author = videos_channels[video_id] subtitles_list = get_subtitles(video_id) channel_data = get_channel_json(author["channelId"]) + + channel_id=author["channelId"] + duration=videos_channels[video_id]["duration"], + publication_date=video["contentDetails"]["videoPublishedAt"], + # Check if the video is short + is_short_video = is_short(video_id,channel_id,duration,publication_date) # can be True or None + is_short_flag = True if is_short_video is True else False # Set True if is_short is True, otherwise False return Video( id=video_id, title=video["snippet"]["title"], @@ -1095,6 +1109,7 @@ def generate_video_object(video) -> Video: subtitle_path=f"videos/{video_id}" if len(subtitles_list) > 0 else None, subtitle_list=subtitles_list, duration=videos_channels[video_id]["duration"], + is_short=is_short_flag, ) def generate_video_preview_object(video) -> VideoPreview: @@ -1187,6 +1202,9 @@ def get_playlist_slug(playlist) -> str: home_playlist_list = [] main_playlist_slug = None + long_videos_playlist_slug = None + shorts_playlist_slug = None + lives_playlist_slug = None if len(self.playlists) > 0: main_playlist_slug = get_playlist_slug( self.playlists[0] @@ -1216,6 +1234,16 @@ def get_playlist_slug(playlist) -> str: # modify playlist object for preview on homepage playlist_obj.videos = playlist_obj.videos[:12] + if playlist.playlist_id == self.long_videos_playlist_id: + long_videos_playlist_slug = (playlist_slug) + + if playlist.playlist_id == self.shorts_playlist_id: + shorts_playlist_slug = (playlist_slug) + + if playlist.playlist_id == self.lives_playlist_id: + lives_playlist_slug= (playlist_slug) + + if playlist.playlist_id == self.uploads_playlist_id: main_playlist_slug = ( playlist_slug # set uploads playlist as main playlist @@ -1251,22 +1279,33 @@ def get_playlist_slug(playlist) -> str: # write channel.json file channel_data = get_channel_json(self.main_channel_id) + channel_data_dict = { + "id":str(self.main_channel_id), + "title":str(self.title), + "description":str(self.description), + "channel_name":channel_data["snippet"]["title"], + "channel_description":channel_data["snippet"]["description"], + "profile_path":"profile.jpg", + "banner_path":"banner.jpg", + "collection_type":self.collection_type, + "main_playlist":main_playlist_slug, + "playlist_count":len(self.playlists), + "joined_date":channel_data["snippet"]["publishedAt"], + } + + if long_videos_playlist_slug is not None : + channel_data_dict["long_videos_playlist"] = long_videos_playlist_slug + + if shorts_playlist_slug is not None : + channel_data_dict["shorts_playlist"] = shorts_playlist_slug + + if lives_playlist_slug is not None : + channel_data_dict["lives_playlist"] = lives_playlist_slug + self.zim_file.add_item_for( path="channel.json", title=self.title, - content=Channel( - id=str(self.main_channel_id), - title=str(self.title), - description=str(self.description), - channel_name=channel_data["snippet"]["title"], - channel_description=channel_data["snippet"]["description"], - profile_path="profile.jpg", - banner_path="banner.jpg", - collection_type=self.collection_type, - main_playlist=main_playlist_slug, - playlist_count=len(self.playlists), - joined_date=channel_data["snippet"]["publishedAt"], - ).model_dump_json(by_alias=True, indent=2), + content = Channel(**channel_data_dict).model_dump_json(by_alias=True, indent=2, exclude_none=True), mimetype="application/json", is_front=False, ) diff --git a/scraper/src/youtube2zim/youtube.py b/scraper/src/youtube2zim/youtube.py index 8eb17b09..0f271d69 100644 --- a/scraper/src/youtube2zim/youtube.py +++ b/scraper/src/youtube2zim/youtube.py @@ -2,7 +2,9 @@ # vim: ai ts=4 sts=4 et sw=4 nu from http import HTTPStatus +from datetime import datetime +import isodate import requests from dateutil import parser as dt_parser from zimscraperlib.download import stream_file @@ -80,6 +82,68 @@ def credentials_ok(): return False +def is_short(video_id,channel_id,duration,publication_date): + """check that a youtube video is short or not""" + # Ensure publication_date is a string + if isinstance(publication_date, tuple): + publication_date = publication_date[0] # If it's a tuple, extract the first element + + short_duration_limit = 180 #3minutes + cutoff_date=datetime(2020,9,14) + published_date = datetime.strptime(publication_date, "%Y-%m-%dT%H:%M:%SZ") + short_playlist_id="UUSH" + channel_id[2:] # Generate the short playlist ID + + if published_date < cutoff_date: + return False + + duration_in_sec = isodate.parse_duration(duration[0]).total_seconds() + + if duration_in_sec >= short_duration_limit: + return False + + try : + req = requests.get( + PLAYLIST_ITEMS_API, + params={ + "playlistId": short_playlist_id, + "videoId": video_id, + "part": "id", + "key": YOUTUBE.api_key, + "maxResults": 10, + }, + timeout=REQUEST_TIMEOUT, + ) + + # Check for HTTP error response + if req.status_code >= HTTPStatus.BAD_REQUEST: + logger.error(f"HTTP {req.status_code} Error response: {req.text}") + req.raise_for_status() # Raises an HTTPError if the status code is 4xx or 5xx + + + # Parse the response + response_json = req.json() + total_results = response_json.get("pageInfo", {}).get("totalResults", 0) + playlist_items = response_json.get("items", []) + + # Check if there are no items or totalResults is not 1 if yes then the video is not short + if total_results != 1 or not playlist_items: + return False + + # If everything is successful, return the long videos playlist ID + return True + + except IndexError: + logger.error(f"Index error : checking {video_id} is short or not") + return None + + except requests.RequestException as e: + logger.error(f"Request failed in is_short: {e}") + return None + + except Exception as e: + logger.error(f"Error occurred in is_short : {e}") + + def get_channel_json(channel_id): """fetch or retieve-save and return the Youtube ChannelResult JSON""" fname = f"channel_{channel_id}" @@ -319,6 +383,129 @@ def skip_outofrange_videos(date_range, item): return dt_parser.parse(item["snippet"]["publishedAt"]).date() in date_range +def get_shorts_playlist_id(channel_id): + '''Return the user's uploaded short playlist ID, or None if shorts are not available or if an error occurs''' + + short_playlist_id = "UUSH" + channel_id[2:] # Generate the short playlist ID + + '''Make the API request to get the playlist details to determine whether shorts are available on the channel''' + + try: + req = requests.get( + PLAYLIST_API, + params={"id": short_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, + timeout=REQUEST_TIMEOUT, + ) + + # Check for HTTP error response + if req.status_code >= HTTPStatus.BAD_REQUEST: + logger.error(f"HTTP {req.status_code} Error response: {req.text}") + req.raise_for_status() # Raises an HTTPError if the status code is 4xx or 5xx + + # Parse the response + response_json = req.json() + total_results = response_json.get("pageInfo", {}).get("totalResults", 0) + playlist_items = response_json.get("items", []) + + # Check if there are no items or totalResults is 0 if yes then shorts not available + if total_results == 0 or not playlist_items: + logger.error(f"Short Playlist `{short_playlist_id}`: Not Found or No Shorts Available") + return None + + # If everything is successful, return the short playlist ID + return short_playlist_id + + except IndexError: + logger.error(f"Short Playlist `{short_playlist_id}`: Not Found or No Shorts Available") + return None + + except requests.RequestException as e: + logger.error(f"Request failed: {e}") + return None + +def get_long_videos_playlist_id(channel_id): + '''Return the user's uploaded long videos playlist ID, or None if long videos are not available or if an error occurs''' + + long_videos_playlist_id = "UULF" + channel_id[2:] # Generate the long videos playlist ID + + '''Make the API request to get the playlist details to determine whether long videos are available on the channel''' + + try: + req = requests.get( + PLAYLIST_API, + params={"id": long_videos_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, + timeout=REQUEST_TIMEOUT, + ) + + # Check for HTTP error response + if req.status_code >= HTTPStatus.BAD_REQUEST: + logger.error(f"HTTP {req.status_code} Error response: {req.text}") + req.raise_for_status() # Raises an HTTPError if the status code is 4xx or 5xx + + + # Parse the response + response_json = req.json() + total_results = response_json.get("pageInfo", {}).get("totalResults", 0) + playlist_items = response_json.get("items", []) + + # Check if there are no items or totalResults is 0 if yes then long videos not available + if total_results == 0 or not playlist_items: + logger.error(f"Long videos Playlist `{long_videos_playlist_id}`: Not Found or No long videos Available") + return None + + # If everything is successful, return the long videos playlist ID + return long_videos_playlist_id + + except IndexError: + logger.error(f"Long videos Playlist `{long_videos_playlist_id}`: Not Found or No long videos Available") + return None + + except requests.RequestException as e: + logger.error(f"Request failed: {e}") + return None + +def get_lives_playlist_id(channel_id): + '''Return the user's lives playlist ID, or None if lives are not available or if an error occurs''' + + lives_playlist_id = "UULV" + channel_id[2:] # Generate the lives playlist ID + + '''Make the API request to get the playlist details to determine whether Lives are available on the channel''' + + try: + req = requests.get( + PLAYLIST_API, + params={"id": lives_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, + timeout=REQUEST_TIMEOUT, + ) + + # Check for HTTP error response + if req.status_code >= HTTPStatus.BAD_REQUEST: + logger.error(f"HTTP {req.status_code} Error response: {req.text}") + req.raise_for_status() # Raises an HTTPError if the status code is 4xx or 5xx + + # Parse the response + response_json = req.json() + total_results = response_json.get("pageInfo", {}).get("totalResults", 0) + playlist_items = response_json.get("items", []) + + # Check if there are no items or totalResults is 0 if yes then lives not available + if total_results == 0 or not playlist_items: + logger.error(f"Live Playlist `{lives_playlist_id}`: Not Found or No lives Available") + return None + + # If everything is successful, return the live playlist ID + return lives_playlist_id + + except IndexError: + logger.error(f"Live Playlist `{lives_playlist_id}`: Not Found or No lives Available") + return None + + except requests.RequestException as e: + logger.error(f"Request failed: {e}") + return None + + + def extract_playlists_details_from(collection_type, youtube_id): """prepare a list of Playlist from user request @@ -335,6 +522,25 @@ def extract_playlists_details_from(collection_type, youtube_id): # retrieve list of playlists for that channel playlist_ids = [p["id"] for p in get_channel_playlists_json(main_channel_id)] + + # Retrieve the shorts,long videos and lives playlist ID + long_videos_playlist_id = get_long_videos_playlist_id(main_channel_id) + shorts_playlist_id = get_shorts_playlist_id(main_channel_id) + lives_playlist_id = get_lives_playlist_id(main_channel_id) + + + if long_videos_playlist_id is not None: + # include uploads long videos playlist (contains every long videos) + playlist_ids += [long_videos_playlist_id] + + if shorts_playlist_id is not None: + # include uploads short playlist (contains every shorts) + playlist_ids += [shorts_playlist_id] + + if lives_playlist_id is not None: + # include lives playlist (contains every lives) + playlist_ids += [lives_playlist_id] + # we always include uploads playlist (contains everything) playlist_ids += [channel_json["contentDetails"]["relatedPlaylists"]["uploads"]] uploads_playlist_id = playlist_ids[-1] @@ -349,4 +555,7 @@ def extract_playlists_details_from(collection_type, youtube_id): [Playlist.from_id(playlist_id) for playlist_id in dict.fromkeys(playlist_ids)], main_channel_id, uploads_playlist_id, + long_videos_playlist_id, + shorts_playlist_id, + lives_playlist_id, ) diff --git a/zimui/src/assets/vjs-youtube.css b/zimui/src/assets/vjs-youtube.css index dce27734..722df671 100644 --- a/zimui/src/assets/vjs-youtube.css +++ b/zimui/src/assets/vjs-youtube.css @@ -54,3 +54,38 @@ .vjs-youtube .vjs-tech canvas { border-radius: 8px; } + + +.video-js.vjs-fluid, +.video-js.vjs-16-9, +.video-js.vjs-4-3, +video.video-js, +video.vjs-tech { + max-height: calc(100vh - 64px); + position: relative !important; + width: 100%; + height: auto !important; + max-width: 100% !important; + padding-top: 0 !important; + line-height: 0; +} +.vjs-control-bar { + line-height: 1; +} + +/* Fullscreen styles */ +.video-js.vjs-fullscreen { + display: flex; + align-items: center; + justify-content: center; + background-color: black; + text-align: center; +} + +.video-js.vjs-fullscreen video { + margin: auto; + width: auto !important; + height: 100% !important; + max-height: 100vh; + object-fit: contain; +} \ No newline at end of file diff --git a/zimui/src/components/channel/ChannelHeader.vue b/zimui/src/components/channel/ChannelHeader.vue index 4194b7a4..4e501186 100644 --- a/zimui/src/components/channel/ChannelHeader.vue +++ b/zimui/src/components/channel/ChannelHeader.vue @@ -20,23 +20,34 @@ onMounted(async () => { } }) -const tabs = [ - { - id: 0, - title: 'Videos', - to: { name: 'videos' } - }, - { - id: 1, - title: 'Playlists', - to: { name: 'playlists' } +// Computed tabs array based on store data +const tabs = computed(() => { + const baseTabs = [ + { id: 0, title: 'Home', to: { name: 'home' } } + ]; + + if (main.channel?.longVideosPlaylist) { + baseTabs.push({ id: 1, title: 'Videos', to: { name: 'videos' } }); + } + + if (main.channel?.shortsPlaylist) { + baseTabs.push({ id: 2, title: 'Shorts', to: { name: 'shorts' } }); + } + + if (main.channel?.livesPlaylist) { + baseTabs.push({ id: 3, title: 'Lives', to: { name: 'lives' } }); } -] + + baseTabs.push({ id: 4, title: 'Playlists', to: { name: 'playlists' } }); + + return baseTabs; +}); + // Hide tabs if there is only one playlist const hideTabs = computed(() => main.channel?.playlistCount === 1) -const tab = ref(tabs[0].id) +const tab = ref(tabs.value[0]?.id || 0); diff --git a/zimui/src/components/channel/tabs/VideosTab.vue b/zimui/src/components/channel/tabs/VideosTab.vue index e3fc8c2e..63b4421e 100644 --- a/zimui/src/components/channel/tabs/VideosTab.vue +++ b/zimui/src/components/channel/tabs/VideosTab.vue @@ -3,10 +3,9 @@ import { useMainStore } from '@/stores/main' import { computed } from 'vue' import VideosGridTab from './VideosGridTab.vue' -import VideosListTab from './VideosListTab.vue' const main = useMainStore() -const hideTabs = computed(() => main.channel?.playlistCount === 1) +const hideTabs = computed(() => main.channel?.longVideosPlaylist) diff --git a/zimui/src/router/index.ts b/zimui/src/router/index.ts index 42f93300..4b1634a0 100644 --- a/zimui/src/router/index.ts +++ b/zimui/src/router/index.ts @@ -4,7 +4,10 @@ import HomeView from '../views/HomeView.vue' import VideoPlayerView from '../views/VideoPlayerView.vue' import NotFoundView from '../views/NotFoundView.vue' +import ChannelHomeTab from '@/components/channel/tabs/ChannelHomeTab.vue' import VideosTab from '@/components/channel/tabs/VideosTab.vue' +import ShortsTab from '@/components/channel/tabs/ShortsTab.vue' +import LivesTab from '@/components/channel/tabs/LivesTab.vue' import PlaylistsTab from '@/components/channel/tabs/PlaylistsTab.vue' import PlaylistView from '@/views/PlaylistView.vue' @@ -15,13 +18,28 @@ const router = createRouter({ path: '/', name: 'home', component: HomeView, - redirect: '/videos', + redirect: '/channel-home', children: [ + { + path: 'channel-home', + name: 'channel-home', + component: ChannelHomeTab + }, { path: 'videos', name: 'videos', component: VideosTab }, + { + path: 'shorts', + name: 'shorts', + component: ShortsTab + }, + { + path: 'lives', + name: 'lives', + component: LivesTab + }, { path: 'playlists', name: 'playlists', diff --git a/zimui/src/types/Channel.ts b/zimui/src/types/Channel.ts index 4cfa8a8d..19dbf9d5 100644 --- a/zimui/src/types/Channel.ts +++ b/zimui/src/types/Channel.ts @@ -9,6 +9,9 @@ export interface Channel { joinedDate: string collectionType: string mainPlaylist?: string + longVideosPlaylist?:string + shortsPlaylist?:string + livesPlaylist?:string playlistCount: number } From 0aa62ecdf02aae9f56b1e2ae012156906842c3c9 Mon Sep 17 00:00:00 2001 From: arjitdas Date: Sun, 27 Oct 2024 10:54:28 +0530 Subject: [PATCH 02/11] refactor: create reusable component for shorts, lives and videos tab --- .../src/components/channel/tabs/LivesTab.vue | 10 ++-- .../components/channel/tabs/ShortsGridTab.vue | 52 ------------------- .../src/components/channel/tabs/ShortsTab.vue | 13 +++-- .../components/channel/tabs/VideosGridTab.vue | 52 ------------------- .../src/components/channel/tabs/VideosTab.vue | 10 ++-- .../LivesGridTab.vue => views/TabView.vue} | 29 ++++++++--- 6 files changed, 42 insertions(+), 124 deletions(-) delete mode 100644 zimui/src/components/channel/tabs/ShortsGridTab.vue delete mode 100644 zimui/src/components/channel/tabs/VideosGridTab.vue rename zimui/src/{components/channel/tabs/LivesGridTab.vue => views/TabView.vue} (63%) diff --git a/zimui/src/components/channel/tabs/LivesTab.vue b/zimui/src/components/channel/tabs/LivesTab.vue index e4d2a6ee..47cd0c53 100644 --- a/zimui/src/components/channel/tabs/LivesTab.vue +++ b/zimui/src/components/channel/tabs/LivesTab.vue @@ -2,17 +2,17 @@ import { useMainStore } from '@/stores/main' import { computed } from 'vue' -import LivesGridTab from './LivesGridTab.vue' +import TabView from '@/views/TabView.vue' const main = useMainStore() -const hideTabs = computed(() => main.channel?.livesPlaylist) +const livesAvailable = computed(() => main.channel?.livesPlaylist) diff --git a/zimui/src/components/channel/tabs/ShortsGridTab.vue b/zimui/src/components/channel/tabs/ShortsGridTab.vue deleted file mode 100644 index 3dda7687..00000000 --- a/zimui/src/components/channel/tabs/ShortsGridTab.vue +++ /dev/null @@ -1,52 +0,0 @@ - - - diff --git a/zimui/src/components/channel/tabs/ShortsTab.vue b/zimui/src/components/channel/tabs/ShortsTab.vue index a70335bb..78e18072 100644 --- a/zimui/src/components/channel/tabs/ShortsTab.vue +++ b/zimui/src/components/channel/tabs/ShortsTab.vue @@ -2,17 +2,20 @@ import { useMainStore } from '@/stores/main' import { computed } from 'vue' -import ShortsGridTab from './ShortsGridTab.vue' +import TabView from '@/views/TabView.vue' const main = useMainStore() -const hideTabs = computed(() => main.channel?.shortsPlaylist) +const shortsAvailable = computed(() => main.channel?.shortsPlaylist) diff --git a/zimui/src/components/channel/tabs/VideosGridTab.vue b/zimui/src/components/channel/tabs/VideosGridTab.vue deleted file mode 100644 index 3e2cb63a..00000000 --- a/zimui/src/components/channel/tabs/VideosGridTab.vue +++ /dev/null @@ -1,52 +0,0 @@ - - - diff --git a/zimui/src/components/channel/tabs/VideosTab.vue b/zimui/src/components/channel/tabs/VideosTab.vue index 63b4421e..b706d0bb 100644 --- a/zimui/src/components/channel/tabs/VideosTab.vue +++ b/zimui/src/components/channel/tabs/VideosTab.vue @@ -2,15 +2,17 @@ import { useMainStore } from '@/stores/main' import { computed } from 'vue' -import VideosGridTab from './VideosGridTab.vue' +import TabView from '@/views/TabView.vue' const main = useMainStore() -const hideTabs = computed(() => main.channel?.longVideosPlaylist) +const videosAvailable = computed(() => main.channel?.longVideosPlaylist) From 69f0677dfce599e327e98b4ed0c3c200b5ce9488 Mon Sep 17 00:00:00 2001 From: arjitdas Date: Sun, 27 Oct 2024 11:10:43 +0530 Subject: [PATCH 03/11] docs: update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd71e3b2..3614ef7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Diffrentiate shorts, lives, & normal videos (#367) +- corrected the short video resolution in the UI (#366) + ### Changed - Raise exception if there are no videos in the playlists (#347) From 5246679c2d786030785ba569c580de213ec6621a Mon Sep 17 00:00:00 2001 From: arjitdas Date: Thu, 31 Oct 2024 10:22:40 +0530 Subject: [PATCH 04/11] refactor: remove isShort --- scraper/pyproject.toml | 1 - scraper/src/youtube2zim/schemas.py | 1 - scraper/src/youtube2zim/scraper.py | 8 ---- scraper/src/youtube2zim/youtube.py | 62 ------------------------------ 4 files changed, 72 deletions(-) diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml index 0c98003b..0922fc2b 100644 --- a/scraper/pyproject.toml +++ b/scraper/pyproject.toml @@ -19,7 +19,6 @@ dependencies = [ "pydantic==2.9.1", "pyhumps==3.8.0", "schedule==1.2.2", - "isodate==0.7.2", ] dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"] diff --git a/scraper/src/youtube2zim/schemas.py b/scraper/src/youtube2zim/schemas.py index f42d2c67..6e1309f7 100644 --- a/scraper/src/youtube2zim/schemas.py +++ b/scraper/src/youtube2zim/schemas.py @@ -45,7 +45,6 @@ class Video(CamelModel): subtitle_path: str | None = None subtitle_list: list[Subtitle] duration: str - is_short: bool class VideoPreview(CamelModel): diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py index 9f781381..b6557c51 100644 --- a/scraper/src/youtube2zim/scraper.py +++ b/scraper/src/youtube2zim/scraper.py @@ -84,7 +84,6 @@ skip_deleted_videos, skip_non_public_videos, skip_outofrange_videos, - is_short, ) MAXIMUM_YOUTUBEID_LENGTH = 24 @@ -1085,12 +1084,6 @@ def generate_video_object(video) -> Video: subtitles_list = get_subtitles(video_id) channel_data = get_channel_json(author["channelId"]) - channel_id=author["channelId"] - duration=videos_channels[video_id]["duration"], - publication_date=video["contentDetails"]["videoPublishedAt"], - # Check if the video is short - is_short_video = is_short(video_id,channel_id,duration,publication_date) # can be True or None - is_short_flag = True if is_short_video is True else False # Set True if is_short is True, otherwise False return Video( id=video_id, title=video["snippet"]["title"], @@ -1109,7 +1102,6 @@ def generate_video_object(video) -> Video: subtitle_path=f"videos/{video_id}" if len(subtitles_list) > 0 else None, subtitle_list=subtitles_list, duration=videos_channels[video_id]["duration"], - is_short=is_short_flag, ) def generate_video_preview_object(video) -> VideoPreview: diff --git a/scraper/src/youtube2zim/youtube.py b/scraper/src/youtube2zim/youtube.py index 0f271d69..826f1dee 100644 --- a/scraper/src/youtube2zim/youtube.py +++ b/scraper/src/youtube2zim/youtube.py @@ -4,7 +4,6 @@ from http import HTTPStatus from datetime import datetime -import isodate import requests from dateutil import parser as dt_parser from zimscraperlib.download import stream_file @@ -82,67 +81,6 @@ def credentials_ok(): return False -def is_short(video_id,channel_id,duration,publication_date): - """check that a youtube video is short or not""" - # Ensure publication_date is a string - if isinstance(publication_date, tuple): - publication_date = publication_date[0] # If it's a tuple, extract the first element - - short_duration_limit = 180 #3minutes - cutoff_date=datetime(2020,9,14) - published_date = datetime.strptime(publication_date, "%Y-%m-%dT%H:%M:%SZ") - short_playlist_id="UUSH" + channel_id[2:] # Generate the short playlist ID - - if published_date < cutoff_date: - return False - - duration_in_sec = isodate.parse_duration(duration[0]).total_seconds() - - if duration_in_sec >= short_duration_limit: - return False - - try : - req = requests.get( - PLAYLIST_ITEMS_API, - params={ - "playlistId": short_playlist_id, - "videoId": video_id, - "part": "id", - "key": YOUTUBE.api_key, - "maxResults": 10, - }, - timeout=REQUEST_TIMEOUT, - ) - - # Check for HTTP error response - if req.status_code >= HTTPStatus.BAD_REQUEST: - logger.error(f"HTTP {req.status_code} Error response: {req.text}") - req.raise_for_status() # Raises an HTTPError if the status code is 4xx or 5xx - - - # Parse the response - response_json = req.json() - total_results = response_json.get("pageInfo", {}).get("totalResults", 0) - playlist_items = response_json.get("items", []) - - # Check if there are no items or totalResults is not 1 if yes then the video is not short - if total_results != 1 or not playlist_items: - return False - - # If everything is successful, return the long videos playlist ID - return True - - except IndexError: - logger.error(f"Index error : checking {video_id} is short or not") - return None - - except requests.RequestException as e: - logger.error(f"Request failed in is_short: {e}") - return None - - except Exception as e: - logger.error(f"Error occurred in is_short : {e}") - def get_channel_json(channel_id): """fetch or retieve-save and return the Youtube ChannelResult JSON""" From 713fc2897b21eb271f65627b78828ba0ecc64a68 Mon Sep 17 00:00:00 2001 From: arjitdas Date: Thu, 31 Oct 2024 13:26:03 +0530 Subject: [PATCH 05/11] refactor: improve naming conventions for user uploads playlist for clarity --- CHANGELOG.md | 2 +- scraper/src/youtube2zim/schemas.py | 6 +- scraper/src/youtube2zim/scraper.py | 42 ++++++------- scraper/src/youtube2zim/youtube.py | 61 +++++++++---------- .../src/components/channel/ChannelHeader.vue | 6 +- .../channel/tabs/LivesGridTab.vue} | 21 ++----- .../src/components/channel/tabs/LivesTab.vue | 6 +- .../components/channel/tabs/ShortsGridTab.vue | 61 +++++++++++++++++++ .../src/components/channel/tabs/ShortsTab.vue | 10 ++- .../components/channel/tabs/VideosGridTab.vue | 61 +++++++++++++++++++ .../src/components/channel/tabs/VideosTab.vue | 8 +-- zimui/src/types/Channel.ts | 6 +- 12 files changed, 199 insertions(+), 91 deletions(-) rename zimui/src/{views/TabView.vue => components/channel/tabs/LivesGridTab.vue} (76%) create mode 100644 zimui/src/components/channel/tabs/ShortsGridTab.vue create mode 100644 zimui/src/components/channel/tabs/VideosGridTab.vue diff --git a/CHANGELOG.md b/CHANGELOG.md index 3614ef7c..766a585a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Diffrentiate shorts, lives, & normal videos (#367) +- Diffrentiate user uploaded shorts, lives, & long videos (#367) - corrected the short video resolution in the UI (#366) ### Changed diff --git a/scraper/src/youtube2zim/schemas.py b/scraper/src/youtube2zim/schemas.py index 6e1309f7..abfbb0e5 100644 --- a/scraper/src/youtube2zim/schemas.py +++ b/scraper/src/youtube2zim/schemas.py @@ -107,9 +107,9 @@ class Channel(CamelModel): joined_date: str collection_type: str main_playlist: str | None = None - long_videos_playlist: str | None=None - shorts_playlist: str | None=None - lives_playlist: str | None=None + user_long_uploads_playlist: str | None=None + user_short_uploads_playlist: str | None=None + user_lives_playlist: str | None=None playlist_count: int diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py index b6557c51..1d054139 100644 --- a/scraper/src/youtube2zim/scraper.py +++ b/scraper/src/youtube2zim/scraper.py @@ -179,9 +179,9 @@ def __init__( # process-related self.playlists = [] self.uploads_playlist_id = None - self.long_videos_playlist_id = None - self.shorts_playlist_id = None - self.lives_playlist_id = None + self.user_long_uploads_playlist_id = None + self.user_short_uploads_playlist_id = None + self.user_lives_playlist_id = None self.videos_ids = [] self.video_ids_count = 0 self.videos_processed = 0 @@ -593,9 +593,9 @@ def extract_playlists(self): self.playlists, self.main_channel_id, self.uploads_playlist_id, - self.long_videos_playlist_id, - self.shorts_playlist_id, - self.lives_playlist_id, + self.user_long_uploads_playlist_id, + self.user_short_uploads_playlist_id, + self.user_lives_playlist_id, ) = extract_playlists_details_from(self.collection_type, self.youtube_id) def extract_videos_list(self): @@ -1194,9 +1194,9 @@ def get_playlist_slug(playlist) -> str: home_playlist_list = [] main_playlist_slug = None - long_videos_playlist_slug = None - shorts_playlist_slug = None - lives_playlist_slug = None + user_long_uploads_playlist_slug = None + user_short_uploads_playlist_slug = None + user_lives_playlist_slug = None if len(self.playlists) > 0: main_playlist_slug = get_playlist_slug( self.playlists[0] @@ -1226,14 +1226,14 @@ def get_playlist_slug(playlist) -> str: # modify playlist object for preview on homepage playlist_obj.videos = playlist_obj.videos[:12] - if playlist.playlist_id == self.long_videos_playlist_id: - long_videos_playlist_slug = (playlist_slug) + if playlist.playlist_id == self.user_long_uploads_playlist_id: + user_long_uploads_playlist_slug = (playlist_slug) - if playlist.playlist_id == self.shorts_playlist_id: - shorts_playlist_slug = (playlist_slug) + if playlist.playlist_id == self.user_short_uploads_playlist_id: + user_short_uploads_playlist_slug = (playlist_slug) - if playlist.playlist_id == self.lives_playlist_id: - lives_playlist_slug= (playlist_slug) + if playlist.playlist_id == self.user_lives_playlist_id: + user_lives_playlist_slug= (playlist_slug) if playlist.playlist_id == self.uploads_playlist_id: @@ -1285,14 +1285,14 @@ def get_playlist_slug(playlist) -> str: "joined_date":channel_data["snippet"]["publishedAt"], } - if long_videos_playlist_slug is not None : - channel_data_dict["long_videos_playlist"] = long_videos_playlist_slug + if user_long_uploads_playlist_slug is not None : + channel_data_dict["user_long_uploads_playlist"] = user_long_uploads_playlist_slug - if shorts_playlist_slug is not None : - channel_data_dict["shorts_playlist"] = shorts_playlist_slug + if user_short_uploads_playlist_slug is not None : + channel_data_dict["user_short_uploads_playlist"] = user_short_uploads_playlist_slug - if lives_playlist_slug is not None : - channel_data_dict["lives_playlist"] = lives_playlist_slug + if user_lives_playlist_slug is not None : + channel_data_dict["user_lives_playlist"] = user_lives_playlist_slug self.zim_file.add_item_for( path="channel.json", diff --git a/scraper/src/youtube2zim/youtube.py b/scraper/src/youtube2zim/youtube.py index 826f1dee..a4dc35f3 100644 --- a/scraper/src/youtube2zim/youtube.py +++ b/scraper/src/youtube2zim/youtube.py @@ -321,17 +321,17 @@ def skip_outofrange_videos(date_range, item): return dt_parser.parse(item["snippet"]["publishedAt"]).date() in date_range -def get_shorts_playlist_id(channel_id): +def get_user_short_uploads_playlist_id(channel_id): '''Return the user's uploaded short playlist ID, or None if shorts are not available or if an error occurs''' - short_playlist_id = "UUSH" + channel_id[2:] # Generate the short playlist ID + user_short_uploads_playlist_id = "UUSH" + channel_id[2:] # Generate the short playlist ID '''Make the API request to get the playlist details to determine whether shorts are available on the channel''' try: req = requests.get( PLAYLIST_API, - params={"id": short_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, + params={"id": user_short_uploads_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, timeout=REQUEST_TIMEOUT, ) @@ -347,31 +347,31 @@ def get_shorts_playlist_id(channel_id): # Check if there are no items or totalResults is 0 if yes then shorts not available if total_results == 0 or not playlist_items: - logger.error(f"Short Playlist `{short_playlist_id}`: Not Found or No Shorts Available") + logger.error(f"Short Playlist `{user_short_uploads_playlist_id}`: Not Found or No Shorts Available") return None # If everything is successful, return the short playlist ID - return short_playlist_id + return user_short_uploads_playlist_id except IndexError: - logger.error(f"Short Playlist `{short_playlist_id}`: Not Found or No Shorts Available") + logger.error(f"User short uploads Playlist `{user_short_uploads_playlist_id}`: Not Found or No uploaded Shorts Available") return None except requests.RequestException as e: logger.error(f"Request failed: {e}") return None -def get_long_videos_playlist_id(channel_id): +def get_user_long_uploads_playlist_id(channel_id): '''Return the user's uploaded long videos playlist ID, or None if long videos are not available or if an error occurs''' - long_videos_playlist_id = "UULF" + channel_id[2:] # Generate the long videos playlist ID + user_long_uploads_playlist_id = "UULF" + channel_id[2:] # Generate the long videos playlist ID '''Make the API request to get the playlist details to determine whether long videos are available on the channel''' try: req = requests.get( PLAYLIST_API, - params={"id": long_videos_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, + params={"id": user_long_uploads_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, timeout=REQUEST_TIMEOUT, ) @@ -388,31 +388,31 @@ def get_long_videos_playlist_id(channel_id): # Check if there are no items or totalResults is 0 if yes then long videos not available if total_results == 0 or not playlist_items: - logger.error(f"Long videos Playlist `{long_videos_playlist_id}`: Not Found or No long videos Available") + logger.error(f"User Long uploads Playlist `{user_long_uploads_playlist_id}`: Not Found or No uploaded long videos Available") return None # If everything is successful, return the long videos playlist ID - return long_videos_playlist_id + return user_long_uploads_playlist_id except IndexError: - logger.error(f"Long videos Playlist `{long_videos_playlist_id}`: Not Found or No long videos Available") + logger.error(f"Long videos Playlist `{user_long_uploads_playlist_id}`: Not Found or No long videos Available") return None except requests.RequestException as e: logger.error(f"Request failed: {e}") return None -def get_lives_playlist_id(channel_id): +def get_user_lives_playlist_id(channel_id): '''Return the user's lives playlist ID, or None if lives are not available or if an error occurs''' - lives_playlist_id = "UULV" + channel_id[2:] # Generate the lives playlist ID + user_lives_playlist_id = "UULV" + channel_id[2:] # Generate the lives playlist ID '''Make the API request to get the playlist details to determine whether Lives are available on the channel''' try: req = requests.get( PLAYLIST_API, - params={"id": lives_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, + params={"id": user_lives_playlist_id, "part": "snippet", "key": YOUTUBE.api_key}, timeout=REQUEST_TIMEOUT, ) @@ -428,21 +428,20 @@ def get_lives_playlist_id(channel_id): # Check if there are no items or totalResults is 0 if yes then lives not available if total_results == 0 or not playlist_items: - logger.error(f"Live Playlist `{lives_playlist_id}`: Not Found or No lives Available") + logger.error(f"User lives Playlist `{user_lives_playlist_id}`: Not Found or No lives Available") return None # If everything is successful, return the live playlist ID - return lives_playlist_id + return user_lives_playlist_id except IndexError: - logger.error(f"Live Playlist `{lives_playlist_id}`: Not Found or No lives Available") + logger.error(f"Live Playlist `{user_lives_playlist_id}`: Not Found or No lives Available") return None except requests.RequestException as e: logger.error(f"Request failed: {e}") return None - def extract_playlists_details_from(collection_type, youtube_id): """prepare a list of Playlist from user request @@ -462,22 +461,22 @@ def extract_playlists_details_from(collection_type, youtube_id): playlist_ids = [p["id"] for p in get_channel_playlists_json(main_channel_id)] # Retrieve the shorts,long videos and lives playlist ID - long_videos_playlist_id = get_long_videos_playlist_id(main_channel_id) - shorts_playlist_id = get_shorts_playlist_id(main_channel_id) - lives_playlist_id = get_lives_playlist_id(main_channel_id) + user_long_uploads_playlist_id = get_user_long_uploads_playlist_id(main_channel_id) + user_short_uploads_playlist_id = get_user_short_uploads_playlist_id(main_channel_id) + user_lives_playlist_id = get_user_lives_playlist_id(main_channel_id) - if long_videos_playlist_id is not None: + if user_long_uploads_playlist_id is not None: # include uploads long videos playlist (contains every long videos) - playlist_ids += [long_videos_playlist_id] + playlist_ids += [user_long_uploads_playlist_id] - if shorts_playlist_id is not None: + if user_short_uploads_playlist_id is not None: # include uploads short playlist (contains every shorts) - playlist_ids += [shorts_playlist_id] + playlist_ids += [user_short_uploads_playlist_id] - if lives_playlist_id is not None: + if user_lives_playlist_id is not None: # include lives playlist (contains every lives) - playlist_ids += [lives_playlist_id] + playlist_ids += [user_lives_playlist_id] # we always include uploads playlist (contains everything) playlist_ids += [channel_json["contentDetails"]["relatedPlaylists"]["uploads"]] @@ -493,7 +492,7 @@ def extract_playlists_details_from(collection_type, youtube_id): [Playlist.from_id(playlist_id) for playlist_id in dict.fromkeys(playlist_ids)], main_channel_id, uploads_playlist_id, - long_videos_playlist_id, - shorts_playlist_id, - lives_playlist_id, + user_long_uploads_playlist_id, + user_short_uploads_playlist_id, + user_lives_playlist_id, ) diff --git a/zimui/src/components/channel/ChannelHeader.vue b/zimui/src/components/channel/ChannelHeader.vue index 4e501186..ba8d03ec 100644 --- a/zimui/src/components/channel/ChannelHeader.vue +++ b/zimui/src/components/channel/ChannelHeader.vue @@ -26,15 +26,15 @@ const tabs = computed(() => { { id: 0, title: 'Home', to: { name: 'home' } } ]; - if (main.channel?.longVideosPlaylist) { + if (main.channel?.userLongUploadsPlaylist) { baseTabs.push({ id: 1, title: 'Videos', to: { name: 'videos' } }); } - if (main.channel?.shortsPlaylist) { + if (main.channel?.userShortUploadsPlaylist) { baseTabs.push({ id: 2, title: 'Shorts', to: { name: 'shorts' } }); } - if (main.channel?.livesPlaylist) { + if (main.channel?.userLivesPlaylist) { baseTabs.push({ id: 3, title: 'Lives', to: { name: 'lives' } }); } diff --git a/zimui/src/views/TabView.vue b/zimui/src/components/channel/tabs/LivesGridTab.vue similarity index 76% rename from zimui/src/views/TabView.vue rename to zimui/src/components/channel/tabs/LivesGridTab.vue index 2c6468b5..9234bed7 100644 --- a/zimui/src/views/TabView.vue +++ b/zimui/src/components/channel/tabs/LivesGridTab.vue @@ -8,25 +8,16 @@ import VideoGrid from '@/components/video/VideoGrid.vue' import TabInfo from '@/components/common/ViewInfo.vue' import type { Playlist } from '@/types/Playlists' + const main = useMainStore() const videos = ref([]) const playlist = ref() const isLoading = ref(true) -const props = defineProps({ - playlistLabel: { - type: String, - required: true - }, - playlistType: { - type: String, - required: true - } -}) // Watch for changes in the playlist watch( - () => main.channel?.[props.playlistLabel], + () => main.channel?.userLivesPlaylist, () => { fetchData() } @@ -34,9 +25,9 @@ watch( // Fetch the videos for the playlist const fetchData = async function () { - if (main.channel?.[props.playlistLabel]) { + if (main.channel?.userLivesPlaylist) { try { - const resp = await main.fetchPlaylist(main.channel?.[props.playlistLabel]) + const resp = await main.fetchPlaylist(main.channel?.userLivesPlaylist) if (resp) { playlist.value = resp videos.value = resp.videos @@ -60,11 +51,11 @@ onMounted(() => {
- +
diff --git a/zimui/src/components/channel/tabs/LivesTab.vue b/zimui/src/components/channel/tabs/LivesTab.vue index 47cd0c53..fbd8ba05 100644 --- a/zimui/src/components/channel/tabs/LivesTab.vue +++ b/zimui/src/components/channel/tabs/LivesTab.vue @@ -2,15 +2,15 @@ import { useMainStore } from '@/stores/main' import { computed } from 'vue' -import TabView from '@/views/TabView.vue' +import LivesGridTab from './LivesGridTab.vue' const main = useMainStore() -const livesAvailable = computed(() => main.channel?.livesPlaylist) +const livesAvailable = computed(() => main.channel?.userLivesPlaylist) diff --git a/zimui/src/components/channel/tabs/PlaylistsTab.vue b/zimui/src/components/channel/tabs/PlaylistsTab.vue index a06c7039..a97a602a 100644 --- a/zimui/src/components/channel/tabs/PlaylistsTab.vue +++ b/zimui/src/components/channel/tabs/PlaylistsTab.vue @@ -13,7 +13,7 @@ const isLoading = ref(true) // Watch for changes in the main playlist watch( - () => main.channel?.mainPlaylist, + () => main.channel?.id, () => { fetchData() } @@ -21,7 +21,7 @@ watch( // Fetch the playlists for the playlist tab const fetchData = async function () { - if (main.channel?.mainPlaylist) { + if (main.channel?.id) { try { const resp = await main.fetchPlaylists() if (resp) { diff --git a/zimui/src/types/Channel.ts b/zimui/src/types/Channel.ts index 967d9e3d..f243a077 100644 --- a/zimui/src/types/Channel.ts +++ b/zimui/src/types/Channel.ts @@ -7,10 +7,10 @@ export interface Channel { profilePath?: string bannerPath?: string joinedDate: string - mainPlaylist?: string - userLongUploadsPlaylist?:string - userShortUploadsPlaylist?:string - userLivesPlaylist?:string + firstPlaylist?: string + userLongUploadsPlaylist?: string + userShortUploadsPlaylist?: string + userLivesPlaylist?: string playlistCount: number }