Merge branch 'development'

FreeTubeApp · Sep 11, 2022 · 7ae45b3 · 7ae45b3
2 parents fc79c1d + ca80efa
commit 7ae45b3
Show file tree

Hide file tree

Showing 8 changed files with 169 additions and 17 deletions.
diff --git a/.github/workflows/conflicts.yml b/.github/workflows/conflicts.yml
@@ -0,0 +1,25 @@
+name: "Conflicts"
+on:
+  # So that PRs touching the same files as the push are updated
+  push:
+  # So that the `dirtyLabel` is removed if conflicts are resolve
+  # We recommend `pull_request_target` so that github secrets are available.
+  # In `pull_request` we wouldn't be able to change labels of fork PRs
+  pull_request_target:
+    types: [synchronize]
+  workflow_run:
+    workflows: ['Dummy workflow for conflicts']
+    types: [requested]
+
+jobs:
+  main:
+    runs-on: ubuntu-latest
+    steps:
+      - name: check if prs are dirty
+        uses: eps1lon/actions-label-merge-conflict@releases/2.x
+        with:
+          dirtyLabel: "PR: merge conflicts / rebase needed"
+          removeOnDirtyLabel: "PR: waiting for review"
+          repoToken: "${{ secrets.GITHUB_TOKEN }}"
+          commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."
+          commentOnClean: "Conflicts have been resolved. A maintainer will review the pull request shortly."
diff --git a/.github/workflows/dummy-conflicts.yml b/.github/workflows/dummy-conflicts.yml
@@ -0,0 +1,9 @@
+name: Dummy workflow for conflicts
+on:
+  pull_request_review:
+    types: [submitted]
+jobs:
+  dummy:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "this is a dummy workflow that triggers a workflow_run; it's necessary because otherwise the repo secrets will not be in scope for externally forked pull requests"
diff --git a/.github/workflows/remove-outdated-labels.yml b/.github/workflows/remove-outdated-labels.yml
@@ -0,0 +1,36 @@
+name: Remove outdated labels
+on:
+  # https://github.community/t/github-actions-are-severely-limited-on-prs/18179/15
+  pull_request_target:
+    types:
+      - closed
+jobs:
+  remove-merged-pr-labels:
+    name: Remove merged pull request labels
+    if: github.event.pull_request.merged
+    runs-on: ubuntu-latest
+    steps:
+      - uses: mondeja/[email protected]
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          labels: |
+            PR: waiting for review
+            PR: WIP
+            PR: changes requested
+            PR: merge conflicts / rebase needed
+            PR/Issue: dependent
+
+  remove-closed-pr-labels:
+    name: Remove closed pull request labels
+    if: github.event_name == 'pull_request_target' && (! github.event.pull_request.merged)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: mondeja/[email protected]
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          labels: |
+            PR: waiting for review
+            PR: WIP
+            PR: changes requested
+            PR: merge conflicts / rebase needed
+            PR/Issue: dependent      
diff --git a/README.md b/README.md
@@ -1,16 +1,16 @@
 # YouTube Trending Videos Scraper NodeJS Documentation
-This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube) rewrite but can be used with any other project as well.
+This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in [FreeTube](https://github.com/FreeTubeApp/FreeTube) but can be used with any other project as well.
 
 Therefore, this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data.
 
 The library works as long as YouTube keeps its web page layout the same. Therefore, there is **no guarantee** that this library will work at all times.
 If this library should not work at some point, please create an issue and let me know so that I can take a look into it. Pull requests are also welcomed in this case.
 
 ## Installation
-`npm install yt-trending-scraper`
+`npm install @freetubeapp/yt-trending-scraper`
 
 ## Usage
-`const ytrend = require("yt-trending-scraper")`
+`const ytrend = require("@freetubeapp/yt-trending-scraper")`
 
 ## API
 **scrapeTrendingPage(_parameters_)**
@@ -50,7 +50,7 @@ const parameters = {
 ytrend.scrapeTrendingPage(parameters).then((data) =>{
     console.log(data);
 }).catch((error)=>{
-    console.log(error);
+    console.error(error);
 });
 
 // The data is a list of objects containing the following attributes:
@@ -74,7 +74,8 @@ ytrend.scrapeTrendingPage(parameters).then((data) =>{
     isUpcoming:         false,
     isCreatorOnRise:    Boolean, // indicates whether the video is part of a creator on the rise
     isVerified:         Boolean,
-    isVerifiedArist:    Boolean
+    isVerifiedArist:    Boolean,
+    isShort:            Boolean
 }
 
 // The thumbnail objects:

diff --git a/index.d.ts b/index.d.ts
@@ -0,0 +1,40 @@
+declare module "@freetubeapp/yt-trending-scraper" {
+  interface Thumbnail {
+    quality: string
+    url: string
+    width: number
+    height: number
+  }
+  interface Video {
+    videoId: string,
+    title: string,
+    type: "video",
+    author: string,
+    authorId: string,
+    authorUrl: string,
+    videoThumbnails: Thumbnail[],
+    description: string,
+    viewCount: number,
+    published: EpochTimeStamp,
+    publishedText: string,
+    lengthSeconds: number,
+    timeText: string,
+    liveNow: false,
+    paid: false,
+    premium: false,
+    isUpcoming: false,
+    isCreatorOnRise: boolean,
+    isVerified: boolean,
+    isVerifiedAuthor: boolean,
+    isShort: boolean
+  }
+  interface TrendingPayload {
+    geoLocation: string,
+    parseCreatorOnRise: boolean,
+    page: "default" | "music" | "gaming" | "movies"
+  }
+  class TrendingScraper {
+    static scrapeTrendingPage(payload: TrendingPayload) : Promise<Video[]>
+  }
+  export default TrendingScraper
+}
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,11 +1,12 @@
 {
   "name": "@freetube/yt-trending-scraper",
-  "version": "3.1.0",
+  "version": "3.1.1",
   "description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.",
   "main": "index.js",
   "files": [
     "index.js",
-    "src/"
+    "src/",
+    "index.d.ts"
   ],
   "scripts": {
     "test": "jest --watchAll --verbose --coverage",
@@ -50,5 +51,7 @@
     "eslint-plugin-prettier": "^4.2.1",
     "eslint-plugin-promise": "^6.0.0",
     "jest": "^28.1.3",
-    "prettier": "^2.7.1"  }
+    "prettier": "^2.7.1"
+  },
+  "types": "index.d.ts"
 }
diff --git a/src/HtmlParser.js b/src/HtmlParser.js
@@ -1,17 +1,45 @@
 class HtmlParser {
   static parseNewHtml(htmlData, parseCreatorOnRise) {
     // matches the special setup of the video elements
-    let jsonContent = '{' + htmlData.match(/"sectionListRenderer".+?(},"tab)/)[0]
-    // remove the last chars in order to make it valid JSON
-    jsonContent = jsonContent.substring(0, jsonContent.length - 5)
-    const contentArrayJSON = JSON.parse(jsonContent).sectionListRenderer.contents
+    const jsonDataRegex = /ytInitialData = (.+)?(;<\/script>)/
+    const jsonObject = JSON.parse(htmlData.match(jsonDataRegex)[1])
+    const jsonContent = jsonObject.contents.twoColumnBrowseResultsRenderer.tabs
+      .find(e => e.tabRenderer.selected)
+      .tabRenderer
+      .content
+    let contentArrayJSON
+    if ('sectionListRenderer' in jsonContent) {
+      contentArrayJSON = jsonContent.sectionListRenderer.contents
+    } else if ('richGridRenderer' in jsonContent) {
+      contentArrayJSON = jsonContent.richGridRenderer.contents
+    }
     let videos = []
     const currentTime = Date.now()
     contentArrayJSON.forEach((data) => {
-      const videoList = this.buildApiOutput(data.itemSectionRenderer.contents[0].shelfRenderer.content, currentTime, parseCreatorOnRise)
-      videos = [...videos, ...videoList]
+      if ('itemSectionRenderer' in data) {
+        const videoList = this.buildApiOutput(data.itemSectionRenderer.contents[0].shelfRenderer.content, currentTime, parseCreatorOnRise)
+        videos = [...videos, ...videoList]
+      } else if ('richItemRenderer' in data) {
+        videos.push(
+          this.parseRichItemRenderer(data, currentTime)
+        )
+      } else if ('richSectionRenderer' in data) {
+        const videoList = this.parseRichSectionRenderer(data, currentTime)
+        videos = [...videos, ...videoList]
+      }
+    })
+    return this.deduplicateVideoList(videos)
+  }
+
+  static deduplicateVideoList(videos) {
+    const uniqueIds = new Set()
+    return videos.filter((video) => {
+      if (!uniqueIds.has(video.videoId)) {
+        uniqueIds.add(video.videoId)
+        return true
+      }
+      return false
     })
-    return videos
   }
 
   // access the one video container and build and object with all the data required
@@ -47,6 +75,16 @@ class HtmlParser {
     return videoEntryList
   }
 
+  static parseRichItemRenderer(data, currentTime) {
+    return this.parseVideo(data.richItemRenderer.content.videoRenderer, currentTime)
+  }
+
+  static parseRichSectionRenderer(data, currentTime) {
+    return data.richSectionRenderer.content.richShelfRenderer.contents.map(rsr => {
+      return this.parseRichItemRenderer(rsr, currentTime)
+    })
+  }
+
   static parseVideo(videoRenderer, currentTime) {
     const videoEntry = {
       videoId: -1,