From 9173120682d23010247b6bcfcaf216a93b820fe1 Mon Sep 17 00:00:00 2001 From: PrestonN Date: Tue, 30 Aug 2022 21:50:04 -0400 Subject: [PATCH 1/9] Bump version number to v3.1.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 208c4af..20b4a26 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@freetube/yt-trending-scraper", - "version": "3.0.0", + "version": "3.1.0", "description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.", "main": "index.js", "files": [ From 1727359e43fa67b9c7f16d03af847002ca72678e Mon Sep 17 00:00:00 2001 From: PrestonN Date: Tue, 30 Aug 2022 21:51:29 -0400 Subject: [PATCH 2/9] Update package-lock.json --- package-lock.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 654e990..970af58 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@freetube/yt-trending-scraper", - "version": "3.0.0", + "version": "3.1.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@freetube/yt-trending-scraper", - "version": "3.0.0", + "version": "3.1.0", "license": "GPLv3", "dependencies": { "axios": "^0.27.2" From fa39933873691813f5c8f70b71377c67b833820a Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Tue, 6 Sep 2022 21:10:31 -0400 Subject: [PATCH 3/9] remove duplicate videos from trending video list (#24) * remove duplicate videos from video list * use set --- src/HtmlParser.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/HtmlParser.js b/src/HtmlParser.js index 5dc6142..c0c1af7 100644 --- a/src/HtmlParser.js +++ b/src/HtmlParser.js @@ -11,7 +11,18 @@ class HtmlParser { const videoList = this.buildApiOutput(data.itemSectionRenderer.contents[0].shelfRenderer.content, currentTime, parseCreatorOnRise) videos = [...videos, ...videoList] }) - return videos + return this.deduplicateVideoList(videos) + } + + static deduplicateVideoList(videos) { + const uniqueIds = new Set() + return videos.filter((video) => { + if (!uniqueIds.has(video.videoId)) { + uniqueIds.add(video.videoId) + return true + } + return false + }) } // access the one video container and build and object with all the data required From ee809adcf1e77e1bdeeb116e27c80d536dd23407 Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Tue, 6 Sep 2022 21:11:29 -0400 Subject: [PATCH 4/9] add support for parsing new trending format (#23) * add support for parsing new trending type * remove unreachable code * small fix --- src/HtmlParser.js | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/HtmlParser.js b/src/HtmlParser.js index c0c1af7..bc05319 100644 --- a/src/HtmlParser.js +++ b/src/HtmlParser.js @@ -1,15 +1,32 @@ class HtmlParser { static parseNewHtml(htmlData, parseCreatorOnRise) { // matches the special setup of the video elements - let jsonContent = '{' + htmlData.match(/"sectionListRenderer".+?(},"tab)/)[0] - // remove the last chars in order to make it valid JSON - jsonContent = jsonContent.substring(0, jsonContent.length - 5) - const contentArrayJSON = JSON.parse(jsonContent).sectionListRenderer.contents + const jsonDataRegex = /ytInitialData = (.+)?(;<\/script>)/ + const jsonObject = JSON.parse(htmlData.match(jsonDataRegex)[1]) + const jsonContent = jsonObject.contents.twoColumnBrowseResultsRenderer.tabs + .find(e => e.tabRenderer.selected) + .tabRenderer + .content + let contentArrayJSON + if ('sectionListRenderer' in jsonContent) { + contentArrayJSON = jsonContent.sectionListRenderer.contents + } else if ('richGridRenderer' in jsonContent) { + contentArrayJSON = jsonContent.richGridRenderer.contents + } let videos = [] const currentTime = Date.now() contentArrayJSON.forEach((data) => { - const videoList = this.buildApiOutput(data.itemSectionRenderer.contents[0].shelfRenderer.content, currentTime, parseCreatorOnRise) - videos = [...videos, ...videoList] + if ('itemSectionRenderer' in data) { + const videoList = this.buildApiOutput(data.itemSectionRenderer.contents[0].shelfRenderer.content, currentTime, parseCreatorOnRise) + videos = [...videos, ...videoList] + } else if ('richItemRenderer' in data) { + videos.push( + this.parseRichItemRenderer(data, currentTime) + ) + } else if ('richSectionRenderer' in data) { + const videoList = this.parseRichSectionRenderer(data, currentTime) + videos = [...videos, ...videoList] + } }) return this.deduplicateVideoList(videos) } @@ -58,6 +75,16 @@ class HtmlParser { return videoEntryList } + static parseRichItemRenderer(data, currentTime) { + return this.parseVideo(data.richItemRenderer.content.videoRenderer, currentTime) + } + + static parseRichSectionRenderer(data, currentTime) { + return data.richSectionRenderer.content.richShelfRenderer.contents.map(rsr => { + return this.parseRichItemRenderer(rsr, currentTime) + }) + } + static parseVideo(videoRenderer, currentTime) { const videoEntry = { videoId: -1, From 83f30a0608475a04704e23870de18f1d2fab6fa0 Mon Sep 17 00:00:00 2001 From: efb4f5ff-1298-471a-8973-3d47447115dc <73130443+efb4f5ff-1298-471a-8973-3d47447115dc@users.noreply.github.com> Date: Wed, 7 Sep 2022 01:12:14 +0000 Subject: [PATCH 5/9] Add PR conflicts label automation workflow (#22) --- .github/workflows/conflicts.yml | 25 +++++++++++++++++++++++++ .github/workflows/dummy-conflicts.yml | 9 +++++++++ 2 files changed, 34 insertions(+) create mode 100644 .github/workflows/conflicts.yml create mode 100644 .github/workflows/dummy-conflicts.yml diff --git a/.github/workflows/conflicts.yml b/.github/workflows/conflicts.yml new file mode 100644 index 0000000..63b208e --- /dev/null +++ b/.github/workflows/conflicts.yml @@ -0,0 +1,25 @@ +name: "Conflicts" +on: + # So that PRs touching the same files as the push are updated + push: + # So that the `dirtyLabel` is removed if conflicts are resolve + # We recommend `pull_request_target` so that github secrets are available. + # In `pull_request` we wouldn't be able to change labels of fork PRs + pull_request_target: + types: [synchronize] + workflow_run: + workflows: ['Dummy workflow for conflicts'] + types: [requested] + +jobs: + main: + runs-on: ubuntu-latest + steps: + - name: check if prs are dirty + uses: eps1lon/actions-label-merge-conflict@releases/2.x + with: + dirtyLabel: "PR: merge conflicts / rebase needed" + removeOnDirtyLabel: "PR: waiting for review" + repoToken: "${{ secrets.GITHUB_TOKEN }}" + commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request." + commentOnClean: "Conflicts have been resolved. A maintainer will review the pull request shortly." diff --git a/.github/workflows/dummy-conflicts.yml b/.github/workflows/dummy-conflicts.yml new file mode 100644 index 0000000..ed1fa3f --- /dev/null +++ b/.github/workflows/dummy-conflicts.yml @@ -0,0 +1,9 @@ +name: Dummy workflow for conflicts +on: + pull_request_review: + types: [submitted] +jobs: + dummy: + runs-on: ubuntu-latest + steps: + - run: echo "this is a dummy workflow that triggers a workflow_run; it's necessary because otherwise the repo secrets will not be in scope for externally forked pull requests" From a9fc112690de4335eeac98aeb39db9d863c660c0 Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Thu, 8 Sep 2022 09:22:09 -0400 Subject: [PATCH 6/9] fix read me (#25) --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1604c65..eb4cf3d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # YouTube Trending Videos Scraper NodeJS Documentation -This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube) rewrite but can be used with any other project as well. +This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in [FreeTube](https://github.com/FreeTubeApp/FreeTube) but can be used with any other project as well. Therefore, this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data. @@ -7,10 +7,10 @@ The library works as long as YouTube keeps its web page layout the same. Therefo If this library should not work at some point, please create an issue and let me know so that I can take a look into it. Pull requests are also welcomed in this case. ## Installation -`npm install yt-trending-scraper` +`npm install @freetubeapp/yt-trending-scraper` ## Usage -`const ytrend = require("yt-trending-scraper")` +`const ytrend = require("@freetubeapp/yt-trending-scraper")` ## API **scrapeTrendingPage(_parameters_)** @@ -50,7 +50,7 @@ const parameters = { ytrend.scrapeTrendingPage(parameters).then((data) =>{ console.log(data); }).catch((error)=>{ - console.log(error); + console.error(error); }); // The data is a list of objects containing the following attributes: @@ -74,7 +74,8 @@ ytrend.scrapeTrendingPage(parameters).then((data) =>{ isUpcoming: false, isCreatorOnRise: Boolean, // indicates whether the video is part of a creator on the rise isVerified: Boolean, - isVerifiedArist: Boolean + isVerifiedArist: Boolean, + isShort: Boolean } // The thumbnail objects: From 5f6f740170583de8426c35bd58df927376c49d26 Mon Sep 17 00:00:00 2001 From: efb4f5ff-1298-471a-8973-3d47447115dc <73130443+efb4f5ff-1298-471a-8973-3d47447115dc@users.noreply.github.com> Date: Fri, 9 Sep 2022 07:19:34 +0000 Subject: [PATCH 7/9] Create remove-outdated-labels.yml (#21) --- .github/workflows/remove-outdated-labels.yml | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/remove-outdated-labels.yml diff --git a/.github/workflows/remove-outdated-labels.yml b/.github/workflows/remove-outdated-labels.yml new file mode 100644 index 0000000..722218c --- /dev/null +++ b/.github/workflows/remove-outdated-labels.yml @@ -0,0 +1,36 @@ +name: Remove outdated labels +on: + # https://github.community/t/github-actions-are-severely-limited-on-prs/18179/15 + pull_request_target: + types: + - closed +jobs: + remove-merged-pr-labels: + name: Remove merged pull request labels + if: github.event.pull_request.merged + runs-on: ubuntu-latest + steps: + - uses: mondeja/remove-labels-gh-action@v1.1.1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + labels: | + PR: waiting for review + PR: WIP + PR: changes requested + PR: merge conflicts / rebase needed + PR/Issue: dependent + + remove-closed-pr-labels: + name: Remove closed pull request labels + if: github.event_name == 'pull_request_target' && (! github.event.pull_request.merged) + runs-on: ubuntu-latest + steps: + - uses: mondeja/remove-labels-gh-action@v1.1.1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + labels: | + PR: waiting for review + PR: WIP + PR: changes requested + PR: merge conflicts / rebase needed + PR/Issue: dependent From 9d6b4a32fbacd95c37e78b2976f1fa08071ba9c4 Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Sat, 10 Sep 2022 17:50:41 -0400 Subject: [PATCH 8/9] Add Type Defintion file (#26) * add type definition * export trending scraper --- index.d.ts | 40 ++++++++++++++++++++++++++++++++++++++++ package.json | 7 +++++-- 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 index.d.ts diff --git a/index.d.ts b/index.d.ts new file mode 100644 index 0000000..3b8a978 --- /dev/null +++ b/index.d.ts @@ -0,0 +1,40 @@ +declare module "@freetubeapp/yt-trending-scraper" { + interface Thumbnail { + quality: string + url: string + width: number + height: number + } + interface Video { + videoId: string, + title: string, + type: "video", + author: string, + authorId: string, + authorUrl: string, + videoThumbnails: Thumbnail[], + description: string, + viewCount: number, + published: EpochTimeStamp, + publishedText: string, + lengthSeconds: number, + timeText: string, + liveNow: false, + paid: false, + premium: false, + isUpcoming: false, + isCreatorOnRise: boolean, + isVerified: boolean, + isVerifiedAuthor: boolean, + isShort: boolean + } + interface TrendingPayload { + geoLocation: string, + parseCreatorOnRise: boolean, + page: "default" | "music" | "gaming" | "movies" + } + class TrendingScraper { + static scrapeTrendingPage(payload: TrendingPayload) : Promise + } + export default TrendingScraper +} diff --git a/package.json b/package.json index 20b4a26..f024880 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,8 @@ "main": "index.js", "files": [ "index.js", - "src/" + "src/", + "index.d.ts" ], "scripts": { "test": "jest --watchAll --verbose --coverage", @@ -50,5 +51,7 @@ "eslint-plugin-prettier": "^4.2.1", "eslint-plugin-promise": "^6.0.0", "jest": "^28.1.3", - "prettier": "^2.7.1" } + "prettier": "^2.7.1" + }, + "types": "index.d.ts" } From ca80efaf713b1e92a516c61f52dcea599049fa83 Mon Sep 17 00:00:00 2001 From: PrestonN Date: Sun, 11 Sep 2022 17:57:24 -0400 Subject: [PATCH 9/9] Update package-lock.json and package.json --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 970af58..fb5c79d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@freetube/yt-trending-scraper", - "version": "3.1.0", + "version": "3.1.1", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@freetube/yt-trending-scraper", - "version": "3.1.0", + "version": "3.1.1", "license": "GPLv3", "dependencies": { "axios": "^0.27.2" diff --git a/package.json b/package.json index f024880..8776262 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@freetube/yt-trending-scraper", - "version": "3.1.0", + "version": "3.1.1", "description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.", "main": "index.js", "files": [