This repository has been archived by the owner on Jul 2, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
47 lines (43 loc) · 1.59 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
from bs4 import BeautifulSoup
from manager import Manager
class Scraper:
@staticmethod
def get_response():
response = requests.get("https://subsplease.org/rss/?r=1080")
soup = BeautifulSoup(response.text, "lxml")
return soup
def get_new_episodes(self):
soup = self.get_response()
manager = Manager()
manager.load_shows_watching()
manager.load_episodes_downloaded()
newly_added = []
for episode in soup.find_all("item"):
if (
episode.category.text in manager.shows_watching
and episode.title.text not in manager.episodes_downloaded
):
episode_dict = {
"title": episode.title.text,
"show": episode.category.text[:-7],
"link": episode.link.next_element,
"date": episode.pubdate.text[:-6],
"size": episode.find("subsplease:size").text,
"ep": episode.title.text[
episode.title.text.rfind("-")
+ 2 : episode.title.text.find("(")
- 1
],
}
newly_added.append(episode_dict)
return newly_added
def get_all_shows(self, query: str):
soup = self.get_response()
all_shows = [
category.text
for category in soup.find_all("category")
if query.lower() in category.text.lower()
]
all_shows = list(dict.fromkeys(all_shows))
return all_shows