Skip to content

Commit

Permalink
removed webdriver dependency, fixed danbooru extractor, some refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
gan-of-culture committed Mar 17, 2024
1 parent e2e0239 commit 12ad6db
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 374 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,5 @@ media
*.aac

*token
*exhentai_test.go
*muchohentai_test.go
get-sauce*
115 changes: 52 additions & 63 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,59 +261,58 @@ get-sauce -i https://hentai-moon.com/videos/285/isekai-harem-monogatari-ep-1/

The following links will direct you to adult content. Please keep that in mind!

| Site | Images | Videos | Requirements |
| ------------------------------------------------------------------------------------------|:------------------:|:----------------:|:--------------------:|
| [9hentai.to/ru](https://9hentai.to/) | :heavy_check_mark: | ? |
| [animeidhentai.com (1080p, 720p, 480p, 360p)](https://animeidhentai.com) | ? |:heavy_check_mark:|
| [booruproject (ex. rule34, gelbooru)](https://booru.org/top) | :heavy_check_mark: |:heavy_check_mark:|
| [booru.io](https://booru.io/) | :heavy_check_mark: | ? |
| [comicporn.xxx](https://comicporn.xxx) | :heavy_check_mark: | ? |
| [danbooru.donmai.us](https://danbooru.donmai.us) | :heavy_check_mark: | ? |:car:|
| [doujin.sexy](https://doujin.sexy) | :heavy_check_mark: | ? |
| [e-hentai.org](http://e-hentai.org/) | :heavy_check_mark: | ? |
| [exhentai.org](http://exhentai.org/) | :heavy_check_mark: | ? |:closed_lock_with_key:|
| [haho.moe (1080p, 720p, 480p, 360p)](https://haho.moe) | ? |:heavy_check_mark:|
| [hanime.tv (720p, 480p, 360p)](https://hanime.tv) | ? |:heavy_check_mark:|
| [hentai.tv (1080p, 720p, 480p, 360p)](https://hentai.tv/) | ? |:heavy_check_mark:|
| [hentai-moon.com (720p, 480p)](https://hentai-moon.com) | ? |:heavy_check_mark:|
| [hentai2read.com](https://hentai2read.com) | :heavy_check_mark: | ? |
| [hentai2w.com(720p, 480p, 360p)](https://hentai2w.com) | ? |:heavy_check_mark:|
| [hentaicloud.com(720p)](https://www.hentaicloud.com) | :x: |:heavy_check_mark:|
| [hentaidude.com(720p, 480, 360p)](https://hentaidude.com/) | ? |:heavy_check_mark:|
| [hentaiera.com](https://hentaiera.com) | :heavy_check_mark: | ? |
| [hentaienvy.com](https://hentaienvy.com) | :heavy_check_mark: | ? |
| [www.hentai-foundry.com](https://www.hentai-foundry.com/) | :heavy_check_mark: | ? |
| [hentaifox.com](https://hentaifox.com) | :heavy_check_mark: | ? |
| [hentaihaven.co (1080p, 720p, 480p, 360p)](https://hentaihaven.co) | ? |:heavy_check_mark:|
| [hentaihaven.com (1080p, 720p, 480p, 360p)](https://hentaihaven.com) | ? |:heavy_check_mark:|
| [hentaihaven.red (1080p, 720p, 480p, 360p)](https://hentaihaven.red) | ? |:heavy_check_mark:|
| [hentaihaven.xxx (1080p, 720p, 480p, 360p)](https://hentaihaven.xxx) | ? |:heavy_check_mark:|
| [hentaimama.io(1080p, 720p)](https://hentaimama.io) | ? |:heavy_check_mark:|
| [hentaipulse.com(720p, 420p)](https://hentaipulse.com) | ? |:heavy_check_mark:|
| [hentairox.com](https://hentairox.com) | :heavy_check_mark: | ? |
| [hentaistream.moe (2160p, 1080p, 480p)](https://hentaistream.moe/) | ? |:heavy_check_mark:|
| [hentaistream.tv (1080p, 720p, 480p, 360p)](https://hentaistream.tv) | ? |:heavy_check_mark:|
| [hentaistream.xxx (1080p, 720, 480p, 360p)](https://hentaistream.xxx/) | ? |:heavy_check_mark:|
| [hentaivideos.net (1080p, 720p, 480p, 360p)](https://hentaivideos.net/) | ? |:heavy_check_mark:|
| [hentaiworld.tv (1080p, 720p, 480p)](https://hentaiworld.tv/) | ? |:heavy_check_mark:|
| [hentaiyes.com (1080p, 720p, 480p, 360p)](https://hentaiyes.com/) | ? |:heavy_check_mark:|
| [hentaizap.com](https://hentaizap.com) | :heavy_check_mark: | ? |
| [hitomi.la](https://hitomi.la/) | :heavy_check_mark: | ? |
| [imhentai.com](https://imhentai.xxx) | :heavy_check_mark: | ? |
| [iwara.tv](https://iwara.tv/) | :heavy_check_mark: |:heavy_check_mark:|
| [konachan.com](https://konachan.com/post?tags=) | :heavy_check_mark: | ? |
| [latesthentai.com (1080p, 720p, 480p, 360p)](https://latesthentai.com/) | ? |:heavy_check_mark:|
| [miohentai.com (1080p, 720p, 480p)](https://miohentai.com/) | :heavy_check_mark: | ? |
| [nhentai.net](https://nhentai.net) | :heavy_check_mark: | ? |:cookie:|
| [ohentai.org (1080p, 720p, 480p)](https://ohentai.org/) | ? |:heavy_check_mark:|
| [oppai.stream (2160p, 1080p, 720p)](https://oppai.stream/) | ? |:heavy_check_mark:|
| [pururin.to](https://pururin.to) | :heavy_check_mark: | ? |
| [rule34.paheal.net](https://rule34.paheal.net) | :heavy_check_mark: |:heavy_check_mark:|
| [rule34video.com (2160p, 1080p, 720p, 480p, 360p)](https://rule34video.com/) | :heavy_check_mark: |:heavy_check_mark:|
| [simply-hentai.com](https://www.simply-hentai.com) | :heavy_check_mark: | ? |
| [thehentaiworld.com](https://thehentaiworld.com) | :heavy_check_mark: |:heavy_check_mark:|
| [uncensoredhentai.xxx (1080p, 720p, 480p, 360p)](https://uncensoredhentai.xxx/) | ? |:heavy_check_mark:|
| [yandere.re](https://yande.re/post) | :heavy_check_mark: | ? |
| Site | Images | Videos | Requirements |
| ------------------------------------------------------------------------------- | :----------------: | :----------------: | :----------: |
| [9hentai.to/ru](https://9hentai.to/) | :heavy_check_mark: | ? |
| [animeidhentai.com (1080p, 720p, 480p, 360p)](https://animeidhentai.com) | ? | :heavy_check_mark: |
| [booruproject (ex. rule34, gelbooru)](https://booru.org/top) | :heavy_check_mark: | :heavy_check_mark: |
| [booru.io](https://booru.io/) | :heavy_check_mark: | ? |
| [comicporn.xxx](https://comicporn.xxx) | :heavy_check_mark: | ? |
| [danbooru.donmai.us](https://danbooru.donmai.us) | :heavy_check_mark: | ? |
| [doujin.sexy](https://doujin.sexy) | :heavy_check_mark: | ? |
| [e-hentai.org](http://e-hentai.org/) | :heavy_check_mark: | ? |
| [haho.moe (1080p, 720p, 480p, 360p)](https://haho.moe) | ? | :heavy_check_mark: |
| [hanime.tv (720p, 480p, 360p)](https://hanime.tv) | ? | :heavy_check_mark: |
| [hentai.tv (1080p, 720p, 480p, 360p)](https://hentai.tv/) | ? | :heavy_check_mark: |
| [hentai-moon.com (720p, 480p)](https://hentai-moon.com) | ? | :heavy_check_mark: |
| [hentai2read.com](https://hentai2read.com) | :heavy_check_mark: | ? |
| [hentai2w.com(720p, 480p, 360p)](https://hentai2w.com) | ? | :heavy_check_mark: |
| [hentaicloud.com(720p)](https://www.hentaicloud.com) | :x: | :heavy_check_mark: |
| [hentaidude.com(720p, 480, 360p)](https://hentaidude.com/) | ? | :heavy_check_mark: |
| [hentaiera.com](https://hentaiera.com) | :heavy_check_mark: | ? |
| [hentaienvy.com](https://hentaienvy.com) | :heavy_check_mark: | ? |
| [www.hentai-foundry.com](https://www.hentai-foundry.com/) | :heavy_check_mark: | ? |
| [hentaifox.com](https://hentaifox.com) | :heavy_check_mark: | ? |
| [hentaihaven.co (1080p, 720p, 480p, 360p)](https://hentaihaven.co) | ? | :heavy_check_mark: |
| [hentaihaven.com (1080p, 720p, 480p, 360p)](https://hentaihaven.com) | ? | :heavy_check_mark: |
| [hentaihaven.red (1080p, 720p, 480p, 360p)](https://hentaihaven.red) | ? | :heavy_check_mark: |
| [hentaihaven.xxx (1080p, 720p, 480p, 360p)](https://hentaihaven.xxx) | ? | :heavy_check_mark: |
| [hentaimama.io(1080p, 720p)](https://hentaimama.io) | ? | :heavy_check_mark: |
| [hentaipulse.com(720p, 420p)](https://hentaipulse.com) | ? | :heavy_check_mark: |
| [hentairox.com](https://hentairox.com) | :heavy_check_mark: | ? |
| [hentaistream.moe (2160p, 1080p, 480p)](https://hentaistream.moe/) | ? | :heavy_check_mark: |
| [hentaistream.tv (1080p, 720p, 480p, 360p)](https://hentaistream.tv) | ? | :heavy_check_mark: |
| [hentaistream.xxx (1080p, 720, 480p, 360p)](https://hentaistream.xxx/) | ? | :heavy_check_mark: |
| [hentaivideos.net (1080p, 720p, 480p, 360p)](https://hentaivideos.net/) | ? | :heavy_check_mark: |
| [hentaiworld.tv (1080p, 720p, 480p)](https://hentaiworld.tv/) | ? | :heavy_check_mark: |
| [hentaiyes.com (1080p, 720p, 480p, 360p)](https://hentaiyes.com/) | ? | :heavy_check_mark: |
| [hentaizap.com](https://hentaizap.com) | :heavy_check_mark: | ? |
| [hitomi.la](https://hitomi.la/) | :heavy_check_mark: | ? |
| [imhentai.com](https://imhentai.xxx) | :heavy_check_mark: | ? |
| [iwara.tv](https://iwara.tv/) | :heavy_check_mark: | :heavy_check_mark: |
| [konachan.com](https://konachan.com/post?tags=) | :heavy_check_mark: | ? |
| [latesthentai.com (1080p, 720p, 480p, 360p)](https://latesthentai.com/) | ? | :heavy_check_mark: |
| [miohentai.com (1080p, 720p, 480p)](https://miohentai.com/) | :heavy_check_mark: | ? |
| [nhentai.net](https://nhentai.net) | :heavy_check_mark: | ? | :cookie: |
| [ohentai.org (1080p, 720p, 480p)](https://ohentai.org/) | ? | :heavy_check_mark: |
| [oppai.stream (2160p, 1080p, 720p)](https://oppai.stream/) | ? | :heavy_check_mark: |
| [pururin.to](https://pururin.to) | :heavy_check_mark: | ? |
| [rule34.paheal.net](https://rule34.paheal.net) | :heavy_check_mark: | :heavy_check_mark: |
| [rule34video.com (2160p, 1080p, 720p, 480p, 360p)](https://rule34video.com/) | :heavy_check_mark: | :heavy_check_mark: |
| [simply-hentai.com](https://www.simply-hentai.com) | :heavy_check_mark: | ? |
| [thehentaiworld.com](https://thehentaiworld.com) | :heavy_check_mark: | :heavy_check_mark: |
| [uncensoredhentai.xxx (1080p, 720p, 480p, 360p)](https://uncensoredhentai.xxx/) | ? | :heavy_check_mark: |
| [yandere.re](https://yande.re/post) | :heavy_check_mark: | ? |

You can still try to use the universal downloader, if your site is not listed.

Expand All @@ -328,16 +327,6 @@ get-sauce -h "cookie: cf_clearance=k2TGEnkzhz_PtHs09vMryROlD4O3UZhrDFrU4svgjdM-1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36" http...
```

🔐
--> you need to supply login credentials for this extractor to work

```console
get-sauce -un "MyUserName" -up "MyUserPassword" http...
```

🚗
--> requires [geckodriver](https://github.com/mozilla/geckodriver) to workaround DDOS protection

## Credit

- Thanks to [lux](https://github.com/iawia002/lux) for the great template
Expand Down
18 changes: 14 additions & 4 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ var (
Truncate bool
// UserHeaders for the HTTP requests. To bypass Cloudflare or DDOS-GUARD protection
UserHeaders string
// Username for authorization
Username string
// UserPassword for authorization
UserPassword string
// Workers for downloading
Workers int
)
Expand All @@ -41,3 +37,17 @@ var FakeHeaders = map[string]string{
"Accept-Language": "en-US,en;q=0.8",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
}

var FakeHeadersFirefox117 = map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"TE": "Trailers",
"X-Requested-With": "XMLHttpRequest",
}
61 changes: 29 additions & 32 deletions extractors/danbooru/danbooru.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,49 @@ package danbooru
import (
"fmt"
"log"
"net/http"
"regexp"
"strings"

"github.com/gan-of-culture/get-sauce/config"
"github.com/gan-of-culture/get-sauce/request/webdriver"
"github.com/gan-of-culture/get-sauce/request"
"github.com/gan-of-culture/get-sauce/static"
"github.com/gan-of-culture/get-sauce/utils"
"github.com/pkg/errors"
)

const site = "https://danbooru.donmai.us"

var reIMGData = regexp.MustCompile(`data-width="([^"]+)"[ ]+data-height="([^"]+)"[\s\S]*?alt="([^"]+)".+src="([^"]+)"`)

type extractor struct{}
type extractor struct {
client *http.Client
}

// New returns a danbooru extractor.
// New returns a danbooru extractor
func New() static.Extractor {
return &extractor{}
return newForTesting()
}

func newForTesting() *extractor {
return &extractor{
client: request.Firefox117Client(),
}
}

// Extract for danbooru pages
func (e *extractor) Extract(URL string) ([]*static.Data, error) {
config.FakeHeaders["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0"

posts, err := parseURL(URL)
posts, err := e.parseURL(URL)
if err != nil {
return nil, err
}

data := []*static.Data{}
for _, post := range posts {
contentData, err := extractData(site + post)
contentData, err := e.extractData(site + post)
if err != nil {
return nil, utils.Wrap(err, site+post)
log.Println(site + post)
return nil, err
}
data = append(data, contentData)
}
Expand All @@ -45,7 +54,7 @@ func (e *extractor) Extract(URL string) ([]*static.Data, error) {
}

// parseURL for danbooru pages
func parseURL(URL string) ([]string, error) {
func (e *extractor) parseURL(URL string) ([]string, error) {
re := regexp.MustCompile(`page=([0-9]+)`)
pageNo := re.FindAllString(URL, -1)
// pageNo = URL?page=number -> if it's there it means overview page otherwise single post or invalid
Expand All @@ -54,50 +63,38 @@ func parseURL(URL string) ([]string, error) {
re := regexp.MustCompile(`/posts/[0-9]+`)
linkToPost := re.FindString(URL)
if linkToPost == "" {
return nil, static.ErrURLParseFailed
return nil, errors.WithStack(static.ErrURLParseFailed)
}

return []string{linkToPost}, nil
}

wd, err := webdriver.New()
if err != nil {
return nil, err
}
defer wd.Close()

htmlString, err := wd.Get(URL)
htmlString, err := request.GetAsBytesWithClient(e.client, URL, URL)
if err != nil {
return nil, err
return nil, errors.WithStack(err)
}

re = regexp.MustCompile(`data-id="([^"]+)`)
matchedIDs := re.FindAllStringSubmatch(htmlString, -1)
matchedIDs := re.FindAllSubmatch(htmlString, -1)

out := []string{}
for _, submatchID := range matchedIDs {
out = append(out, "/posts/"+submatchID[1])
out = append(out, "/posts/"+string(submatchID[1]))
}

return out, nil
}

func extractData(postURL string) (*static.Data, error) {
wd, err := webdriver.New()
func (e *extractor) extractData(postURL string) (*static.Data, error) {
htmlString, err := request.GetAsBytesWithClient(e.client, postURL, postURL)
if err != nil {
return nil, err
}
defer wd.Close()

htmlString, err := wd.Get(postURL)
if err != nil {
return nil, err
return nil, errors.WithStack(err)
}

matchedImgData := reIMGData.FindStringSubmatch(htmlString)
matchedImgData := reIMGData.FindStringSubmatch(string(htmlString))
if len(matchedImgData) != 5 {
log.Println(htmlString)
return nil, static.ErrDataSourceParseFailed
return nil, errors.WithStack(static.ErrDataSourceParseFailed)
}
// [1] = img original width [2] image original height [3] image name [4] src URL

Expand Down
Loading

0 comments on commit 12ad6db

Please sign in to comment.