diff --git a/Tests/Parser/Client/fixtures/library.yml b/Tests/Parser/Client/fixtures/library.yml index 87ae3a0edb..1665f00c39 100644 --- a/Tests/Parser/Client/fixtures/library.yml +++ b/Tests/Parser/Client/fixtures/library.yml @@ -731,3 +731,15 @@ type: library name: Azure Blob Storage version: 12.23.0 +- + user_agent: trafilatura/1.5.0 (+https://github.com/adbar/trafilatura) + client: + type: library + name: trafilatura + version: 1.5.0 +- + user_agent: 'sqlmap/1.8.10.1#dev (https://sqlmap.org)' + client: + type: library + name: sqlmap + version: 1.8.10.1 diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index c94be9560b..a0133d2d70 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -4000,15 +4000,6 @@ producer: name: 'IBM Germany Research & Development GmbH' url: https://exchange.xforce.ibmcloud.com/ -- - user_agent: 'sqlmap/1.1.8.2#dev (http://sqlmap.org)' - bot: - name: sqlmap - category: Security Checker - url: http://sqlmap.org/ - producer: - name: sqlmap - url: http://sqlmap.org/ - user_agent: Mozilla/5.0 (compatible; theoldreader.com; 1 subscribers; feed-id=aaa) bot: @@ -8433,3 +8424,27 @@ name: SuggestBot category: Crawler url: https://github.com/nettrom/suggestbot +- + user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36 (https://securitee.org/cms-experiment-fall2024/) + bot: + name: CMS Experiment + category: Security Checker + url: https://securitee.org/cms-experiment-fall2024/ +- + user_agent: SiteCheckerBotCrawler/1.0 (+http://sitechecker.pro) + bot: + name: SiteCheckerBotCrawler + category: Crawler + url: https://sitechecker.pro/ + producer: + name: Cyber Circus Limited + url: https://sitechecker.pro/ +- + user_agent: SBIder/0.8-dev (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html) + bot: + name: SBIder + category: Crawler + url: https://www.sitesell.com/sbider.html + producer: + name: SiteSell Inc. + url: https://www.sitesell.com/ diff --git a/regexes/bots.yml b/regexes/bots.yml index fdada6986e..f6e139f5e3 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -1624,14 +1624,6 @@ name: 'Sprinklr, Inc.' url: 'https://www.sprinklr.com/' -- regex: 'sqlmap/' - name: 'sqlmap' - category: 'Security Checker' - url: 'http://sqlmap.org/' - producer: - name: 'sqlmap' - url: 'http://sqlmap.org/' - - regex: 'SSL Labs' name: 'SSL Labs' category: 'Validator' @@ -4912,6 +4904,27 @@ category: 'Crawler' url: 'https://github.com/nettrom/suggestbot' +- regex: 'cms-experiment' + name: 'CMS Experiment' + category: 'Security Checker' + url: 'https://securitee.org/cms-experiment-fall2024/' + +- regex: 'SiteCheckerBotCrawler' + name: 'SiteCheckerBotCrawler' + category: 'Crawler' + url: 'https://sitechecker.pro/' + producer: + name: 'Cyber Circus Limited' + url: 'https://sitechecker.pro/' + +- regex: 'SBIder' + name: 'SBIder' + category: 'Crawler' + url: 'https://www.sitesell.com/sbider.html' + producer: + name: 'SiteSell Inc.' + url: 'https://www.sitesell.com/' + # Generic bots - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$' name: 'Generic Bot' diff --git a/regexes/client/libraries.yml b/regexes/client/libraries.yml index 9195432909..c4543e7bbf 100644 --- a/regexes/client/libraries.yml +++ b/regexes/client/libraries.yml @@ -649,3 +649,13 @@ name: 'Azure Blob Storage' version: '$1' url: 'https://learn.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python' + +- regex: 'trafilatura(?:/(\d+[.\d]+))?' + name: 'trafilatura' + version: '$1' + url: 'https://github.com/adbar/trafilatura' + +- regex: 'sqlmap(?:/(\d+[.\d]+))?' + name: 'sqlmap' + version: '$1' + url: 'https://sqlmap.org/'