From d32490902eb92c5199084c49bd1a40d5709b5325 Mon Sep 17 00:00:00 2001 From: OSINT-TECHNOLOGIES <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com> Date: Fri, 14 Jun 2024 19:55:03 +0300 Subject: [PATCH] Update networking_processor.py [some fixes and .xlsx report support] --- datagather_modules/networking_processor.py | 63 +++++++++++++--------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/datagather_modules/networking_processor.py b/datagather_modules/networking_processor.py index 59de7c5..6823ba2 100644 --- a/datagather_modules/networking_processor.py +++ b/datagather_modules/networking_processor.py @@ -41,7 +41,7 @@ def get_ssl_certificate(short_domain, port=443): print(Fore.RED + "Error while gathering info about SSL certificate. Reason: {}".format(e)) return "No information about SSL certificate was gathered" -def query_internetdb(ip): +def query_internetdb(ip, report_file_extension): url = f"https://internetdb.shodan.io/{ip}" response = requests.get(url) if response.status_code == 200: @@ -51,49 +51,64 @@ def query_internetdb(ip): cpes = data.get("cpes", []) tags = data.get("tags", []) vulns = data.get("vulns", []) - return ports, ' // '.join(hostnames), ' // '.join(cpes), ' // '.join(tags), ' // '.join(vulns) + if report_file_extension == 'pdf': + return ports, ' // '.join(hostnames), ' // '.join(cpes), ' // '.join(tags), ' // '.join(vulns) + elif report_file_extension == 'xlsx': + return ports, ' // '.join(hostnames), ' // '.join(cpes), ' // '.join(tags), vulns else: print(Fore.RED + "No information was found on InternetDB" + Style.RESET_ALL) ports = hostnames = cpes = tags = vulns = "No info about this web resource on InternetDB" return ports, hostnames, cpes, tags, vulns -def get_robots_txt(url, report_folder): - filepath = report_folder + '//01-robots.txt' +def get_robots_txt(url, robots_path): if not url.startswith('http'): url = 'http://' + url robots_url = url + '/robots.txt' response = requests.get(robots_url) if response.status_code == 200: - with open(filepath, 'w') as f: + with open(robots_path, 'w') as f: f.write(response.text) return 'File "robots.txt" was extracted to text file in report folder' else: return 'File "robots.txt" was not found' -def get_sitemap_xml(url, report_folder): - filepath = report_folder + '//02-sitemap.txt' - if not url.startswith('http'): - url = 'http://' + url - sitemap_url = url + '/sitemap.xml' - response = requests.get(sitemap_url) - if response.status_code == 200: - with open(filepath, 'w') as f: - f.write(response.text) - return 'File "sitemap.xml" was extracted to text file in report folder' - else: - return 'File "sitemap.xml" was not found' +def get_sitemap_xml(url, sitemap_path): + try: + if not url.startswith('http'): + url = 'http://' + url + sitemap_url = url + '/sitemap.xml' + response = requests.get(sitemap_url) + if len(response.text) > 0: + if response.status_code == 200: + with open(sitemap_path, 'w') as f: + f.write(response.text) + return 'File "sitemap.xml" was extracted to text file in report folder' + else: + return 'File "sitemap.xml" was not found' + else: + with open(sitemap_path, 'w') as f: + f.write('0') + print(Fore.RED + "Error while gathering sitemap.xml. Probably it's unreachable") + return 'File "sitemap.xml" was not found' + except Exception as e: + print(Fore.RED + "Error while gathering sitemap.xml. Reason: {}".format(e)) + return 'Error occured during sitemap.xml gathering' -def extract_links_from_sitemap(report_folder): - file_name = report_folder + '//02-sitemap.txt' - links_file = report_folder + '//03-sitemap_links.txt' +def extract_links_from_sitemap(sitemap_links_path, sitemap_path, report_file_type): try: - tree = ET.parse(file_name) + tree = ET.parse(sitemap_path) root = tree.getroot() links = [elem.text for elem in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc')] - with open(links_file, 'w') as f: + if report_file_type == 'pdf': + with open(sitemap_links_path, 'w') as f: + for link in links: + f.write(f"=> {link}\n") + return 'Links from "sitemap.txt" were successfully parsed' + elif report_file_type == 'xlsx': + parsed_links = [] for link in links: - f.write(f"=> {link}\n") - return 'Links from "sitemap.txt" were successfully parsed' + parsed_links.append(link) + return 'Links from "sitemap.txt" were successfully parsed', parsed_links except ET.ParseError as e: print(Fore.RED + "Links from sitemap.txt were not parsed. Reason: {}".format(e)) return 'Links from "sitemap.txt" were not parsed'