diff --git a/README.md b/README.md index 1997f5a..a585a81 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ A Python 3 script to __automate the download of SQL backups via a This is useful when your web hosting provider does not grant you access to a console (for `mysqldump`) but you want to automate the backup of your database (without having to manually use the browser). -It has been tested with Python 3.4+ on Linux and Windows and the following versions of phpMyAdmin: -`4.3.x - 4.8.x, 5.0.0` +It has been tested with Python 3.8 on Linux and the following versions of phpMyAdmin: +`5.2` _Note_: The web interface of phpMyAdmin may change in the future and break this script. Please file a bug report (including your version of phpMyAdmin) if you encounter this issue. @@ -83,9 +83,8 @@ UTC date / time to the directory `/tmp`, e.g. `/tmp/2016-03-11--15-19-04-UTC_exa ## Requirements - - A [Python 3.4+](https://www.python.org/) installation on your system - - [Grab - python web-scraping framework](http://grablib.org/): Install via `pip install -U Grab` or see - the [installation instructions](http://docs.grablib.org/en/latest/usage/installation.html) if you run into problems. + - A [Python 3.8+](https://www.python.org/) installation on your system + - Requirements - `pip install -r requirements.txt` __Note for Windows users__: while it is possible to install the requirements natively, it is often easier to use the [Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/install-win10) if you are using Windows 10 diff --git a/phpmyadmin_sql_backup.py b/phpmyadmin_sql_backup.py index 6d3c395..f92c136 100644 --- a/phpmyadmin_sql_backup.py +++ b/phpmyadmin_sql_backup.py @@ -28,64 +28,103 @@ import os import re import sys +from itertools import product +from urllib.parse import urljoin -import grab +import requests +from lxml import html -__version__ = '2019-05-07.1' +__version__ = '2024-12-01' CONTENT_DISPOSITION_FILENAME_RE = re.compile(r'^.*filename="(?P[^"]+)".*$') DEFAULT_PREFIX_FORMAT = r'%Y-%m-%d--%H-%M-%S-UTC_' -def is_login_successful(g): - return g.doc.text_search("frame_content") or g.doc.text_search("server_export.php") +def is_login_successful(tree): + hrefs = tree.xpath("//a/@href") + target_substrings = ["frame_content", "server_export.php", "index.php?route=/server/export"] + combinations = product(target_substrings, hrefs) - -def open_frame_if_phpmyadmin_3(g): - frame_url_selector = g.doc.select("id('frame_content')/@src") - if frame_url_selector.exists(): - g.go(frame_url_selector.text()) + return any(substring in href for substring, href in combinations) def download_sql_backup(url, user, password, dry_run=False, overwrite_existing=False, prepend_date=True, basename=None, output_directory=os.getcwd(), exclude_dbs=None, compression='none', prefix_format=None, timeout=60, http_auth=None, server_name=None, **kwargs): prefix_format = prefix_format or DEFAULT_PREFIX_FORMAT - exclude_dbs = exclude_dbs.split(',') or [] - encoding = '' if compression == 'gzip' else 'gzip' - - g = grab.Grab(encoding=encoding, timeout=timeout) - if http_auth: - g.setup(userpwd=http_auth) - else: - g.doc.set_input_by_id('input_username', user) - g.doc.set_input_by_id('input_password', password) - g.submit() - g.go(url) - if server_name: - g.doc.set_input_by_id('input_servername', server_name) - - if not is_login_successful(g): - raise ValueError('Could not login - did you provide the correct username / password?') - - open_frame_if_phpmyadmin_3(g) - - export_url = g.doc.select("id('topmenu')//a[contains(@href,'server_export.php')]/@href").text() - g.go(export_url) - - dbs_available = [option.attrib['value'] for option in g.doc.form.inputs['db_select[]']] + exclude_dbs = exclude_dbs.split(',') if exclude_dbs else [] + session = requests.Session() + + # Login + response = session.get(url, timeout=timeout) + if response.status_code != 200: + raise ValueError("Failed to load the login page.") + + tree = html.fromstring(response.content) + form_action = tree.xpath("//form[@id='login_form']/@action") + form_action = form_action[0] if form_action else url + + form_data = { + "pma_username": user, + "pma_password": password, + } + + hidden_inputs = tree.xpath("//form[@id='login_form']//input[@type='hidden']") + for hidden_input in hidden_inputs: + name = hidden_input.get("name") + value = hidden_input.get("value", "") + if name: + form_data[name] = value + + login_response = session.post(urljoin(url,form_action), data=form_data, timeout=timeout) + + if login_response.status_code != 200: + raise ValueError("Could not log in. Please check your credentials.") + + tree = html.fromstring(login_response.content) + if not is_login_successful(tree): + raise ValueError("Could not log in. Please check your credentials.") + + # Extract export URL + export_url = tree.xpath("id('topmenu')//a[contains(@href,'server_export.php')]/@href") + if not export_url: + export_url = tree.xpath("id('topmenu')//a[contains(@href,'index.php?route=/server/export')]/@href") + if not export_url: + raise ValueError("Could not find export URL.") + export_url = export_url[0] + + # Access export page + export_response = session.get(urljoin(url,export_url), timeout=timeout) + export_tree = html.fromstring(export_response.content) + + + # Determine databases to dump + dbs_available = export_tree.xpath("//select[@name='db_select[]']/option/@value") dbs_to_dump = [db_name for db_name in dbs_available if db_name not in exclude_dbs] if not dbs_to_dump: - print('Warning: no databases to dump (databases available: "{}")'.format('", "'.join(dbs_available)), - file=sys.stderr) - - file_response = g.submit( - extra_post=[('db_select[]', db_name) for db_name in dbs_to_dump] + [('compression', compression)]) - - re_match = CONTENT_DISPOSITION_FILENAME_RE.match(g.doc.headers['Content-Disposition']) + print(f'Warning: no databases to dump (databases available: "{", ".join(dbs_available)}")', + file=sys.stderr) + + # Prepare form data + dump_form_action = export_tree.xpath("//form[@name='dump']/@action")[0] + form_data = {'db_select[]': dbs_to_dump} + form_data['compression'] = compression + form_data['what'] = 'sql' + form_data['filename_template'] = '@SERVER@' + form_data['sql_structure_or_data'] = 'structure_and_data' + dump_hidden_inputs = export_tree.xpath("//form[@name='dump']//input[@type='hidden']") + for hidden_input in dump_hidden_inputs: + name = hidden_input.get("name") + value = hidden_input.get("value", "") + if name: + form_data[name] = value + + # Submit form and download file + file_response = session.post(urljoin(url, dump_form_action), data=form_data, timeout=timeout, stream=True) + content_disposition = file_response.headers.get('Content-Disposition', '') + re_match = CONTENT_DISPOSITION_FILENAME_RE.match(content_disposition) if not re_match: - raise ValueError( - 'Could not determine SQL backup filename from {}'.format(g.doc.headers['Content-Disposition'])) + raise ValueError(f"Could not determine SQL backup filename from {content_disposition}") content_filename = re_match.group('filename') filename = content_filename if basename is None else basename + os.path.splitext(content_filename)[1] @@ -97,16 +136,19 @@ def download_sql_backup(url, user, password, dry_run=False, overwrite_existing=F if os.path.isfile(out_filename) and not overwrite_existing: basename, ext = os.path.splitext(out_filename) n = 1 - print('File {} already exists, to overwrite it use --overwrite-existing'.format(out_filename), file=sys.stderr) + print(f'File {out_filename} already exists, to overwrite it use --overwrite-existing', file=sys.stderr) while True: - alternate_out_filename = '{}_({}){}'.format(basename, n, ext) + alternate_out_filename = f'{basename}_({n}){ext}' if not os.path.isfile(alternate_out_filename): out_filename = alternate_out_filename break n += 1 + # Save file if not dry run if not dry_run: - file_response.save(out_filename) + with open(out_filename, 'wb') as f: + for chunk in file_response.iter_content(chunk_size=8192): + f.write(chunk) return out_filename diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..894c864 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +certifi==2024.8.30 +charset-normalizer==3.4.0 +idna==3.10 +lxml==5.3.0 +requests==2.32.3 +urllib3==2.2.3