From bc26fbedeac3e7c8c262b7c9a796ad4487d3748a Mon Sep 17 00:00:00 2001
From: BubaVV <vv@WIN-TR4JBTJC6N9>
Date: Sun, 1 Dec 2024 23:27:06 +0200
Subject: [PATCH] Update for PHPMyAdmin 5.2

Grab looks abandoned and not working anymore. Migrated it to lxml and
tested a bit on recent phpmyadmin installation
---
 README.md                |   9 ++-
 phpmyadmin_sql_backup.py | 134 +++++++++++++++++++++++++--------------
 requirements.txt         |   6 ++
 3 files changed, 96 insertions(+), 53 deletions(-)
 create mode 100644 requirements.txt
diff --git a/README.md b/README.md
index e60df34..a585a81 100644
--- a/README.md
+++ b/README.md
@@ -9,8 +9,8 @@ A Python 3 script to __automate the download of SQL backups via a
 This is useful when your web hosting provider does not grant you access to a console (for `mysqldump`) but
 you want to automate the backup of your database (without having to manually use the browser).
 
-It has been tested with Python 3.4+ on Linux and Windows and the following versions of phpMyAdmin:
-`4.3.x - 4.8.x, 5.0.0` 
+It has been tested with Python 3.8 on Linux and the following versions of phpMyAdmin:
+`5.2`
 
 _Note_: The web interface of phpMyAdmin may change in the future and break this script. Please file a bug report
 (including your version of phpMyAdmin) if you encounter this issue.
@@ -83,9 +83,8 @@ UTC date / time to the directory `/tmp`, e.g. `/tmp/2016-03-11--15-19-04-UTC_exa
 
 ## Requirements
 
- - A [Python 3.4+](https://www.python.org/) installation on your system
- - [Grab - python web-scraping framework](https://github.com/lorien/grab): Install via `pip install -U Grab` or see 
-   the [installation instructions](https://grab.readthedocs.io/en/latest/usage/installation.html) if you run into problems.
+ - A [Python 3.8+](https://www.python.org/) installation on your system
+ - Requirements - `pip install -r requirements.txt`
 
 __Note for Windows users__: while it is possible to install the requirements natively, it is often easier to use the
 [Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/install-win10) if you are using Windows 10
diff --git a/phpmyadmin_sql_backup.py b/phpmyadmin_sql_backup.py
index db0ff17..f92c136 100644
--- a/phpmyadmin_sql_backup.py
+++ b/phpmyadmin_sql_backup.py
@@ -28,68 +28,103 @@
 import os
 import re
 import sys
+from itertools import product
+from urllib.parse import urljoin
 
-import grab
+import requests
+from lxml import html
 
-__version__ = '2019-05-07.1'
+__version__ = '2024-12-01'
 
 CONTENT_DISPOSITION_FILENAME_RE = re.compile(r'^.*filename="(?P<filename>[^"]+)".*$')
 DEFAULT_PREFIX_FORMAT = r'%Y-%m-%d--%H-%M-%S-UTC_'
 
 
-def is_login_successful(g):
-    return any(g.doc.text_search(s) for s in ["frame_content", "server_export.php", "index.php?route=/server/export"])
+def is_login_successful(tree):
+    hrefs = tree.xpath("//a/@href")
+    target_substrings = ["frame_content", "server_export.php", "index.php?route=/server/export"]
+    combinations = product(target_substrings, hrefs)
 
-
-def open_frame_if_phpmyadmin_3(g):
-    frame_url_selector = g.doc.select("id('frame_content')/@src")
-    if frame_url_selector.exists():
-        g.go(frame_url_selector.text())
+    return any(substring in href for substring, href in combinations)
 
 
 def download_sql_backup(url, user, password, dry_run=False, overwrite_existing=False, prepend_date=True, basename=None,
                         output_directory=os.getcwd(), exclude_dbs=None, compression='none', prefix_format=None,
                         timeout=60, http_auth=None, server_name=None, **kwargs):
     prefix_format = prefix_format or DEFAULT_PREFIX_FORMAT
-    exclude_dbs = exclude_dbs.split(',') or []
-    encoding = '' if compression == 'gzip' else 'gzip'
-
-    g = grab.Grab(encoding=encoding, timeout=timeout)
-    if http_auth:
-        g.setup(userpwd=http_auth)
-    else:
-        g.doc.set_input_by_id('input_username', user)
-        g.doc.set_input_by_id('input_password', password)
-        g.submit()
-    g.go(url)
-    if server_name:
-        g.doc.set_input_by_id('input_servername', server_name)
-
-    if not is_login_successful(g):
-        raise ValueError('Could not login - did you provide the correct username / password?')
-
-    open_frame_if_phpmyadmin_3(g)
-
-    old_export_url_selector = g.doc.select("id('topmenu')//a[contains(@href,'server_export.php')]/@href")
-    if old_export_url_selector.exists():
-        export_url = old_export_url_selector.text()
-    else:
-        export_url = g.doc.select("id('topmenu')//a[contains(@href,'index.php?route=/server/export')]/@href").text()
-    g.go(export_url)
-
-    dbs_available = [option.attrib['value'] for option in g.doc.form.inputs['db_select[]']]
+    exclude_dbs = exclude_dbs.split(',') if exclude_dbs else []
+    session = requests.Session()
+
+    # Login
+    response = session.get(url, timeout=timeout)
+    if response.status_code != 200:
+        raise ValueError("Failed to load the login page.")
+
+    tree = html.fromstring(response.content)
+    form_action = tree.xpath("//form[@id='login_form']/@action")
+    form_action = form_action[0] if form_action else url
+
+    form_data = {
+        "pma_username": user,
+        "pma_password": password,
+    }
+
+    hidden_inputs = tree.xpath("//form[@id='login_form']//input[@type='hidden']")
+    for hidden_input in hidden_inputs:
+        name = hidden_input.get("name")
+        value = hidden_input.get("value", "")
+        if name:
+            form_data[name] = value
+
+    login_response = session.post(urljoin(url,form_action), data=form_data, timeout=timeout)
+
+    if login_response.status_code != 200:
+        raise ValueError("Could not log in. Please check your credentials.")
+
+    tree = html.fromstring(login_response.content)
+    if not is_login_successful(tree):
+        raise ValueError("Could not log in. Please check your credentials.")
+
+    # Extract export URL
+    export_url = tree.xpath("id('topmenu')//a[contains(@href,'server_export.php')]/@href")
+    if not export_url:
+        export_url = tree.xpath("id('topmenu')//a[contains(@href,'index.php?route=/server/export')]/@href")
+    if not export_url:
+        raise ValueError("Could not find export URL.")
+    export_url = export_url[0]
+
+    # Access export page
+    export_response = session.get(urljoin(url,export_url), timeout=timeout)
+    export_tree = html.fromstring(export_response.content)
+
+
+    # Determine databases to dump
+    dbs_available = export_tree.xpath("//select[@name='db_select[]']/option/@value")
     dbs_to_dump = [db_name for db_name in dbs_available if db_name not in exclude_dbs]
     if not dbs_to_dump:
-        print('Warning: no databases to dump (databases available: "{}")'.format('", "'.join(dbs_available)),
-            file=sys.stderr)
-
-    file_response = g.submit(
-        extra_post=[('db_select[]', db_name) for db_name in dbs_to_dump] + [('compression', compression)])
-
-    re_match = CONTENT_DISPOSITION_FILENAME_RE.match(g.doc.headers['Content-Disposition'])
+        print(f'Warning: no databases to dump (databases available: "{", ".join(dbs_available)}")',
+              file=sys.stderr)
+
+    # Prepare form data
+    dump_form_action = export_tree.xpath("//form[@name='dump']/@action")[0]
+    form_data = {'db_select[]': dbs_to_dump}
+    form_data['compression'] = compression
+    form_data['what'] = 'sql'
+    form_data['filename_template'] = '@SERVER@'
+    form_data['sql_structure_or_data'] = 'structure_and_data'
+    dump_hidden_inputs = export_tree.xpath("//form[@name='dump']//input[@type='hidden']")
+    for hidden_input in dump_hidden_inputs:
+        name = hidden_input.get("name")
+        value = hidden_input.get("value", "")
+        if name:
+            form_data[name] = value
+
+    # Submit form and download file
+    file_response = session.post(urljoin(url, dump_form_action), data=form_data, timeout=timeout, stream=True)
+    content_disposition = file_response.headers.get('Content-Disposition', '')
+    re_match = CONTENT_DISPOSITION_FILENAME_RE.match(content_disposition)
     if not re_match:
-        raise ValueError(
-            'Could not determine SQL backup filename from {}'.format(g.doc.headers['Content-Disposition']))
+        raise ValueError(f"Could not determine SQL backup filename from {content_disposition}")
 
     content_filename = re_match.group('filename')
     filename = content_filename if basename is None else basename + os.path.splitext(content_filename)[1]
@@ -101,16 +136,19 @@ def download_sql_backup(url, user, password, dry_run=False, overwrite_existing=F
     if os.path.isfile(out_filename) and not overwrite_existing:
         basename, ext = os.path.splitext(out_filename)
         n = 1
-        print('File {} already exists, to overwrite it use --overwrite-existing'.format(out_filename), file=sys.stderr)
+        print(f'File {out_filename} already exists, to overwrite it use --overwrite-existing', file=sys.stderr)
         while True:
-            alternate_out_filename = '{}_({}){}'.format(basename, n, ext)
+            alternate_out_filename = f'{basename}_({n}){ext}'
             if not os.path.isfile(alternate_out_filename):
                 out_filename = alternate_out_filename
                 break
             n += 1
 
+    # Save file if not dry run
     if not dry_run:
-        file_response.save(out_filename)
+        with open(out_filename, 'wb') as f:
+            for chunk in file_response.iter_content(chunk_size=8192):
+                f.write(chunk)
 
     return out_filename
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..894c864
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+certifi==2024.8.30
+charset-normalizer==3.4.0
+idna==3.10
+lxml==5.3.0
+requests==2.32.3
+urllib3==2.2.3