diff --git a/groups_import.py b/groups_import.py
index 34d71d8b1..c1c876d68 100644
--- a/groups_import.py
+++ b/groups_import.py
@@ -142,7 +142,7 @@ def parse_csv_file(ctx):
     # Start processing the actual group data rows
     for line in lines:
         row_number += 1
-        rowdata, error = process_csv_line(line)
+        rowdata, error = process_csv_line(ctx, line)
 
         if error is None:
             extracted_data.append(rowdata)
diff --git a/tools/arb-update-resources.py b/tools/arb-update-resources.py
index 20874af8f..48c93ecf7 100644
--- a/tools/arb-update-resources.py
+++ b/tools/arb-update-resources.py
@@ -53,7 +53,7 @@ def parse_args():
 
 def parse_cs_values(input):
     """Parses a comma-separated list of key:value pairs as a dict."""
-    result = dict()
+    result = {}
     for kv_pair in input.split(","):
         if kv_pair == "":
             continue
@@ -96,7 +96,7 @@ def setup_session(irods_environment_config, ca_file="/etc/pki/tls/certs/chain.cr
                     'encryption_num_hash_rounds': 16,
                     'encryption_salt_size': 8,
                     'ssl_context': ssl_context}
-    settings = dict()
+    settings = {}
     settings.update(irods_environment_config)
     settings.update(ssl_settings)
     settings["password"] = password
@@ -160,8 +160,8 @@ def call_rule(session, rulename, params, number_outputs, rule_engine='irods_rule
         output=output_params,
         **re_config)
 
-    outArray = myrule.execute()
-    buf = outArray.MsParam_PI[0].inOutStruct.stdoutBuf.buf.decode(
+    out_array = myrule.execute()
+    buf = out_array.MsParam_PI[0].inOutStruct.stdoutBuf.buf.decode(
         'utf-8').splitlines()
 
     return buf[:number_outputs]
@@ -208,7 +208,7 @@ def main():
                 print("Updating misc resources ...")
             call_rule_update_misc(session)
     except NetworkException:
-        print("Could not connect to iRODS sever ...")
+        print("Could not connect to iRODS server ...")
 
 
 if __name__ == '__main__':
diff --git a/tools/troubleshoot-published-data.py b/tools/troubleshoot-published-data.py
new file mode 100644
index 000000000..e20e35970
--- /dev/null
+++ b/tools/troubleshoot-published-data.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""This script collects all published packages and checks that they have all the required info.
+
+Example:
+To check all published packages:
+python3 troubleshoot-published-data.py
+
+To check one specific package by name:
+python3 troubleshoot-published-data.py -p research-initial[1725262507]
+
+To put results into a json lines log file:
+python3 troubleshoot-published-data.py -l /etc/irods/yoda-ruleset/troubleshoot-pub-log.jsonl
+"""
+import argparse
+import subprocess
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        prog="troubleshoot-published-data.py",
+        description=__doc__,
+        formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument("-l", "--log-file", type=str, required=False,
+                        help="If write to json lines log file, location to write to")
+    parser.add_argument("-p", "--package", type=str, required=False,
+                        help="Troubleshoot a specific data package by name (default: troubleshoot all packages)")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    rule_name = "/etc/irods/yoda-ruleset/tools/troubleshoot_data.r"
+    # rule_name = "rule_batch_troubleshoot_published_data_packages"
+    # TODO handle packages with spaces in the name
+    data_package = f"*data_package={args.package}"
+    log_loc = f"*log_loc={args.log_file if args.log_file else ''}"
+    subprocess.call(['irule', '-r', 'irods_rule_engine_plugin-python-instance', '-F',
+                    rule_name, data_package, log_loc])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/troubleshoot_data.r b/tools/troubleshoot_data.r
index 3cb1f0d8f..505943408 100644
--- a/tools/troubleshoot_data.r
+++ b/tools/troubleshoot_data.r
@@ -2,7 +2,8 @@
 
 def main(rule_args, callback, rei):
     data_package = global_vars["*data_package"].strip('"')
-    callback.rule_batch_troubleshoot_published_data_packages(data_package)
+    log_loc = global_vars["*log_loc"].strip('"')
+    callback.rule_batch_troubleshoot_published_data_packages(data_package, log_loc)
 
-INPUT *data_package=all_published_packages
+INPUT *data_package="", *log_loc=""
 OUTPUT ruleExecOut
diff --git a/troubleshoot_data.py b/troubleshoot_data.py
index 29f0f07f2..be815eebc 100644
--- a/troubleshoot_data.py
+++ b/troubleshoot_data.py
@@ -7,12 +7,14 @@
 __all__ = ['rule_batch_troubleshoot_published_data_packages']
 
 import hashlib
+import json
 import subprocess
 
 import genquery
 
 import datacite
 from publication import get_publication_config
+import session_vars
 from schema_transformation import verify_package_schema
 from util import *
 
@@ -30,10 +32,29 @@ def find_full_package_path(ctx, data_packages, short_package_name):
     for path in data_packages:
         if short_package_name in path:
             return path
-    log.write(ctx, "Error: The data package '{}' does not exist in the provided list.".format(short_package_name))
+    log.write_stdout(ctx, "Error: The data package '{}' does not exist in the provided list.".format(short_package_name))
     return None
 
 
+def published_data_package_exists(ctx, path):
+    """Confirm whether path is to a published data package"""
+    # TODO could this be a utility?
+
+    # Define the query condition and attributes to fetch data
+    query_condition = (
+        "COLL_NAME = '{}' AND "
+        "META_COLL_ATTR_NAME = 'org_vault_status' AND "
+        "META_COLL_ATTR_VALUE = 'PUBLISHED'".format(path)
+    )
+    query_attributes = "COLL_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS"
+    iter = genquery.row_iterator(query_attributes, query_condition, genquery.AS_LIST, ctx)
+
+    for _ in iter:
+        return True
+
+    return False
+
+
 def find_published_data_packages(ctx):
     """
     Find all published data packages by matching its AVU that org_vault_status = "PUBLISHED".
@@ -55,11 +76,10 @@ def find_published_data_packages(ctx):
         iter = genquery.row_iterator(query_attributes, query_condition, genquery.AS_LIST, ctx)
 
         # Collecting only the collection names
-        data_packages = [row[0] for row in iter]
+        return [row[0] for row in iter]
 
-        return data_packages
     except Exception as e:
-        log.write(ctx, "An error {} occurred while executing the query:".format(e))
+        log.write_stdout(ctx, "An error {} occurred while executing the query:".format(e))
         return []
 
 
@@ -118,22 +138,22 @@ def check_datacite_doi_registration(ctx, data_package):
     """
 
     try:
-        versionDOI = get_attribute_value(ctx, data_package, "versionDOI")
-        status_code = datacite.metadata_get(ctx, versionDOI)
-        versionDOI_check = (status_code == 200)
+        version_doi = get_attribute_value(ctx, data_package, "versionDOI")
+        status_code = datacite.metadata_get(ctx, version_doi)
+        version_doi_check = status_code == 200
     except ValueError as e:
-        log.write(ctx, "Error: {} while trying to get versionDOI".format(e))
-        versionDOI_check = False
+        log.write_stdout(ctx, "Error: {} while trying to get versionDOI".format(e))
+        version_doi_check = False
 
     try:
-        baseDOI = get_attribute_value(ctx, data_package, "baseDOI")
-        status_code = datacite.metadata_get(ctx, baseDOI)
-        baseDOI_check = (status_code == 200)
+        base_doi = get_attribute_value(ctx, data_package, "baseDOI")
+        status_code = datacite.metadata_get(ctx, base_doi)
+        base_doi_check = status_code == 200
     except ValueError as e:
-        log.write(ctx, "Error: {} while trying to get baseDOI".format(e))
-        baseDOI_check = False
+        log.write_stdout(ctx, "Error: {} while trying to get baseDOI".format(e))
+        base_doi_check = False
 
-    return (versionDOI_check, baseDOI_check)
+    return (version_doi_check, base_doi_check)
 
 
 def calculate_md5(content):
@@ -174,16 +194,18 @@ def get_md5_remote_ssh(ctx, host, username, file_path):
         )
 
         # Run the command using Popen (for python2 version)
-        # TODO: in copy_to_vault package, it may have subprocess methods
+        # TODO: in copy_to_vault package, it may have subprocess method that we can copy here
+        # TODO: convert to .run() in Python3
         process = subprocess.Popen(ssh_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+        # returncode = subprocess.call(["irsync", "-rK", "i:{}/".format(coll), "i:{}/original".format(target)])
         stdout, stderr = process.communicate()
 
         # Return only the MD5 hash part
         if process.returncode == 0:
             return stdout.strip().split()[0]
-        else:
-            log.write(ctx, "Error: {}".format(stderr))
-            return None
+
+        log.write(ctx, "Error: {}".format(stderr))
+        return None
     except Exception as e:
         log.write(ctx, "An error occurred: {}".format(str(e)))
         return None
@@ -209,60 +231,96 @@ def get_attribute_value(ctx, data_package, attribute_suffix):
         raise ValueError("Attribute {} not found in AVU".format(attr))
 
 
-def verify_file_integrity(ctx, data_package, attribute_suffix, remote_directory):
+def verify_file_integrity(ctx, data_package, attribute_suffix, remote_host, remote_directory):
     """
     Compares MD5 checksums between a local file and its remote version to verify their integrity.
 
     :param ctx:              Combined type of a callback and rei struct
     :param data_package:     String representing the data package collection path.
     :param attribute_suffix: Suffix identifying the metadata attribute for the file path.
+    :param remote_host:      Hostname for the remote host to look up in publication config
     :param remote_directory: Base directory on the remote server for the file.
 
     :returns:                True if the MD5 checksums match, False otherwise.
+
+    :raises KeyError:        If host cannot be found in publication config
     """
 
     # Calculate md5 for the local file
     file_path = get_attribute_value(ctx, data_package, attribute_suffix)
+    log.write_stdout(ctx, "file path: {}".format(file_path))
+    # We are comparing small files so it should be ok to get the whole file
     local_data = data_object.read(ctx, file_path)
     local_md5 = calculate_md5(local_data)
 
     # Calculate md5 for the remote file
     publication_config = get_publication_config(ctx)
+    if remote_host not in publication_config:
+        raise KeyError("Host {} does not exist in publication config".format(remote_host))
+
     file_shortname = file_path.split("/")[-1].replace('-combi', '')
+    log.write_stdout(ctx, "short: {}".format(file_shortname))
     remote_file_path = "/var/www/{}/{}/{}/{}".format(
         remote_directory, publication_config['yodaInstance'], publication_config['yodaPrefix'], file_shortname)
-    remote_md5 = get_md5_remote_ssh(ctx, "combined.yoda.test", "inbox", remote_file_path)  # TODO: host_name and user_name can be variables
+    log.write_stdout(ctx, "remote file path: {}".format(remote_file_path))
+
+    remote_md5 = get_md5_remote_ssh(ctx, publication_config[remote_host], config.inbox_user, remote_file_path)
 
     if local_md5 == remote_md5:
         return True
-    else:
-        log.write(ctx, " MD5 of local and remote file don't match.")
-        log.write(ctx, "Local MD5 ({}): {}".format(attribute_suffix, local_md5))
-        log.write(ctx, "Remote MD5 ({}): {}".format(attribute_suffix, remote_md5))
-        return False
+
+    log.write_stdout(ctx, "MD5 of local and remote file don't match.")
+    log.write_stdout(ctx, "Local MD5 ({}): {}".format(attribute_suffix, local_md5))
+    log.write_stdout(ctx, "Remote MD5 ({}): {}".format(attribute_suffix, remote_md5))
+    return False
 
 
 def check_integrity_of_publication_files(ctx, data_package):
     """
-    Checks the integrity of landingPage and CombiJson files by verifying their MD5 checksums in local aginst thoese in public server.
+    Checks the integrity of landingPage and CombiJson files by verifying their MD5 checksums in local against those in public server.
 
     :param ctx:          Combined type of a callback and rei struct
     :param data_package: String representing the data package collection path.
 
     :returns:            A tuple containing boolean results of checking
     """
-    landing_page_verified = verify_file_integrity(ctx, data_package, "landingPagePath", "landingpages")
-    combi_json_verified = verify_file_integrity(ctx, data_package, "combiJsonPath", "moai/metadata")
+    # publicVHost for landingpage, moaiHost for moai
+    landing_page_verified = verify_file_integrity(ctx, data_package, "landingPagePath", "publicVHost", "landingpages")
+    combi_json_verified = verify_file_integrity(ctx, data_package, "combiJsonPath", "moaiHost", "moai/metadata")
     return (landing_page_verified, combi_json_verified)
 
 
-@rule.make(inputs=[0], outputs=[1])
-def rule_batch_troubleshoot_published_data_packages(ctx, requested_package):
+def print_troubleshoot_result(ctx, result):
+    """Print the result of troubleshooting of one package in human-friendly format"""
+    pass_all_tests = False
+    for value in result.values():
+        pass_all_tests = pass_all_tests and value
+
+    log.write_stdout(ctx, "Results for: {}".format(result['data_package_path']))
+    # TODO check these prompt messages
+    if pass_all_tests:
+        log.write_stdout(ctx, "Package passes all tests.")
+    else:
+        log.write_stdout(ctx, "Package FAILED one or more tests:")
+        log.write_stdout(ctx, "Schema matches: {}".format(result['schema_check']))
+        log.write_stdout(ctx, "All expected AVUs exist: {}".format(result['no_missing_avus_check']))
+        log.write_stdout(ctx, "No unexpected AVUs: {}".format(result['no_unexpected_avus_check']))
+        log.write_stdout(ctx, "Version DOI matches: {}".format(result['versionDOI_check']))
+        log.write_stdout(ctx, "Base DOI matches: {}".format(result['baseDOI_check']))
+        log.write_stdout(ctx, "Landing page matches: {}".format(result['landingPage_check']))
+        log.write_stdout(ctx, "combiJson matches: {}".format(result['combiJson_check']))
+
+    log.write_stdout(ctx, "")
+
+
+@rule.make(inputs=[0, 1], outputs=[2])
+def rule_batch_troubleshoot_published_data_packages(ctx, requested_package, log_loc):
     """
     Troubleshoots published data packages.
 
-    :param ctx: Context that combines a callback and rei struct.
+    :param ctx:               Context that combines a callback and rei struct.
     :param requested_package: A string representing a specific data package path or "all_published_data" for all packages.
+    :param log_loc:           A string representing location to write a json log
 
     :returns: None.
 
@@ -274,46 +332,56 @@ def rule_batch_troubleshoot_published_data_packages(ctx, requested_package):
 
     Operates on either a single specified package or all published packages, depending on the input.
     """
+    data_packages = []
 
-    # Retrieve all published data packages
-    all_published_packages = find_published_data_packages(ctx)
-    if not all_published_packages:
-        log.write(ctx, "No published packages found.")
-        return
+    # Full path given
+    if requested_package.startswith("/"):
+        if not collection.exists(ctx, requested_package) or not published_data_package_exists(ctx, requested_package):
+            log.write_stdout(ctx, "Error: Requested package '{}' not found among published packages.".format(requested_package))
+            return
 
-    # Determine which packages to process based on the input
-    if requested_package == 'all_published_packages':
-        data_packages = all_published_packages
+        data_packages.append(requested_package)
     else:
-        data_package_path = find_full_package_path(ctx, all_published_packages, requested_package)
-        if data_package_path:
-            data_packages = [data_package_path]
-        else:
-            log.write(ctx, "Error: Requested package '{}' not found among published packages.".format(requested_package))
+        # Retrieve all published data packages
+        all_published_packages = find_published_data_packages(ctx)
+        if not all_published_packages:
+            log.write_stdout(ctx, "No published packages found.")
             return
 
-    results_dict = {}
-
-    # Toubleshooting
+        # Determine which packages to process based on the input
+        if requested_package == 'all_published_packages':
+            data_packages = all_published_packages
+        else:
+            data_package_path = find_full_package_path(ctx, all_published_packages, requested_package)
+            if data_package_path:
+                data_packages.append(data_package_path)
+            else:
+                log.write_stdout(ctx, "Error: Requested package '{}' not found among published packages.".format(requested_package))
+                return
+
+    # Troubleshooting
     for data_package in data_packages:
-        log.write(ctx, "Troubleshooting: {}".format(data_package))
+        log.write_stdout(ctx, "Troubleshooting: {}".format(data_package))
         schema_check = verify_package_schema(ctx, data_package, {})['match_schema']
         no_missing_avus_check, no_unexpected_avus_check = check_data_package_system_avus(ctx, data_package)
-        versionDOI_check, baseDOI_check = check_datacite_doi_registration(ctx, data_package)
-        landingPage_check, combiJson_check = check_integrity_of_publication_files(ctx, data_package)
+        version_doi_check, base_doi_check = check_datacite_doi_registration(ctx, data_package)
+        landing_page_check, combi_json_check = check_integrity_of_publication_files(ctx, data_package)
 
         # Collect results for current data package
-        results_dict[data_package] = {
+        result = {
+            'data_package_path': data_package,
             'schema_check': schema_check,
             'no_missing_avus_check': no_missing_avus_check,
             'no_unexpected_avus_check': no_unexpected_avus_check,
-            'versionDOI_check': versionDOI_check,
-            'baseDOI_check': baseDOI_check,
-            'landingPage_check': landingPage_check,
-            'combiJson_check': combiJson_check
+            'versionDOI_check': version_doi_check,
+            'baseDOI_check': base_doi_check,
+            'landingPage_check': landing_page_check,
+            'combiJson_check': combi_json_check
         }
 
-    log.write(ctx, "Troubleshooting results: {}".format(results_dict))
-
-    # TODO: Return the result of output to the terminal (stdout)
-    # return json.dumps(results_dict)
+        print_troubleshoot_result(ctx, result)
+        # TODO proper check?
+        if len(log_loc):
+            with open(log_loc, "a") as writer:
+                json.dump(result, writer)
+                writer.writelines('\n')
diff --git a/util/config.py b/util/config.py
index 731d131d9..e66f5d573 100644
--- a/util/config.py
+++ b/util/config.py
@@ -147,6 +147,7 @@ def __repr__(self):
                 vault_copy_multithread_enabled=True,
                 user_max_connections_enabled=False,
                 user_max_connections_number=4,
+                inbox_user='inbox',
                 python3_interpreter='/usr/local/bin/python3')
 
 # }}}