add: print max rss during performance test runs

- previously just took RSS value at end of last "convert" but this can be affected by garbage collection mid-run, so now the RSS is polled after the run and the largest value is printed with average timings. - expression.py: seems that `match` a bit faster than `fullmatch`
XLSForm · Dec 18, 2024 · fd17a60 · fd17a60
1 parent 5979b9d
commit fd17a60
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 17 deletions.
diff --git a/pyxform/parsing/expression.py b/pyxform/parsing/expression.py
@@ -109,14 +109,14 @@ def is_pyxform_reference(value: str) -> bool:
     Does the input string contain only a valid Pyxform reference? e.g. ${my_question}
     """
     # Needs 3 characters for "${}", plus a name inside.
-    return value and len(value) > 3 and bool(RE_ONLY_PYXFORM_REF.fullmatch(value))
+    return value and len(value) > 3 and bool(RE_ONLY_PYXFORM_REF.match(value))
 
 
 def is_xml_tag(value: str) -> bool:
     """
     Does the input string contain only a valid XML tag / element name?
     """
-    return value and bool(RE_ONLY_NCNAME.fullmatch(value))
+    return value and bool(RE_ONLY_NCNAME.match(value))
 
 
 def has_last_saved(value: str) -> bool:

diff --git a/tests/test_dynamic_default.py b/tests/test_dynamic_default.py
@@ -2,13 +2,13 @@
 Test handling dynamic default in forms
 """
 
-import os
 from dataclasses import dataclass
+from os import getpid
 from time import perf_counter
 from unittest import skip
 from unittest.mock import patch
 
-import psutil
+from psutil import Process
 from pyxform import utils
 from pyxform.xls2xform import convert
 
@@ -778,11 +778,11 @@ def test_dynamic_default_performance__time(self):
         Results with Python 3.10.14 on VM with 2vCPU (i7-7700HQ) 1GB RAM, x questions
         each, average of 10 runs (seconds), with and without the check, per question:
         | num   | with   | without | peak RSS MB |
-        |   500 | 0.1903 |  0.1977 |          58 |
-        |  1000 | 0.4010 |  0.3913 |          63 |
-        |  2000 | 0.6860 |  0.6813 |          67 |
-        |  5000 | 1.7119 |  1.7421 |          90 |
-        | 10000 | 3.5399 |  3.4963 |         136 |
+        |   500 | 0.1626 |  0.1886 |          60 |
+        |  1000 | 0.3330 |  0.3916 |          63 |
+        |  2000 | 0.8675 |  0.7823 |          70 |
+        |  5000 | 1.7051 |  1.5653 |          91 |
+        | 10000 | 3.1097 |  3.8525 |         137 |
         """
         survey_header = """
         | survey |            |          |          |               |
@@ -791,19 +791,26 @@ def test_dynamic_default_performance__time(self):
         question = """
         |        | text       | q{i}     | Q{i}     | if(../t2 = 'test', 1, 2) + 15 - int(1.2) |
         """
+        process = Process(getpid())
         for count in (500, 1000, 2000):
             questions = "\n".join(question.format(i=i) for i in range(count))
             md = "".join((survey_header, questions))
 
             def run(name, case):
                 runs = 0
                 results = []
+                peak_memory_usage = process.memory_info().rss
                 while runs < 10:
                     start = perf_counter()
                     convert(xlsform=case)
                     results.append(perf_counter() - start)
+                    peak_memory_usage = max(process.memory_info().rss, peak_memory_usage)
                     runs += 1
-                print(name, round(sum(results) / len(results), 4))
+                print(
+                    name,
+                    round(sum(results) / len(results), 4),
+                    f"| Peak RSS: {peak_memory_usage}",
+                )
 
             run(name=f"questions={count}, with check (seconds):", case=md)
 
@@ -828,7 +835,7 @@ def test_dynamic_default_performance__memory(self):
         """
         questions = "\n".join(question.format(i=i) for i in range(1, 2000))
         md = "".join((survey_header, questions))
-        process = psutil.Process(os.getpid())
+        process = Process(getpid())
         pre_mem = process.memory_info().rss
         self.assertPyxformXform(md=md)
         post_mem = process.memory_info().rss

diff --git a/tests/test_translations.py b/tests/test_translations.py
@@ -3,10 +3,12 @@
 """
 
 from dataclasses import dataclass
+from os import getpid
 from time import perf_counter
 from unittest import skip
 from unittest.mock import patch
 
+from psutil import Process
 from pyxform.constants import CHOICES, SURVEY
 from pyxform.constants import DEFAULT_LANGUAGE_VALUE as DEFAULT_LANG
 from pyxform.validators.pyxform.translations_checks import (
@@ -401,11 +403,11 @@ def test_missing_translations_check_performance(self):
         with 2 choices each, average of 10 runs (seconds), with and without the check,
         per question:
         | num   | with   | without | peak RSS MB |
-        |   500 | 0.8251 |  0.8473 |          76 |
-        |  1000 | 1.8430 |  1.8612 |          97 |
-        |  2000 | 5.0824 |  5.1167 |         140 |
-        |  5000 | 19.921 |  21.390 |         249 |
-        | 10000 | 78.382 |  74.223 |         435 |
+        |   500 | 0.7427 |  0.8133 |          77 |
+        |  1000 | 1.7908 |  1.7777 |          94 |
+        |  2000 | 5.6719 |  4.8387 |         141 |
+        |  5000 | 20.452 |  19.502 |         239 |
+        | 10000 | 70.871 |  62.106 |         416 |
         """
         survey_header = """
         | survey |                 |        |                    |                   |
@@ -422,6 +424,7 @@ def test_missing_translations_check_performance(self):
         |         | c{i}        | na   | la-d  | la-e       |
         |         | c{i}        | nb   | lb-d  | lb-e       |
         """
+        process = Process(getpid())
         for count in (500, 1000, 2000):
             questions = "\n".join(question.format(i=i) for i in range(count))
             choice_lists = "\n".join(choice_list.format(i=i) for i in range(count))
@@ -430,12 +433,18 @@ def test_missing_translations_check_performance(self):
             def run(name, case):
                 runs = 0
                 results = []
+                peak_memory_usage = process.memory_info().rss
                 while runs < 10:
                     start = perf_counter()
                     convert(xlsform=case)
                     results.append(perf_counter() - start)
+                    peak_memory_usage = max(process.memory_info().rss, peak_memory_usage)
                     runs += 1
-                print(name, round(sum(results) / len(results), 4))
+                print(
+                    name,
+                    round(sum(results) / len(results), 4),
+                    f"| Peak RSS: {peak_memory_usage}",
+                )
 
             run(name=f"questions={count}, with check (seconds):", case=md)