Skip to content

Commit

Permalink
add: print max rss during performance test runs
Browse files Browse the repository at this point in the history
- previously just took RSS value at end of last "convert" but this can
  be affected by garbage collection mid-run, so now the RSS is polled
  after the run and the largest value is printed with average timings.
- expression.py: seems that `match` a bit faster than `fullmatch`
  • Loading branch information
lindsay-stevens committed Dec 18, 2024
1 parent 5979b9d commit fd17a60
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 17 deletions.
4 changes: 2 additions & 2 deletions pyxform/parsing/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,14 @@ def is_pyxform_reference(value: str) -> bool:
Does the input string contain only a valid Pyxform reference? e.g. ${my_question}
"""
# Needs 3 characters for "${}", plus a name inside.
return value and len(value) > 3 and bool(RE_ONLY_PYXFORM_REF.fullmatch(value))
return value and len(value) > 3 and bool(RE_ONLY_PYXFORM_REF.match(value))


def is_xml_tag(value: str) -> bool:
"""
Does the input string contain only a valid XML tag / element name?
"""
return value and bool(RE_ONLY_NCNAME.fullmatch(value))
return value and bool(RE_ONLY_NCNAME.match(value))


def has_last_saved(value: str) -> bool:
Expand Down
25 changes: 16 additions & 9 deletions tests/test_dynamic_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
Test handling dynamic default in forms
"""

import os
from dataclasses import dataclass
from os import getpid
from time import perf_counter
from unittest import skip
from unittest.mock import patch

import psutil
from psutil import Process
from pyxform import utils
from pyxform.xls2xform import convert

Expand Down Expand Up @@ -778,11 +778,11 @@ def test_dynamic_default_performance__time(self):
Results with Python 3.10.14 on VM with 2vCPU (i7-7700HQ) 1GB RAM, x questions
each, average of 10 runs (seconds), with and without the check, per question:
| num | with | without | peak RSS MB |
| 500 | 0.1903 | 0.1977 | 58 |
| 1000 | 0.4010 | 0.3913 | 63 |
| 2000 | 0.6860 | 0.6813 | 67 |
| 5000 | 1.7119 | 1.7421 | 90 |
| 10000 | 3.5399 | 3.4963 | 136 |
| 500 | 0.1626 | 0.1886 | 60 |
| 1000 | 0.3330 | 0.3916 | 63 |
| 2000 | 0.8675 | 0.7823 | 70 |
| 5000 | 1.7051 | 1.5653 | 91 |
| 10000 | 3.1097 | 3.8525 | 137 |
"""
survey_header = """
| survey | | | | |
Expand All @@ -791,19 +791,26 @@ def test_dynamic_default_performance__time(self):
question = """
| | text | q{i} | Q{i} | if(../t2 = 'test', 1, 2) + 15 - int(1.2) |
"""
process = Process(getpid())
for count in (500, 1000, 2000):
questions = "\n".join(question.format(i=i) for i in range(count))
md = "".join((survey_header, questions))

def run(name, case):
runs = 0
results = []
peak_memory_usage = process.memory_info().rss
while runs < 10:
start = perf_counter()
convert(xlsform=case)
results.append(perf_counter() - start)
peak_memory_usage = max(process.memory_info().rss, peak_memory_usage)
runs += 1
print(name, round(sum(results) / len(results), 4))
print(
name,
round(sum(results) / len(results), 4),
f"| Peak RSS: {peak_memory_usage}",
)

run(name=f"questions={count}, with check (seconds):", case=md)

Expand All @@ -828,7 +835,7 @@ def test_dynamic_default_performance__memory(self):
"""
questions = "\n".join(question.format(i=i) for i in range(1, 2000))
md = "".join((survey_header, questions))
process = psutil.Process(os.getpid())
process = Process(getpid())
pre_mem = process.memory_info().rss
self.assertPyxformXform(md=md)
post_mem = process.memory_info().rss
Expand Down
21 changes: 15 additions & 6 deletions tests/test_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
"""

from dataclasses import dataclass
from os import getpid
from time import perf_counter
from unittest import skip
from unittest.mock import patch

from psutil import Process
from pyxform.constants import CHOICES, SURVEY
from pyxform.constants import DEFAULT_LANGUAGE_VALUE as DEFAULT_LANG
from pyxform.validators.pyxform.translations_checks import (
Expand Down Expand Up @@ -401,11 +403,11 @@ def test_missing_translations_check_performance(self):
with 2 choices each, average of 10 runs (seconds), with and without the check,
per question:
| num | with | without | peak RSS MB |
| 500 | 0.8251 | 0.8473 | 76 |
| 1000 | 1.8430 | 1.8612 | 97 |
| 2000 | 5.0824 | 5.1167 | 140 |
| 5000 | 19.921 | 21.390 | 249 |
| 10000 | 78.382 | 74.223 | 435 |
| 500 | 0.7427 | 0.8133 | 77 |
| 1000 | 1.7908 | 1.7777 | 94 |
| 2000 | 5.6719 | 4.8387 | 141 |
| 5000 | 20.452 | 19.502 | 239 |
| 10000 | 70.871 | 62.106 | 416 |
"""
survey_header = """
| survey | | | | |
Expand All @@ -422,6 +424,7 @@ def test_missing_translations_check_performance(self):
| | c{i} | na | la-d | la-e |
| | c{i} | nb | lb-d | lb-e |
"""
process = Process(getpid())
for count in (500, 1000, 2000):
questions = "\n".join(question.format(i=i) for i in range(count))
choice_lists = "\n".join(choice_list.format(i=i) for i in range(count))
Expand All @@ -430,12 +433,18 @@ def test_missing_translations_check_performance(self):
def run(name, case):
runs = 0
results = []
peak_memory_usage = process.memory_info().rss
while runs < 10:
start = perf_counter()
convert(xlsform=case)
results.append(perf_counter() - start)
peak_memory_usage = max(process.memory_info().rss, peak_memory_usage)
runs += 1
print(name, round(sum(results) / len(results), 4))
print(
name,
round(sum(results) / len(results), 4),
f"| Peak RSS: {peak_memory_usage}",
)

run(name=f"questions={count}, with check (seconds):", case=md)

Expand Down

0 comments on commit fd17a60

Please sign in to comment.