From 199d5f50289328a35191f80c85ed7fed2e6269bf Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Wed, 11 Sep 2024 02:01:08 -0700 Subject: [PATCH] Improved test partitioning: keep namespaces grouped together and don't sort tests (#31) * Improved test partitioning * Update dox * Code cleanup * Modernize GH Actions * Appease Kondo --- .github/actions/setup/action.yml | 34 +++++++ .github/workflows/deploy.yml | 47 ++++------ .github/workflows/tests.yml | 67 ++++---------- README.md | 11 +-- deps.edn | 15 ++++ src/mb/hawk/core.clj | 45 +--------- src/mb/hawk/partition.clj | 150 +++++++++++++++++++++++++++++++ test/mb/hawk/core_test.clj | 45 ---------- test/mb/hawk/partition_test.clj | 78 ++++++++++++++++ 9 files changed, 319 insertions(+), 173 deletions(-) create mode 100644 .github/actions/setup/action.yml create mode 100644 src/mb/hawk/partition.clj create mode 100644 test/mb/hawk/partition_test.clj diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml new file mode 100644 index 0000000..d4bb63f --- /dev/null +++ b/.github/actions/setup/action.yml @@ -0,0 +1,34 @@ +name: Setup Clojure +inputs: + clojure-version: + required: true + default: "1.11.1.1413" + java-version: + required: true + default: "17" + cache-key: + required: true + +runs: + using: composite + steps: + - name: Prepare JDK + uses: actions/setup-java@v3 + with: + java-version: ${{ inputs.java-version }} + distribution: 'temurin' + - name: Setup Clojure + uses: DeLaGuardo/setup-clojure@12.5 + with: + cli: ${{ inputs.clojure-version }} + - name: Restore cache + uses: actions/cache@v3 + with: + path: | + ~/.m2/repository + ~/.gitlibs + ~/.deps.clj + key: v1-${{ hashFiles('./deps.edn') }}-${{ inputs.cache-key }} + restore-keys: | + v1-${{ hashFiles('./deps.edn') }}- + v1- diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 2f78a97..dd3cf2c 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,35 +10,18 @@ jobs: runs-on: ubuntu-20.04 environment: Deployment steps: - - uses: actions/checkout@v4.1.0 - with: - fetch-depth: 0 - - name: Prepare JDK 17 - uses: actions/setup-java@v3 - with: - java-version: 17 - distribution: 'temurin' - - name: Setup Clojure - uses: DeLaGuardo/setup-clojure@12.1 - with: - cli: 1.11.1.1413 - - name: Restore cache - uses: actions/cache@v3 - with: - path: | - ~/.m2/repository - ~/.gitlibs - ~/.deps.clj - key: v1-${{ hashFiles('./deps.edn') }}-deploy - restore-keys: | - v1-${{ hashFiles('./deps.edn') }}- - v1- - - name: Build Hawk - run: clojure -T:build jar - env: - GITHUB_SHA: ${{ env.GITHUB_SHA }} - - name: Deploy Hawk - run: clojure -T:build deploy - env: - CLOJARS_USERNAME: ${{ secrets.CLOJARS_USERNAME }} - CLOJARS_PASSWORD: ${{ secrets.CLOJARS_PASSWORD }} + - uses: actions/checkout@v4.1.0 + - uses: ./.github/actions/setup + with: + cache-key: deploy + - name: Build Hawk + run: >- + clojure -T:build jar + env: + GITHUB_SHA: ${{ env.GITHUB_SHA }} + - name: Deploy Hawk + run: >- + clojure -T:build deploy + env: + CLOJARS_USERNAME: ${{ secrets.CLOJARS_USERNAME }} + CLOJARS_PASSWORD: ${{ secrets.CLOJARS_PASSWORD }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index daad8dd..2f860c7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,62 +2,33 @@ name: Tests on: push: - workflow_dispatch: - inputs: - debug_enabled: - type: boolean - description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' - required: false - default: false + branches: + - main + pull_request: jobs: kondo: runs-on: ubuntu-20.04 timeout-minutes: 10 steps: - - uses: actions/checkout@v3 - - uses: DeLaGuardo/clojure-lint-action@master - with: - check-name: Run clj-kondo - clj-kondo-args: >- - --lint - src - test - github_token: ${{ secrets.GITHUB_TOKEN }} + - uses: actions/checkout@v4.1.0 + - uses: ./.github/actions/setup + with: + cache-key: kondo + - name: Run Kondo + run: >- + clojure -M:kondo --lint src test tests: runs-on: ubuntu-20.04 timeout-minutes: 10 steps: - - uses: actions/checkout@v3 - - name: Prepare JDK 17 - uses: actions/setup-java@v3 - with: - java-version: 17 - distribution: 'temurin' - - name: Setup Clojure - uses: DeLaGuardo/setup-clojure@10.1 - with: - cli: 1.11.1.1208 - - name: Restore cache - uses: actions/cache@v3 - with: - path: | - ~/.m2/repository - ~/.gitlibs - ~/.deps.clj - key: v1-${{ hashFiles('./deps.edn') }}-postgres - restore-keys: | - v1-${{ hashFiles('./deps.edn') }}- - v1- - # to continue the workflow create an empty file `touch continue` - # or `sudo touch /continue` while still in an SSH session - - name: Setup tmate session - if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} - uses: mxschmitt/action-tmate@v3 - with: - limit-access-to-actor: true - - run: clojure -X:dev:test - name: Run tests - env: - CI: TRUE + - uses: actions/checkout@v4.1.0 + - uses: ./.github/actions/setup + with: + cache-key: tests + - name: Run tests + run: >- + clojure -X:dev:test + env: + CI: TRUE diff --git a/README.md b/README.md index 7e56758..5e71f36 100644 --- a/README.md +++ b/README.md @@ -218,12 +218,13 @@ Running 575 tests ... ``` -`:partition/index` is zero-based, e.g. if you have ten partitions (`:partiton/total 10`) then the first partition is `0` and -the last is `9`. +`:partition/index` is zero-based, e.g. if you have ten partitions (`:partiton/total 10`) then the first partition is `0` +and the last is `9`. -Tests are partitioned at the `deftest` level after all tests are found the usual way -- all namespaces that would be -loaded if you were running the entire test suite are still loaded. Partitions are split as evenly as possible, but -tests are guaranteed to be split deterministically into exactly the number of partitions you asked for. +Tests are partitioned at the var (`deftest`) level after all tests are found the usual way, but all tests in any given +namespace will always be split into the same partition. All namespaces that would be loaded if you were running the +entire test suite are still loaded. Partitions are split as evenly as possible, but tests are guaranteed to be split +deterministically into exactly the number of partitions you asked for. ## Additional options diff --git a/deps.edn b/deps.edn index 8c954a2..b0edc3c 100644 --- a/deps.edn +++ b/deps.edn @@ -28,5 +28,20 @@ slipset/deps-deploy {:mvn/version "0.2.1"}} :ns-default build} + ;; clojure -M:kondo --lint src test + ;; + ;; clojure -M:kondo --version + ;; + ;; clojure -M:kondo --copy-configs --dependencies --lint "$(clojure -A:dev -Spath)" --skip-lint --parallel + ;; + ;; Run Kondo from the JVM using the pinned version. Preferable to running the installed command since we can pin the + ;; version here which may be different from the version installed on your computer. + :kondo + {:replace-deps + {clj-kondo/clj-kondo {:mvn/version "2024.08.29"}} + + :main-opts + ["-m" "clj-kondo.main"]} + :ci {:jvm-opts ["-Dhawk.mode" "cli/ci"]}}} diff --git a/src/mb/hawk/core.clj b/src/mb/hawk/core.clj index c84a9a6..a61ab04 100644 --- a/src/mb/hawk/core.clj +++ b/src/mb/hawk/core.clj @@ -2,7 +2,6 @@ (:require [clojure.java.classpath :as classpath] [clojure.java.io :as io] - [clojure.math :as math] [clojure.pprint :as pprint] [clojure.set :as set] [clojure.string :as str] @@ -17,6 +16,7 @@ [mb.hawk.init :as hawk.init] [mb.hawk.junit :as hawk.junit] [mb.hawk.parallel :as hawk.parallel] + [mb.hawk.partition :as hawk.partition] [mb.hawk.speak :as hawk.speak] [mb.hawk.util :as u])) @@ -121,47 +121,6 @@ [_nil options] (find-tests (classpath/system-classpath) options)) -(defn- partition-all-into-n-partitions - "Split sequence `xs` into `num-partitions` as equally as possible. Guaranteed to return `num-partitions`. This custom - function is used instead of [[partition-all]] or whatever because we want to make sure every partition gets tests, - even with weird combinations like 4 tests with 3 partitions or 29 tests with 10 partitions." - [num-partitions xs] - {:post [(= (count %) num-partitions)]} - ;; make sure the partitioning is deterministic -- `xs` should always come back in the same order but we should sort - ;; just to be safe. - (let [xs (sort-by str xs) - partition-size (/ (count xs) num-partitions)] - (into [] - (comp (map-indexed (fn [i x] - [(long (math/floor (/ i partition-size))) x])) - (partition-by first) - (map (fn [partition] - (map second partition)))) - xs))) - -(defn- partition-tests [tests {num-partitions :partition/total, partition-index :partition/index, :as _options}] - (if (or num-partitions partition-index) - (do - (assert (and num-partitions partition-index) - ":partition/total and :partition/index must be set together") - (assert (pos-int? num-partitions) - "Invalid :partition/total - must be a positive integer") - (assert (<= num-partitions (count tests)) - "Invalid :partition/total - cannot have more partitions than number of tests") - (assert (int? partition-index) - "Invalid :partition/index - must be an integer") - (assert (<= 0 partition-index (dec num-partitions)) - (format "Invalid :partition/index - must be between 0 and %d" (dec num-partitions))) - (let [partitions (partition-all-into-n-partitions num-partitions tests) - partition (nth partitions partition-index)] - (printf "Running tests in partition %d of %d (%d tests of %d)...\n" - (inc partition-index) - num-partitions - (count partition) - (count tests)) - partition)) - tests)) - (defn find-tests-with-options "Find tests using the options map as passed to `clojure -X`." [{:keys [only], :as options}] @@ -170,7 +129,7 @@ (println "Running tests in" (pr-str only))) (let [start-time-ms (System/currentTimeMillis) tests (-> (find-tests only options) - (partition-tests options))] + (hawk.partition/partition-tests options))] (printf "Finding tests took %s.\n" (u/format-milliseconds (- (System/currentTimeMillis) start-time-ms))) (println "Running" (count tests) "tests") tests)) diff --git a/src/mb/hawk/partition.clj b/src/mb/hawk/partition.clj new file mode 100644 index 0000000..9c10723 --- /dev/null +++ b/src/mb/hawk/partition.clj @@ -0,0 +1,150 @@ +(ns mb.hawk.partition + (:require + [clojure.math :as math])) + +(defn- namespace* + "Like [[clojure.core/namespace]] but handles vars." + [x] + (cond + (instance? clojure.lang.Named x) (namespace x) + (var? x) (namespace (symbol x)) + :else nil)) + +(defn- sort-tests-by-namespace + "The test runner normally sorts the namespaces before running tests, so we should do the same before we partition things + if we want them to make sense. Preserve the order of the vars inside each namespace." + [test-vars] + (let [test-var->sort-position (into {} + (map-indexed + (fn [i varr] + [varr i])) + test-vars)] + (sort-by (juxt namespace* test-var->sort-position) + test-vars))) + +(defn- namespace->num-tests + "Return a map of + + namespace string => number of tests in that namespace" + [test-vars] + (reduce + (fn [m test-var] + (update m (namespace* test-var) (fnil inc 0))) + {} + test-vars)) + +(defn- test-var->ideal-partition + "Return a map of + + test-var => ideal partition number + + 'Ideal partition number' is the partition it would live in ideally if we weren't worried about making sure namespaces + are grouped together." + [num-partitions test-vars] + (let [target-partition-size (/ (count test-vars) num-partitions)] + (into {} + (map-indexed (fn [i test-var] + (let [ideal-partition (long (math/floor (/ i target-partition-size)))] + (assert (<= 0 ideal-partition (dec num-partitions))) + [test-var ideal-partition])) + test-vars)))) + +(defn- namespace->possible-partitions + "Return a map of + + namespace string => set of possible partition numbers for its tests + + For most namespaces there should only be one possible partition but for some the ideal split happens in the middle of + the namespace which means we have two possible candidate partitions to put it into." + [num-partitions test-vars] + (let [test-var->ideal-partition (test-var->ideal-partition num-partitions test-vars)] + (reduce + (fn [m test-var] + (update m (namespace* test-var) #(conj (set %) (test-var->ideal-partition test-var)))) + {} + test-vars))) + +(defn- namespace->partition + "Return a map of + + namespace string => canonical partition number for its tests + + If there are multiple possible candidate partitions for a namespace, choose the one that has the least tests in it." + [num-partitions test-vars] + (let [namespace->num-tests (namespace->num-tests test-vars) + namespace->possible-partitions (namespace->possible-partitions num-partitions test-vars) + ;; process all the namespaces that have no question about what partition they should go into first so we have as + ;; accurate a picture of the size of each partition as possible before dealing with the ambiguous ones + namespaces (distinct (map namespace* test-vars)) + multiple-possible-partitions? (fn [nmspace] + (> (count (namespace->possible-partitions nmspace)) + 1)) + namespaces (concat (remove multiple-possible-partitions? namespaces) + (filter multiple-possible-partitions? namespaces))] + ;; Keep track of how many tests are in each partition so far + (:namespace->partition + (reduce + (fn [m nmspace] + (let [partition (first (sort-by (fn [partition] + (get-in m [:partition->size partition])) + (namespace->possible-partitions nmspace)))] + (-> m + (update-in [:partition->size partition] (fnil + 0) (namespace->num-tests nmspace)) + (assoc-in [:namespace->partition nmspace] partition)))) + {} + namespaces)))) + +(defn- make-test-var->partition + "Return a function with the signature + + (f test-var) => partititon-number" + [num-partitions test-vars] + (let [namespace->partition (namespace->partition num-partitions test-vars)] + (fn test-var->partition [test-var] + (get namespace->partition (namespace* test-var))))) + +(defn- partition-tests-into-n-partitions + "Split a sequence of `test-vars` into `num-partitions`, returning a map of + + partition number => sequence of tests + + Attempts to divide tests up into partitions that are as equal as possible, but keeps tests in the same namespace + grouped together." + [num-partitions test-vars] + {:post [(= (count %) num-partitions)]} + (let [test-vars (sort-tests-by-namespace test-vars) + test-var->partition (make-test-var->partition num-partitions test-vars)] + (reduce + (fn [m test-var] + (update m (test-var->partition test-var) #(conj (vec %) test-var))) + (sorted-map) + test-vars))) + +(defn- validate-partition-options [tests {num-partitions :partition/total, partition-index :partition/index, :as _options}] + (assert (and num-partitions partition-index) + ":partition/total and :partition/index must be set together") + (assert (pos-int? num-partitions) + "Invalid :partition/total - must be a positive integer") + (assert (<= num-partitions (count tests)) + "Invalid :partition/total - cannot have more partitions than number of tests") + (assert (int? partition-index) + "Invalid :partition/index - must be an integer") + (assert (<= 0 partition-index (dec num-partitions)) + (format "Invalid :partition/index - must be between 0 and %d" (dec num-partitions)))) + +(defn partition-tests + "Return only `tests` to run for the current partition (if `:partition/total` and `:partition/index` are specified). If + they are not specified this returns all `tests`." + [tests {num-partitions :partition/total, partition-index :partition/index, :as options}] + (if (or num-partitions partition-index) + (do + (validate-partition-options tests options) + (let [partition-index->tests (partition-tests-into-n-partitions num-partitions tests) + partition (get partition-index->tests partition-index)] + (printf "Running tests in partition %d of %d (%d tests of %d)...\n" + (inc partition-index) + num-partitions + (count partition) + (count tests)) + partition)) + tests)) diff --git a/test/mb/hawk/core_test.clj b/test/mb/hawk/core_test.clj index 8cb75d2..da96693 100644 --- a/test/mb/hawk/core_test.clj +++ b/test/mb/hawk/core_test.clj @@ -58,48 +58,3 @@ {:exclude-tags [:exclude-this-test]} {:exclude-tags #{:exclude-this-test}} {:exclude-tags [:exclude-this-test :another/tag]})) - -(deftest ^:parallel partition-tests-test - (are [i expected] (= expected - (#'hawk/partition-tests - (range 4) - {:partition/index i, :partition/total 3})) - 0 [0 1] - 1 [2] - 2 [3]) - (are [i expected] (= expected - (#'hawk/partition-tests - (range 5) - {:partition/index i, :partition/total 3})) - 0 [0 1] - 1 [2 3] - 2 [4])) - -(deftest ^:parallel partition-tests-determinism-test - (testing "partitioning should be deterministic even if tests come back in a non-deterministic order for some reason" - (are [i expected] (= expected - (#'hawk/partition-tests - (shuffle (map #(format "%02d" %) (range 26))) - {:partition/index i, :partition/total 10})) - 0 ["00" "01" "02"] - 1 ["03" "04" "05"] - 2 ["06" "07"] - 3 ["08" "09" "10"] - 4 ["11" "12"] - 5 ["13" "14" "15"] - 6 ["16" "17" "18"] - 7 ["19" "20"] - 8 ["21" "22" "23"] - 9 ["24" "25"]))) - -(deftest ^:parallel partition-test - (are [index expected] (= expected - (hawk/find-tests-with-options {:only `[find-tests-test - exclude-tags-test - partition-tests-test - partition-test] - :partition/index index - :partition/total 3})) - 0 [#'exclude-tags-test #'find-tests-test] - 1 [#'partition-test] - 2 [#'partition-tests-test])) diff --git a/test/mb/hawk/partition_test.clj b/test/mb/hawk/partition_test.clj new file mode 100644 index 0000000..6fa2bef --- /dev/null +++ b/test/mb/hawk/partition_test.clj @@ -0,0 +1,78 @@ +(ns mb.hawk.partition-test + (:require + [clojure.test :refer :all] + [mb.hawk.core :as hawk] + [mb.hawk.core-test] + [mb.hawk.parallel-test] + [mb.hawk.partition :as hawk.partition] + [mb.hawk.speak-test])) + +(defn- partition-tests* [num-partitions tests] + (into (sorted-map) + (map (fn [i] + [i (hawk.partition/partition-tests + tests + {:partition/index i, :partition/total num-partitions})])) + (range num-partitions))) + +(deftest ^:parallel partition-tests-test + (is (= '{0 [a/test b/test] + 1 [c/test] + 2 [d/test]} + (partition-tests* 3 '[a/test b/test c/test d/test]))) + (is (= '{0 [a/test b/test] + 1 [c/test d/test] + 2 [e/test]} + (partition-tests* 3 '[a/test b/test c/test d/test e/test])))) + +(deftest ^:parallel partition-tests-evenly-test + (testing "make sure we divide things roughly evenly" + (is (= '{0 [n00/test n01/test n02/test] + 1 [n03/test n04/test n05/test] + 2 [n06/test n07/test] + 3 [n08/test n09/test n10/test] + 4 [n11/test n12/test] + 5 [n13/test n14/test n15/test] + 6 [n16/test n17/test n18/test] + 7 [n19/test n20/test] + 8 [n21/test n22/test n23/test] + 9 [n24/test n25/test]} + (partition-tests* 10 (map #(symbol (format "n%02d/test" %)) (range 26))))) + ;; ideally the split happens in the middle of b here, but b should get put into 0 because 0 only has one other test + ;; while 1 has two. + (is (= '{0 [a/test-1 b/test-1 b/test-2 b/test-3] + 1 [c/test-1 c/test-2]} + (partition-tests* 2 '[a/test-1 b/test-1 b/test-2 b/test-3 c/test-1 c/test-2]))))) + +(deftest ^:parallel partition-should-not-split-in-the-middle-of-a-namespace-test + (testing "Partitioning should not split in the middle of a namespace" + (is (= '{0 [a/test-1 a/test-2 a/test-3] + 1 [b/test-1]} + (partition-tests* 2 '[a/test-1 a/test-2 a/test-3 b/test-1]))))) + +(deftest ^:parallel partition-preserve-order-test + (testing "Partitioning should sort namespaces but preserve order of vars" + (is (= '{0 [a/test-1 a/test-3 a/test-2] + 1 [b/test-1 b/test-2 b/test-3]} + (partition-tests* 2 '[b/test-1 b/test-2 b/test-3 a/test-1 a/test-3 a/test-2]))))) + +(deftest ^:parallel partition-test + (is (= {0 [#'mb.hawk.core-test/find-tests-test + #'mb.hawk.core-test/exclude-tags-test] + 1 [#'mb.hawk.parallel-test/ns-parallel-test + #'mb.hawk.parallel-test/var-not-parallel-test] + 2 [#'mb.hawk.speak-test/speak-results-test]} + (into (sorted-map) + (map (fn [i] + [i (hawk/find-tests-with-options + {:only ['mb.hawk.core-test/find-tests-test + 'mb.hawk.speak-test/speak-results-test + ;; this var intentionally comes after a different var in a different + ;; namespace to make sure we partition things in a way that groups + ;; namespaces together + 'mb.hawk.core-test/exclude-tags-test + 'mb.hawk.parallel-test/ns-parallel-test + 'mb.hawk.parallel-test/var-not-parallel-test] + :partition/index i + :partition/total 3})])) + (range 3)))))