MilesCranmer · MilesCranmer · Dec 22, 2022 · Dec 23, 2022 · Dec 23, 2022 · Dec 23, 2022
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -51,18 +51,6 @@ jobs:
             julia --color=yes --threads=auto --check-bounds=yes --depwarn=yes --code-coverage=user --project=. -e 'import Pkg; Pkg.test(coverage=true)'
             julia --color=yes --project=. coverage.jl
         shell: bash
-        if: ${{ matrix.os != 'windows-latest' }}
-      - name: "Run tests, skipping errors."
-        run: |
-            julia --color=yes --project=. -e 'import Pkg; Pkg.add("Coverage")'
-            {
-              julia --color=yes --threads=auto --check-bounds=yes --depwarn=yes --code-coverage=user --project=. -e 'import Pkg; Pkg.test(coverage=true)'
-            } || {
-              echo "Tests failed, but continuing anyway."
-            }
-            julia --color=yes --project=. coverage.jl
-        shell: bash
-        if: ${{ matrix.os == 'windows-latest' }}
       - name: "Coveralls"
         uses: coverallsapp/github-action@master
         with:

diff --git a/src/Configure.jl b/src/Configure.jl
@@ -17,6 +17,7 @@ function assert_operators_defined_over_reals(T, options::Options)
             end
         end
     catch error
+        # TODO: Only raise error if domain error.
         throw(
             AssertionError(
                 "Your configuration is invalid - one of your operators ($cur_op) is not well-defined over the real line. You can get around this by returning `NaN` for invalid inputs.",
@@ -242,18 +243,19 @@ function test_entire_pipeline(procs, dataset::Dataset{T}, options::Options) wher
         (options.verbosity > 0 || options.progress), "Testing entire pipeline on workers..."
     )
     for proc in procs
+        cdataset = copy_dataset(dataset)
         push!(
             futures,
             @spawnat proc begin
                 tmp_pop = Population(
-                    dataset;
+                    cdataset;
                     npop=20,
                     nlength=3,
                     options=options,
                     nfeatures=dataset.nfeatures,
                 )
                 tmp_pop = s_r_cycle(
-                    dataset,
+                    cdataset,
                     tmp_pop,
                     5,
                     5,
@@ -263,7 +265,7 @@ function test_entire_pipeline(procs, dataset::Dataset{T}, options::Options) wher
                     record=RecordType(),
                 )[1]
                 tmp_pop = optimize_and_simplify_population(
-                    dataset, tmp_pop, options, options.maxsize, RecordType()
+                    cdataset, tmp_pop, options, options.maxsize, RecordType()
                 )
             end
         )

diff --git a/src/Core.jl b/src/Core.jl
@@ -8,7 +8,7 @@ include("Operators.jl")
 include("Options.jl")
 
 import .ProgramConstantsModule: MAX_DEGREE, BATCH_DIM, FEATURE_DIM, RecordType
-import .DatasetModule: Dataset
+import .DatasetModule: Dataset, copy_dataset
 import .OptionsStructModule: Options, MutationWeights, sample_mutation
 import .OptionsModule: Options
 import .OperatorsModule:

diff --git a/src/Dataset.jl b/src/Dataset.jl
@@ -63,4 +63,18 @@ function Dataset(
     return Dataset{T}(X, y, n, nfeatures, weighted, weights, avg_y, baseline, varMap)
 end
 
+function copy_dataset(dataset::Dataset{T}) where {T<:Real}
+    return Dataset{T}(
+        copy(dataset.X),
+        copy(dataset.y),
+        dataset.n,
+        dataset.nfeatures,
+        dataset.weighted,
+        dataset.weighted ? copy(dataset.weights) : nothing,
+        dataset.avg_y,
+        dataset.baseline_loss,
+        copy(dataset.varMap),
+    )
+end
+
 end
diff --git a/src/Population.jl b/src/Population.jl
@@ -5,7 +5,7 @@ import Random: randperm
 import DynamicExpressions: string_tree
 import ..CoreModule: Options, Dataset, RecordType
 import ..ComplexityModule: compute_complexity
-import ..LossFunctionsModule: score_func, update_baseline_loss!
+import ..LossFunctionsModule: score_func
 import ..AdaptiveParsimonyModule: RunningSearchStatistics
 import ..MutationFunctionsModule: gen_random_tree
 import ..PopMemberModule: PopMember, copy_pop_member
@@ -60,7 +60,6 @@ function Population(
     nfeatures::Int,
 ) where {T<:Real}
     dataset = Dataset(X, y)
-    update_baseline_loss!(dataset, options)
     return Population(dataset; npop=npop, options=options, nfeatures=nfeatures)
 end
 

diff --git a/src/SymbolicRegression.jl b/src/SymbolicRegression.jl
@@ -143,6 +143,7 @@ import .CoreModule:
     FEATURE_DIM,
     RecordType,
     Dataset,
+    copy_dataset,
     Options,
     MutationWeights,
     plus,
@@ -546,16 +547,18 @@ function _EquationSearch(
 
             if saved_pop !== nothing && length(saved_pop.members) == options.npop
                 saved_pop::Population{T}
+                copy_saved_pop = copy_population(saved_pop)
                 new_pop = @sr_spawner parallelism worker_idx (
-                    saved_pop, HallOfFame(options, T), RecordType(), 0.0
+                    copy_saved_pop, HallOfFame(options, T), RecordType(), 0.0
                 )
             else
                 if saved_pop !== nothing
                     @warn "Recreating population (output=$(j), population=$(i)), as the saved one doesn't have the correct number of members."
                 end
+                cdataset = copy_dataset(datasets[j])
                 new_pop = @sr_spawner parallelism worker_idx (
                     Population(
-                        datasets[j];
+                        cdataset;
                         npop=options.npop,
                         nlength=3,
                         options=options,
@@ -584,6 +587,8 @@ function _EquationSearch(
 
             # TODO - why is this needed??
             # Multi-threaded doesn't like to fetch within a new task:
+            copy_search_stats = deepcopy(running_search_statistics)
+            cdataset = copy_dataset(dataset)
             updated_pop = @sr_spawner parallelism worker_idx let
                 in_pop = if parallelism in (:multiprocessing, :multithreading)
                     fetch(init_pops[j][i])[1]
@@ -596,26 +601,26 @@ function _EquationSearch(
                     "iteration0" => record_population(in_pop, options)
                 )
                 tmp_num_evals = 0.0
-                normalize_frequencies!(running_search_statistics)
+                normalize_frequencies!(copy_search_stats)
                 tmp_pop, tmp_best_seen, evals_from_cycle = s_r_cycle(
-                    dataset,
+                    cdataset,
                     in_pop,
                     options.ncycles_per_iteration,
                     curmaxsize,
-                    running_search_statistics;
+                    copy_search_stats;
                     verbosity=options.verbosity,
                     options=options,
                     record=cur_record,
                 )
                 tmp_num_evals += evals_from_cycle
                 tmp_pop, evals_from_optimize = optimize_and_simplify_population(
-                    dataset, tmp_pop, options, curmaxsize, cur_record
+                    cdataset, tmp_pop, options, curmaxsize, cur_record
                 )
                 tmp_num_evals += evals_from_optimize
                 if options.batching
                     for i_member in 1:(options.maxsize + MAX_DEGREE)
                         score, result_loss = score_func(
-                            dataset, tmp_best_seen.members[i_member].tree, options
+                            cdataset, tmp_best_seen.members[i_member].tree, options
                         )
                         tmp_best_seen.members[i_member].score = score
                         tmp_best_seen.members[i_member].loss = result_loss
@@ -787,26 +792,29 @@ function _EquationSearch(
                 iteration = find_iteration_from_record(key, record) + 1
             end
 
+            copy_search_stats = deepcopy(all_running_search_statistics[j])
+            copy_cur_pop = copy_population(cur_pop)
+            cdataset = copy_dataset(dataset)
             allPops[j][i] = @sr_spawner parallelism worker_idx let
                 cur_record = RecordType()
                 @recorder cur_record[key] = RecordType(
-                    "iteration$(iteration)" => record_population(cur_pop, options)
+                    "iteration$(iteration)" => record_population(copy_cur_pop, options)
                 )
                 tmp_num_evals = 0.0
-                normalize_frequencies!(all_running_search_statistics[j])
+                normalize_frequencies!(copy_search_stats)
                 tmp_pop, tmp_best_seen, evals_from_cycle = s_r_cycle(
-                    dataset,
-                    cur_pop,
+                    cdataset,
+                    copy_cur_pop,
                     options.ncycles_per_iteration,
                     curmaxsize,
-                    all_running_search_statistics[j];
+                    copy_search_stats;
                     verbosity=options.verbosity,
                     options=options,
                     record=cur_record,
                 )
                 tmp_num_evals += evals_from_cycle
                 tmp_pop, evals_from_optimize = optimize_and_simplify_population(
-                    dataset, tmp_pop, options, curmaxsize, cur_record
+                    cdataset, tmp_pop, options, curmaxsize, cur_record
                 )
                 tmp_num_evals += evals_from_optimize
 
@@ -815,7 +823,7 @@ function _EquationSearch(
                     for i_member in 1:(options.maxsize + MAX_DEGREE)
                         if tmp_best_seen.exists[i_member]
                             score, result_loss = score_func(
-                                dataset, tmp_best_seen.members[i_member].tree, options
+                                cdataset, tmp_best_seen.members[i_member].tree, options
                             )
                             tmp_best_seen.members[i_member].score = score
                             tmp_best_seen.members[i_member].loss = result_loss