xKDR · EngPeterAtef · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 20, 2024
diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
@@ -27,7 +27,7 @@
 - `weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
 - `popsize::Union{Nothing, Symbol}=nothing`: the (expected) survey population size.

 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");

 julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw)
@@ -52,12 +52,12 @@
     allprobs::Symbol # Right now only singlestage approx supported
     pps::Bool # TODO functionality
     # Single stage clusters sample, like apiclus1
-    function SurveyDesign(
+    function SurveyDesign!(
         data::AbstractDataFrame;
-        clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
-        strata::Union{Nothing,Symbol} = nothing,
-        popsize::Union{Nothing,Symbol} = nothing,
-        weights::Union{Nothing,Symbol} = nothing,
+        clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
+        strata::Union{Nothing,Symbol}=nothing,
+        popsize::Union{Nothing,Symbol}=nothing,
+        weights::Union{Nothing,Symbol}=nothing,
     )
         # sampsize here is number of clusters completely sampled, popsize is total clusters in population
         if typeof(strata) <: Nothing
@@ -99,8 +99,8 @@
                 data[!, popsize] = data[!, sampsize_labels] .* data[!, weights_labels]
             end
         elseif isa(popsize, Symbol)
-                weights_labels = :_weights
-                data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
+            weights_labels = :_weights
+            data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
         else
             # neither popsize nor weights given
             weights_labels = :_weights
@@ -305,31 +305,31 @@
         type::String,
         replicates::UInt,
         replicate_weights::Vector{Symbol},
-    ) where {ReplicateType <: InferenceMethod}
+    ) where {ReplicateType<:InferenceMethod}
         new{ReplicateType}(data, cluster, popsize, sampsize, strata, weights, allprobs,
-           pps, type, replicates, replicate_weights, ReplicateType(replicates))
+            pps, type, replicates, replicate_weights, ReplicateType(replicates))
     end
 
     # constructor with given replicate_weights
     function ReplicateDesign{ReplicateType}(
         data::AbstractDataFrame,
         replicate_weights::Vector{Symbol};
-        clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
-        strata::Union{Nothing,Symbol} = nothing,
-        popsize::Union{Nothing,Symbol} = nothing,
-        weights::Union{Nothing,Symbol} = nothing
-    ) where {ReplicateType <: InferenceMethod}
+        clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
+        strata::Union{Nothing,Symbol}=nothing,
+        popsize::Union{Nothing,Symbol}=nothing,
+        weights::Union{Nothing,Symbol}=nothing
+    ) where {ReplicateType<:InferenceMethod}
         # rename the replicate weights if needed
-        rename!(data, [replicate_weights[index] => "replicate_"*string(index) for index in 1:length(replicate_weights)])
+        rename!(data, [replicate_weights[index] => "replicate_" * string(index) for index in 1:length(replicate_weights)])
 
         # call the SurveyDesign constructor
         base_design = SurveyDesign(
-                        data;
-                        clusters=clusters,
-                        strata=strata,
-                        popsize=popsize,
-                        weights=weights
-                      )
+            data;
+            clusters=clusters,
+            strata=strata,
+            popsize=popsize,
+            weights=weights
+        )
         new{ReplicateType}(
             base_design.data,
             base_design.cluster,
@@ -350,11 +350,11 @@
     ReplicateDesign{ReplicateType}(
         data::AbstractDataFrame,
         replicate_weights::UnitRange{Int};
-        clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
-        strata::Union{Nothing,Symbol} = nothing,
-        popsize::Union{Nothing,Symbol} = nothing,
-        weights::Union{Nothing,Symbol} = nothing
-    ) where {ReplicateType <: InferenceMethod} =
+        clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
+        strata::Union{Nothing,Symbol}=nothing,
+        popsize::Union{Nothing,Symbol}=nothing,
+        weights::Union{Nothing,Symbol}=nothing
+    ) where {ReplicateType<:InferenceMethod} =
         ReplicateDesign{ReplicateType}(
             data,
             Symbol.(names(data)[replicate_weights]);
@@ -368,11 +368,11 @@
     ReplicateDesign{ReplicateType}(
         data::AbstractDataFrame,
         replicate_weights::Regex;
-        clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
-        strata::Union{Nothing,Symbol} = nothing,
-        popsize::Union{Nothing,Symbol} = nothing,
-        weights::Union{Nothing,Symbol} = nothing
-    ) where {ReplicateType <: InferenceMethod} =
+        clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
+        strata::Union{Nothing,Symbol}=nothing,
+        popsize::Union{Nothing,Symbol}=nothing,
+        weights::Union{Nothing,Symbol}=nothing
+    ) where {ReplicateType<:InferenceMethod} =
         ReplicateDesign{ReplicateType}(
             data,
             Symbol.(names(data)[findall(name -> occursin(replicate_weights, name), names(data))]);

diff --git a/src/by.jl b/src/by.jl
@@ -1,12 +1,12 @@
 function subset(group, design::SurveyDesign)
-    return SurveyDesign(DataFrame(group);clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights)   
-end 
+    return SurveyDesign(DataFrame(group); clusters=design.cluster, strata=design.strata, popsize=design.popsize, weights=design.weights)
+end
 
 function subset(group, design::ReplicateDesign)
-    return ReplicateDesign{typeof(design.inference_method)}(DataFrame(group), design.replicate_weights;clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights)   
+    return ReplicateDesign{typeof(design.inference_method)}(DataFrame(group), design.replicate_weights; clusters=design.cluster, strata=design.strata, popsize=design.popsize, weights=design.weights)
 end
 
-function bydomain(x::Union{Symbol, Vector{Symbol}}, domain,design::Union{SurveyDesign, ReplicateDesign}, func::Function, args...; kwargs...)
+function bydomain(x::Union{Symbol,Vector{Symbol}}, domain, design::Union{SurveyDesign,ReplicateDesign}, func::Function, args...; kwargs...)
     domain_names = unique(design.data[!, domain])
     gdf = groupby(design.data, domain)
     domain_names = [join(collect(keys(gdf)[i]), "-") for i in 1:length(gdf)]

diff --git a/src/load_data.jl b/src/load_data.jl
@@ -37,5 +37,5 @@ function load_data(name)
     name = name * ".csv"
     @assert name ∈ readdir(asset_path())
 
-    CSV.read(asset_path(name), DataFrame, missingstring = "NA")
+    CSV.read(asset_path(name), DataFrame, missingstring="NA")
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -7,23 +7,23 @@ using DataFrames
 const STAT_TOL = 1e-5
 const SE_TOL = 1e-1
 TOTAL_REPLICATES = 4000
-REPLICATES_VECTOR = [Symbol("replicate_"*string(i)) for i in 1:TOTAL_REPLICATES]
+REPLICATES_VECTOR = [Symbol("replicate_" * string(i)) for i in 1:TOTAL_REPLICATES]
 REPLICATES_REGEX = r"r*_\d"
 
 # Simple random sample
 apisrs = load_data("apisrs") # Load API dataset
-srs = SurveyDesign(apisrs, weights = :pw)
-unitrange = UnitRange((length(names(apisrs)) + 1):(TOTAL_REPLICATES + length(names(apisrs))))
+srs = SurveyDesign(apisrs, weights=:pw)
+unitrange = UnitRange((length(names(apisrs))+1):(TOTAL_REPLICATES+length(names(apisrs))))
 bsrs = srs |> bootweights # Create bootstrap replicate design
 jsrs = srs |> jackknifeweights # Create jackknife replicate design
-bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights = :pw)  # using ReplicateDesign constructor
-bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights = :pw)  # using ReplicateDesign constructor
-bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights = :pw)  # using ReplicateDesign constructor
+bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights=:pw)  # using ReplicateDesign constructor
+bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights=:pw)  # using ReplicateDesign constructor
+bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights=:pw)  # using ReplicateDesign constructor
 
 # Stratified sample
 apistrat = load_data("apistrat") # Load API dataset
-dstrat = SurveyDesign(apistrat, strata = :stype, weights = :pw) # Create SurveyDesign
-unitrange = UnitRange((length(names(apistrat)) + 1):(TOTAL_REPLICATES + length(names(apistrat))))
+dstrat = SurveyDesign(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign
+unitrange = UnitRange((length(names(apistrat))+1):(TOTAL_REPLICATES+length(names(apistrat))))
 bstrat = dstrat |> bootweights # Create replicate design
 bstrat_direct = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_VECTOR, strata=:stype, weights=:pw)  # using ReplicateDesign constructor
 bstrat_unitrange = ReplicateDesign{BootstrapReplicates}(bstrat.data, unitrange, strata=:stype, weights=:pw)  # using ReplicateDesign constructor
@@ -32,23 +32,23 @@ bstrat_regex = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_REGE
 # One-stage cluster sample
 apiclus1 = load_data("apiclus1") # Load API dataset
 apiclus1[!, :pw] = fill(757 / 15, (size(apiclus1, 1),)) # Correct api mistake for pw column
-dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) # Create SurveyDesign
-unitrange = UnitRange((length(names(apiclus1)) + 1):(TOTAL_REPLICATES + length(names(apiclus1))))
+dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign
+unitrange = UnitRange((length(names(apiclus1))+1):(TOTAL_REPLICATES+length(names(apiclus1))))
 dclus1_boot = dclus1 |> bootweights # Create replicate design
 dclus1_boot_direct = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_VECTOR, clusters=:dnum, weights=:pw)  # using ReplicateDesign constructor
 dclus1_boot_unitrange = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, unitrange, clusters=:dnum, weights=:pw)  # using ReplicateDesign constructor
 dclus1_boot_regex = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_REGEX, clusters=:dnum, weights=:pw)  # using ReplicateDesign constructor
 
 # Two-stage cluster sample
 apiclus2 = load_data("apiclus2") # Load API dataset
-dclus2 = SurveyDesign(apiclus2; clusters = :dnum, weights = :pw) # Create SurveyDesign
+dclus2 = SurveyDesign(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign
 dclus2_boot = dclus2 |> bootweights # Create replicate design
 
 # NHANES
 nhanes = load_data("nhanes")
 nhanes.seq1 = collect(1.0:5.0:42955.0)
 nhanes.seq2 = collect(1.0:9.0:77319.0) # [9k for k in 0:8590.0]
-dnhanes = SurveyDesign(nhanes; clusters = :SDMVPSU, strata = :SDMVSTRA, weights = :WTMEC2YR)
+dnhanes = SurveyDesign(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR)
 dnhanes_boot = dnhanes |> bootweights
 
 @testset "Survey.jl" begin