Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Append ! suffix to functions which mutate their arguments #319

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 31 additions & 31 deletions src/SurveyDesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
- `weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
- `popsize::Union{Nothing, Symbol}=nothing`: the (expected) survey population size.

```jldoctest

Check failure on line 30 in src/SurveyDesign.jl

View workflow job for this annotation

GitHub Actions / build

doctest failure in ~/work/Survey.jl/Survey.jl/src/SurveyDesign.jl:30-43 ```jldoctest julia> apiclus1 = load_data("apiclus1"); julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) SurveyDesign: data: 183×44 DataFrame strata: none cluster: dnum [637, 637, 637 … 448] popsize: [507.7049, 507.7049, 507.7049 … 507.7049] sampsize: [15, 15, 15 … 15] weights: [33.847, 33.847, 33.847 … 33.847] allprobs: [0.0295, 0.0295, 0.0295 … 0.0295] ``` Subexpression: dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) Evaluated output: ERROR: MethodError: no method matching SurveyDesign(::DataFrames.DataFrame; clusters::Symbol, weights::Symbol) Stacktrace: [1] top-level scope @ none:1 Expected output: SurveyDesign: data: 183×44 DataFrame strata: none cluster: dnum [637, 637, 637 … 448] popsize: [507.7049, 507.7049, 507.7049 … 507.7049] sampsize: [15, 15, 15 … 15] weights: [33.847, 33.847, 33.847 … 33.847] allprobs: [0.0295, 0.0295, 0.0295 … 0.0295] diff = Warning: Diff output requires color. SurveyDesign: data: 183×44 DataFrame strata: none cluster: dnum [637, 637, 637 … 448] popsize: [507.7049, 507.7049, 507.7049 … 507.7049] sampsize: [15, 15, 15 … 15] weights: [33.847, 33.847, 33.847 … 33.847] allprobs: [0.0295, 0.0295, 0.0295 … 0.0295]ERROR: MethodError: no method matching SurveyDesign(::DataFrames.DataFrame; clusters::Symbol, weights::Symbol) Stacktrace: [1] top-level scope @ none:1
julia> apiclus1 = load_data("apiclus1");

julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw)
Expand All @@ -52,12 +52,12 @@
allprobs::Symbol # Right now only singlestage approx supported
pps::Bool # TODO functionality
# Single stage clusters sample, like apiclus1
function SurveyDesign(
function SurveyDesign!(
data::AbstractDataFrame;
clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
strata::Union{Nothing,Symbol} = nothing,
popsize::Union{Nothing,Symbol} = nothing,
weights::Union{Nothing,Symbol} = nothing,
clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
strata::Union{Nothing,Symbol}=nothing,
popsize::Union{Nothing,Symbol}=nothing,
weights::Union{Nothing,Symbol}=nothing,
)
# sampsize here is number of clusters completely sampled, popsize is total clusters in population
if typeof(strata) <: Nothing
Expand Down Expand Up @@ -99,8 +99,8 @@
data[!, popsize] = data[!, sampsize_labels] .* data[!, weights_labels]
end
elseif isa(popsize, Symbol)
weights_labels = :_weights
data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
weights_labels = :_weights
data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
else
# neither popsize nor weights given
weights_labels = :_weights
Expand Down Expand Up @@ -305,31 +305,31 @@
type::String,
replicates::UInt,
replicate_weights::Vector{Symbol},
) where {ReplicateType <: InferenceMethod}
) where {ReplicateType<:InferenceMethod}
new{ReplicateType}(data, cluster, popsize, sampsize, strata, weights, allprobs,
pps, type, replicates, replicate_weights, ReplicateType(replicates))
pps, type, replicates, replicate_weights, ReplicateType(replicates))
end

# constructor with given replicate_weights
function ReplicateDesign{ReplicateType}(
data::AbstractDataFrame,
replicate_weights::Vector{Symbol};
clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
strata::Union{Nothing,Symbol} = nothing,
popsize::Union{Nothing,Symbol} = nothing,
weights::Union{Nothing,Symbol} = nothing
) where {ReplicateType <: InferenceMethod}
clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
strata::Union{Nothing,Symbol}=nothing,
popsize::Union{Nothing,Symbol}=nothing,
weights::Union{Nothing,Symbol}=nothing
) where {ReplicateType<:InferenceMethod}
# rename the replicate weights if needed
rename!(data, [replicate_weights[index] => "replicate_"*string(index) for index in 1:length(replicate_weights)])
rename!(data, [replicate_weights[index] => "replicate_" * string(index) for index in 1:length(replicate_weights)])

# call the SurveyDesign constructor
base_design = SurveyDesign(
data;
clusters=clusters,
strata=strata,
popsize=popsize,
weights=weights
)
data;
clusters=clusters,
strata=strata,
popsize=popsize,
weights=weights
)
new{ReplicateType}(
base_design.data,
base_design.cluster,
Expand All @@ -350,11 +350,11 @@
ReplicateDesign{ReplicateType}(
data::AbstractDataFrame,
replicate_weights::UnitRange{Int};
clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
strata::Union{Nothing,Symbol} = nothing,
popsize::Union{Nothing,Symbol} = nothing,
weights::Union{Nothing,Symbol} = nothing
) where {ReplicateType <: InferenceMethod} =
clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
strata::Union{Nothing,Symbol}=nothing,
popsize::Union{Nothing,Symbol}=nothing,
weights::Union{Nothing,Symbol}=nothing
) where {ReplicateType<:InferenceMethod} =
ReplicateDesign{ReplicateType}(
data,
Symbol.(names(data)[replicate_weights]);
Expand All @@ -368,11 +368,11 @@
ReplicateDesign{ReplicateType}(
data::AbstractDataFrame,
replicate_weights::Regex;
clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
strata::Union{Nothing,Symbol} = nothing,
popsize::Union{Nothing,Symbol} = nothing,
weights::Union{Nothing,Symbol} = nothing
) where {ReplicateType <: InferenceMethod} =
clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
strata::Union{Nothing,Symbol}=nothing,
popsize::Union{Nothing,Symbol}=nothing,
weights::Union{Nothing,Symbol}=nothing
) where {ReplicateType<:InferenceMethod} =
ReplicateDesign{ReplicateType}(
data,
Symbol.(names(data)[findall(name -> occursin(replicate_weights, name), names(data))]);
Expand Down
8 changes: 4 additions & 4 deletions src/by.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
function subset(group, design::SurveyDesign)
return SurveyDesign(DataFrame(group);clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights)
end
return SurveyDesign(DataFrame(group); clusters=design.cluster, strata=design.strata, popsize=design.popsize, weights=design.weights)
end

function subset(group, design::ReplicateDesign)
return ReplicateDesign{typeof(design.inference_method)}(DataFrame(group), design.replicate_weights;clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights)
return ReplicateDesign{typeof(design.inference_method)}(DataFrame(group), design.replicate_weights; clusters=design.cluster, strata=design.strata, popsize=design.popsize, weights=design.weights)
end

function bydomain(x::Union{Symbol, Vector{Symbol}}, domain,design::Union{SurveyDesign, ReplicateDesign}, func::Function, args...; kwargs...)
function bydomain(x::Union{Symbol,Vector{Symbol}}, domain, design::Union{SurveyDesign,ReplicateDesign}, func::Function, args...; kwargs...)
domain_names = unique(design.data[!, domain])
gdf = groupby(design.data, domain)
domain_names = [join(collect(keys(gdf)[i]), "-") for i in 1:length(gdf)]
Expand Down
2 changes: 1 addition & 1 deletion src/load_data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,5 @@ function load_data(name)
name = name * ".csv"
@assert name ∈ readdir(asset_path())

CSV.read(asset_path(name), DataFrame, missingstring = "NA")
CSV.read(asset_path(name), DataFrame, missingstring="NA")
end
24 changes: 12 additions & 12 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ using DataFrames
const STAT_TOL = 1e-5
const SE_TOL = 1e-1
TOTAL_REPLICATES = 4000
REPLICATES_VECTOR = [Symbol("replicate_"*string(i)) for i in 1:TOTAL_REPLICATES]
REPLICATES_VECTOR = [Symbol("replicate_" * string(i)) for i in 1:TOTAL_REPLICATES]
REPLICATES_REGEX = r"r*_\d"

# Simple random sample
apisrs = load_data("apisrs") # Load API dataset
srs = SurveyDesign(apisrs, weights = :pw)
unitrange = UnitRange((length(names(apisrs)) + 1):(TOTAL_REPLICATES + length(names(apisrs))))
srs = SurveyDesign(apisrs, weights=:pw)
unitrange = UnitRange((length(names(apisrs))+1):(TOTAL_REPLICATES+length(names(apisrs))))
bsrs = srs |> bootweights # Create bootstrap replicate design
jsrs = srs |> jackknifeweights # Create jackknife replicate design
bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights = :pw) # using ReplicateDesign constructor
bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights = :pw) # using ReplicateDesign constructor
bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights = :pw) # using ReplicateDesign constructor
bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights=:pw) # using ReplicateDesign constructor
bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights=:pw) # using ReplicateDesign constructor
bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights=:pw) # using ReplicateDesign constructor

# Stratified sample
apistrat = load_data("apistrat") # Load API dataset
dstrat = SurveyDesign(apistrat, strata = :stype, weights = :pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apistrat)) + 1):(TOTAL_REPLICATES + length(names(apistrat))))
dstrat = SurveyDesign(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apistrat))+1):(TOTAL_REPLICATES+length(names(apistrat))))
bstrat = dstrat |> bootweights # Create replicate design
bstrat_direct = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_VECTOR, strata=:stype, weights=:pw) # using ReplicateDesign constructor
bstrat_unitrange = ReplicateDesign{BootstrapReplicates}(bstrat.data, unitrange, strata=:stype, weights=:pw) # using ReplicateDesign constructor
Expand All @@ -32,23 +32,23 @@ bstrat_regex = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_REGE
# One-stage cluster sample
apiclus1 = load_data("apiclus1") # Load API dataset
apiclus1[!, :pw] = fill(757 / 15, (size(apiclus1, 1),)) # Correct api mistake for pw column
dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apiclus1)) + 1):(TOTAL_REPLICATES + length(names(apiclus1))))
dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apiclus1))+1):(TOTAL_REPLICATES+length(names(apiclus1))))
dclus1_boot = dclus1 |> bootweights # Create replicate design
dclus1_boot_direct = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_VECTOR, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor
dclus1_boot_unitrange = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, unitrange, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor
dclus1_boot_regex = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_REGEX, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor

# Two-stage cluster sample
apiclus2 = load_data("apiclus2") # Load API dataset
dclus2 = SurveyDesign(apiclus2; clusters = :dnum, weights = :pw) # Create SurveyDesign
dclus2 = SurveyDesign(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign
dclus2_boot = dclus2 |> bootweights # Create replicate design

# NHANES
nhanes = load_data("nhanes")
nhanes.seq1 = collect(1.0:5.0:42955.0)
nhanes.seq2 = collect(1.0:9.0:77319.0) # [9k for k in 0:8590.0]
dnhanes = SurveyDesign(nhanes; clusters = :SDMVPSU, strata = :SDMVSTRA, weights = :WTMEC2YR)
dnhanes = SurveyDesign(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR)
dnhanes_boot = dnhanes |> bootweights

@testset "Survey.jl" begin
Expand Down
Loading