diff --git a/src/macros.jl b/src/macros.jl index b3d9573..fad579a 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -74,6 +74,8 @@ end @regress y x1 x2 ... [@if condition], [robust] [cluster(var1, var2, ...)] Estimate a regression model in `df` with dependent variable `y` and independent variables `x1`, `x2`, etc. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `robust` is provided, robust standard errors are calculated. If `cluster` is provided, clustered standard errors are calculated. + +The regression is limited to rows for which all variables are values. Missing values, infinity, and NaN are automatically excluded. """ macro regress(exprs...) :regress |> parse(exprs) |> rewrite diff --git a/src/side_effects.jl b/src/side_effects.jl index 5fa2d5a..624a2f1 100644 --- a/src/side_effects.jl +++ b/src/side_effects.jl @@ -33,8 +33,12 @@ function rewrite(::Val{:regress}, command::Command) # validate everything except fixed effects to_validate = [x for x in arguments if get_top_symbol(x) != :fe] additional_condition = build_bitmask(target_df, :(Kezdi.isvalue($(to_validate...)))) + nobs_to_drop = quote $setup + if sum(.!$additional_condition) > 0 + display("Dropping $(sum(.!$additional_condition)) row(s) due to missing values.") + end if length($(arguments[2:end])) == 1 reg(view($target_df, $additional_condition, :), @formula($(arguments[1]) ~ $(arguments[2])), $vcov) |> $teardown else