diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cbf83a9..b4ef816 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: version: - - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. + - '1.9' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. - 'nightly' os: diff --git a/Project.toml b/Project.toml index 4d974ad..c9d7d1b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GLFixedEffectModels" uuid = "bafb0ae5-e5f5-5100-81b6-6a55d777c812" authors = ["Johannes Boehm "] -version = "0.5.2" +version = "0.5.3" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" @@ -24,13 +24,13 @@ Vcov = "ec2bfdc2-55df-4fc9-b9ae-4958c2cf2486" DataFrames = "1" Distributions = "0.25" FillArrays = "1" -FixedEffectModels = "1.7" -FixedEffects = "2.1" +FixedEffectModels = "1.7,1.8,1.9,1.10,1.11" +FixedEffects = "2.1,2.2,2.3" GLM = "^1.3.6" LoopVectorization = "0.12" Reexport = "1" StatsAPI = "1" StatsBase = "0.33, 0.34" StatsModels = "0.6, 0.7" -Vcov = "0.7" -julia = "1" +Vcov = "0.7, 0.8" +julia = ">= 1.9" diff --git a/src/fit.jl b/src/fit.jl index d673cbf..d362138 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -94,8 +94,8 @@ function nlreg(@nospecialize(df), formula = StatsModels.FormulaTerm(formula.lhs, StatsModels.InterceptTerm{true}() + formula.rhs) end formula, formula_endo, formula_iv = FixedEffectModels.parse_iv(formula) - has_iv = formula_iv != nothing - has_weights = weights != nothing + has_iv = formula_iv != StatsModels.FormulaTerm(ConstantTerm(0), ConstantTerm(0)) + has_weights = weights !== nothing if has_iv error("Instrumental variables are not allowed.") end @@ -133,8 +133,17 @@ function nlreg(@nospecialize(df), end esample .&= Vcov.completecases(df, vcov) - fes, ids, fekeys, formula = FixedEffectModels.parse_fixedeffect(df, formula) - has_fes = !isempty(fes) + formula, formula_fes = FixedEffectModels.parse_fe(formula) + has_fes = formula_fes != FormulaTerm(ConstantTerm(0), ConstantTerm(0)) + fes, ids, fekeys = FixedEffectModels.parse_fixedeffect(df, formula_fes) + + has_fe_intercept = any(fe.interaction isa UnitWeights for fe in fes) + + # remove intercept if absorbed by fixed effects + if has_fe_intercept + formula = FormulaTerm(formula.lhs, tuple(InterceptTerm{false}(), (term for term in FixedEffectModels.eachterm(formula.rhs) if !isa(term, Union{ConstantTerm,InterceptTerm}))...)) + end + has_intercept = hasintercept(formula) if has_fes if drop_singletons @@ -150,14 +159,6 @@ function nlreg(@nospecialize(df), end end - has_intercept = hasintercept(formula) - has_fe_intercept = false - if has_fes - if any(fe.interaction isa Ones for fe in fes) - has_fe_intercept = true - end - end - save_fe = (:fe ∈ save) & has_fes ############################################################################## @@ -535,7 +536,7 @@ function nlreg(@nospecialize(df), residuals end - vcov_data = VcovDataGLM(Xhat, crossx, resid_vcov, dof_residual_)#, hessian) + vcov_data = VcovDataGLM(Xhat, crossx, inv(crossx), resid_vcov, dof_residual_)#, hessian) # hessian is unnecessary since in all cases vcov takes the inv(cholesky(hessian)) which is the same as inv(crossx) """ This option works if purely using Vcov.jl: diff --git a/src/utils/basecol.jl b/src/utils/basecol.jl index 754f5be..72532cb 100644 --- a/src/utils/basecol.jl +++ b/src/utils/basecol.jl @@ -25,7 +25,11 @@ end # rank(A) == rank(A'A) function basecol(X::AbstractMatrix...; factorization = :Cholesky) - cholm = cholesky!(Symmetric(crossprod(X...)), Val(true); tol = -1, check = false) + @static if VERSION >= v"1.7" + cholm = cholesky!(Symmetric(crossprod(X...)), RowMaximum(); tol = -1, check = false) + else + cholm = cholesky!(Symmetric(crossprod(X...)), Val(true); tol = -1, check = false) + end r = 0 if size(cholm, 1) > 0 r = sum(diag(cholm.factors) .> size(X[1],1)^2 * eps()) diff --git a/src/utils/vcov.jl b/src/utils/vcov.jl index 8820963..8404fce 100644 --- a/src/utils/vcov.jl +++ b/src/utils/vcov.jl @@ -1,23 +1,30 @@ -struct VcovDataGLM{T, N} <: RegressionModel +struct VcovDataGLM{T, Tu, N} <: RegressionModel modelmatrix::Matrix{Float64} # X crossmodelmatrix::T # X'X in the simplest case. Can be Matrix but preferably Factorization + invcrossmodelmatrix::Tu residuals::Array{Float64, N} # vector or matrix of residuals (matrix in the case of IV, residuals of Xendo on (Z, Xexo)) dof_residual::Int end StatsAPI.modelmatrix(x::VcovDataGLM) = x.modelmatrix StatsAPI.crossmodelmatrix(x::VcovDataGLM) = x.crossmodelmatrix +invcrossmodelmatrix(x::VcovDataGLM) = x.invcrossmodelmatrix StatsAPI.residuals(x::VcovDataGLM) = x.residuals StatsAPI.dof_residual(x::VcovDataGLM) = x.dof_residual # with clusters, the standard StatsBase.vcov works function StatsAPI.vcov(x::VcovDataGLM, ::Vcov.RobustCovariance) - A = inv(crossmodelmatrix(x)) + A = invcrossmodelmatrix(x) C = modelmatrix(x) .* residuals(x) B = C' * C return Symmetric(A * B * A) end function StatsAPI.vcov(x::VcovDataGLM, ::Vcov.SimpleCovariance) - return Symmetric(inv(crossmodelmatrix(x))) + return Symmetric(invcrossmodelmatrix(x)) end + +function StatsAPI.vcov(x::VcovDataGLM, v::Vcov.ClusterCovariance) + xtx = invcrossmodelmatrix(x) + Vcov.pinvertible(Symmetric(xtx * Vcov.S_hat(x, v) * xtx)) +end \ No newline at end of file diff --git a/test/nlreg.jl b/test/nlreg.jl index a752a4a..3b4f9a1 100644 --- a/test/nlreg.jl +++ b/test/nlreg.jl @@ -170,14 +170,14 @@ df_sep = DataFrame(y = [[0.0, 0.0, 0.0, 1.0, 1.0, 1.0];rand(rng,[0.0,1.0],500)], m = @formula y ~ x + fe(x1) try # this should fail - x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = Symbol[], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 ) + local x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = Symbol[], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 ) catch ex @test !isnothing(ex) end # with cutoff on mu, it converges try # this should pass - x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = [:mu], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 ) + local x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = [:mu], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 ) @test x.coef ≈ [-0.0005504145168443688] atol = 1e-4 catch ex @test isnothing(ex) diff --git a/test/separation.jl b/test/separation.jl index 9538eea..224ca60 100644 --- a/test/separation.jl +++ b/test/separation.jl @@ -12,12 +12,12 @@ df = DataFrame(y = rand(6), x1 = [1;0.5;0.8;1;0;0], x2 = [0;0.5;0.2;0;0;0], id = # y ~ x1 + x2 + fe(id), will drop x2 res1 = nlreg(df, @formula(y ~ x1 + x2 + fe(id)), Poisson(), LogLink()) @test 0 ∈ res1.coef -@show res1 +#@show res1 # y ~ x1 + fe(id) res2 = nlreg(df, @formula(y ~ x2 + fe(id)), Poisson(), LogLink()) @test 0 ∉ res2.coef -@show res2 +#@show res2 # ---------------------------------------------------------------------------------------------------------------- #