Merge pull request #53 from nilshg/master

Compat with Vcov 0.8 and newer FixedEffectModels versions
jmboehm · Dec 28, 2023 · fa9df73 · fa9df73
2 parents 8c99175 + 31dac02
commit fa9df73
Show file tree

Hide file tree

Showing 7 changed files with 39 additions and 27 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
+          - '1.9' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
           - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
           - 'nightly'
         os:

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "GLFixedEffectModels"
 uuid = "bafb0ae5-e5f5-5100-81b6-6a55d777c812"
 authors = ["Johannes Boehm <[email protected]>"]
-version = "0.5.2"
+version = "0.5.3"
 
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -24,13 +24,13 @@ Vcov = "ec2bfdc2-55df-4fc9-b9ae-4958c2cf2486"
 DataFrames = "1"
 Distributions = "0.25"
 FillArrays = "1"
-FixedEffectModels = "1.7"
-FixedEffects = "2.1"
+FixedEffectModels = "1.7,1.8,1.9,1.10,1.11"
+FixedEffects = "2.1,2.2,2.3"
 GLM = "^1.3.6"
 LoopVectorization = "0.12"
 Reexport = "1"
 StatsAPI = "1"
 StatsBase = "0.33, 0.34"
 StatsModels = "0.6, 0.7"
-Vcov = "0.7"
-julia = "1"
+Vcov = "0.7, 0.8"
+julia = ">= 1.9"
diff --git a/src/fit.jl b/src/fit.jl
@@ -94,8 +94,8 @@ function nlreg(@nospecialize(df),
         formula = StatsModels.FormulaTerm(formula.lhs, StatsModels.InterceptTerm{true}() + formula.rhs)
     end
     formula, formula_endo, formula_iv = FixedEffectModels.parse_iv(formula)
-    has_iv = formula_iv != nothing
-    has_weights = weights != nothing
+    has_iv = formula_iv != StatsModels.FormulaTerm(ConstantTerm(0), ConstantTerm(0))
+    has_weights = weights !== nothing
     if has_iv
         error("Instrumental variables are not allowed.")
     end
@@ -133,8 +133,17 @@ function nlreg(@nospecialize(df),
     end
     esample .&= Vcov.completecases(df, vcov)
 
-    fes, ids, fekeys, formula = FixedEffectModels.parse_fixedeffect(df, formula)
-    has_fes = !isempty(fes)
+    formula, formula_fes = FixedEffectModels.parse_fe(formula)
+    has_fes = formula_fes != FormulaTerm(ConstantTerm(0), ConstantTerm(0))
+    fes, ids, fekeys = FixedEffectModels.parse_fixedeffect(df, formula_fes)
+
+    has_fe_intercept = any(fe.interaction isa UnitWeights for fe in fes)
+
+    # remove intercept if absorbed by fixed effects
+    if has_fe_intercept
+        formula = FormulaTerm(formula.lhs, tuple(InterceptTerm{false}(), (term for term in FixedEffectModels.eachterm(formula.rhs) if !isa(term, Union{ConstantTerm,InterceptTerm}))...))
+    end
+    has_intercept = hasintercept(formula)
 
     if has_fes
         if drop_singletons
@@ -150,14 +159,6 @@ function nlreg(@nospecialize(df),
         end
     end
 
-    has_intercept = hasintercept(formula)
-    has_fe_intercept = false
-    if has_fes
-        if any(fe.interaction isa Ones for fe in fes)
-            has_fe_intercept = true
-        end
-    end
-
     save_fe = (:fe ∈ save) & has_fes 
 
     ##############################################################################
@@ -535,7 +536,7 @@ function nlreg(@nospecialize(df),
         residuals
     end
 
-    vcov_data = VcovDataGLM(Xhat, crossx, resid_vcov, dof_residual_)#, hessian)
+    vcov_data = VcovDataGLM(Xhat, crossx, inv(crossx), resid_vcov, dof_residual_)#, hessian)
     # hessian is unnecessary since in all cases vcov takes the inv(cholesky(hessian)) which is the same as inv(crossx)
     """
     This option works if purely using Vcov.jl:

diff --git a/src/utils/basecol.jl b/src/utils/basecol.jl
@@ -25,7 +25,11 @@ end
 
 # rank(A) == rank(A'A)
 function basecol(X::AbstractMatrix...; factorization = :Cholesky)
-    cholm = cholesky!(Symmetric(crossprod(X...)), Val(true); tol = -1, check = false)
+    @static if VERSION >= v"1.7"
+        cholm = cholesky!(Symmetric(crossprod(X...)), RowMaximum(); tol = -1, check = false)
+    else
+        cholm = cholesky!(Symmetric(crossprod(X...)), Val(true); tol = -1, check = false)
+    end
     r = 0
     if size(cholm, 1) > 0
         r = sum(diag(cholm.factors) .> size(X[1],1)^2 * eps())

diff --git a/src/utils/vcov.jl b/src/utils/vcov.jl
@@ -1,23 +1,30 @@
-struct VcovDataGLM{T, N} <: RegressionModel
+struct VcovDataGLM{T, Tu, N} <: RegressionModel
     modelmatrix::Matrix{Float64}       # X
     crossmodelmatrix::T                    # X'X in the simplest case. Can be Matrix but preferably Factorization
+    invcrossmodelmatrix::Tu
     residuals::Array{Float64, N}      # vector or matrix of residuals (matrix in the case of IV, residuals of Xendo on (Z, Xexo))
     dof_residual::Int
 end
 StatsAPI.modelmatrix(x::VcovDataGLM) = x.modelmatrix
 StatsAPI.crossmodelmatrix(x::VcovDataGLM) = x.crossmodelmatrix
+invcrossmodelmatrix(x::VcovDataGLM) = x.invcrossmodelmatrix
 StatsAPI.residuals(x::VcovDataGLM) = x.residuals
 StatsAPI.dof_residual(x::VcovDataGLM) = x.dof_residual
 
 # with clusters, the standard StatsBase.vcov works
 
 function StatsAPI.vcov(x::VcovDataGLM, ::Vcov.RobustCovariance)
-    A = inv(crossmodelmatrix(x))
+    A = invcrossmodelmatrix(x)
     C = modelmatrix(x) .* residuals(x)
     B = C' * C
     return Symmetric(A * B * A)
 end
 
 function StatsAPI.vcov(x::VcovDataGLM, ::Vcov.SimpleCovariance)
-	return Symmetric(inv(crossmodelmatrix(x)))
+	return Symmetric(invcrossmodelmatrix(x))
 end
+
+function StatsAPI.vcov(x::VcovDataGLM, v::Vcov.ClusterCovariance)
+    xtx = invcrossmodelmatrix(x)
+    Vcov.pinvertible(Symmetric(xtx * Vcov.S_hat(x, v) * xtx))
+end
diff --git a/test/nlreg.jl b/test/nlreg.jl
@@ -170,14 +170,14 @@ df_sep = DataFrame(y = [[0.0, 0.0, 0.0, 1.0, 1.0, 1.0];rand(rng,[0.0,1.0],500)],
 m = @formula y ~ x + fe(x1)
 try
     # this should fail
-    x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = Symbol[], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 )
+    local x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = Symbol[], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 )
 catch ex
     @test !isnothing(ex)
 end
 # with cutoff on mu, it converges
 try
     # this should pass
-    x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = [:mu], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 )
+    local x = nlreg(df_sep, m, Binomial(), LogitLink() , start = [0.1], separation = [:mu], separation_mu_lbound=1e-10, separation_mu_ubound=1.0-1e-10, verbose=true, rho_tol=1e-12 )
     @test x.coef ≈ [-0.0005504145168443688] atol = 1e-4
 catch ex
     @test isnothing(ex)

diff --git a/test/separation.jl b/test/separation.jl
@@ -12,12 +12,12 @@ df = DataFrame(y = rand(6), x1 = [1;0.5;0.8;1;0;0], x2 = [0;0.5;0.2;0;0;0], id =
 # y ~ x1 + x2 + fe(id), will drop x2
 res1 = nlreg(df, @formula(y ~ x1 + x2 + fe(id)), Poisson(), LogLink())
 @test 0 ∈ res1.coef
-@show res1
+#@show res1
 # y ~ x1 + fe(id)
 
 res2 = nlreg(df, @formula(y ~ x2 + fe(id)), Poisson(), LogLink())
 @test 0 ∉ res2.coef
-@show res2
+#@show res2
 
 # ---------------------------------------------------------------------------------------------------------------- #