Skip to content

Commit

Permalink
Merge pull request #60 from jmboehm/augmentdfbug
Browse files Browse the repository at this point in the history
fix augmentdf bug, error without fe
  • Loading branch information
jmboehm authored Jun 6, 2024
2 parents 0cd6599 + 488c3b3 commit 55c2a7c
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "GLFixedEffectModels"
uuid = "bafb0ae5-e5f5-5100-81b6-6a55d777c812"
authors = ["Johannes Boehm <[email protected]>"]
version = "0.5.3"
version = "0.5.4"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Expand Down
3 changes: 2 additions & 1 deletion src/GLFixedEffectModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct GLFixedEffectModel <: RegressionModel

esample::BitVector # Is the row of the original dataframe part of the estimation sample?
augmentdf::DataFrame
fekeys::Vector{Symbol}
loglikelihood::Float64
nullloglikelihood::Float64

Expand Down Expand Up @@ -126,7 +127,7 @@ end

function FixedEffectModels.fe(x::GLFixedEffectModel)
!has_fe(x) && throw("fe() is not defined for fixed effect models without fixed effects")
x.augmentdf[!, 2:size(x.augmentdf, 2)]
x.augmentdf[!, Symbol.( "fe_" .* String.(x.fekeys))]
end


Expand Down
38 changes: 28 additions & 10 deletions src/fit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ function nlreg(@nospecialize(df),
dof_add::Integer = 0,
save::Vector{Symbol} = Symbol[],
method::Symbol = :cpu,
nthreads::Integer = method == :cpu ? Threads.nthreads() : 256,
drop_singletons = true,
double_precision::Bool = true,
dev_tol::Real = 1.0e-8, # tolerance level for the first stopping condition of the maximization routine.
Expand Down Expand Up @@ -83,6 +84,11 @@ function nlreg(@nospecialize(df),
subset = eval(evaluate_subset(df, subsetformula))
end

if method == :cpu && nthreads > Threads.nthreads()
@warn "Keyword argument nthreads = $(nthreads) is ignored (Julia was started with only $(Threads.nthreads()) threads)."
nthreads = Threads.nthreads()
end

##############################################################################
##
## Parse formula
Expand Down Expand Up @@ -157,6 +163,8 @@ function nlreg(@nospecialize(df),
@info "$(dropped_n) observations detected as singletons. Dropping them ..."
end
end
else
error("No fixed effect specified. Use GLM.jl for the estimation of generalized linear models without fixed effects.")
end

save_fe = (:fe save) & has_fes
Expand Down Expand Up @@ -263,7 +271,7 @@ function nlreg(@nospecialize(df),
Xexo, basecoef = detect_linear_dependency_among_X!(Xexo, basecoef; coefnames=coef_names)

weights = Weights(Ones{Float64}(sum(esample)))
feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(fes, weights, Val{method})
feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(fes, weights, Val{method}, nthreads)

# make one copy after deleting NAs + dropping singletons + detecting separations (fe + relu)
nobs = sum(esample)
Expand All @@ -283,24 +291,25 @@ function nlreg(@nospecialize(df),
(length(start) == coeflength) || error("Invalid length of `start` argument.")
beta = start
else
beta = 0.1 .* ones(Float64, coeflength)
# beta = zeros(double_precision ? Float64 : Float32, coeflength)
beta = 0.1 .* ones(double_precision ? Float64 : Float32, coeflength)
end

#Xexo = oldX[esample,:]
Xexo = GLFixedEffectModels.getcols(Xexo, basecoef) # get Xexo from oldX and basecoef and esample

eta = Xexo * beta
mu = GLM.linkinv.(Ref(link),eta)
wt = ones(Float64, nobs, 1)
wt = ones(double_precision ? Float64 : Float32, nobs, 1)
dev = sum(devresid.(Ref(distribution), y, mu))
nulldev = sum(devresid.(Ref(distribution), mean(y), mu))

Xhat = Xexo
crossx = Matrix{Float64}(undef, nobs, 0)
crossx = Matrix{double_precision ? Float64 : Float32}(undef, nobs, 0)
residuals = y[:] # just for initialization

# Stuff that we need in outside scope
emp = Array{Float64,2}(undef,2,2)
emp = Array{double_precision ? Float64 : Float32,2}(undef,2,2)
score = hessian = emp

outer_iterations = 0
Expand Down Expand Up @@ -439,6 +448,11 @@ function nlreg(@nospecialize(df),
display(Xdemean .* nudemean)
display(Xhat .* nu)
end

# if dev > nulldev
# @warn "Final deviance exceeds null deviance. Possibly running into a local maximum. Try restarting with a different starting guess."
# end

break
else
verbose && println("Iter $i : not converged. Δdev = $((devold - dev)/dev), ||Δβ|| = $(norm(beta_update))")
Expand All @@ -463,7 +477,7 @@ function nlreg(@nospecialize(df),
augmentdf = DataFrame()
if save_residuals
if nobs < length(esample)
augmentdf.residuals = Vector{Union{Float64, Missing}}(missing, length(esample))
augmentdf.residuals = Vector{Union{double_precision ? Float64 : Float32, Missing}}(missing, length(esample))
augmentdf[esample, :residuals] = residuals
else
augmentdf[!, :residuals] = residuals
Expand All @@ -473,12 +487,15 @@ function nlreg(@nospecialize(df),
oldX = oldX[esample,:]
oldX = getcols(oldX, basecoef)
# update FixedEffectSolver
weights = Weights(Ones{Float64}(sum(esample)))
weights = Weights(Ones{double_precision ? Float64 : Float32}(sum(esample)))
feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(fes, weights, Val{method})
newfes, b, c = solve_coefficients!(eta - oldX * coef, feM; tol = center_tol, maxiter = maxiter_center)
for fekey in fekeys
augmentdf[!, fekey] = df[:, fekey]
end
for j in 1:length(fes)
if nobs < length(esample)
augmentdf[!, ids[j]] = Vector{Union{Float64, Missing}}(missing, length(esample))
augmentdf[!, ids[j]] = Vector{Union{double_precision ? Float64 : Float32, Missing}}(missing, length(esample))
augmentdf[esample, ids[j]] = newfes[j]
else
augmentdf[!, ids[j]] = newfes[j]
Expand All @@ -487,15 +504,15 @@ function nlreg(@nospecialize(df),
end
if :mu save
if nobs < length(esample)
augmentdf.mu = Vector{Union{Float64, Missing}}(missing, length(esample))
augmentdf.mu = Vector{Union{double_precision ? Float64 : Float32, Missing}}(missing, length(esample))
augmentdf[esample, :mu] = mu
else
augmentdf[!, :mu] = mu
end
end
if :eta save
if nobs < length(esample)
augmentdf.eta = Vector{Union{Float64, Missing}}(missing, length(esample))
augmentdf.eta = Vector{Union{double_precision ? Float64 : Float32, Missing}}(missing, length(esample))
augmentdf[esample, :eta] = eta
else
augmentdf[!, :eta] = eta
Expand Down Expand Up @@ -589,6 +606,7 @@ function nlreg(@nospecialize(df),
outer_converged,
esample,
augmentdf,
fekeys,
ll,
null_ll,
distribution,
Expand Down
3 changes: 3 additions & 0 deletions src/utils/biascorr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ function biascorr_probit(model::GLFixedEffectModel,df::DataFrame,fes::Dict,L::In
model.converged,
model.esample,
model.augmentdf,
model.fekeys,
model.loglikelihood,
model.nullloglikelihood,
model.distribution,
Expand Down Expand Up @@ -213,6 +214,7 @@ function biascorr_logit(model::GLFixedEffectModel,df::DataFrame,fes::Dict,L::Int
model.converged,
model.esample,
model.augmentdf,
model.fekeys,
model.loglikelihood,
model.nullloglikelihood,
model.distribution,
Expand Down Expand Up @@ -327,6 +329,7 @@ function biascorr_poisson(model::GLFixedEffectModel,df::DataFrame,fes::Dict,L::I
model.converged,
model.esample,
model.augmentdf,
model.fekeys,
model.loglikelihood,
model.nullloglikelihood,
model.distribution,
Expand Down
2 changes: 1 addition & 1 deletion test/nlreg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ df.RandomCategorical = df.Random
# PROBIT ------------------------------------------------------------------
# One FE, Probit
m = @formula binary ~ SepalWidth + fe(SpeciesDummy)
x = nlreg(df, m, Binomial(), GLM.ProbitLink(), start = [0.2])
x = nlreg(df, m, Binomial(), GLM.ProbitLink(), start = [0.2], save = [:fe])
@test x.coef [4.7793003788996895] atol = 1e-4

# Two FE, Probit
Expand Down

0 comments on commit 55c2a7c

Please sign in to comment.