diff --git a/Project.toml b/Project.toml index d354361..5fa687d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,14 @@ name = "Mice" uuid = "d4678d24-b338-4f96-a2c8-a66549d61c16" authors = ["Tom Metherell and contributors"] -version = "0.3.1" +version = "0.3.2" [deps] AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" @@ -18,16 +19,20 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [weakdeps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" RCall = "6f49c342-dc21-5d91-9882-a32aef131414" [extensions] +MiceDataFramesExt = "DataFrames" MiceRCallExt = "RCall" [compat] AxisArrays = "0.4" CategoricalArrays = "0.10" +DataFrames = "1.6" Distributions = "0.25" -RCall = "0.13" +PrecompileTools = "1.2" +RCall = "0.13, 0.14" RecipesBase = "1.3.1" Statistics = "1" StatsAPI = "1.7" diff --git a/ext/MiceDataFramesExt.jl b/ext/MiceDataFramesExt.jl new file mode 100644 index 0000000..e125190 --- /dev/null +++ b/ext/MiceDataFramesExt.jl @@ -0,0 +1,43 @@ +module MiceDataFramesExt + using CategoricalArrays: CategoricalArray, CategoricalPool, CategoricalValue + using DataFrames: DataFrame + using Mice: bindImputations, complete, listComplete, makeMethods, mice + using PrecompileTools: @compile_workload + using Random: rand, randperm + + @compile_workload begin + catPool = CategoricalPool(["a", "b", "c"]) + df = DataFrame( + a = Vector{Union{Missing, Int}}(randperm(20)), + b = Vector{Union{Missing, Float64}}(randperm(20)), + c = Vector{Union{Missing, String}}(rand(["a", "b", "c"], 20)), + d = Vector{Union{Missing, Bool}}(rand(Bool, 20)), + e = CategoricalArray{Union{Missing, Int}}(rand([1, 2, 3], 20)), + f = CategoricalArray{Union{Missing, String}}(rand(["a", "b", "c"], 20)), + g = Vector{Union{Missing, CategoricalValue}}(rand([CategoricalValue(catPool, 1), CategoricalValue(catPool, 2), CategoricalValue(catPool, 3)], 20)) + ) + + for col in axes(df, 2) + df[rand(1:20, 1), col] .= missing + end + + imputedDataPmm = mice(df, m = 1, iter = 1, progressReports = false) + + meanMethods = makeMethods(df) + meanMethods["b"] = "mean" + imputedDataMean = mice(df, m = 1, iter = 1, methods = meanMethods, progressReports = false) + + normMethods = meanMethods + normMethods["b"] = "norm" + imputedDataNorm = mice(df, m = 1, iter = 1, methods = normMethods, progressReports = false) + + sampleMethods = normMethods + sampleMethods[:] .= "sample" + imputedDataSample = mice(df, m = 1, iter = 1, methods = sampleMethods, progressReports = false) + + bindImputations(imputedDataPmm, imputedDataPmm) + + complete(imputedDataPmm, 1) + listComplete(imputedDataPmm) + end +end \ No newline at end of file diff --git a/src/Mice.jl b/src/Mice.jl index 5243c19..044fa22 100644 --- a/src/Mice.jl +++ b/src/Mice.jl @@ -1,9 +1,10 @@ module Mice # Dependencies using AxisArrays: axes, AxisArray, AxisMatrix, AxisVector - using CategoricalArrays: CategoricalArray, CategoricalValue, levels + using CategoricalArrays: CategoricalArray, CategoricalPool, CategoricalValue, levels using Distributions: ccdf, Chisq, FDist, Normal, TDist using LinearAlgebra: cholesky, Diagonal, diagm, eigen, inv, qr, rank, svd + using PrecompileTools: @compile_workload using Printf: @printf using Random: rand, randn, randperm import RecipesBase: plot @@ -459,4 +460,6 @@ module Mice end export bindImputations, complete, findMissings, listComplete, makeMethods, makePredictorMatrix, mice, Mids, Mipo, Mira, pool, plot, with + + include("precompile.jl") end \ No newline at end of file diff --git a/src/precompile.jl b/src/precompile.jl new file mode 100644 index 0000000..070b3cb --- /dev/null +++ b/src/precompile.jl @@ -0,0 +1,35 @@ +@compile_workload begin + catPool = CategoricalPool(["a", "b", "c"]) + ct = ( + a = Vector{Union{Missing, Int}}(randperm(20)), + b = Vector{Union{Missing, Float64}}(randperm(20)), + c = Vector{Union{Missing, String}}(rand(["a", "b", "c"], 20)), + d = Vector{Union{Missing, Bool}}(rand(Bool, 20)), + e = CategoricalArray{Union{Missing, Int}}(rand([1, 2, 3], 20)), + f = CategoricalArray{Union{Missing, String}}(rand(["a", "b", "c"], 20)), + g = Vector{Union{Missing, CategoricalValue}}(rand([CategoricalValue(catPool, 1), CategoricalValue(catPool, 2), CategoricalValue(catPool, 3)], 20)) + ) + + for col in ct + col[rand(1:20, 1)] .= missing + end + + imputedDataPmm = mice(ct, m = 1, iter = 1, progressReports = false) + + meanMethods = makeMethods(ct) + meanMethods["b"] = "mean" + imputedDataMean = mice(ct, m = 1, iter = 1, methods = meanMethods, progressReports = false) + + normMethods = meanMethods + normMethods["b"] = "norm" + imputedDataNorm = mice(ct, m = 1, iter = 1, methods = normMethods, progressReports = false) + + sampleMethods = normMethods + sampleMethods[:] .= "sample" + imputedDataSample = mice(ct, m = 1, iter = 1, methods = sampleMethods, progressReports = false) + + bindImputations(imputedDataPmm, imputedDataPmm) + + complete(imputedDataPmm, 1) + listComplete(imputedDataPmm) +end