-
Notifications
You must be signed in to change notification settings - Fork 43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Materialize DimArray
or DimStack
From a Table
#739
base: main
Are you sure you want to change the base?
Changes from all commits
60256a0
3526b96
eab2fa0
d4892df
ea6751a
13c80da
6a9d26e
9164c22
2ebec1c
8e791bf
4cd5f9d
0c1991a
8758ba9
4534de5
119fa30
ed395ca
00336af
532f887
c98dcb0
06a2c91
3bacf33
4ced6f7
c846dfd
fe2c871
61f8220
3d28b43
dbe7b99
f410988
a17f069
9bdded9
5451087
faf4d76
02f60a3
fafd357
d7f15f5
34a0a69
d0b9eb7
32b0c00
0ea72a0
bc62932
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -278,6 +278,7 @@ end | |
""" | ||
DimStack <: AbstractDimStack | ||
|
||
DimStack(table, [dims]; kw...) | ||
DimStack(data::AbstractDimArray...; kw...) | ||
DimStack(data::Tuple{Vararg{AbstractDimArray}}; kw...) | ||
DimStack(data::NamedTuple{Keys,Vararg{AbstractDimArray}}; kw...) | ||
|
@@ -420,5 +421,12 @@ function DimStack(data::NamedTuple, dims::Tuple; | |
all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() | ||
DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) | ||
end | ||
# Write each column from a table with one or more coordinate columns to a layer in a DimStack | ||
function DimStack(table, dims::Tuple; selector=DimensionalData.Contains(), kw...) | ||
data_cols = _data_cols(table, dims) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again we probably need a |
||
indices = coords_to_indices(table, dims; selector=selector) | ||
arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] | ||
return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) | ||
end | ||
|
||
layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
""" | ||
restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) | ||
|
||
Restore a dimensional array from its tabular representation. | ||
|
||
# Arguments | ||
- `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. | ||
- `indices`: An `AbstractVector` containing the flat indices corresponding to each element in `data`. | ||
- `dims`: The dimensions of the destination `DimArray`. | ||
- `missingval`: The value to write for missing elements in `data`. | ||
|
||
# Returns | ||
An `Array` containing the ordered valued in `data` with the size specified by `dims`. | ||
``` | ||
""" | ||
function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) | ||
# Allocate Destination Array | ||
dst_size = prod(map(length, dims)) | ||
dst = Vector{eltype(data)}(undef, dst_size) | ||
dst[indices] .= data | ||
|
||
# Handle Missing Rows | ||
_missingval = _cast_missing(data, missingval) | ||
missing_rows = ones(Bool, dst_size) | ||
missing_rows[indices] .= false | ||
data = ifelse.(missing_rows, _missingval, dst) | ||
|
||
# Reshape Array | ||
return reshape(data, size(dims)) | ||
end | ||
|
||
""" | ||
coords_to_indices(table, dims; selector=Near()) | ||
|
||
Return the flat index of each row in `table` based on its associated coordinates. | ||
Dimension columns are determined from the name of each dimension in `dims`. | ||
It is assumed that the source/destination array has the same dimension order as `dims`. | ||
|
||
# Arguments | ||
- `table`: A table representation of a dimensional array. | ||
- `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. | ||
- `selector`: The selector type to use for non-numerical/irregular coordinates. | ||
|
||
# Example | ||
```julia | ||
julia> d = DimArray(rand(256, 256), (X, Y)); | ||
|
||
julia> t = DimTable(d); | ||
|
||
julia> coords_to_indices(t, dims(d)) | ||
65536-element Vector{Int64}: | ||
1 | ||
2 | ||
⋮ | ||
65535 | ||
65536 | ||
``` | ||
""" | ||
function coords_to_indices(table, dims::Tuple; selector=DimensionalData.Near()) | ||
return _coords_to_indices(table, dims, selector) | ||
end | ||
|
||
# Find the order of the table's rows according to the coordinate values | ||
_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = | ||
_coords_to_indices(_dim_cols(table, dims), dims, sel) | ||
function _coords_to_indices(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) | ||
ords = _coords_to_ords(coords, dims, sel) | ||
indices = _ords_to_indices(ords, dims) | ||
return indices | ||
end | ||
|
||
""" | ||
guess_dims(table; kw...) | ||
guess_dims(table, dims; precision=6) | ||
|
||
Guesses the dimensions of an array based on the provided tabular representation. | ||
|
||
# Arguments | ||
- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. | ||
The dimensions will be inferred from the corresponding coordinate collumns in the table. | ||
- `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default | ||
to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular | ||
value or as a `Pair` with both the dimensions and corresponding order. The order will be inferred from the data | ||
when none is given. This should work for sorted coordinates, but will not be sufficient when the table's rows are | ||
out of order. | ||
|
||
# Keyword Arguments | ||
- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). | ||
|
||
# Returns | ||
A tuple containing the inferred dimensions from the table. | ||
|
||
# Example | ||
```julia | ||
julia> xdims = X(LinRange{Float64}(610000.0, 661180.0, 2560)); | ||
|
||
julia> ydims = Y(LinRange{Float64}(6.84142e6, 6.79024e6, 2560)); | ||
|
||
julia> bdims = Dim{:Band}([:B02, :B03, :B04]); | ||
|
||
julia> d = DimArray(rand(UInt16, 2560, 2560, 3), (xdims, ydims, bdims)); | ||
|
||
julia> t = DataFrame(d); | ||
|
||
julia> t_rand = Random.shuffle(t); | ||
|
||
julia> dims(d) | ||
↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, | ||
→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, | ||
↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered | ||
|
||
julia> DD.guess_dims(t) | ||
↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, | ||
→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, | ||
↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered | ||
|
||
julia> DD.guess_dims(t, X, Y, :Band) | ||
↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, | ||
→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, | ||
↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered | ||
|
||
julia> DD.guess_dims(t_rand, X => DD.ForwardOrdered, Y => DD.ReverseOrdered, :Band => DD.ForwardOrdered) | ||
↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, | ||
→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, | ||
↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered | ||
``` | ||
""" | ||
guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) | ||
function guess_dims(table, dims::Tuple; precision=6) | ||
map(dim -> _guess_dims(get_column(table, dim), dim, precision), dims) | ||
end | ||
|
||
""" | ||
get_column(table, dim::Type{<:DD.Dimension}) | ||
get_column(table, dim::DD.Dimension) | ||
get_column(table, dim::Symbol) | ||
get_column(table, dim::Pair) | ||
|
||
Retrieve the coordinate data stored in the column specified by `dim`. | ||
|
||
# Arguments | ||
- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. | ||
- `dim`: A single dimension to be retrieved, which may be a `Symbol`, a `Dimension`, or a `Dimension => Order` pair. | ||
""" | ||
get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) | ||
get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) | ||
get_column(table, x::Symbol) = Tables.getcolumn(table, x) | ||
get_column(table, x::Pair) = get_column(table, first(x)) | ||
|
||
""" | ||
data_col_names(table, dims::Tuple) | ||
|
||
Return the names of all columns that don't matched the dimensions given by `dims`. | ||
|
||
# Arguments | ||
- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. | ||
- `dims`: A `Tuple` of one or more `Dimensions`. | ||
""" | ||
function data_col_names(table, dims::Tuple) | ||
dim_cols = DD.name(dims) | ||
return filter(x -> !(x in dim_cols), Tables.columnnames(table)) | ||
end | ||
|
||
_guess_dims(coords::AbstractVector, dim::DD.Dimension, args...) = dim | ||
_guess_dims(coords::AbstractVector, dim::Type{<:DD.Dimension}, args...) = _guess_dims(coords, DD.name(dim), args...) | ||
_guess_dims(coords::AbstractVector, dim::Pair, args...) = _guess_dims(coords, first(dim), last(dim), args...) | ||
function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: DD.Order} | ||
return _guess_dims(coords, dim, T(), precision) | ||
end | ||
function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be I'm wondering what happens to strings, symbols and other objects that need to go in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should work with almost any type. If the coordinates are non-numerical, then we will internally dispatch on the following methods: # Extract all unique coordinates from the given vector
_unique_vals(coords::AbstractVector, precision::Int) = _round_dim_val.(coords, precision) |> unique
# Round dimension value within the specified precision
_round_dim_val(x, ::Int) = x
# Determine if the given coordinates are forward ordered, reverse ordered, or unordered
function _guess_dim_order(coords::AbstractVector)
if issorted(coords)
return DD.ForwardOrdered()
elseif issorted(coords, rev=true)
return DD.ReverseOrdered()
else
return DD.Unordered()
end
end
# Estimate the span between consecutive coordinates
_guess_dim_span(::AbstractVector, ::DD.Order, ::Int) = DD.Irregular()
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We used to need a try catch for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can also find regular spans for Dates? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should let us handle data that can't be sorted: function _guess_dim_order(coords::AbstractVector)
try
if issorted(coords)
return DD.ForwardOrdered()
elseif issorted(coords, rev=true)
return DD.ReverseOrdered()
else
return DD.Unordered()
end
catch
return DD.Unordered()
end
end And this should retrieve the span from function _guess_dim_span(coords::AbstractVector{<:Dates.AbstractTime}, ::DD.Ordered, precision::Int)
steps = (@view coords[2:end]) .- (@view coords[1:end-1])
span = argmin(abs, steps)
return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular()
end However, there seems to be a problem with constructing a julia> vals = [Date("2022-11-16") + Day(i * 7) for i in 0:4];
julia> LinRange(first(vals), last(vals), 5)
5-element LinRange{Day, Int64}:
Error showing value of type LinRange{Day, Int64}:
ERROR: InexactError: Int64(553856.25) Thus, I'm not sure how we should construct a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A StepRange works for Dates. Probably we should use StepRangeLen instead of LinRange where possible anyway There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That works. Do you want to use Here's the result: julia> xdims = X(LinRange{Float64}(610000.0, 661180.0, 2560));
julia> ydims = Y(LinRange{Float64}(6.84142e6, 6.79024e6, 2560));
julia> bdims = Dim{:Band}([:B02, :B03, :B04]);
julia> tdims = Dim{:Ti}([d1 + Day(i * 7) for i in 0:4]);
julia> d = DimArray(rand(UInt16, 2560, 2560, 3, 5), (xdims, ydims, bdims, tdims));
julia> t = DataFrame(d);
julia> DD.guess_dims(t)
↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points,
→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points,
↗ Ti Sampled{Date} Date("2024-11-18"):Day(7):Date("2024-12-16") ForwardOrdered Regular Points,
⬔ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's probably better for everything, except a bit slower. It didn't exist when I first wrote this package and uses of LinRange are just legacy from that. |
||
dim_vals = _dim_vals(coords, precision) | ||
order = _guess_dim_order(dim_vals) | ||
span = _guess_dim_span(dim_vals, order, precision) | ||
return _build_dim(dim_vals, dim, order, span) | ||
end | ||
function _guess_dims(coords::AbstractVector, dim::Symbol, order::DD.Order, precision::Int) | ||
dim_vals = _dim_vals(coords, order, precision) | ||
span = _guess_dim_span(dim_vals, order, precision) | ||
return _build_dim(dim_vals, dim, order, span) | ||
end | ||
|
||
# Extract coordinate columns from table | ||
function _dim_cols(table, dims::Tuple) | ||
dim_cols = DD.name(dims) | ||
return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) | ||
end | ||
|
||
# Extract dimension column names from the given table | ||
_dim_col_names(table) = filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)) | ||
_dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), DD.name(dims)) | ||
|
||
# Extract data columns from table | ||
function _data_cols(table, dims::Tuple) | ||
data_cols = data_col_names(table, dims) | ||
return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) | ||
end | ||
|
||
# Determine the ordinality of a set of coordinates | ||
_coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) | ||
_coords_to_ords(coords::Tuple, dims::Tuple, sel::DD.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) | ||
_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DD.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) | ||
|
||
# Determine the ordinality of a set of regularly spaced numerical coordinates | ||
function _coords_to_ords( | ||
coords::AbstractVector{<:Real}, | ||
dim::Dimension, | ||
::DimensionalData.Near, | ||
position::DimensionalData.Position, | ||
span::DimensionalData.Regular) | ||
step = DD.step(span) | ||
float_ords = ((coords .- first(dim)) ./ step) .+ 1 | ||
int_ords = _round_ords(float_ords, position) | ||
return clamp!(int_ords, 1, length(dim)) | ||
end | ||
|
||
JoshuaBillson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Determine the ordinality of a set of categorical or irregular coordinates | ||
function _coords_to_ords( | ||
coords::AbstractVector, | ||
dim::Dimension, | ||
sel::DimensionalData.Selector, | ||
::DimensionalData.Position, | ||
::DimensionalData.Span) | ||
return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) | ||
end | ||
|
||
# Round coordinate ordinality to the appropriate integer given the specified locus | ||
_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Start) = floor.(Int, ords) | ||
_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Center) = round.(Int, ords) | ||
_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.End) = ceil.(Int, ords) | ||
|
||
# Extract dimension value from the given vector of coordinates | ||
_dim_vals(coords::AbstractVector, precision::Int) = _unique_vals(coords, precision) | ||
_dim_vals(coords::AbstractVector, ::DD.Order, precision::Int) = _unique_vals(coords, precision) | ||
_dim_vals(coords::AbstractVector, ::DD.ForwardOrdered, precision::Int) = sort!(_unique_vals(coords, precision)) | ||
_dim_vals(coords::AbstractVector, ::DD.ReverseOrdered, precision::Int) = sort!(_unique_vals(coords, precision), rev=true) | ||
|
||
# Extract all unique coordinates from the given vector | ||
_unique_vals(coords::AbstractVector, ::Int) = unique(coords) | ||
_unique_vals(coords::AbstractVector{<:Real}, precision::Int) = round.(coords, digits=precision) |> unique | ||
|
||
# Determine if the given coordinates are forward ordered, reverse ordered, or unordered | ||
function _guess_dim_order(coords::AbstractVector) | ||
try | ||
if issorted(coords) | ||
return DD.ForwardOrdered() | ||
elseif issorted(coords, rev=true) | ||
return DD.ReverseOrdered() | ||
else | ||
return DD.Unordered() | ||
end | ||
catch | ||
return DD.Unordered() | ||
end | ||
end | ||
|
||
# Estimate the span between consecutive coordinates | ||
_guess_dim_span(::AbstractVector, ::DD.Order, ::Int) = DD.Irregular() | ||
function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision::Int) | ||
JoshuaBillson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
steps = round.((@view coords[2:end]) .- (@view coords[1:end-1]), digits=precision) | ||
span = argmin(abs, steps) | ||
return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() | ||
end | ||
function _guess_dim_span(coords::AbstractVector{<:Dates.AbstractTime}, ::DD.Ordered, precision::Int) | ||
steps = (@view coords[2:end]) .- (@view coords[1:end-1]) | ||
span = argmin(abs, steps) | ||
return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() | ||
end | ||
|
||
function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) | ||
return rebuild(name2dim(dim), DD.Categorical(vals, order=order)) | ||
end | ||
function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) | ||
return rebuild(name2dim(dim), DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) | ||
end | ||
function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) | ||
n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) | ||
dim_vals = StepRangeLen(first(vals), span.step, n) | ||
return rebuild(name2dim(dim), DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) | ||
end | ||
|
||
# Determine the index from a tuple of coordinate orders | ||
function _ords_to_indices(ords, dims) | ||
stride = 1 | ||
indices = ones(Int, length(ords[1])) | ||
for (ord, dim) in zip(ords, dims) | ||
indices .+= (ord .- 1) .* stride | ||
stride *= length(dim) | ||
end | ||
return indices | ||
end | ||
|
||
_cast_missing(::AbstractArray, missingval::Missing) = missing | ||
function _cast_missing(::AbstractArray{T}, missingval) where {T} | ||
JoshuaBillson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
try | ||
return convert(T, missingval) | ||
catch e | ||
return missingval | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
People will also hit this method if they do something weird like pass a non-AbstractArray to
DimArray
.