Skip to content

Commit

Permalink
feat(transactions): make type agnostic - can be any type as long as c…
Browse files Browse the repository at this point in the history
…onsistent
  • Loading branch information
mcmcgrath13 committed Feb 28, 2019
1 parent 3721b3a commit ac1dee6
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 15 deletions.
8 changes: 4 additions & 4 deletions src/frequent_itemset_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ end

# This function is used internally by the frequent() function to create the
# initial bitarrays used to represent the first "children" in the itemset tree.
function occurrence(transactions::Array{Array{String, 1}, 1}, uniq_items::Array{String, 1})
function occurrence(transactions::Array{Array{S, 1}, 1}, uniq_items::Array{S, 1}) where S
n = length(transactions)
p = length(uniq_items)

Expand All @@ -110,7 +110,7 @@ The tree is built recursively using calls to the growtree!() function. The
`minsupp` and `maxdepth` parameters control the minimum support needed for an
itemset to be called "frequent", and the max depth of the tree, respectively
"""
function frequent_item_tree(transactions::Array{Array{String, 1}, 1}, uniq_items::Array{String, 1}, minsupp::Int, maxdepth::Int)
function frequent_item_tree(transactions::Array{Array{S, 1}, 1}, uniq_items::Array{S, 1}, minsupp::Int, maxdepth::Int) where S
occ = occurrence(transactions, uniq_items)

# Have to initialize `itms` array like this because type inference
Expand Down Expand Up @@ -166,10 +166,10 @@ item sets and their support count (integer) when given and array of transactions
basically just wraps frequent_item_tree() but gives back the plain text of the items,
rather than that Int16 representation.
"""
function frequent(transactions::Array{Array{String, 1}, 1}, minsupp::T, maxdepth) where T <: Real
function frequent(transactions::Array{Array{S, 1}, 1}, minsupp::T, maxdepth) where {T <: Real, S}
n = length(transactions)
uniq_items = unique_items(transactions)
item_lkup = Dict{Int16, String}()
item_lkup = Dict{Int16, S}()
for (i, itm) in enumerate(uniq_items)
item_lkup[i] = itm
end
Expand Down
12 changes: 6 additions & 6 deletions src/rule_generation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,19 @@ function gen_rules(root::Node, supp_dict::Dict{Array{Int16, 1}, Int}, num_transa
end


function rules_to_dataframe(rules::Array{Rule, 1}, item_lkup::Dict{T, String}; join_str = " | ") where T <: Integer
function rules_to_dataframe(rules::Array{Rule, 1}, item_lkup::Dict{T, S}; join_str = " | ") where {T <: Integer, S}
n_rules = length(rules)
dt = DataFrame(lhs = fill("", n_rules),
rhs = fill("", n_rules),
supp = zeros(n_rules),
conf = zeros(n_rules),
lift = zeros(n_rules))
for i = 1:n_rules
lhs_items = map(x -> item_lkup[x], rules[i].p)
lhs_items = map(x -> string.(item_lkup[x]), rules[i].p)

lhs_string = "{" * join(lhs_items, join_str) * "}"
dt[i, :lhs] = lhs_string
dt[i, :rhs] = item_lkup[rules[i].q]
dt[i, :rhs] = string.(item_lkup[rules[i].q])
dt[i, :supp] = rules[i].supp
dt[i, :conf] = rules[i].conf
dt[i, :lift] = rules[i].lift
Expand All @@ -123,10 +123,10 @@ required for an itemset to be considered frequent. The `conf` argument allows us
association rules without at least `conf` level of confidence. The `maxlen` argument stipulates
the maximum length of an association rule (i.e., total items on left- and right-hand sides)
"""
function apriori(transactions::Array{Array{String, 1}, 1}; supp::Float64 = 0.01, conf = 0.8, maxlen::Int = 5)
function apriori(transactions::Array{Array{S, 1}, 1}; supp::Float64 = 0.01, conf = 0.8, maxlen::Int = 5) where S
n = length(transactions)
uniq_items = unique_items(transactions)
item_lkup = Dict{Int16, String}()
item_lkup = Dict{Int16, S}()
for (i, itm) in enumerate(uniq_items)
item_lkup[i] = itm
end
Expand All @@ -145,7 +145,7 @@ end
"""
apriori(occurrences, item_lkup; supp, conf, maxlen)
Given an boolean occurrence matrix of transactions (rows are transactions, columns are items) and
Given an boolean occurrence matrix of transactions (rows are transactions, columns are items) and
a lookup dictionary of column-index to items-string, this function runs the a-priori
algorithm for generating frequent item sets. These frequent items are then used to generate
association rules. The `supp` argument allows us to stipulate the minimum support
Expand Down
7 changes: 2 additions & 5 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

function shownodes(node::Node, k::Int = 0)
if has_children(node)
for nd in node.children
for nd in node.children
print("k = $(k + 1): ")
println(nd.item_ids)
end
Expand All @@ -11,11 +11,8 @@ function shownodes(node::Node, k::Int = 0)
end
end




function randstr(n::Int, len::Int = 16)
vals = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
vals = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
upper = map(uppercase, vals)
append!(vals, upper)
Expand Down

0 comments on commit ac1dee6

Please sign in to comment.