diff --git a/Project.toml b/Project.toml index 09381711..a67eaaf9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BetaML" uuid = "024491cd-cc6b-443e-8034-08ea7eb7db2b" authors = ["Antonello Lobianco "] -version = "0.11.3" +version = "0.11.4" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/Clustering/Clustering_hard.jl b/src/Clustering/Clustering_hard.jl index 1ba6336b..7d97b58d 100644 --- a/src/Clustering/Clustering_hard.jl +++ b/src/Clustering/Clustering_hard.jl @@ -250,7 +250,7 @@ $(TYPEDFIELDS) Base.@kwdef mutable struct KMeansC_hp <: BetaMLHyperParametersSet "Number of classes to discriminate the data [def: 3]" n_classes::Int64 = 3 - "Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`), `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one." + "Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`, `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one." dist::Function = (x,y) -> norm(x-y) """ The computation method of the vector of the initial representatives. @@ -276,7 +276,7 @@ $(TYPEDFIELDS) Base.@kwdef mutable struct KMedoidsC_hp <: BetaMLHyperParametersSet "Number of classes to discriminate the data [def: 3]" n_classes::Int64 = 3 - "Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`), `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one." + "Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`, `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one." dist::Function = (x,y) -> norm(x-y) """ The computation method of the vector of the initial representatives. diff --git a/src/Utils/Measures.jl b/src/Utils/Measures.jl index 42990c82..2071e905 100644 --- a/src/Utils/Measures.jl +++ b/src/Utils/Measures.jl @@ -6,6 +6,7 @@ # ------------------------------------------------------------------------------ # Some common distance measures +# https://weaviate.io/blog/distance-metrics-in-vector-search """L1 norm distance (aka _Manhattan Distance_)""" l1_distance(x,y) = sum(abs.(x-y)) """Euclidean (L2) distance""" @@ -13,7 +14,7 @@ l2_distance(x,y) = norm(x-y) """Squared Euclidean (L2) distance""" l2squared_distance(x,y) = norm(x-y)^2 """Cosine distance""" -cosine_distance(x,y) = dot(x,y)/(norm(x)*norm(y)) +cosine_distance(x,y) = 1-dot(x,y)/(norm(x)*norm(y)) """ $(TYPEDSIGNATURES) diff --git a/test/Utils_tests.jl b/test/Utils_tests.jl index 091f94a6..f4af8a42 100644 --- a/test/Utils_tests.jl +++ b/test/Utils_tests.jl @@ -723,6 +723,12 @@ size(w2) == (4,) eltype(w2) == Float64 +# ================================== +# New test +println("** Testing cosine distance....") +x = [0,1]; y = [1,0] +@test cosine_distance(x,y) == 1 + # MLJ Tests # ================================== # NEW TEST