Skip to content

Commit

Permalink
Merge pull request #4 from Cyrik/wip/co-effects
Browse files Browse the repository at this point in the history
added co-effects testing
  • Loading branch information
jcpsantiago authored Jan 31, 2022
2 parents 2d7369e + 5831523 commit e447b51
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 12 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@
.hgignore
.hg/
.DS_Store
.lsp
.lh
.calva
.clj-kondo
75 changes: 68 additions & 7 deletions src/jcpsantiago/bulgogi.clj
Original file line number Diff line number Diff line change
@@ -1,19 +1,67 @@
(ns jcpsantiago.bulgogi
" À-la-carte transformations of data, useful in ML systems.")

(defn- update-keys [m f]
(reduce-kv (fn [m k v]
(assoc m (f k) v)) {} m))

(defn- all-special-functions
"Returns a map of feature-name -> feature-var"
([fn-type]
(all-special-functions fn-type true))
([fn-type namespaced?]
(->> (all-ns)
(filter #(fn-type (meta %)))
(map (fn [ns] (update-keys (ns-publics ns) #(if namespaced? (symbol (str ns) (str %)) %))))
(apply merge-with #(throw (Exception. (str "Conflict between: " %1 " and :" %2)))))))


(defn all-features
"Returns a map of feature-name -> fn-var"
[]
(all-special-functions ::features false))


(defn all-coeffects
"Returns a map of coeffect-name -> fn-var"
[]
(all-special-functions ::coeffects))


(defn- resolved-features
[features -ns]
(->> features
(map #(let [sym (symbol %)]
(ns-resolve (find-ns -ns) sym)))))
[features]
(let [all (all-features)]
(->> features
(map #(get all (symbol %))))))


(defn- transformed
[input-data fns]
(pmap #(% input-data) fns))


(defn- enriched
[input-data fns]
(if (empty? fns)
input-data
(->> fns
(pmap #(% input-data))
(apply merge))))


(def ^:private memoized-features
(memoize resolved-features))


(def ^:private memoized-coeffects
(memoize (fn [fn-vars]
(let [all (all-coeffects)]
(->> fn-vars
(map #(:bulgogi/coeffect (meta %)))
(remove nil?)
(map #(all (symbol %))))))))


(defn preprocessed
"
Takes a request map with keys :input-data and :features.
Expand All @@ -29,9 +77,22 @@
Looks for the features in the namespace and applies them to the input-data
in parallel. Returns a map of feature-keys and feature-values.
"
[req -ns]
[req]
(let [{:keys [input-data features]} req
fns (resolved-features features -ns)
fns (memoized-features features)
coeffects (memoized-coeffects fns)
fn-ks (map keyword features)]
(->> (transformed input-data fns)
(->> (transformed (enriched input-data coeffects) fns)
(zipmap fn-ks))))


(defn- feature-conflicts? []
(->> (all-features)
(mapcat keys)
distinct?))

(all-features)

(comment
(all-features)
(feature-conflicts?))
126 changes: 121 additions & 5 deletions test/jcpsantiago/bulgogi_test.clj
Original file line number Diff line number Diff line change
@@ -1,9 +1,125 @@
(ns jcpsantiago.bulgogi-test
{:jcpsantiago.bulgogi/features true
:jcpsantiago.bulgogi/coeffects true}
(:require
[clojure.test :refer :all]
[jcpsantiago.bulgogi :refer :all]))
[clojure.string :as s]
[clojure.test :refer :all]
[clojure.set]
[jcpsantiago.features]
[jcpsantiago.bulgogi :as SUT]))


(deftest a-test
(testing "will be gone soon :D"
(is (= 1 1))))
(defn boolean->int
"Cast a boolean to 1/0 integer indicator"
[b]
(when boolean? b
(if (true? b) 1 0)))


(defn email-name
"Lower-cased name of an email address (the bit before @)"
[email]
(-> email
s/lower-case
(s/replace-first #"@.*" "")))


(defn n-digits-in-email-name
"Number of digits in the email name"
[{email :email}]
(->> (email-name email)
(re-seq #"\d")
count))


(defn n-chars-in-email-name
"Number of characters in the email name i.e. length of the email name"
[{email :email}]
(-> (email-name email)
count))


(defn diff-eur-previous-order
"Difference in euros between the current order and the previous one."
[{current-amount :current-amount previous-amount :previous-amount}]
(- current-amount previous-amount))


(defn risky-item?
"Boolean depending on whether an item is risky or not"
[{brand :brand}]
(->> brand
s/lower-case
(re-seq #"baz corp")
some?
boolean->int))


(defn contains-risky-item
"Indicator 1/0 depending on whether a risky item is present in the cart"
[{items :items}]
(->> items
(map #(risky-item? %))
(some #(= 1 %))
boolean->int))


(defn ^{:bulgogi/coeffect ::added} needs-coeffect
[{data ::added}]
data)


(defn added
[_]
{::added "some data"})


(defn email-name-as-coeffect
[{email :email}]
{::email-name-as-coeffect (-> email
s/lower-case
(s/replace-first #"@.*" ""))})


(defn ^{:bulgogi/coeffect ::email-name-as-coeffect} n-chars-in-email-name-w-coeffect
[{email-name-as-coeffect ::email-name-as-coeffect}]
(count email-name-as-coeffect))


(def test-input
{:current-amount 700
:previous-amount 400
:email "[email protected]"
:items [{:brand "Foo Industries" :value 1234}
{:brand "Baz Corp" :value 35345}]})


(deftest preprocessed
(testing "basics"
(is (= {:contains-risky-item 1
:diff-eur-previous-order 300
:n-digits-in-email-name 2}
(SUT/preprocessed {:input-data test-input
:features ["n-digits-in-email-name"
"contains-risky-item"
"diff-eur-previous-order"]}
))))
(testing "coeffect"
(is (= {:needs-coeffect "some data"}
(SUT/preprocessed {:input-data test-input
:features ["needs-coeffect"]}
))))
(testing "coeffect complex feature"
(is (= {:n-chars-in-email-name-w-coeffect 10}
(SUT/preprocessed {:input-data test-input
:features ["n-chars-in-email-name-w-coeffect"]}
)))))

(deftest all-features
(testing "finds features in marked ns"
(is (clojure.set/subset? #{'distinct-feature1 'feature1}
(set (keys (SUT/all-features))))))
(testing "throws on conflict"
(require '[jcpsantiago.features2])
(is (thrown-with-msg? Exception #"^Conflict" (SUT/all-features)))))

8 changes: 8 additions & 0 deletions test/jcpsantiago/features.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(ns jcpsantiago.features
{:jcpsantiago.bulgogi/features true})

(defn feature1 [_]
(inc 1))

(defn distinct-feature1 []
"some")
8 changes: 8 additions & 0 deletions test/jcpsantiago/features2.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(ns jcpsantiago.features2
{:jcpsantiago.bulgogi/features true})

(defn feature1 [_]
(inc 1))

(defn distinct-feature2 []
"some")

0 comments on commit e447b51

Please sign in to comment.