diff --git a/.gitignore b/.gitignore index ddfe040..0b65f21 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,7 @@ .hgignore .hg/ .DS_Store +.lsp +.lh +.calva +.clj-kondo diff --git a/src/jcpsantiago/bulgogi.clj b/src/jcpsantiago/bulgogi.clj index 3f9b56e..7664798 100644 --- a/src/jcpsantiago/bulgogi.clj +++ b/src/jcpsantiago/bulgogi.clj @@ -1,12 +1,38 @@ (ns jcpsantiago.bulgogi " À-la-carte transformations of data, useful in ML systems.") +(defn- update-keys [m f] + (reduce-kv (fn [m k v] + (assoc m (f k) v)) {} m)) + +(defn- all-special-functions + "Returns a map of feature-name -> feature-var" + ([fn-type] + (all-special-functions fn-type true)) + ([fn-type namespaced?] + (->> (all-ns) + (filter #(fn-type (meta %))) + (map (fn [ns] (update-keys (ns-publics ns) #(if namespaced? (symbol (str ns) (str %)) %)))) + (apply merge-with #(throw (Exception. (str "Conflict between: " %1 " and :" %2))))))) + + +(defn all-features + "Returns a map of feature-name -> fn-var" + [] + (all-special-functions ::features false)) + + +(defn all-coeffects + "Returns a map of coeffect-name -> fn-var" + [] + (all-special-functions ::coeffects)) + (defn- resolved-features - [features -ns] - (->> features - (map #(let [sym (symbol %)] - (ns-resolve (find-ns -ns) sym))))) + [features] + (let [all (all-features)] + (->> features + (map #(get all (symbol %)))))) (defn- transformed @@ -14,6 +40,28 @@ (pmap #(% input-data) fns)) +(defn- enriched + [input-data fns] + (if (empty? fns) + input-data + (->> fns + (pmap #(% input-data)) + (apply merge)))) + + +(def ^:private memoized-features + (memoize resolved-features)) + + +(def ^:private memoized-coeffects + (memoize (fn [fn-vars] + (let [all (all-coeffects)] + (->> fn-vars + (map #(:bulgogi/coeffect (meta %))) + (remove nil?) + (map #(all (symbol %)))))))) + + (defn preprocessed " Takes a request map with keys :input-data and :features. @@ -29,9 +77,22 @@ Looks for the features in the namespace and applies them to the input-data in parallel. Returns a map of feature-keys and feature-values. " - [req -ns] + [req] (let [{:keys [input-data features]} req - fns (resolved-features features -ns) + fns (memoized-features features) + coeffects (memoized-coeffects fns) fn-ks (map keyword features)] - (->> (transformed input-data fns) + (->> (transformed (enriched input-data coeffects) fns) (zipmap fn-ks)))) + + +(defn- feature-conflicts? [] + (->> (all-features) + (mapcat keys) + distinct?)) + +(all-features) + +(comment + (all-features) + (feature-conflicts?)) \ No newline at end of file diff --git a/test/jcpsantiago/bulgogi_test.clj b/test/jcpsantiago/bulgogi_test.clj index 766a72c..500b224 100644 --- a/test/jcpsantiago/bulgogi_test.clj +++ b/test/jcpsantiago/bulgogi_test.clj @@ -1,9 +1,125 @@ (ns jcpsantiago.bulgogi-test + {:jcpsantiago.bulgogi/features true + :jcpsantiago.bulgogi/coeffects true} (:require - [clojure.test :refer :all] - [jcpsantiago.bulgogi :refer :all])) + [clojure.string :as s] + [clojure.test :refer :all] + [clojure.set] + [jcpsantiago.features] + [jcpsantiago.bulgogi :as SUT])) -(deftest a-test - (testing "will be gone soon :D" - (is (= 1 1)))) +(defn boolean->int + "Cast a boolean to 1/0 integer indicator" + [b] + (when boolean? b + (if (true? b) 1 0))) + + +(defn email-name + "Lower-cased name of an email address (the bit before @)" + [email] + (-> email + s/lower-case + (s/replace-first #"@.*" ""))) + + +(defn n-digits-in-email-name + "Number of digits in the email name" + [{email :email}] + (->> (email-name email) + (re-seq #"\d") + count)) + + +(defn n-chars-in-email-name + "Number of characters in the email name i.e. length of the email name" + [{email :email}] + (-> (email-name email) + count)) + + +(defn diff-eur-previous-order + "Difference in euros between the current order and the previous one." + [{current-amount :current-amount previous-amount :previous-amount}] + (- current-amount previous-amount)) + + +(defn risky-item? + "Boolean depending on whether an item is risky or not" + [{brand :brand}] + (->> brand + s/lower-case + (re-seq #"baz corp") + some? + boolean->int)) + + +(defn contains-risky-item + "Indicator 1/0 depending on whether a risky item is present in the cart" + [{items :items}] + (->> items + (map #(risky-item? %)) + (some #(= 1 %)) + boolean->int)) + + +(defn ^{:bulgogi/coeffect ::added} needs-coeffect + [{data ::added}] + data) + + +(defn added + [_] + {::added "some data"}) + + +(defn email-name-as-coeffect + [{email :email}] + {::email-name-as-coeffect (-> email + s/lower-case + (s/replace-first #"@.*" ""))}) + + +(defn ^{:bulgogi/coeffect ::email-name-as-coeffect} n-chars-in-email-name-w-coeffect + [{email-name-as-coeffect ::email-name-as-coeffect}] + (count email-name-as-coeffect)) + + +(def test-input + {:current-amount 700 + :previous-amount 400 + :email "squadron42@starfleet.ufp" + :items [{:brand "Foo Industries" :value 1234} + {:brand "Baz Corp" :value 35345}]}) + + +(deftest preprocessed + (testing "basics" + (is (= {:contains-risky-item 1 + :diff-eur-previous-order 300 + :n-digits-in-email-name 2} + (SUT/preprocessed {:input-data test-input + :features ["n-digits-in-email-name" + "contains-risky-item" + "diff-eur-previous-order"]} + )))) + (testing "coeffect" + (is (= {:needs-coeffect "some data"} + (SUT/preprocessed {:input-data test-input + :features ["needs-coeffect"]} + )))) + (testing "coeffect complex feature" + (is (= {:n-chars-in-email-name-w-coeffect 10} + (SUT/preprocessed {:input-data test-input + :features ["n-chars-in-email-name-w-coeffect"]} + ))))) + +(deftest all-features + (testing "finds features in marked ns" + (is (clojure.set/subset? #{'distinct-feature1 'feature1} + (set (keys (SUT/all-features)))))) + (testing "throws on conflict" + (require '[jcpsantiago.features2]) + (is (thrown-with-msg? Exception #"^Conflict" (SUT/all-features))))) + diff --git a/test/jcpsantiago/features.clj b/test/jcpsantiago/features.clj new file mode 100644 index 0000000..8cf20c2 --- /dev/null +++ b/test/jcpsantiago/features.clj @@ -0,0 +1,8 @@ +(ns jcpsantiago.features + {:jcpsantiago.bulgogi/features true}) + +(defn feature1 [_] + (inc 1)) + +(defn distinct-feature1 [] + "some") \ No newline at end of file diff --git a/test/jcpsantiago/features2.clj b/test/jcpsantiago/features2.clj new file mode 100644 index 0000000..e53f43d --- /dev/null +++ b/test/jcpsantiago/features2.clj @@ -0,0 +1,8 @@ +(ns jcpsantiago.features2 + {:jcpsantiago.bulgogi/features true}) + +(defn feature1 [_] + (inc 1)) + +(defn distinct-feature2 [] + "some") \ No newline at end of file