se_lib/util.rel

// *****************************************************************
// Sales Engineering Utilities
//
// Tools for exploring / manipulating source data.
//
// INSTALL this source: util.rel
// *****************************************************************

@no_diagnostics(:RECURSIVE_INLINE) 
module se_util

    // // generate a random int number from the range
    // // key is a random seed
    // // lower and upper define inclusive range
    // def random_int[key, lower in Int, upper in Int] =
    //     lower + trunc_to_int[(upper-lower+1) * random_threefry_float64[key, 1]]

    // // generate n random int numbers from the range
    // // key is a random seed
    // // lower and upper define inclusive range
    // // n is a number of random numbers to generate
    // def random_int_n[key, lower in Int, upper in Int, n in Int] =
    //     random_int[key + seed, lower, upper] for seed in range[1, n, 1]

    // calculate the interval in minutes between two timestamps
    // ts1 and ts2 are the timestamps
    // prec is the number of decimal places for the result
    @inline
    def interval_minutes[ts1 in DateTime, ts2 in DateTime, prec in Int](interval) =
        datetime_to_nanoseconds(ts1, nanos) and
        datetime_to_nanoseconds(ts2, nanos_next) and
        scale = power[10, prec] and
        minutes = (nanos_next - nanos) / (60 * 1000000000) and     // convert diff to minutes
        interval = round[:ROUND_NEAREST, minutes * scale] / scale
        from nanos, nanos_next, scale, minutes

    // calculate the interval in seconds between two timestamps
    // ts1 and ts2 are the timestamps
    // prec is the number of decimal places for the result
    @inline
    def interval_seconds[ts1, ts2, prec in Int](interval) =
        datetime_to_nanoseconds(ts1, nanos) and
        datetime_to_nanoseconds(ts2, nanos_next) and
        scale = power[10, prec] and
        seconds = (nanos_next - nanos) / 1000000000 and     // convert diff to seconds
        interval = round[:ROUND_NEAREST, seconds * scale] / scale
        from nanos, nanos_next, scale, seconds
      

    // Numerical Functions
    @inline def round_with_precision[mode, x, p] {
        xint + round[mode, xdec * 10 ^ p] / 10 ^ p
        from xint, xdec
        where xint = round[:ROUND_DOWN, x] and
        xdec = x - xint
    }


    // String Functions
    @inline def string_split_by_index[delimiter, text, index] = 
        string_split[delimiter, text][index]


    // Date and Time Functions
    //
    // extract Date from DateTime
    @inline
    def cast_to_date[dt, tz](d) = 
        ^Date[dt, tz](d)

    // date intervals overlap
    @inline
    def overlap[a_from, a_to, b_from, b_to] = 
        b_to >= a_from and a_to >= b_from

    // date inside interval
    @inline
    def inside[a, b_from, b_to] =
        overlap[a, a, b_from, b_to]

    // Max (latest) of two dates
    @inline
    def max_date[d1, d2] =
        if d1 > d2 then d1 else d2 end

    // Min (earliest) of two dates
    @inline
    def min_date[d1, d2] = 
        if d1 < d2 then d1 else d2 end
        

    // Filter relations by tuple arity
    @outline
    def arity_filter[R, n](xs...) = arity({xs...}, n) and R(xs...)


    // Test if module relname exists
    @outline
    def exists_relname[G, relname] = first[G](relname)


    // Count table tuples by arity
    @inline
    def count_by_arity[R][a] = 
        count[xs...: R(xs...) and arity[{xs...}]=a]


    // Transform n-arity relation into GNF format
    // Given n-arity relation R and its schema S (list of attribute names, preferrably as RelName's)
    // to_gnf produces GNF representation of the same data from R in GNF(attribute, index, value)
    // format. Insted of finding an natural uique key in the relation (if exists) it manufactures
    // one using `enumerate[R]` and then uses `pivot` to transform data to GNF.
    // Example:
    // def data = {
    // "A", 1, 2;
    // "A", 3, 1;
    // "B", 4, 2
    // }
    // def schema = {:name, :id, :value}
    // def output = table[make_gnf[data, schema]]
    // also see Slack: https://relationalai.slack.com/archives/CG4NZLU20/p1680121257404589?thread_ts=1680110655.756849&cid=CG4NZLU20
    @inline
    def to_gnf[R, SCHEMA](attr, idx, val) {
        arity[R]=arity[SCHEMA] and
        enumerate[R](idx, xs...) and
        pivot[(xs...)](i, val) and
        pivot[SCHEMA](i, attr)
        from i, xs...
    }

    @outline
    module discrete_values_helper[R, PROBS, seed in Int]
        @ondemand def cumsumagg[ct](lower_val, s) = lower_val=s-cprob, s=sum[t, prob: PROBS(t, prob) and PROBS(ct, cprob) and t <= ct] from cprob
        @ondemand def data(e, t) = random_mersenne_twister[seed, R](e, rn) and 
                        cumsumagg(t, dfrom, dto) and rn >= dfrom and rn < dto from rn, dfrom, dto
    end

    // Synthesize discrete values according to probabilities (must sum up to 1.)
    // Example:
    // def engineType = se_util:synthesize_discrete_values[last[vehicle_from_id], 
    //                                     {("Gas", .8);
    //                                      ("Hybrid", .1);
    //                                      ("EV Plugin", .1); 
    //                                     }, 2023]
    @inline
    def synthesize_discrete_values[R, PROBS, seed] = discrete_values_helper[R, PROBS, seed][:data]

    // Sample tuples from relation
    // Example
    // def my_sample = sample_relation[range[1, 100, 1], 12, 2023]
    @inline
    def sample_relation[R, n, seed] = i, ys...:
      top[n, {rnd, xs...: random_mersenne_twister[seed, R](xs..., rnd)}](i, _, ys...) 

    @inline
    def sample_relation_deprecated[R, n, seed] = 
      top[n, {rnd, xs...: random_mersenne_twister[seed, R](xs..., rnd)}][_, _]

    // Apply is a functional feature that allows generic application of an arbirtrary function relation to arbitrary dataset
    // Example:
    // @inline def prepare_string[s in String] = uppercase[string_trim[s]]
    // def output = apply[prepare_string][{"aBc "; " edF"; "qwerty"}]
    @inline
    def apply[R][xs...] = v: R(xs..., v)

    // Matrix transformation
    // from Math Matrix to GNF Matrix
    //
    // Example:
    // def x = {(1., 2., 3., 4.);
    //           (3., 4., 5., 6.);
    //           (5., 6., 7., 8.);}
    // def xgnf = {(1, 1, 1.);
    //            (1, 2, 2.);
    //            (1, 3, 3.);
    //            (1, 4, 4.);
    //            (2, 1, 3.);
    //            (2, 2, 4.);
    //            (2, 3, 5.);
    //            (2, 4, 6.);
    //            (3, 1, 5.);
    //            (3, 2, 6.);
    //            (3, 3, 7.);
    //            (3, 4, 8.);}
    // // makes true
    // def output = equal[xgnf, se_util:make_gnf_matrix[x]]
    @inline
    def make_gnf_matrix[X] = pivot[enumerate[X][i]] for i

    // Linear predictions using model with weights W
    // Example:
    // def beta = (1, 1.; 2, 2.; 3, 0.5; 4, 3.5;)
    // def x = {(1., 2., 3., 4.);
    //           (3., 4., 5., 6.);
    //           (5., 6., 7., 8.);}
    // def predictions = linear_predict[beta, se_util:make_gnf_matrx[x]]
    @inline
    def linear_predict[W, X][i] = sum[j: X[i, j] * W[j]]


    // Debugging
    // shortcut to test if 2 objects are equal and report result,
    // usually used when debugging to send result to output
    @inline
    def print_equal[label, L, R, true_message, false_message] {
        label, if equal[L, R] then true_message else false_message end
    }
    
end