diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index a3ac618..a1e9e7c 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [main, master]
+    branches: [main, master, package-development]
   pull_request:
-    branches: [main, master]
+    branches: [main, master, package-development]
 
 name: R-CMD-check
 
diff --git a/R/control.R b/R/control.R
index a35e173..bf5dba7 100644
--- a/R/control.R
+++ b/R/control.R
@@ -20,7 +20,7 @@ controlSel <- function(method = "glm.fit", #perhaps another control function for
                        optim_method = "NR",
                        overlap = FALSE,
                        dependence = FALSE,
-                       h_x = c(1, 2)
+                       h_x = c("1", "2")
                        ) {
 
   list(epsilon = epsilon,
@@ -29,7 +29,7 @@ controlSel <- function(method = "glm.fit", #perhaps another control function for
       optim_method = optim_method,
       overlap = overlap,
       dependence = dependence,
-      h_x = if(missing(h_x)) 1 else h_x
+      h_x = if(missing(h_x)) "1" else h_x
       )
 
 }
diff --git a/R/nonprobCV.R b/R/nonprobCV.R
index a17c355..a8be3fa 100644
--- a/R/nonprobCV.R
+++ b/R/nonprobCV.R
@@ -1,118 +1,143 @@
-cv_nonprobsvy <- function(X_rand, X_nons, R, weights_X, method_selection, K = 10) {
+cv_nonprobsvy <- function(X, R, weights_X, method_selection, h, nfolds = 10) {
 
   loc_nons <- which(R == 1)
   loc_rand <- which(R == 0)
-
-  R_nons <- R[loc_nons]
-  R_rand <- R[loc_rand]
-
-  weights_nons <- weights_X[loc_nons]
-  weights_rand <- weights_X[loc_rand]
-
+  X_nons <- cbind(X[loc_nons,], weights_X[loc_nons], R[loc_nons])
+  X_rand <- cbind(X[loc_rand,], weights_X[loc_rand], R[loc_nons])
   k <- 1
 
   loss_theta_av <- vector(mode = "numeric", length = 50)
-  lambdas <- seq(from = 0.02, to = 4, by = 0.08)
+  lambdas <- setup_lambda(X = X,
+                          y = R,
+                          weights = weights_X,
+                          method_selection = method_selection,
+                          lambda_min = 0,
+                          nlambda = 50)
+
 
   X_nons <- X_nons[sample(nrow(X_nons)), ]
   X_rand <- X_rand[sample(nrow(X_rand)), ]
 
-  folds_nons <- cut(seq(1,nrow(X_nons)), breaks=K, labels=FALSE) #split nonprobabability sample into K parts
-  folds_rand <- cut(seq(1,nrow(X_rand)), breaks=K, labels=FALSE) #split probabability sample into K parts
+  folds_nons <- cut(seq(1,nrow(X_nons)), breaks=nfolds, labels=FALSE) #split nonprobabability sample into K parts
+  folds_rand <- cut(seq(1,nrow(X_rand)), breaks=nfolds, labels=FALSE) #split probabability sample into K parts
 
   # pair K subsets randomly
-  sample_nons <- sample(1:K, K, replace = FALSE)
-  sample_rand <- sample(1:K, K, replace = FALSE)
+  sample_nons <- sample(1:nfolds, nfolds, replace = FALSE)
+  sample_rand <- sample(1:nfolds, nfolds, replace = FALSE)
 
   for (lambda in lambdas) {
 
-    loss_theta_vec <- vector(mode = "numeric", length = K)
+    loss_theta_vec <- vector(mode = "numeric", length = nfolds)
 
-    for(i in 1:K){
+    for(i in 1:nfolds){
 
       # train data for X_nons
       idx_nons <- which(folds_nons==sample_nons[i], arr.ind=TRUE)
       X_nons_train <- X_nons[-idx_nons, ]
-      R_nons_train <- R_nons[-idx_nons]
-      weights_nons_train <- weights_nons[-idx_nons]
+
 
       # test data for X_nons
       X_nons_test <- X_nons[idx_nons, ]
-      R_nons_test <- R_nons[idx_nons]
-      weights_nons_test <- weights_nons[idx_nons]
+
 
       # train data for X_rand
       idx_rand <- which(folds_rand==sample_rand[i], arr.ind=TRUE)
       X_rand_train <- X_rand[-idx_rand, ]
-      R_rand_train <- R_rand[-idx_rand]
-      weights_rand_train <- weights_rand[-idx_rand]
 
       # test data for X_rand
       X_rand_test <- X_rand[idx_rand, ]
-      R_rand_test <- R_rand[idx_rand]
-      weights_rand_test <- weights_rand[idx_rand]
 
 
-      X_train <- rbind(X_rand_train[, -1], X_nons_train[, -1])
-      X_test <- rbind(X_rand_test[, -1], X_nons_test[, -1])
+      X_train <- rbind(X_rand_train, X_nons_train)
+      X_test <- rbind(X_rand_test, X_nons_test)
 
-      R_train <- c(R_rand_train, R_nons_train)
-      R_test <- c(R_rand_test, R_nons_test)
 
-      weights_X_train <- c(weights_rand_train, weights_nons_train)
-      weights_X_test <- c(weights_rand_test, weights_nons_test)
+      theta_est <- do.call("fit", X_train, X_train$R,  X_test$weights_X, method_selection, h)
 
-      p <- ncol(X_test)
+      loss_theta_vec[i] <- loss_theta(par = theta_est, R = X_test$R, X = X_test,
+                                      weights = X_test$weights_X, h = h, method_selection = method_selection)
 
-      # initial values for set of parameters
-      init_theta <- rep(0, p+1)
 
-      # variables selection using score equation for theta
+    }
 
-      par0 <- c(init_theta)
-      LAMBDA <- Matrix::Matrix(matrix(0, p+1, p+1), sparse = TRUE)
-      it <- 0
+    loss_theta_av[k] <- mean(loss_theta_vec)
 
-      for(jj in 1:100) { # to fix
-        it <- it + 1
+    k <- k + 1
 
-        u_theta0 <- u_theta(par = par0, R = R_train, X = X_train,
-                            weights = weights_X_train,
-                            method_selection = method_selection)
+  }
 
-        u_theta0_der <- u_theta_der(par = par0, R = R_train, X = X_train,
-                                    weights = weights_X_train,
-                                    method_selection = method_selection)
+  i <- which.min(loss_theta_av)
 
-        diag(LAMBDA) <- abs(q_lambda(par0, lambda))/(1e-6 + abs(par0))
-        par <- par0 + solve(u_theta0_der + LAMBDA, sparse = TRUE) %*% (u_theta0 - LAMBDA %*% par0) # perhaps 'solve' function instead of 'ginv'
-        # equation (13) in the article
-        if (sum(abs(par - par0)) < 1e-6) break;
-        if (sum(abs(par - par0)) > 1000) break;
+  print(lambdas[i])
 
-        par0 <- par
+  lambdas[i]
 
-      }
+}
 
-      par <- as.vector(par)
-      theta_est <- par
 
-      loss_theta_vec[i] <- loss_theta(par = theta_est, R = R_test, X = X_test,
-                                      weights = weights_X_test, method_selection = method_selection)
 
+setup_lambda <- function(X, y, weights, method_selection, alpha = 1, lambda_min, log,lambda = FALSE, nlambda, ...) { #consider panalty factor here
 
-    }
+  fit <- glm.fit(x = X, y = y, weights = weights, family = binomial(link = method_selection))
+  #fit <- glm(y~1, weights = weights, family = binomial(link = method_selection))
 
-    loss_theta_av[k] <- mean(loss_theta_vec)
+  n <- length(y)
+  p <- ncol(X)
+  w <- fit$weights
+  r <- residuals(fit, "working") * w
+  zmax <- .Call("maxprod", X, r)/n
+  lambda.max <- zmax/alpha
 
-    k <- k + 1
 
+  if (log.lambda) { # lambda sequence on log-scale
+    if (lambda.min==0) {
+      lambda <- c(exp(seq(log(lambda.max), log(.001*lambda.max), length=nlambda-1)), 0)
+    } else {
+      lambda <- exp(seq(log(lambda.max), log(lambda.min*lambda.max), length=nlambda))
+    }
+  } else { # lambda sequence on linear-scale
+    if (lambda.min==0) {
+      lambda <- c(seq(lambda.max, 0.001*lambda.max, length = nlambda-1), 0)
+    } else {
+      lambda <- seq(lambda.max, lambda.min*lambda.max, length = nlambda)
+    }
   }
+  lambda
+}
 
-  i <- which.min(loss_theta_av)
 
-  print(lambdas[i])
+fit <- function(X, R, weights, method_selection, h) {
 
-  lambdas[i]
+  p <- ncol(X)
+  init_theta <- rep(0, p)
+  # variables selection using score equation for theta
+  par0 <- init_theta
+  LAMBDA <- Matrix::Matrix(matrix(0, p, p), sparse = TRUE)
+  it <- 0
+  for(jj in 1:maxit) {
+    it <- it + 1
+    if (it == maxit) break
+
+    u_theta0 <- u_theta(par = par0, R = R, X = X,
+                        weights = weights_X, h = h,
+                        method_selection = method_selection)
+
+    u_theta0_der <- u_theta_der(par = par0, R = R, X = X,
+                                weights = weights_X, h = h,
+                                method_selection = method_selection)
+
+    diag(LAMBDA) <- abs(q_lambda(par0, lambda))/(eps + abs(par0))
+    par <- par0 + MASS::ginv(as.matrix(u_theta0_der + LAMBDA)) %*% (u_theta0 - LAMBDA %*% par0) # perhaps 'solve' function instead of 'ginv'
+    # equation (13) in article
+    if (sum(abs(par - par0)) < eps) break;
+    if (sum(abs(par - par0)) > 1000) break;
+
+    par0 <- par
+
+  }
+
+  par <- as.vector(par)
+  theta_est <- par
+  theta_est
 
 }
diff --git a/R/nonprobDR.R b/R/nonprobDR.R
index 90f6bec..f4510ef 100644
--- a/R/nonprobDR.R
+++ b/R/nonprobDR.R
@@ -153,6 +153,7 @@ nonprobDR <- function(selection,
     est_ps_rand <- maxLik_rand_obj$ps
     hess <- maxLik_nons_obj$hess
     theta_hat <- maxLik_nons_obj$theta_hat
+    names(theta_hat) <- c("(Intercept)", nons_names)
     log_likelihood <- log_like(theta_hat) # maximum of the loglikelihood function
 
 
diff --git a/R/nonprobIPW.R b/R/nonprobIPW.R
index e97012d..512230a 100644
--- a/R/nonprobIPW.R
+++ b/R/nonprobIPW.R
@@ -137,6 +137,7 @@ nonprobIPW <- function(selection,
     est_ps_rand <- maxLik_rand_obj$ps
     hess <- maxLik_nons_obj$hess
     theta_hat <- maxLik_nons_obj$theta_hat
+    names(theta_hat) <- c("(Intercept)", nons_names)
     log_likelihood <- log_like(theta_hat) # maximum of the loglikelihood function
 
     # to complete
diff --git a/R/nonprobVariablesSelection.R b/R/nonprobVariablesSelection.R
index f80d065..f4e9ffd 100644
--- a/R/nonprobVariablesSelection.R
+++ b/R/nonprobVariablesSelection.R
@@ -70,6 +70,7 @@ nonprobSel <- function(selection,
   eps <- control_selection$epsilon
   penalty <- control_inference$penalty
   maxit <- control_selection$maxit
+  h <- control_selection$h_x
 
   weights <- rep.int(1, nrow(data)) # to remove
 
@@ -77,13 +78,9 @@ nonprobSel <- function(selection,
   X_nons <- model.matrix(XY_nons, data) #matrix of nonprobability sample with intercept
   nons_names <- attr(terms(outcome, data = data), "term.labels")
   if (all(nons_names %in% colnames(svydesign$variables))) {
-
     X_rand <- as.matrix(cbind(1, svydesign$variables[,nons_names])) #matrix of probability sample with intercept
-
   } else {
-
     stop("variable names in data and svydesign do not match")
-
   }
 
   y_nons <- XY_nons[,1]
@@ -99,7 +96,7 @@ nonprobSel <- function(selection,
 
   n_nons <- nrow(X_nons)
   n_rand <- nrow(X_rand)
-  X <- rbind(X_rand[, -1], X_nons[, -1]) # joint matrix
+  X <- rbind(X_rand, X_nons) # joint matrix
   #X_stand <- scale(X)
   weights_X <- c(weights_rand, weights)
 
@@ -119,12 +116,13 @@ nonprobSel <- function(selection,
   p <- ncol(X)
   N_rand <- sum(weights_rand)
 
-  FITT <- ncvreg::cv.ncvreg(X = X, y = R,
+  FITT <- ncvreg::cv.ncvreg(X = X[, -1], y = R, weights = weights_X,
                             penalty = penalty, family = "binomial") # for 50 variables this function returns first four variables as the most "important", while my function returns almost all, perhaps because of form of the loss function
 
   #print(FITT$fit$beta[,FITT$min])
   loss_theta <- vector(mode = "numeric", length = length(FITT$lambda))
   theta <- list(vector(mode = "numeric", length = length(FITT$lambda)))
+  indices <- list(vector(mode = "numeric", length = length(FITT$lambda)))
 
   k <- 1
 
@@ -132,26 +130,28 @@ nonprobSel <- function(selection,
 
     # initial values for set of parameters
     #init_theta <- vector(mode = "numeric", length = p+1)
-    init_theta <- FITT$fit$beta[, k]
-
+    idxx <- which(FITT$fit$beta[, k] != 0)
+    init_theta <- rep(0, length(FITT$fit$beta[, k][idxx]))
     # variables selection using score equation for theta
     par0 <- init_theta
-    LAMBDA <- Matrix::Matrix(matrix(0, p+1, p+1), sparse = TRUE)
+    p <- length(par0)
+    LAMBDA <- Matrix::Matrix(matrix(0, p, p), sparse = TRUE)
     it <- 0
     for(jj in 1:maxit) {
       it <- it + 1
+      if (it == maxit) break
 
-      u_theta0 <- u_theta(par = par0, R = R, X = X,
-                          weights = weights_X,
+      u_theta0 <- u_theta(par = par0, R = R, X = X[, idxx],
+                          weights = weights_X, h = h,
                           method_selection = method_selection)
 
-      u_theta0_der <- u_theta_der(par = par0, R = R, X = X,
-                                  weights = weights_X,
+      u_theta0_der <- u_theta_der(par = par0, R = R, X = X[, idxx],
+                                  weights = weights_X, h = h,
                                   method_selection = method_selection)
 
       diag(LAMBDA) <- abs(q_lambda(par0, lambda))/(eps + abs(par0))
       par <- par0 + MASS::ginv(as.matrix(u_theta0_der + LAMBDA)) %*% (u_theta0 - LAMBDA %*% par0) # perhaps 'solve' function instead of 'ginv'
-                                                                                                                   # equation (13) in article
+                                                                                                    # equation (13) in article
       if (sum(abs(par - par0)) < eps) break;
       if (sum(abs(par - par0)) > 1000) break;
 
@@ -160,48 +160,57 @@ nonprobSel <- function(selection,
     }
 
   par <- as.vector(par)
-  par[which(abs(par) < 1e-3)] <- 0
-  iddx <- which(abs(par) != 0)
-  theta_estt <- par[iddx]
-  theta[[k]] <- par
-  Xloss <- cbind(1, X)
-
-  loss_theta[k] <- loss_theta(par = theta_estt,
-                              R = R,
-                              X = as.matrix(Xloss[, iddx]),
-                              weights = weights_X,
-                              method_selection = method_selection)
 
+  if (any(par==0)) {
+    i <- which(par==0)
+    idxx <- idxx[-i]
+  }
 
-  k <- k + 1
+  indices[[k]] <- idxx
+  theta_estt <- theta[[k]] <- par[par!=0]
 
+  if (length(theta[[k]]) == 1) {
+    loss_theta[k] <- NA
+  } else {
+    loss_theta[k] <- loss_theta(par = theta_estt,
+                                R = R,
+                                X = as.matrix(X[, idxx]),
+                                weights = weights_X,
+                                h = h,
+                                method_selection = method_selection) # compute this term without intercept
   }
+  k <- k + 1
+ }
 
   #A <- t(matrix(unlist(theta), nrow = length(theta[[1]])))
   #L <- as.matrix(FITT$lambda, ncol = 1)
   #print(as.data.frame(cbind(L, A)))
+  min_idx <- which.min(loss_theta)
+  theta_est <- theta[[min_idx]]
+  theta_selected <- indices[[min_idx]] - 1
+  names(theta_est) <- c("(Intercept)", nons_names[theta_selected[-1]])
 
-  theta_est <- theta[[which.min(loss_theta)]]
-  theta_selected <- which(abs(theta_est) != 0) - 1
 
   # variables selection for beta using ncvreg package, perhaps cv.glmnet for theta like
 
   #theta <- glmnet::cv.glmnet(X, R, family = "binomial")
   #print(coef(theta))
 
-  beta <- ncvreg::cv.ncvreg(X = X[loc_nons, ], y = y_nons,
+  beta <- ncvreg::cv.ncvreg(X = X[loc_nons, -1], y = y_nons,
                             penalty = penalty, family = family_outcome)
 
+
   beta_est <- beta$fit$beta[,beta$min]
   beta_selected <- which(abs(beta_est) != 0) - 1
+  beta_est <- beta_est[beta$fit$beta[,beta$min] != 0]
 
   # Estimating theta, beta parameters using selected variables
 
   idx <- unique(c(beta_selected[-1], theta_selected[-1])) #excluding intercepts
   psel <- length(idx)
-  Xsel <- as.matrix(X[, idx])
+  Xsel <- as.matrix(X[, idx + 1])
 
-  par0 <- rep(0, 2*(psel+1))
+  par0 <- rep(0, 2*(psel + 1))
 
   # root for joint score equation
   par_sel <- rootSolve::multiroot(u_theta_beta,
@@ -216,6 +225,7 @@ nonprobSel <- function(selection,
 
   theta_sel <- par_sel[1:(psel+1)]
   beta_sel <- par_sel[(psel+2):(2*psel+2)]
+  names(theta_sel) <- names(beta_sel) <- c("(Intercept)", nons_names[idx])
 
   ps_nons_est <- inv_link(as.vector(as.matrix(cbind(1, Xsel)) %*% as.matrix(theta_sel)))
   weights_nons <- 1/ps_nons_est[loc_nons]
@@ -246,7 +256,7 @@ nonprobSel <- function(selection,
     svydesign_mean <- survey::svymean(~y_rand, svydesign)
     var_prob <- as.vector(attr(svydesign_mean, "var")) # based on survey package, probability component
 
-    var_nonprob <- (sum((infl1) - 2*infl2) + sum(weights_rand * sigmasqhat))/(N_nons^2) # nonprobability component
+    var_nonprob <- (sum((infl1) - 2*infl2) + sum(weights_rand * sigmasqhat))/N_nons^2 # nonprobability component
 
     se_nonprob <- sqrt(var_nonprob)
     se_prob <- sqrt(var_prob)
@@ -351,9 +361,7 @@ nonprobSel <- function(selection,
          theta_sel = theta_est,
          beta_sel = beta_est,
          theta = theta_sel,
-         beta = beta_sel,
-         theta_selected_vars = nons_names[theta_selected[-1]],
-         beta_selected_vars = nons_names[beta_selected[-1]]
+         beta = beta_sel
          ),
     class = "Doubly-robust")
 
@@ -369,6 +377,7 @@ u_theta <- function(par,
                     X,
                     weights,
                     method_selection,
+                    h,
                     N = NULL) {
 
 
@@ -382,17 +391,18 @@ u_theta <- function(par,
 
     inv_link <- method$make_link_inv
 
-    theta <- par
+    theta <- as.matrix(par)
     n <- length(R)
-    X0 <- cbind(1, X)
-    lpiB <- X0 %*% theta
-    ps <- inv_link(lpiB)
+    X0 <- as.matrix(X)
+    eta_pi <- X0 %*% theta
+    ps <- inv_link(eta_pi)
     R_rand <- 1 - R
     ps <- as.vector(ps)
     N_nons <- sum(1/ps)
 
-    eq <- c(apply(X0 * R/ps - X0 * R_rand * weights, 2, sum))/N_nons
-
+    eq <- switch(h,
+                 "1" = c(apply(X0 * R/ps - X0 * R_rand * weights, 2, sum))/N_nons,
+                 "2" = c(apply(X0 * R - X0 * R_rand * weights * ps, 2, sum))/N_nons)
     eq
 
 }
@@ -405,6 +415,7 @@ u_theta_der <-  function(par,
                          X,
                          weights,
                          method_selection,
+                         h,
                          N = NULL) {
 
   method <- method_selection
@@ -417,23 +428,45 @@ u_theta_der <-  function(par,
 
   inv_link <- method$make_link_inv
 
-  theta <- par
-  p <- ncol(X)
+  theta <- as.matrix(par)
+  X0 <- as.matrix(X)
+  p <- ncol(X0)
   n <- length(R)
-  X0 <- cbind(1, X)
-  lpiB <- X0 %*% theta
-  ps <- inv_link(lpiB)
+  eta_pi <- X0 %*% theta
+  ps <- inv_link(eta_pi)
   ps <- as.vector(ps)
+  R_rand <- 1 - R
+
+  if (method_selection == "probit") {
+    dinv_link <- method$make_link_inv_der
+    psd <- dinv_link(eta_pi)
+    psd <- as.vector(psd)
+  }
+
   N_nons <- sum(1/ps)
 
-  mxDer <- matrix(0,(p+1),(p+1))
+  mxDer <- matrix(0, p, p)
 
-  for (ii in 1:(p+1)) {
-    for (jj in ii:(p+1)) {
-      mxDer[ii,jj] <- mxDer[jj,ii] <- sum(R * (1-ps)/ps * X0[,ii] * X0[,jj])
+  if (h == "1") {
+    for (ii in 1:(p)) {
+      for (jj in ii:(p)) {
+        mxDer[ii,jj] <- mxDer[jj,ii] <- switch(method_selection,
+                                              "logit" = sum(R * (1-ps)/ps * X0[,ii] * X0[,jj]),
+                                              "cloglog" = sum(R * (1-ps)/ps^2 * exp(eta_pi) * X0[,ii] * X0[,jj]),
+                                              "probit" = sum(R * psd/ps^2 * X0[,ii] * X0[,jj]))
+      }
+    }
+  } else if (h == "2") {
+    for (ii in 1:(p)) {
+      for (jj in ii:(p)) {
+        mxDer[ii,jj] <- mxDer[jj,ii] <- switch(method_selection,
+                                               "logit" = sum(R_rand * weights * ps/(exp(eta_pi)+1) * X0[,ii] * X0[,jj]),
+                                               "cloglog" = sum(R_rand * weights * (1-ps) * exp(eta_pi) * X0[,ii] * X0[,jj]),
+                                               "probit" = sum(R_rand * weights * psd * X0[,ii] * X0[,jj]))
+      }
     }
-  }
 
+  }
   mxDer/N_nons
 }
 
@@ -475,8 +508,8 @@ u_theta_beta <- function(par,
   theta <- par[1:(p+1)]
   beta <- par[(p+2):(2*p+2)]
   X0 <- cbind(1, X)
-  lpiB <- X0 %*% theta
-  ps <- inv_link(lpiB)
+  eta_pi <- X0 %*% theta
+  ps <- inv_link(eta_pi)
   R_rand <- 1 - R
   y[which(is.na(y))] <- 0
   ps <- as.vector(ps)
@@ -521,6 +554,7 @@ loss_theta <- function(par,
                        R,
                        X,
                        weights,
+                       h,
                        method_selection) {
 
   method <- method_selection
@@ -537,8 +571,8 @@ loss_theta <- function(par,
   theta <- par
   nAB <- length(R)
   X0 <- X
-  lpiB <- X0 %*% theta
-  ps <- inv_link(lpiB)
+  eta_pi <- X0 %*% theta
+  ps <- inv_link(eta_pi)
 
   loc_nons <- which(R == 1)
   loc_rand <- which(R == 0)
@@ -548,8 +582,11 @@ loss_theta <- function(par,
   N_est_rand <- sum(weights[loc_rand])
   N_est_nons <- sum(1/ps)
 
-  loss <- sum(apply((X0*R/ps - X0*R_rand*weights), 2, sum)^2)
+  loss <- switch(h,
+                 "1" = sum(apply((X0*R/ps - X0*R_rand*weights), 2, sum)^2),
+                 "2" = sum(apply((X0*R - X0*R_rand*weights*ps), 2, sum)^2))
 
   loss
 
 }
+