Family¶
family = c("reg_ld", "reg_hd", "reg_ml", "cls")
| Family | Description | Statistical Inference |
|---|---|---|
reg_ld |
Linear prediction model (low-dimensional) | ✅ |
reg_hd |
High-dimensional linear model | ✅ |
reg_ml |
Machine learning prediction model | ❌ |
cls |
Linear model for classification task | ✅ |
Arguments¶
family = reg_ld¶
cgdro_()¶
intercept
Whether to include intercept in outcome models. Default is FALSE.
delta
Regularization parameter in weight optimization. Default is 0.
infer_cgdro_()¶
tau Variance inflation parameter. Default is 0.2.
alpha_thres
Threshold for small eigenvalues. Default is 0.01.
threshold
Threshold for eigenvalue truncation. Default is 0.
summary_cgdro_()¶
index
Index of the coefficients of interest.
family = reg_hd¶
cgdro_()¶
intercept
Whether to include intercept in outcome models. Default is FALSE.
intercept_loading
Whether to include intercept in loading matrix defined by index. Default is FALSE.
delta
Regularization parameter in weight optimization. Default is 0.
lambda
Regularization parameter in high-dimensional outcome models; if "CV.min" or "CV.1se", use cross-validation to select. Default is NULL.
infer_cgdro_()¶
tau Variance inflation parameter. Default is 0.2.
alpha_thres
Threshold for small eigenvalues. Default is 0.01.
threshold
Threshold for eigenvalue truncation. Default is 0.
family = reg_ml¶
cgdro_()¶
bias_correct
Whether to use bias-corrected estimator for Γ. Default is TRUE.
priors
Optional list with two elements: prior weight vector (length L) and radius (nonnegative scalar). Default is NULL (no prior).
ridge
Ridge regularization parameter (nonnegative scalar) for numerical stability. Default is 1e-8.
solver
Solver for the quadratic program. Options: "ECOS", "SCS". Default is "ECOS".
seed
Random seed for reproducibility in data splitting. Default is 123.
family = cls¶
cgdro_()¶
split
Whether to use sample-splitting in outcome/density estimation. Default is TRUE.
max_iter
Maximum number of iterations. Default is 1000.
tol
Tolerance for convergence. Default is 1e-6.
Check_dual
Whether to compute duality gap every 50 iterations. Default is FALSE.
seed
Random seed for reproducibility in data splitting. Default is 123.
infer_cgdro_()¶
parallel
Whether to use parallel computing. Default is FALSE.
n_workers
number of workers for parallel computing. Default is 4.
diag
whether to use diagonal approximation for covariance estimation. Default is TRUE.
summary_cgdro_()¶
index
Index of the coefficients of interest.
class_index
Index of the class of coefficients of interest.
Example¶
Example of Low-dimensional Linear Regression¶
# number of source groups = 3, with 1000 samples each
# sigma: source group 1,3: 0.5; source group 2: 2
# target sample size = 10000
# dimension p = 5
data <- simu_linear_reg_lowd(n_list = list(1000,1000,1000), N=10000, p = 5, seed = 123)
Xlist = data$X_list
Ylist = data$Y_list
X0 = data$X0
## fit cgdro
## Note: only when loss_type='reward', infer() can be called to get confidence intervals
## For other loss_type, only point estimation and prediction can be done
fit <- cgdro_(Xlist, Ylist, X0, loss_type = "reward",
family = "reg_ld", intercept = TRUE,
delta = 0, verbose = FALSE)
inf <- infer_cgdro_(fit, M = 200, alpha = 0.05)
## summary
summary_cgdro_(fit, infer=inf)
## predict
pred <- predict_cgdro_(fit) # N x 1 vector of predicted values
head(pred)
## fit cgdro
fit <- cgdro_(Xlist, Ylist, X0, loss_type = "squaredloss",
family = "reg_ld", intercept = TRUE,
delta = 0, verbose = FALSE)
## summary
summary(fit)
## predict
pred <- predict_cgdro_(fit) # N x 1 vector of predicted values
head(pred)
## fit cgdro
fit <- cgdro_(Xlist, Ylist, X0, loss_type = "regret",
family = "reg_ld", intercept = TRUE,
delta = 0, verbose = FALSE)
## summary
summary(fit)
## predict
pred <- predict_cgdro_(fit) # N x 1 vector of predicted values
head(pred)
Example of High-dimensional Linear Regression¶
# two source groups, each with 100 samples, and 100 target samples
data <- simu_linear_reg_highd(n_list = c(100, 100), N = 100, p = 100, seed = 123)
Xlist = data$X_list
Ylist = data$Y_list
X0 = data$X0
## fit cgdro
fit <- cgdro_(Xlist, Ylist, X0 = X0,
family = "reg_hd",
index = c(1,10,45,99), intercept = FALSE,
delta = 0, lambda = "CV.min", verbose = FALSE)
inf <- infer_cgdro_(fit, M = 200, alpha = 0.05)
## summary
summary_cgdro_(fit, infer=inf)
## predict
pred <- predict_cgdro_(fit) # N x 1 vector of predicted values
head(pred)
Example of Machine Learning Regression¶
# number of source groups = 3, each with 1000 samples, and 10000 target samples
# dimension p = 5
data <- simu_reg_ml(n_vec = c(1000,1000,1000), n0=10000, N_label=20, p=5, seed = 123)
Xlist = data$X_list
Ylist = data$Y_list
X0 = data$X0
## fit cgdro
fit <- cgdro_(Xlist, Ylist, X0, loss_type = "reward",
family = "reg_ml", f_learner = "xgb", w_learner = "logistic",
bias_correct = TRUE,
priors = NULL,
ridge = 1e-8,
seed = 123,
verbose = FALSE)
fit$weight_
## predict
pred <- predict_cgdro_(fit) # N x 1 vector of predicted values
head(pred)
## fit cgdro
fit <- cgdro_(Xlist, Ylist, X0, loss_type = "squaredloss",
family = "reg_ml", f_learner = "xgb", w_learner = "logistic",
bias_correct = TRUE,
priors = NULL,
ridge = 1e-8,
seed = 123)
fit$weight_
## predict
pred <- predict_cgdro_(fit) # N x 1 vector of predicted values
head(pred)
## fit cgdro
fit <- cgdro_(Xlist, Ylist, X0, loss_type = "regret",
family = "reg_ml", f_learner = "xgb", w_learner = "logistic",
bias_correct = TRUE,
priors = NULL,
ridge = 1e-8,
seed = 123)
fit$weight_
## predict
pred <- predict_cgdro_(fit) # N x 1 vector of predicted values
head(pred)
Example of Classification¶
# two source groups, each with 100 samples, and 1000 target samples
n = 100; p = 5; L = 2; N = 1000; K = 2
data <- simu_cls(n, N, p, L, K, seed=123)
Xlist = data$X_list
Ylist = data$Y_list
X0 = data$X0
## fit cgdro
fit <- cgdro_(Xlist, Ylist, X0,
family = "cls", f_learner = "linear", w_learner = "logistic")
inf <- infer_cgdro_(fit, M = 200, alpha = 0.05, parallel = FALSE, diag = TRUE)
## summary
summary_cgdro_(fit, infer = inf)
summary_cgdro_(fit, infer = inf, index = c(1,3), class_index = c(2))
## prediction
pred <- predict_cgdro_(fit) # N x C matrix of predicted probabilities
head(pred$pred_proba)
head(pred$pred)