반응형
library(MASS)
library(foreach)
library(doParallel)
library(Iso) # Ensure Iso package is loaded for PAVA
# Semiparametric SURE shrinkage estimator function
SURE_shrinkage_estimator <- function(X, A, mu) {
p <- length(X)
b <- numeric(p)
# Perform initial optimization for each b_i
for (i in 1:p) {
calculate_SURE <- function(b_i) {
b_vec <- b
b_vec[i] <- b_i
return(SURE_function(b_vec, X, A, mu))
}
# Adjust b values using PAVA from the Iso package after all b_i have been optimized
b <- pava(b, decreasing = FALSE) # Ensure b is non-decreasing
# Optimize each b_i over [0, 1]
lower_bound <- 0
upper_bound <- 1
optimize_result <- optimize(calculate_SURE, lower = lower_bound, upper = upper_bound, tol = 1e-8)
b[i] <- optimize_result$minimum
}
# Final estimator calculation
theta_hat <- (1 - b) * X + b * mu
return(list(b = b, theta_hat = theta_hat))
}
# SURE (Stein's Unbiased Risk Estimate) function
SURE_function <- function(b, X, A, mu) {
p <- length(X)
sum_sure <- sum(b^2 * (X - mu)^2 + (1 - 2 * b) * A)
return(sum_sure / p)
}
# Function to compare different estimators
compare_estimators <- function(p, num_simulations) {
risks_SURE <- numeric(num_simulations)
risks_JS <- numeric(num_simulations)
risks_EBMLE <- numeric(num_simulations)
risks_EBMOM <- numeric(num_simulations)
risks_OL <- numeric(num_simulations)
for (sim in 1:num_simulations) {
set.seed(1234 * sim)
A <- runif(p, 0.1, 1) # Variance
theta_true <- rnorm(p) # True theta values
X <- rnorm(p, mean = theta_true, sd = sqrt(A)) # Observed data
mu <- mean(X) # Set mu as the mean of the observed data
# SURE Estimator
result_SURE <- SURE_shrinkage_estimator(X, A, mu)
theta_hat_SURE <- result_SURE$theta_hat
risks_SURE[sim] <- mean((theta_hat_SURE - theta_true)^2)
# James-Stein Estimator
JS_estimator <- function(X, A) {
p <- length(X)
s2 <- sum(X^2)
shrinkage_factor <- max(0, 1 - (p - 2) * mean(A) / s2)
theta_hat <- shrinkage_factor * X
return(theta_hat)
}
theta_hat_JS <- JS_estimator(X, A)
risks_JS[sim] <- mean((theta_hat_JS - theta_true)^2)
# EBMLE Estimator
EBMLE_estimator <- function(X, A) {
p <- length(X)
lambda_hat <- max(0, mean(X^2 - A))
theta_hat <- (lambda_hat / (lambda_hat + A)) * X
return(list(lambda_hat = lambda_hat, theta_hat = theta_hat))
}
result_EBMLE <- EBMLE_estimator(X, A)
theta_hat_EBMLE <- result_EBMLE$theta_hat
risks_EBMLE[sim] <- mean((theta_hat_EBMLE - theta_true)^2)
# EBMOM Estimator
EBMOM_estimator <- function(X, A) {
p <- length(X)
lambda_hat <- max(0, mean(X^2 - A))
theta_hat <- (lambda_hat / (lambda_hat + A)) * X
return(list(lambda_hat = lambda_hat, theta_hat = theta_hat))
}
result_EBMOM <- EBMOM_estimator(X, A)
theta_hat_EBMOM <- result_EBMOM$theta_hat
risks_EBMOM[sim] <- mean((theta_hat_EBMOM - theta_true)^2)
# Oracle Estimator
OL_estimator <- function(X, A, theta_true) {
p <- length(X)
calculate_OL <- function(lambda) {
sum_ol <- sum(((lambda / (lambda + A)) * X - theta_true)^2)
return(sum_ol / p)
}
optimize_result <- optimize(calculate_OL, lower = 0, upper = 100, tol = 1e-8)
lambda_hat_OL <- optimize_result$minimum
theta_hat_OL <- (lambda_hat_OL / (lambda_hat_OL + A)) * X
return(list(lambda_hat_OL = lambda_hat_OL, theta_hat_OL = theta_hat_OL))
}
result_OL <- OL_estimator(X, A, theta_true)
theta_hat_OL <- result_OL$theta_hat_OL
risks_OL[sim] <- mean((theta_hat_OL - theta_true)^2)
}
return(c(
SURE = mean(risks_SURE),
JS = mean(risks_JS),
EBMLE = mean(risks_EBMLE),
EBMOM = mean(risks_EBMOM),
OL = mean(risks_OL)
))
}
# Setting up parallel processing cluster
num_cores <- detectCores() - 1
cl <- makeCluster(num_cores)
registerDoParallel(cl)
# Run simulations and visualize results
p_values <- seq(20, 500, by = 20)
num_simulations <- 500 # Adjusted for large number of simulations
results <- foreach(p = p_values, .combine = rbind, .packages = c("MASS", "Iso")) %dopar% {
compare_estimators(p, num_simulations)
}
# Stop the parallel processing cluster
stopCluster(cl)
# Convert results to a dataframe and set column names
results_df <- as.data.frame(results)
names(results_df) <- c("SURE", "JS", "EBMLE", "EBMOM", "OL")
# Plot the results
plot(p_values, results_df$SURE, type = "b", col = "blue", xlab = "Sample Size (p)", ylab = "Average Risk", ylim = range(results_df), main = "Estimator Performance Comparison")
lines(p_values, results_df$JS, type = "b", col = "red")
lines(p_values, results_df$EBMLE, type = "b", col = "green")
lines(p_values, results_df$EBMOM, type = "b", col = "purple")
lines(p_values, results_df$OL, type = "b", col = "orange")
legend("topright", legend = c("SURE", "James-Stein", "EBMLE", "EBMOM", "OL"), col = c("blue", "red", "green", "purple", "orange"), lty = 1, pch = 1)
728x90
LIST
'학부 연구생 > Code' 카테고리의 다른 글
LaTeX 기본 명령어(함수) 및 라이브러리 (0) | 2025.04.26 |
---|---|
Rmosek warning messages (1) | 2025.03.16 |