학부 연구생/Code

SURE Estimates for a Heteroscedastic Hierarchical

learning-log22 2025. 3. 23. 11:38
반응형
library(MASS)
library(foreach)
library(doParallel)
library(Iso)  # Ensure Iso package is loaded for PAVA

# Semiparametric SURE shrinkage estimator function
SURE_shrinkage_estimator <- function(X, A, mu) {
  p <- length(X)
  b <- numeric(p)
  
  # Perform initial optimization for each b_i
  for (i in 1:p) {
    calculate_SURE <- function(b_i) {
      b_vec <- b
      b_vec[i] <- b_i
      return(SURE_function(b_vec, X, A, mu))
    }
  
    # Adjust b values using PAVA from the Iso package after all b_i have been optimized
    b <- pava(b, decreasing = FALSE)  # Ensure b is non-decreasing
    
    # Optimize each b_i over [0, 1]
    lower_bound <- 0
    upper_bound <- 1
    optimize_result <- optimize(calculate_SURE, lower = lower_bound, upper = upper_bound, tol = 1e-8)
    b[i] <- optimize_result$minimum
  }
  
  # Final estimator calculation
  theta_hat <- (1 - b) * X + b * mu
  
  return(list(b = b, theta_hat = theta_hat))
}

# SURE (Stein's Unbiased Risk Estimate) function
SURE_function <- function(b, X, A, mu) {
  p <- length(X)
  sum_sure <- sum(b^2 * (X - mu)^2 + (1 - 2 * b) * A)
  return(sum_sure / p)
}

# Function to compare different estimators
compare_estimators <- function(p, num_simulations) {
  risks_SURE <- numeric(num_simulations)
  risks_JS <- numeric(num_simulations)
  risks_EBMLE <- numeric(num_simulations)
  risks_EBMOM <- numeric(num_simulations)
  risks_OL <- numeric(num_simulations)
  
  for (sim in 1:num_simulations) {
    set.seed(1234 * sim)
    A <- runif(p, 0.1, 1)  # Variance
    theta_true <- rnorm(p)  # True theta values
    X <- rnorm(p, mean = theta_true, sd = sqrt(A))  # Observed data
    mu <- mean(X)  # Set mu as the mean of the observed data
    
    # SURE Estimator
    result_SURE <- SURE_shrinkage_estimator(X, A, mu)
    theta_hat_SURE <- result_SURE$theta_hat
    risks_SURE[sim] <- mean((theta_hat_SURE - theta_true)^2)
    
    # James-Stein Estimator
    JS_estimator <- function(X, A) {
      p <- length(X)
      s2 <- sum(X^2)
      shrinkage_factor <- max(0, 1 - (p - 2) * mean(A) / s2)
      theta_hat <- shrinkage_factor * X
      return(theta_hat)
    }
    theta_hat_JS <- JS_estimator(X, A)
    risks_JS[sim] <- mean((theta_hat_JS - theta_true)^2)
    
    # EBMLE Estimator
    EBMLE_estimator <- function(X, A) {
      p <- length(X)
      lambda_hat <- max(0, mean(X^2 - A))
      theta_hat <- (lambda_hat / (lambda_hat + A)) * X
      return(list(lambda_hat = lambda_hat, theta_hat = theta_hat))
    }
    result_EBMLE <- EBMLE_estimator(X, A)
    theta_hat_EBMLE <- result_EBMLE$theta_hat
    risks_EBMLE[sim] <- mean((theta_hat_EBMLE - theta_true)^2)
    
    # EBMOM Estimator
    EBMOM_estimator <- function(X, A) {
      p <- length(X)
      lambda_hat <- max(0, mean(X^2 - A))
      theta_hat <- (lambda_hat / (lambda_hat + A)) * X
      return(list(lambda_hat = lambda_hat, theta_hat = theta_hat))
    }
    result_EBMOM <- EBMOM_estimator(X, A)
    theta_hat_EBMOM <- result_EBMOM$theta_hat
    risks_EBMOM[sim] <- mean((theta_hat_EBMOM - theta_true)^2)
    
    # Oracle Estimator
    OL_estimator <- function(X, A, theta_true) {
      p <- length(X)
      calculate_OL <- function(lambda) {
        sum_ol <- sum(((lambda / (lambda + A)) * X - theta_true)^2)
        return(sum_ol / p)
      }
      optimize_result <- optimize(calculate_OL, lower = 0, upper = 100, tol = 1e-8)
      lambda_hat_OL <- optimize_result$minimum
      theta_hat_OL <- (lambda_hat_OL / (lambda_hat_OL + A)) * X
      return(list(lambda_hat_OL = lambda_hat_OL, theta_hat_OL = theta_hat_OL))
    }
    result_OL <- OL_estimator(X, A, theta_true)
    theta_hat_OL <- result_OL$theta_hat_OL
    risks_OL[sim] <- mean((theta_hat_OL - theta_true)^2)
  }
  
  return(c(
    SURE = mean(risks_SURE),
    JS = mean(risks_JS),
    EBMLE = mean(risks_EBMLE),
    EBMOM = mean(risks_EBMOM),
    OL = mean(risks_OL)
  ))
}

# Setting up parallel processing cluster
num_cores <- detectCores() - 1
cl <- makeCluster(num_cores)
registerDoParallel(cl)

# Run simulations and visualize results
p_values <- seq(20, 500, by = 20)
num_simulations <- 500  # Adjusted for large number of simulations
results <- foreach(p = p_values, .combine = rbind, .packages = c("MASS", "Iso")) %dopar% {
  compare_estimators(p, num_simulations)
}

# Stop the parallel processing cluster
stopCluster(cl)

# Convert results to a dataframe and set column names
results_df <- as.data.frame(results)
names(results_df) <- c("SURE", "JS", "EBMLE", "EBMOM", "OL")

# Plot the results
plot(p_values, results_df$SURE, type = "b", col = "blue", xlab = "Sample Size (p)", ylab = "Average Risk", ylim = range(results_df), main = "Estimator Performance Comparison")
lines(p_values, results_df$JS, type = "b", col = "red")
lines(p_values, results_df$EBMLE, type = "b", col = "green")
lines(p_values, results_df$EBMOM, type = "b", col = "purple")
lines(p_values, results_df$OL, type = "b", col = "orange")
legend("topright", legend = c("SURE", "James-Stein", "EBMLE", "EBMOM", "OL"), col = c("blue", "red", "green", "purple", "orange"), lty = 1, pch = 1)
728x90
LIST

'학부 연구생 > Code' 카테고리의 다른 글

LaTeX 기본 명령어(함수) 및 라이브러리  (0) 2025.04.26
Rmosek warning messages  (1) 2025.03.16