
####### small initilization 

## initial
library(ggplot2)
library(ggalt)
## common function
threshold<-function(betahat,main.p,thres){
  for(i in 1:main.p)
    if(abs(betahat[i])<thres)
      betahat[i]=0
  return(betahat)
}

over<-function(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,Beta.true, main.alpha, main.eta, main.gamma,thres,tol){
  
  
  ################
  main.n<-nrow(main.xtr)
  main.p<-ncol(main.xtr)
  main.nva<-nrow(main.xva)
  main.nts<-nrow(main.xts)
  main.wnew<-main.w<-rep(main.alpha,main.p)
  main.vnew<-main.v<-rep(main.alpha,main.p)
  main.betanew<-main.beta<-rep(0,main.p)
  main.munew<-main.mu<-rep(1/(main.n*main.gamma),main.n)
  main.t<-1
  
  main.betaRecord<-matrix(rep(0),main.T,main.p)
  main.error<- rep(rep(10),main.T)
  mse.record<-rep(rep(0),main.T)
  beta1.record<-rep(rep(0),main.T)
  beta2.record<-rep(rep(0),main.T)
  beta3.record<-rep(rep(0),main.T)
  beta4.record<-rep(rep(0),main.T)
  beta394.record<-rep(rep(0),main.T)
  beta395.record<-rep(rep(0),main.T)
  beta396.record<-rep(rep(0),main.T)
  beta397.record<-rep(rep(0),main.T)
  beta398.record<-rep(rep(0),main.T)
  beta399.record<-rep(rep(0),main.T)
  beta400.record<-rep(rep(0),main.T)
  start <- proc.time() 
  while(main.t<=main.T){
    #print(main.t)
    
    main.wnew<- main.w+ 2*main.eta/main.n*t(((main.ytr*main.mu)%x%t(rep(1,main.p)))*
                                              (rep(1,main.n)%x%t(main.w))*main.xtr)%*%rep(1,main.n)                               
    main.vnew<- main.v- 2*main.eta/main.n*t(((main.ytr*main.mu)%x%t(rep(1,main.p)))*
                                              (rep(1,main.n)%x%t(main.v))*main.xtr)%*%rep(1,main.n)                               
    main.betanew<- main.wnew*main.wnew- main.vnew*main.vnew
    beta1.record[main.t]  <-main.betanew[1]
    beta2.record[main.t]  <-main.betanew[2]
    beta3.record[main.t]  <-main.betanew[3]
    beta4.record[main.t]  <-main.betanew[4]
    beta394.record[main.t]<-main.betanew[394]
    beta395.record[main.t]<-main.betanew[395]
    beta396.record[main.t]<-main.betanew[396]
    beta397.record[main.t]<-main.betanew[397]
    beta398.record[main.t]<-main.betanew[398]
    beta399.record[main.t]<-main.betanew[399]
    beta400.record[main.t]<-main.betanew[400]
    main.betaRecord[main.t,]<-t(main.betanew)
    #### Record the validation error ####
    
    main.error[main.t]<-  sum( pmax(1-main.yva*(main.xva%*%main.betanew),rep(0,main.nva)))/main.nva
    main.munew<-apply(cbind(rep(0,main.n),(1- main.ytr*(main.xtr%*%main.betanew))/(main.n*main.gamma),
                            rep(1,main.n)),1,median)
    mse<-norm((main.betanew-Beta.true), type="2") /norm(Beta.true, type="2")
    mse.record[main.t]<-mse
    main.w<-main.wnew
    main.v<-main.vnew
    main.mu<-main.munew
    #print(mse)
    #if(norm(main.munew,type="2")==0){break}
    main.t<- main.t+1
  }
  end <- proc.time() 
  time_over <- (end - start)[3][[1]]  
  main.index<-min(which(main.error==min(main.error)))
  betahat_over<-threshold(main.betaRecord[main.index,],main.p,thres)
  betahat_over<-main.betaRecord[main.index,]
  rrmse_over<-norm((betahat_over-Beta.true), type="2") /norm(Beta.true,type = "2")
  error_over<-sum( pmax(1-main.yts*(main.xts%*%betahat_over),rep(0,main.nts)))/main.nts
  supp_over=sum(betahat_over!=0)
  return(list(betahat_over,time_over,rrmse_over,error_over,supp_over,mse.record,
              beta1.record,beta2.record,beta3.record,beta4.record,beta394.record,beta395.record,
              beta396.record,beta397.record,beta398.record,beta399.record,beta400.record))
  ### Maybe we need to record the traninig error or accuracy #####
}

set.seed(1995)
n<-600
p<-400
Beta.true<-rep(0,p)
Beta.true[c(1,2,3,4)]<-10
X<-matrix(rnorm(n*p,0,1),n,p)
px.gen<-1/(1+exp(-X%*%Beta.true))
y.gen<-2*rbinom(n,1,px.gen)-1

main.xtr<-X[1:200,]
main.ytr<-y.gen[1:200]
main.xva<-X[201:400,]
main.yva<-y.gen[201:400]
main.xts<-X[401:600,]
main.yts<-y.gen[401:600]

thres_over=1e-3
main.T<-700
main.eta<-0.5
main.gamma<-1e-4
main.alpha<-1e-4
overre_4=over(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,
              Beta.true, main.alpha, main.eta, main.gamma, thres_over)
main.alpha<-1e-6
overre_6=over(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,
              Beta.true, main.alpha, main.eta, main.gamma, thres_over)
main.alpha<-1e-8
overre_8=over(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,
              Beta.true, main.alpha, main.eta, main.gamma, thres_over)
main.alpha<-1e-10
overre_10=over(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,
               Beta.true, main.alpha, main.eta, main.gamma, thres_over)

######### result summary
datas<-(cbind(seq(1,700,1),overre_4[7][[1]],overre_4[8][[1]],overre_4[9][[1]],overre_4[10][[1]],
              overre_6[7][[1]],overre_6[8][[1]],overre_6[9][[1]],overre_6[10][[1]],
              overre_8[7][[1]],overre_8[8][[1]],overre_8[9][[1]],overre_8[10][[1]],
              overre_10[7][[1]],overre_10[8][[1]],overre_10[9][[1]],overre_10[10][[1]]))




######### sensitivity analysis of gamma

## initial
over<-function(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,Beta.true, main.alpha, main.eta, main.gamma,thres,tol){
  
  
  
  
  ################
  main.n<-nrow(main.xtr)
  main.p<-ncol(main.xtr)
  main.nva<-nrow(main.xva)
  main.nts<-nrow(main.xts)
  main.wnew<-main.w<-rep(main.alpha,main.p)
  main.vnew<-main.v<-rep(main.alpha,main.p)
  main.betanew<-main.beta<-rep(0,main.p)
  main.munew<-main.mu<-rep(1/(main.n*main.gamma),main.n)
  main.t<-1
  
  main.betaRecord<-matrix(rep(0),main.T,main.p)
  main.error<- rep(rep(10),main.T)
  mse.record<-rep(rep(0),main.T)
  beta1.record<-rep(rep(0),main.T)
  beta2.record<-rep(rep(0),main.T)
  beta3.record<-rep(rep(0),main.T)
  beta4.record<-rep(rep(0),main.T)
  beta394.record<-rep(rep(0),main.T)
  beta395.record<-rep(rep(0),main.T)
  beta396.record<-rep(rep(0),main.T)
  beta397.record<-rep(rep(0),main.T)
  beta398.record<-rep(rep(0),main.T)
  beta399.record<-rep(rep(0),main.T)
  beta400.record<-rep(rep(0),main.T)
  start <- proc.time() 
  while(main.t<=main.T){
    #print(main.t)
    
    main.wnew<- main.w+ 2*main.eta/main.n*t(((main.ytr*main.mu)%x%t(rep(1,main.p)))*
                                              (rep(1,main.n)%x%t(main.w))*main.xtr)%*%rep(1,main.n)                               
    main.vnew<- main.v- 2*main.eta/main.n*t(((main.ytr*main.mu)%x%t(rep(1,main.p)))*
                                              (rep(1,main.n)%x%t(main.v))*main.xtr)%*%rep(1,main.n)                               
    main.betanew<- main.wnew*main.wnew- main.vnew*main.vnew
    beta1.record[main.t]  <-main.betanew[1]
    beta2.record[main.t]  <-main.betanew[2]
    beta3.record[main.t]  <-main.betanew[3]
    beta4.record[main.t]  <-main.betanew[4]
    beta394.record[main.t]<-main.betanew[394]
    beta395.record[main.t]<-main.betanew[395]
    beta396.record[main.t]<-main.betanew[396]
    beta397.record[main.t]<-main.betanew[397]
    beta398.record[main.t]<-main.betanew[398]
    beta399.record[main.t]<-main.betanew[399]
    beta400.record[main.t]<-main.betanew[400]
    main.betaRecord[main.t,]<-t(main.betanew)
    #### Record the validation error ####
    
    main.error[main.t]<-  sum( pmax(1-main.yva*(main.xva%*%main.betanew),rep(0,main.nva)))/main.nva
    main.munew<-apply(cbind(rep(0,main.n),(1- main.ytr*(main.xtr%*%main.betanew))/(main.n*main.gamma),
                            rep(1,main.n)),1,median)
    mse<-norm((main.betanew-Beta.true), type="2") /norm(Beta.true, type="2")
    mse.record[main.t]<-mse
    main.w<-main.wnew
    main.v<-main.vnew
    main.mu<-main.munew
    #print(mse)
    #if(norm(main.munew,type="2")==0){break}
    main.t<- main.t+1
  }
  end <- proc.time() 
  time_over <- (end - start)[3][[1]]  
  main.index<-min(which(main.error==min(main.error)))
  betahat_over<-threshold(main.betaRecord[main.index,],main.p,thres)
  betahat_over<-main.betaRecord[main.index,]
  rrmse_over<-norm((betahat_over-Beta.true), type="2") /norm(Beta.true,type = "2")
  error_over<-sum( pmax(1-main.yts*(main.xts%*%betahat_over),rep(0,main.nts)))/main.nts
  supp_over=sum(betahat_over!=0)
  return(list(betahat_over,time_over,rrmse_over,error_over,supp_over,mse.record,
              beta1.record,beta2.record,beta3.record,beta4.record,beta394.record,beta395.record,
              beta396.record,beta397.record,beta398.record,beta399.record,beta400.record))
  ### Maybe we need to record the traninig error or accuracy #####
}



thres_over=1e-3
main.alpha<-1e-8
main.T<-700
main.eta<-0.5
K <- 29+1980

gammas <- c(2.5e-5, 5e-5, 7.5e-5, 1e-4, 2.5e-4, 5e-4, 7.5e-3, 1e-3)

mse_list <- vector("list", length(gammas))
sig1_list <- vector("list", length(gammas))
sig2_list <- vector("list", length(gammas))
sig3_list <- vector("list", length(gammas))
sig4_list <- vector("list", length(gammas))

for(i in 1:length(gammas)){
  mse_i <- numeric(K)
  sig1_i <- numeric(K)
  sig2_i <- numeric(K)
  sig3_i <- numeric(K)
  sig4_i <- numeric(K)
  print(i)
  main.gamma <- gammas[i]
  k <- 1980
  while (k <= K) {
    set.seed(k)
    seed <- k
    n <- 600
    p <- 400
    Beta.true <- rep(0, p)
    Beta.true[c(1,2,3,4)] <- 10
    X <- matrix(rnorm(n*p, 0, 1), n, p)
    px.gen <- 1/(1+exp(-X %*% Beta.true))
    y.gen <- 2 * rbinom(n, 1, px.gen) - 1
    
    main.xtr <- X[1:200, ]
    main.ytr <- y.gen[1:200]
    main.xva <- X[201:400, ]
    main.yva <- y.gen[201:400]
    main.xts <- X[401:600, ]
    main.yts <- y.gen[401:600]
    
    overre_1 = over(main.xtr, main.ytr, main.xva, main.yva, main.xts, main.yts, main.T,
                    Beta.true, main.alpha, main.eta, main.gamma, thres_over)
    
    mse_i[k] = overre_1[6][[1]][700]
    sig1_i[k] = overre_1[7][[1]][700]
    sig2_i[k] = overre_1[8][[1]][700]
    sig3_i[k] = overre_1[9][[1]][700]
    sig4_i[k] = overre_1[10][[1]][700]
    
    k <- k + 1
  }
  
  mse_list[[i]] <- mse_i
  sig1_list[[i]] <- sig1_i
  sig2_list[[i]] <- sig2_i
  sig3_list[[i]] <- sig3_i
  sig4_list[[i]] <- sig4_i
}


library(ggplot2)
library(tidyr)


filtered_mse_list <- lapply(mse_list, function(x) tail(x, 30))

long_data <- do.call(rbind, lapply(1:length(gammas), function(i) {
  data.frame(
    gamma = gammas[i],
    mse = filtered_mse_list[[i]]
  )
}))


label_gamma <- function(gamma_val) {
  if (gamma_val < 1e-3) {
    return(sprintf("%.1e", gamma_val))
  } else {
    return(as.character(gamma_val))
  }
}

ggplot(long_data, aes(x=factor(gamma, levels=gammas, labels=sapply(gammas, label_gamma)), y=mse)) +
  geom_boxplot(fill="#619CFF") +
  scale_x_discrete(name="Gamma Values") +
  scale_y_continuous(name="Estimation Error") 
#+ggtitle("Boxplots of MSE for Different Gamma Values")



###################################
sig1_list <- sig1_list[-7]

filtered_sig1_list <- lapply(sig1_list, function(x) tail(x, 30))

long_data <- do.call(rbind, lapply(1:length(gammas), function(i) {
  data.frame(
    gamma = gammas[i],
    mse = filtered_sig1_list[[i]]
  )
}))

label_gamma <- function(gamma_val) {
  if (gamma_val < 1e-3) {
    return(sprintf("%.1e", gamma_val))
  } else {
    return(as.character(gamma_val))
  }
}

ggplot(long_data, aes(x=factor(gamma, levels=gammas, labels=sapply(gammas, label_gamma)), y=mse)) +
  geom_boxplot(fill="#F8766D") +
  scale_x_discrete(name="Gamma Values") +
  scale_y_continuous(name="Signal 1") 
#+ggtitle("Boxplots of MSE for Different Gamma Values")

################sig2
sig2_list <- sig2_list[-7]


filtered_sig2_list <- lapply(sig2_list, function(x) tail(x, 30))


long_data <- do.call(rbind, lapply(1:length(gammas), function(i) {
  data.frame(
    gamma = gammas[i],
    mse = filtered_sig2_list[[i]]
  )
}))


label_gamma <- function(gamma_val) {
  if (gamma_val < 1e-3) {
    return(sprintf("%.1e", gamma_val))
  } else {
    return(as.character(gamma_val))
  }
}


ggplot(long_data, aes(x=factor(gamma, levels=gammas, labels=sapply(gammas, label_gamma)), y=mse)) +
  geom_boxplot(fill="#00BA38") +
  scale_x_discrete(name="Gamma Values") +
  scale_y_continuous(name="Signal 2") 
#+ggtitle("Boxplots of MSE for Different Gamma Values")





################sig4
sig4_list <- sig4_list[-7]


filtered_sig4_list <- lapply(sig4_list, function(x) tail(x, 30))


long_data <- do.call(rbind, lapply(1:length(gammas), function(i) {
  data.frame(
    gamma = gammas[i],
    mse = filtered_sig4_list[[i]]
  )
}))

label_gamma <- function(gamma_val) {
  if (gamma_val < 1e-3) {
    return(sprintf("%.1e", gamma_val))
  } else {
    return(as.character(gamma_val))
  }
}


ggplot(long_data, aes(x=factor(gamma, levels=gammas, labels=sapply(gammas, label_gamma)), y=mse)) +
  geom_boxplot(fill="#00BA38") +
  scale_x_discrete(name="Gamma Values") +
  scale_y_continuous(name="Signal 4") 
#+ggtitle("Boxplots of MSE for Different Gamma Values")



###################################
sig3_list <- sig3_list[-7]

filtered_sig3_list <- lapply(sig3_list, function(x) tail(x, 30))

long_data <- do.call(rbind, lapply(1:length(gammas), function(i) {
  data.frame(
    gamma = gammas[i],
    mse = filtered_sig3_list[[i]]
  )
}))

label_gamma <- function(gamma_val) {
  if (gamma_val < 1e-3) {
    return(sprintf("%.1e", gamma_val))
  } else {
    return(as.character(gamma_val))
  }
}

ggplot(long_data, aes(x=factor(gamma, levels=gammas, labels=sapply(gammas, label_gamma)), y=mse)) +
  geom_boxplot(fill="#F8766D") +
  scale_x_discrete(name="Gamma Values") +
  scale_y_continuous(name="Signal 3") 
#+ggtitle("Boxplots of MSE for Different Gamma Values")


############### Model1 sim
####model 1 sim


pre<-function(main.yts,y.hat){
  n=length(main.yts)
  num0=sum(main.yts==-1)
  num1=n-num0
  ac=0
  se=0
  sp=0
  for(i in 1:n){
    if(main.yts[i]==y.hat[i])
      ac=ac+1
    if((main.yts+y.hat)[i]==2)
      se=1+se
    if((main.yts+y.hat)[i]==-2)
      sp=1+sp
  }
  return(list(ac/n,se/num1,sp/num0))
}


over<-function(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,Beta.true, main.alpha, main.eta, main.gamma,thres,tol){
  
  
  ################
  main.n<-nrow(main.xtr)
  main.p<-ncol(main.xtr)
  main.nva<-nrow(main.xva)
  main.nts<-nrow(main.xts)
  main.wnew<-main.w<-rep(main.alpha,main.p)
  main.vnew<-main.v<-rep(main.alpha,main.p)
  main.betanew<-main.beta<-rep(0,main.p)
  main.munew<-main.mu<-rep(1/(main.n*main.gamma),main.n)
  main.t<-1
  
  main.betaRecord<-matrix(rep(0),main.T,main.p)
  main.error<- rep(rep(10),main.T)
  start <- proc.time() 
  while(main.t<=main.T){
    #print(main.t)
    
    main.wnew<- main.w+ 2*main.eta/main.n*t(((main.ytr*main.mu)%x%t(rep(1,main.p)))*
                                              (rep(1,main.n)%x%t(main.w))*main.xtr)%*%rep(1,main.n)                               
    main.vnew<- main.v- 2*main.eta/main.n*t(((main.ytr*main.mu)%x%t(rep(1,main.p)))*
                                              (rep(1,main.n)%x%t(main.v))*main.xtr)%*%rep(1,main.n)                               
    main.betanew<- main.wnew*main.wnew- main.vnew*main.vnew
    main.betaRecord[main.t,]<-t(main.betanew)
    #### Record the validation error ####
    
    main.error[main.t]<-  sum( pmax(1-main.yva*(main.xva%*%main.betanew),rep(0,main.nva)))/main.nva
    main.munew<-apply(cbind(rep(0,main.n),(1- main.ytr*(main.xtr%*%main.betanew))/(main.n*main.gamma),
                            rep(1,main.n)),1,median)
    mse<-norm((main.betanew-Beta.true), type="2") 
    main.w<-main.wnew
    main.v<-main.vnew
    main.mu<-main.munew
    if(norm(main.munew,type="2")==0){break}
    main.t<- main.t+1
  }
  end <- proc.time() 
  time_over <- (end - start)[3][[1]]  
  main.index<-min(which(main.error==min(main.error)))
  betahat_over<-threshold(main.betaRecord[main.index,],main.p,thres)
  rrmse_over=norm((betahat_over/norm(betahat_over,type = "2")-Beta.true/norm(Beta.true,type = "2")), type="2")
  rrmse_over_non=norm(betahat_over-Beta.true,type="2")/norm(Beta.true,type = "2")
  px.over<-pnorm(main.xts%*%betahat_over)
  y.hat<-2*rbinom(n,1,px.over)-1
  ac_over=pre(main.yts,y.hat)[1][[1]]
  se_over=pre(main.yts,y.hat)[2][[1]]
  sp_over=pre(main.yts,y.hat)[3][[1]]
  supp_over=sum(betahat_over!=0)
  falsepos_over=sum(betahat_over[5:400]!=0)
  trueneg_over=sum(betahat_over[1:4]==0)
  return(list(betahat_over,time_over,rrmse_over,rrmse_over_non,ac_over,se_over,sp_over,supp_over,falsepos_over,trueneg_over))
}
oracle<-function(main.xtr,main.ytr,main.xts,main.yts,Beta.true){
  main.nts=nrow(main.xts)
  main.p<-ncol(main.xtr)
  start <- proc.time()     
  train_control = trainControl(method = "cv", number = 5)
  datac<-as.data.frame(cbind(main.xtr[,1:5],main.ytr))
  datac$main.ytr= as.factor(datac$main.ytr)
  oracle= train(main.ytr~.,data=datac, method = "svmLinear", trControl = train_control)
  end <- proc.time()      
  time_oracle <- (end - start)[3][[1]]  
  coefs <- oracle$finalModel@coef[[1]]
  mat <- oracle$finalModel@xmatrix[[1]]
  betahat_oracle=coefs %*% mat
  rrmse_oracle=norm((betahat_oracle/norm(betahat_oracle,type = "2")-Beta.true[1:5]/norm(Beta.true[1:5],type = "2")), type="2")
  rrmse_oracle_non=norm(betahat_oracle-Beta.true[1:5],type="2")/norm(Beta.true[1:5],type = "2")
  px.oracle<-pnorm(main.xts[,1:5]%*%as.vector(betahat_oracle))
  y.hat<-2*rbinom(n,1,px.oracle)-1
  ac_oracle=pre(main.yts,y.hat)[1][[1]]
  se_oracle=pre(main.yts,y.hat)[2][[1]]
  sp_oracle=pre(main.yts,y.hat)[3][[1]]
  return(list(betahat_oracle,time_oracle,rrmse_oracle,rrmse_oracle_non,ac_oracle,se_oracle,sp_oracle))
}




S=1

K=29+1980

diff_mse_over=matrix(rep(0),30,S)
diff_mse_over_non=matrix(rep(0),30,S)
diff_ac_over=matrix(rep(0),30,S)
diff_se_over=matrix(rep(0),30,S)
diff_sp_over=matrix(rep(0),30,S)
diff_supp_over=matrix(rep(0),30,S)
diff_false_over=matrix(rep(0),30,S)
diff_true_over=matrix(rep(0), 30,S)

diff_mse_oracle=matrix(rep(0),30,S)
diff_mse_oracle_non=matrix(rep(0),30,S)
diff_ac_oracle=matrix(rep(0),30,S)
diff_se_oracle=matrix(rep(0),30,S)
diff_sp_oracle=matrix(rep(0),30,S)

thres_lasso=5*1e-3
thres_over=5*1e-3
main.T<-700
main.eta<-0.5
main.gamma<-1e-4
main.alpha<-1e-8
k=1980
while (k<=K) {
  set.seed(k)
  seed<-k
  s=1
  while (s<=S) {
    n<-600
    p<-400
    rho = 0.4
    mu = rep(0,p)
    Sigma = diag(p) 
    #X = mvrnorm(n, mu,Sigma)
    for(i in 1:p){for(j in 1:p){Sigma[i,j] = 0.4**(abs(i-j))}
    }
    X = mvrnorm(n, mu,Sigma)
    
    Beta.true<-rep(0,p)
    Beta.true[c(1,2,3,4)]<-c(1.1,1.1,1.1,1.1)
    
    
    px.gen<-pnorm(X%*%Beta.true,0,1)
    y.gen<-2*rbinom(n,1,px.gen)-1
    y.la<-rbinom(n,1,px.gen)
    main.xtr<-X[1:200,]
    main.ytr<-y.gen[1:200]
    main.xva<-X[201:400,]
    main.yva<-y.gen[201:400]
    main.xts<-X[401:600,]
    main.yts<-y.gen[401:600]
    overre=over(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,
                Beta.true, main.alpha, main.eta, main.gamma, thres_over)
    oraclere=oracle(main.xtr,main.ytr,main.xts,main.yts,Beta.true)
    
    diff_mse_over[k-1979,s]=overre[3][[1]]
    diff_mse_over_non[k-1979,s]=overre[4][[1]]
    diff_ac_over[k-1979,s]=overre[5][[1]]
    diff_se_over[k-1979,s]=overre[6][[1]]
    diff_sp_over[k-1979,s]=overre[7][[1]]
    
    diff_mse_oracle[k-1979,s]=oraclere[3][[1]]
    diff_mse_oracle_non[k-1979,s]=oraclere[4][[1]]
    diff_ac_oracle[k-1979,s]=oraclere[5][[1]]
    diff_se_oracle[k-1979,s]=oraclere[6][[1]]
    diff_sp_oracle[k-1979,s]=oraclere[7][[1]]
    s=s+1
  }
  k=k+1
  print(paste(k))
}

df_over <- data.frame(value = c(diff_mse_over), method = "GD")
df_oracle <- data.frame(value = c(diff_mse_oracle), method = "Oracle")

df <- rbind(df_over, df_oracle)

ggplot(df, aes(x=method, y=value, fill=method)) + 
  geom_boxplot() +
  scale_fill_manual(values=c("GD"="#619CFF", "Oracle"="#00BA38")) +
  theme_minimal() +
  labs(title = "Boxplot of diff_mse_over and diff_mse_oracle", y = "Value") +
  theme(legend.position="top")

df_over <- data.frame(value = c(diff_ac_over), method = "GD")
df_oracle <- data.frame(value = c(diff_ac_oracle), method = "Oracle")

df <- rbind(df_over, df_oracle)
ggplot(df, aes(x=method, y=value, fill=method)) + 
  geom_boxplot() +
  scale_fill_manual(values=c("GD"="#619CFF", "Oracle"="#00BA38")) +
  theme_gray() +
  scale_y_continuous(name="Prediction Accuracy")+
  theme(legend.position="top")


############### Model2 sim
##model 2 sim


S=1

K=29+1980

diff_mse_over=matrix(rep(0),30,S)
diff_mse_over_non=matrix(rep(0),30,S)
diff_ac_over=matrix(rep(0),30,S)
diff_se_over=matrix(rep(0),30,S)
diff_sp_over=matrix(rep(0),30,S)
diff_supp_over=matrix(rep(0),30,S)
diff_false_over=matrix(rep(0),30,S)
diff_true_over=matrix(rep(0), 30,S)

diff_mse_oracle=matrix(rep(0),30,S)
diff_mse_oracle_non=matrix(rep(0),30,S)
diff_ac_oracle=matrix(rep(0),30,S)
diff_se_oracle=matrix(rep(0),30,S)
diff_sp_oracle=matrix(rep(0),30,S)

thres_lasso=5*1e-3
thres_over=5*1e-3
main.T<-700
main.eta<-0.5
main.gamma<-1e-4
main.alpha<-1e-8
k=1980
while (k<=K) {
  set.seed(k)
  seed<-k
  s=1
  while (s<=S) {
    n<-600
    p<-400
    Sigma = diag(p) 
    mu = rep(0,p)
    mu[c(1,2,3,4,5)]=c(0.1,0.2,0.3,0.4,0.5)
    #X = mvrnorm(n, mu,Sigma)
    for(i in 1:5){for(j in 1:5){if(i != j){Sigma[i,j] = -0.2}}
    }
    y.gen<-2*rbinom(n,1,0.5)-1
    X<-matrix(0,n,p)
    for(i in 1:n){if(y.gen[i]==1){X[i,] = mvrnorm(1, mu,Sigma)}else{X[i,] = mvrnorm(1, -mu,Sigma)}}
    
    Beta.true<-rep(0,p)
    Beta.true[c(1,2,3,4,5)]<-c(1.39,1.47,1.56,1.65,1.74)
    
    main.xtr<-X[1:200,]
    main.ytr<-y.gen[1:200]
    main.xva<-X[201:400,]
    main.yva<-y.gen[201:400]
    main.xts<-X[401:600,]
    main.yts<-y.gen[401:600]
    #main.ylats<-y.la[201:300]
    
    #lassore=lasso(main.xtr,main.ytr,main.xts,main.yts,Beta.true,thres_lasso,seed)
    overre=over(main.xtr,main.ytr, main.xva,main.yva,main.xts,main.yts,main.T,
                Beta.true, main.alpha, main.eta, main.gamma, thres_over)
    oraclere=oracle(main.xtr,main.ytr,main.xts,main.yts,Beta.true)
    
    diff_mse_over[k-1979,s]=overre[3][[1]]
    diff_mse_over_non[k-1979,s]=overre[4][[1]]
    diff_ac_over[k-1979,s]=overre[5][[1]]
    diff_se_over[k-1979,s]=overre[6][[1]]
    diff_sp_over[k-1979,s]=overre[7][[1]]
    
    diff_mse_oracle[k-1979,s]=oraclere[3][[1]]
    diff_mse_oracle_non[k-1979,s]=oraclere[4][[1]]
    diff_ac_oracle[k-1979,s]=oraclere[5][[1]]
    diff_se_oracle[k-1979,s]=oraclere[6][[1]]
    diff_sp_oracle[k-1979,s]=oraclere[7][[1]]
    s=s+1
  }
  k=k+1
  print(k)
}


df_over <- data.frame(value = c(diff_mse_over_non), method = "GD")
df_oracle <- data.frame(value = c(diff_mse_oracle_non), method = "Oracle")

df <- rbind(df_over, df_oracle)
ggplot(df, aes(x=method, y=value, fill=method)) + 
  geom_boxplot() +
  scale_fill_manual(values=c("GD"="#619CFF", "Oracle"="#00BA38")) +
  theme_gray() +
  scale_y_continuous(name="Estimation Error (Non-normalized)")+
  theme(legend.position="top")





