
library(tidyverse)
library(readxl)
library(BSDA)
#install.packages("tidyverse")
#install.packages("BSDA")

#setwd("/Users/adefazio/Dropbox (Personal)/icml2020/banding_removal")
#source("results_scripts/process.R")
# Run in top directory of banding removal project
reader_names <- c("reader_3", "reader_4", "reader_5", "reader_6", "reader_2", "reader_1")

raw_tbls <- list()

for (reader_name in reader_names) {
    reader_sheet <- paste(c("results_questionnaires/", reader_name, "_cleaned.xlsx"), sep="", collapse="")
    raw_tbl <- read_excel(reader_sheet)
    raw_tbl <- add_column(raw_tbl, reader = reader_name)
    raw_tbls[[reader_name]] <- raw_tbl
    cat("Processed reader", reader_name, '\n')
}
#parse_factor(c("apple", "banana", "bananana"), levels = fruit)

tbl <- bind_rows(raw_tbls)
print("aggregated tables")
print(tbl)

###
key_raw <- read_csv("results_questionnaires/orientation_blind_assignments.csv")
key <- key_raw %>% gather(adversary, baseline, blurred, key="method", value="letter")
print("deanonymization key loaded")

## Replace all references to A/B/C to the actual method name
##
# Order is from worst top best, so higher rank values are better
banding_rank_columns <- c("Most banding", "2nd most banding", "Least banding")
detail_rank_columns <- c("Least detailed", "2nd least detailed", "Most detailed")
all_rank_columns <- c(banding_rank_columns, detail_rank_columns)

method_names <- c("adversary", "baseline", "blurred")

# New columns
for (method in method_names) {
    # Ties are always between the two best options in my data, 
    # so give them rank 2.5 (default here)
    tbl <- add_column(tbl, !! paste(method, "banding") := 2.5)
    tbl <- add_column(tbl, !! paste(method, "detail") := 2.5)
}

for (row in 1:nrow(tbl)) {
    for (column in all_rank_columns) {
        anon_val <- tbl[[row, column]]
        patient <- tbl[[row, "Patient ID"]]
        patient_row <- key %>% filter(volume_id == patient & letter == anon_val)
        method_name <- patient_row[['method']]
        if (grepl("banding", column)) {
            if (anon_val != "N/A") {
                rank <- which(column == banding_rank_columns)
                tbl[[row, paste(method_name, "banding")]] <- rank
            }
        }
        if (grepl("detail", column)) {
            rank <- which(column == detail_rank_columns)
            tbl[[row, paste(method_name, "detail")]] <- rank
        }
    }
}

print("Processed anon letters into ranks")
banding_ranks <- tbl %>% select("Patient ID", "reader", paste(method_names, "banding"))
detail_ranks <- tbl %>% select("Patient ID", "reader", paste(method_names, "detail"))

print(banding_ranks)
print(detail_ranks)

build_pval_table <- function(rank_table) {
    pval_tbl <- matrix(data = NA, nrow = 3, ncol = 3)
    colnames(pval_tbl) <- method_names
    rownames(pval_tbl) <- method_names

    # Build table of results
    for (mn1 in method_names) {
        for (mn2 in method_names) {
            if (mn1 != mn2) {
                rp <- SIGN.test(rank_table[[mn1]], rank_table[[mn2]])$p.value
                # Bonforoni correction
                rp <- rp * 3
                pval_tbl[mn1, mn2] <- rp
            }
            
        }
    }
    pval_tbl
}

# Average ranks from the two readers
banding_avg_ranks <- banding_ranks %>% group_by(`Patient ID`) %>% summarize(
    adversary = mean(`adversary banding`),
    baseline = mean(`baseline banding`),
    blurred = mean(`blurred banding`),
    )

banding_avg_ranks_total <- banding_ranks %>% summarize(
    adversary = mean(`adversary banding`),
    baseline = mean(`baseline banding`),
    blurred = mean(`blurred banding`),
    )

print("Average banding ranks for each method")
print(banding_avg_ranks_total)
print("Pairwise comparison table")
print(build_pval_table(banding_avg_ranks))

#####
# Average ranks from the two readers
detail_avg_ranks <- detail_ranks %>% group_by(`Patient ID`) %>% summarize(
    adversary = mean(`adversary detail`),
    baseline = mean(`baseline detail`),
    blurred = mean(`blurred detail`),
    )

detail_avg_ranks_total <- detail_ranks %>% summarize(
    adversary = mean(`adversary detail`),
    baseline = mean(`baseline detail`),
    blurred = mean(`blurred detail`),
    )

print("Average detail ranks for each method")
print(detail_avg_ranks_total)
print("Pairwise comparison table")
print(build_pval_table(detail_avg_ranks))

####################
## Evaluation of presence of banding

# New columns
for (method in method_names) {
    tbl <- add_column(tbl, !! method := NA)
}

presence_cols <- c("A", "B", "C")

for (row in 1:nrow(tbl)) {
    for (column in presence_cols) {
        value_str <- tolower(tbl[[row, column]])
        if (value_str != "yes" && value_str != "no" && value_str != "y" && value_str != "n") {
            stop(paste("non yes/no found:", value_str))
        } else {
            value <- grepl("y", value_str)
        }

        patient <- tbl[[row, "Patient ID"]]
        patient_row <- key %>% filter(volume_id == patient & letter == column)
        method_name <- patient_row[['method']]
        tbl[row, method_name] <- value
    }
}

presence <- tbl %>% select("Patient ID", "reader", method_names)
presence_avg_ranks_total <- presence %>% summarize(
    adversary = mean(`adversary`),
    baseline = mean(`baseline`),
    blurred = mean(`blurred`),
    )
print(presence_avg_ranks_total)

clvl = 1-0.05/3 # Bonf correction
print("Adversary presence test")
print(binom.test(sum(presence$adversary), length(presence$adversary), conf.level=clvl))
print("Adversary baseline test")
print(binom.test(sum(presence$baseline), length(presence$baseline), conf.level=clvl))
print("Adversary blurred test")
print(binom.test(sum(presence$blurred), length(presence$blurred), conf.level=clvl))

print("Adversary ranked first test")
binom.test(x=sum(banding_ranks$`adversary banding` >= 2.499), n=length(banding_ranks$`adversary banding`))

#### Per reader averages
banding_reader_avgs <- banding_ranks %>% group_by(`reader`) %>% summarize(
    adversary = mean(`adversary banding`),
    baseline = mean(`baseline banding`),
    blurred = mean(`blurred banding`),
    )

detail_reader_avgs <- detail_ranks %>% group_by(`reader`) %>% summarize(
    adversary = mean(`adversary detail`),
    baseline = mean(`baseline detail`),
    blurred = mean(`blurred detail`),
    )

print(banding_reader_avgs)
print(detail_reader_avgs)


print(presence %>% group_by(`reader`) %>% summarize(
    adversary = mean(`adversary`),
    baseline = mean(`baseline`),
    blurred = mean(`blurred`),
    ))
