classification_module.R

# UI function for the classification plot module

ClassiPlotUI <- function(id, label = "Risk Classification") {
  ns <- NS(id)
  
  ############################ ----- UI ----- ################################################################################
  
  
  tagList(
    fluidPage(
      theme = shinythemes::shinytheme(theme = "paper"), # this is the theme Amanda chose, do not change
      mainPanel(width = 12, height = 1225, # creates the sizing for the main panel
                fluidRow(
                  # column 1 row 1 is for selecting a mutually exclusive structural alteration (like a fusion) from a drop-down menu
                  
                  box(
                    width = 4,
                    height = 525,
                    title = tags$h5("The Risk Classification Module", style = "color: #3c8dbc; margin-top: -15px; font-weight: bold;"),
                    p("This module is designed to display the risk classification for a subset of patients based on a combination
                      of structural alterations and mutations."), 
                    p("The upper-middle box contains a dropdown menu for structural alterations
                      which are mostly mutually exclusive. The upper-right box contains checkboxes for various mutations. Multiple mutations
                      can be chosen."),
                    p("The boxes in the middle row are the results of the chosen combinations. The lower-left box outputs the 'canonical'
                      risk classification based on a combination of Children's Oncology Group and Meshinchi Lab knowledge. The lower-middle 
                      and lower-right boxes show survival curves for overall survival and event-free survival respectively, which display 
                      current risk classifications in red, orange, and green as well as a reactive curve in dark blue that represents the 
                      chosen combination of alteration and mutation(s) and a light blue curve that represents the selected structural alteration 
                      but opposite mutations (ie. if 'None' fusion and NPM1, the light blue will be 'None' fusion and no NPM1).")),
                  
                  box(
                    width = 4, # this is half of the main panel width
                    height = 525, # this looks not so good but it needs to be the same length as the mutations box
                    title = tags$h5("Structural Alterations", style = "color: #3c8dbc; margin-top: -15px;"),
                    selectInput(ns("fusion"), NULL,
                                choices = unique(classification$Fusions), selected = "None")  # the default selection is None
                  ),
                  
                  # column 2 row 1 is for selecting a combination of mutations from a group checkbox
                  box(
                    width = 4, # this is half of the main panel width
                    height = 525, # this looks bad but I haven't figured out how to make two columns of mutations and successfully reference them down the line, so I don't know
                    title = tags$h5("Mutations", style = "color: #3c8dbc; margin-top: -15px;"), 
                    checkboxGroupInput(ns("mutations"), label = NULL, choices = colnames(classification)[2:16]) # this subsets the classification dataframe so that the fusions and risk columns are not included
                  )
                ),
                
                # this is the start of row 2
                fluidRow(
                  # column 1 row 2 is for outputting the risk classification text which is based parsing a dataframe of fusions (column 1), mutations (column 2:17), and the risk column (18). 
                  # the rownames are known permutatations in the classification table. If there is a 1 in the mutations, it means it is positive for that mutation. It is important to remember that
                  # this box is only generated by the classification table, which is not statistical TARGET data but simply the current classification schema. Thus, this box could disagree with the 
                  # survival curves and it may be smarter (theoretically) to believe the survival curves as they are data driven and not every permutation is accounted for in the traditional
                  # classification for COG. It's also just important to know for debugging that the survival curves are built separately from this function.
                  box(
                    width = 4,
                    height = 350,
                    title = tags$h5("Risk Classification", style = "color: #3c8dbc; margin-top: -15px;"),
                    uiOutput(ns("styledResultText"))
                  ),
                  
                  # column 2 row 2 is for the overall survival kaplan-meier curve which uses the data from km_cde.csv to generate the default survival curves and the reactive one that will be generated
                  # based on the user inputs 
                  box(
                    width = 4,
                    height = 350,
                    title = tags$h5("Survival Curves: Overall Survival", style = "color: #3c8dbc; margin-top: -15px;"),
                    div(
                      checkboxInput(ns("osCI"), "Confidence Intervals", value = FALSE),
                      style = "margin-top: -25px;"
                    ),
                    #checkboxInput(ns("osRT"), "Show Risk Table", value = FALSE),
                    plotOutput(ns("oscurves"))
                  ),
                  
                  
                  # column 3 row 2 is for the event-free survival kaplan-meier curve. See above.
                  box(
                    width = 4,
                    height = 350,
                    title = tags$h5("Survival Curves: Event-Free Survival", style = "color: #3c8dbc; margin-top: -15px;"),
                    div(
                      checkboxInput(ns("efsCI"), "Confidence Intervals", value = FALSE),
                      style = "margin-top: -25px;"
                    ),
                    #checkboxInput(ns("efsRT"), "Show Risk Table", value = FALSE),
                    plotOutput(ns("efscurves"))
                )),
                
                fluidRow(
                  # column 1 row 2 is for outputting the risk classification text which is based parsing a dataframe of fusions (column 1), mutations (column 2:17), and the risk column (18). 
                  # the rownames are known permutatations in the classification table. If there is a 1 in the mutations, it means it is positive for that mutation. It is important to remember that
                  # this box is only generated by the classification table, which is not statistical TARGET data but simply the current classification schema. Thus, this box could disagree with the 
                  # survival curves and it may be smarter (theoretically) to believe the survival curves as they are data driven and not every permutation is accounted for in the traditional
                  # classification for COG. It's also just important to know for debugging that the survival curves are built separately from this function.
                  box(
                    width = 4,
                    height = 350,
                    title = tags$h5("Age Distribution", style = "color: #3c8dbc; margin-top: -15px;"),
                    plotlyOutput(ns("age_hist"))
                  ),
                  
                  # column 2 row 2 is for the overall survival kaplan-meier curve which uses the data from km_cde.csv to generate the default survival curves and the reactive one that will be generated
                  # based on the user inputs 
                  box(
                    width = 4,
                    height = 350,
                    title = tags$h5("Event Type", style = "color: #3c8dbc; margin-top: -15px;"),
                    plotlyOutput(ns("event_pie"))
                  ),
                  
                  # column 3 row 2 is for the event-free survival kaplan-meier curve. See above.
                  box(
                    width = 4,
                    height = 350,
                    title = tags$h5("Selected USIs", style = "color: #3c8dbc; margin-top: -15px;"),
                    div(style = "overflow-y: scroll; height: 250px;",
                    DT::dataTableOutput(ns("usi_table")))
                  )
                )
      )
    )
  )
}

# this is the server function for this module
ClassiPlot <- function(input, output, session) {
  
  # this is the reactive function for generating the risk classification box (column 1, row 2). The idea is that there are many permutations that are rows in the dataframe and that you subset that 
  # dataframe based on the selections the user makes in picking structural alterations and mutations. Eventually you will be left with either 1 row or 0 rows, the latter of which implies the permutation
  # does not exist in the current classification schema. 
  
  filtered_data <- reactive({ 
    
    # filter the 'classification' data based on user input for 'fusion' and 'mutations'
    filter_data <- classification %>%
      filter(
        if (input$fusion == "None") {
          # if 'fusion' is null or set to 'None', filter based on 'mutations'
          Fusions == "None" & rowSums(select(., input$mutations) == 1) == length(input$mutations)
        } else {
          # otherwise, filter based on the selected 'fusion'
          Fusions == input$fusion
        }
      ) %>%
      filter(
        if (length(input$mutations) == 1) {
          # if only one mutation is selected, filter based on that mutation
          selected_mutation <- input$mutations
          select(., selected_mutation) == 1 & rowSums(select(., setdiff(colnames(classification)[2:17], selected_mutation)) == 1) == 0
        } else if (length(input$mutations) == 2) {
          # if two mutations are selected, filter based on both mutations
          selected_mutations <- input$mutations
          rowSums(select(., selected_mutations) == 1) == 2 & rowSums(select(., setdiff(colnames(classification)[2:17], selected_mutations)) == 1) == 0
        } else if (length(input$mutations) == 3) {
          # if three mutations are selected, filter based on all three mutations
          selected_mutations <- input$mutations
          rowSums(select(., selected_mutations) == 1) == 3 & rowSums(select(., setdiff(colnames(classification)[2:17], selected_mutations)) == 1) == 0
        } else {
          # if no mutations are selected, filter out all rows with mutations
          rowSums(select(., colnames(classification)[2:17]) == 1) == 0
        })
    
    # additional handling for cases where the initial filtering did not yield any results
    if (nrow(filter_data) == 0 && !is.null(input$fusion) && input$fusion != "None") {
      # filter data for high risk if fusion is selected and no mutations are present
      fusion_high_risk <- classification %>%
        filter(Fusions == input$fusion, rowSums(select(., colnames(classification)[2:17]) == 1) == 0, Risk == "High Risk")
      
      if (nrow(fusion_high_risk) == 1) {
        return(fusion_high_risk)  # return the filtered high-risk fusion data
      }
    }
    
    return(filter_data)  # return the filtered data based on user input
})
  
  
  
  # render UI for displaying the risk classification result text
  output$styledResultText <- renderUI({
    
    # get the filtered data based on user input
    filtered_data_result <- filtered_data()
    
    # check if there is exactly one row in the filtered data
    if (nrow(filtered_data_result) != 1) {
      # display a message when there is no or more than one classification
      return(tags$div(
        style = "color: #3c8dbc; font-size: 25px; text-align: center; margin-top:60px;",
        "This combination does not \n have a classification"
      ))
    } else {
      # extract the risk level from the filtered data
      risk_level <- as.character(filtered_data_result$Risk)
      
      # set the style based on the risk level
      if (risk_level == "Low Risk") {
        style <- "color: green; font-size:40px; text-align:center; margin-top:60px;"
      } else if (risk_level == "Standard Risk") {
        style <- "color: orange; font-size:40px; text-align:center; margin-top:60px;"
      } else if (risk_level == "High Risk") {
        style <- "color: red; font-size:40px; text-align:center; margin-top:60px;"
      } else {
        style <- "color: #3c8dbc; font-size:40px; text-align:center; margin-top:60px;"
      }
      
      # display the risk level with the specified style
      return(tags$div(style = style, risk_level))
    }
  })
  
  bind_data <- reactive({
    fusion_selection <- input$fusion
    mutations_selection <- input$mutations
    inverse_subset_data <- data.frame()  # Initialize an empty data frame for inverse data
    
    # Create a copy of km_cde and add a 'type' column
    km_cde_copy <- km_cde
    km_cde_copy$type <- "original"
    
    if (fusion_selection == "None" && length(mutations_selection) == 0) {
      subset_data <- km_cde_copy %>%
        filter(Fusion == "None")
    } else if (fusion_selection == "None") {
      subset_data <- km_cde_copy %>%
        filter(rowSums(select(., mutations_selection) == 1) == length(mutations_selection))
    } else {
      subset_data <- km_cde_copy %>%
        filter(Fusion == fusion_selection, rowSums(select(., mutations_selection) == 1) == length(mutations_selection))
    }
    
    if (length(mutations_selection) > 0) {
      inverse_subset_data <- km_cde_copy %>%
        filter(Fusion == fusion_selection) %>%
        filter(rowSums(select(., mutations_selection) == 1) == 0)
      inverse_subset_data$Risk <- "Inverse"
    }
    
    if (nrow(subset_data) > 0) {
      subset_data$Risk <- "Selected"
      subset_data$type <- "selected"
      combined_data <- rbind(km_cde_copy, subset_data)
      
      if (nrow(inverse_subset_data) > 0) {
        combined_data <- rbind(combined_data, inverse_subset_data)
      }
    } else {
      combined_data <- km_cde_copy
    }
    
    return(combined_data)
  })
  
  # render plot for Overall Survival (OS)
  output$oscurves <- renderPlot({
    
    # get the combined data for OS plots
    combined_data <- bind_data()
    
    combined_data$OS.Years <- as.numeric(combined_data$OS.Years)
    combined_data$OS.codeID <- as.numeric(combined_data$OS.codeID)
    
    if (length(table(combined_data$Risk)) > 4){
      combined_data$Risk <- factor(combined_data$Risk, levels = c("Low", "Standard", "High", "Selected", "Inverse"))
    }
    else {
      combined_data$Risk <- factor(combined_data$Risk, levels = c("Low", "Standard", "High", "Selected"))
    }
    
    sample_size_selected <- sum(combined_data$Risk == "Selected")
    
    conf.int <- input$osCI
    #risk.table <- input$osRT
    
    # Check if sample_size_selected is 0 and display a warning message if true
    if (sample_size_selected == 0) {
      warning_message <- "There are no samples \n with this combination"
      plot(NULL, xlim = c(0, 1), ylim = c(0, 1), type = "n", xlab = "", ylab = "", axes = FALSE)
      text(0.5, 0.7, warning_message, cex = 1.5, col = "#3c8dbc")
    } else {
      sfit1 <- survfit(Surv(OS.Years, OS.codeID) ~ Risk, data = combined_data)
      
      os_plot <- ggsurvplot(
        sfit1,
        data = combined_data,
        pval = FALSE,
        pval.coord = c(1,0.05),
        pval.size = 4.5,
        conf.int = conf.int,
        risk.table = FALSE,
        xlim = c(0,10),
        xlab = "Years",
        ylab = "Overall survival probability",
        break.time.by = 1,
        palette = c("#008001", "#ffa500", "#ff0000", "#1b4dab", "#aec6f2"),
        linetype = c("dotted", "dotted", "dotted", "solid", "longdash"),
        censor = FALSE,
        ggtheme = theme_minimal(),
        legend = "right",
        legend.title = paste("'Selected' n:",sample_size_selected)
      )
      
      os_plot <- ggpar(os_plot, 
                       font.x = 12,
                       font.y = 12,
                       font.caption = 12, 
                       font.legend = 12, 
                       font.tickslab = 12)

      return(os_plot)
    }
  }, height = 250)
  
  # render plot for Event-Free Survival (EFS)
  output$efscurves <- renderPlot({
    
    # get the combined data for EFS plots
    combined_data <- bind_data()
    
    combined_data$EFS.Years <- as.numeric(combined_data$EFS.Years)
    combined_data$EFS.codeID <- as.numeric(combined_data$EFS.codeID)
    
    if (length(table(combined_data$Risk)) > 4){
      combined_data$Risk <- factor(combined_data$Risk, levels = c("Low", "Standard", "High", "Selected", "Inverse"))
    }
    else {
      combined_data$Risk <- factor(combined_data$Risk, levels = c("Low", "Standard", "High", "Selected"))
    }
    
    sample_size_selected <- sum(combined_data$Risk == "Selected")
    
    conf.int <- input$efsCI
    #risk.table <- input$efsRT
    
    # Check if sample_size_selected is 0 and display a warning message if true
    if (sample_size_selected == 0) {
      warning_message <- "There are no samples \n with this combination"
      plot(NULL, xlim = c(0, 1), ylim = c(0, 1), type = "n", xlab = "", ylab = "", axes = FALSE)
      text(0.5, 0.7, warning_message, cex = 1.5, col = "#3c8dbc")
    } else {
      sfit2 <- survfit(Surv(EFS.Years, EFS.codeID) ~ Risk, data = combined_data)
      
      efs_plot <- ggsurvplot(
        sfit2,
        data = combined_data,
        pval = FALSE,
        pval.coord = c(1,0.05),
        pval.size = 4.5,
        conf.int = conf.int,
        risk.table = FALSE,
        xlab = "Years",
        ylab = "Event-Free survival probability",
        break.time.by = 1,
        xlim = c(0,10),
        palette = c("#008001", "#ffa500", "#ff0000", "#1b4dab", "#aec6f2"),
        linetype = c("dotted", "dotted", "dotted", "solid", "longdash"),
        censor = FALSE,
        ggtheme = theme_minimal(),
        legend = "right",
        legend.title = paste("'Selected' n:",sample_size_selected)
      )
      
      efs_plot <- ggpar(efs_plot, 
                        font.x = 12,
                        font.y = 12,
                        font.caption = 12, 
                        font.legend = 12, 
                        font.tickslab = 12)
      
      return(efs_plot)
    }
  }, height = 250)
  
  output$age_hist <- renderPlotly({
    combined_data <- bind_data()
    sample_size_selected <- sum(combined_data$Risk == "Selected")
    
    if (sample_size_selected == 0) {
      return(NULL)
    } 
    else {
      combined_data <- filter(combined_data, combined_data$Risk == "Selected")
      
      # Create a bar plot using plot_ly
      plot_ly(data = combined_data, x = ~Age.in.years, type = "histogram", marker = list(color = viridis(30), line = list(color = "white", width = 0.5)), nbinsx = 30, height = 250) %>%
        layout(title = list(text = NULL), xaxis = list(title = "Age in years"), yaxis = list(title = "Frequency"))
    }
  })
  
  output$event_pie <- renderPlotly({
    combined_data <- bind_data()
    sample_size_selected <- sum(combined_data$Risk == "Selected")
    
    if (sample_size_selected == 0) {
      return(NULL)
    } else {
      combined_data <- filter(combined_data, combined_data$Risk == "Selected")
      combined_data$EFS.eventID <- ifelse(combined_data$EFS.eventID == "Death without remission", "Death w/o remis", combined_data$EFS.eventID)
      
      # Factorize and sort the EventID
      combined_data$EFS.eventID <- factor(combined_data$EFS.eventID, levels = sort(unique(combined_data$EFS.eventID)))
      
      event_type_counts <- table(combined_data$EFS.eventID)
      
      m = list(
        l = 40,
        r = 40,
        b = 50,
        t = 50,
        pad = 0
      )
      
      # Create a pie chart using plot_ly
      plot_ly(data = data.frame(EventID = names(event_type_counts), Count = as.numeric(event_type_counts)),
              labels = ~EventID, values = ~Count, type = "pie", marker = list(colors = viridis(length(levels(combined_data$EFS.eventID)))), sort = FALSE, height = 250) %>%
        layout(title = list(text = NULL), showlegend = TRUE, margin = m)
    }
  })
  
  
  
  
  output$usi_table <- DT::renderDataTable({
    combined_data <- bind_data() # Access the combined data
    sample_size_selected <- sum(combined_data$Risk == "Selected")
    
    if (sample_size_selected == 0) {
      return(NULL)
    }
    
    combined_data <- filter(combined_data, combined_data$Risk == "Selected")
    t <- data.frame(USI = combined_data$USI,
                    Age.in.Years = round(combined_data$Age.in.years, 1),
                    Event.Type = combined_data$EFS.eventID)  # Create a data frame with USI, Age.in.days, and EFS.eventID columns
    
    DT::datatable(t, 
                  class = "compact nowrap hover row-border order-column", # Defines the CSS formatting of the final table, can string multiple options together
                  options = list(
                                 dom = 'frtip',  # 'f' for filter/search, 'r' for processing info, 't' for table
                                 scrollY = TRUE,
                                 searchHighlight = TRUE,
                                 pageLength = 50), 
                  escape = FALSE) %>%
      DT::formatStyle(columns = c(1, 2, 3), fontSize = "100%")  # Apply formatting to all three columns
  })
  
  
  
  
  #################################################################
  #-------------------- FINAL MODULE OUTPUTS ---------------------#
  #################################################################
  
  output$plotos <- renderPlot({
    plotKM(os_plot)
  })
  
  output$plotefs <- renderPlot({
    plotKM(efs_plot)
  })
  
}