diff --git a/R/count.R b/R/count.R index afb2f8c..1e17b11 100644 --- a/R/count.R +++ b/R/count.R @@ -112,25 +112,24 @@ record_counts <- function(unique_citations, citations, db_colname) { #' #' @return A dataframe with counts of distinct records, imported records, and unique records for each source, including total counts and several calculated ratios and percentages. #' @examples -# unique_citations <- data.frame( -# db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), -# other_data = 1:6 -# ) -# -# citations <- data.frame( -# db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), -# other_data = 7:12 -# ) -# -# n_unique <- data.frame( -# cite_source = c("Database1", "Database2", "Database2", "Database3", "Database3", "Database3"), -# cite_label = c("search", "final", "search", "search", "search", "final"), -# unique = c(1, 0, 1, 1, 1, 0) -# ) -# -# result <- calculate_record_counts(unique_citations, citations, n_unique, "db_source") -# result - +#' unique_citations <- data.frame( +#' db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), +#' other_data = 1:6 +#' ) +#' +#' citations <- data.frame( +#' db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), +#' other_data = 7:12 +#' ) +#' +#' n_unique <- data.frame( +#' cite_source = c("Database1", "Database2", "Database2", "Database3", "Database3", "Database3"), +#' cite_label = c("search", "final", "search", "search", "search", "final"), +#' unique = c(1, 0, 1, 1, 1, 0) +#' ) +#' +#' result <- calculate_record_counts(unique_citations, citations, n_unique, "db_source") +#' print(result) calculate_record_counts <- function(unique_citations, citations, n_unique, db_colname) { @@ -222,21 +221,21 @@ calculate_record_counts <- function(unique_citations, citations, n_unique, db_co #' and recall for each source, as well as totals. #' #' @examples -# unique_citations <- data.frame( -# db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), -# cite_label = c("screened", "final", "screened", "final", "screened", "final"), -# duplicate_id = c(102, 102, 103, 103, 104, 104), -# other_data = 1:6 -# ) -# -# citations <- data.frame( -# db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), -# cite_label = c("screened", "final", "screened", "final", "screened", "final"), -# other_data = 7:12 -# ) -# -# result <- calculate_phase_count(unique_citations, citations, "db_source") -# result +#' unique_citations <- data.frame( +#' db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), +#' cite_label = c("screened", "final", "screened", "final", "screened", "final"), +#' duplicate_id = c(102, 102, 103, 103, 104, 104), +#' other_data = 1:6 +#' ) +#' +#' citations <- data.frame( +#' db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), +#' cite_label = c("screened", "final", "screened", "final", "screened", "final"), +#' other_data = 7:12 +#' ) +#' +#' result <- calculate_phase_count(unique_citations, citations, "db_source") +#' result calculate_phase_count <- function(unique_citations, citations, db_colname) { diff --git a/R/dedup.R b/R/dedup.R index c8fb35e..c4af832 100644 --- a/R/dedup.R +++ b/R/dedup.R @@ -6,7 +6,7 @@ #' @export #' @param raw_citations Citation dataframe with relevant columns #' @param manual logical. If TRUE, manually specify pairs of duplicates to merge. Default is FALSE. -#' @param shiny_progress logical. If TRUE, show a progress bar in the Shiny app. Default is FALSE. +#' @param show_unknown_tags When a label, source, or other merged field is missing, do you want this to show as "unknown"? #' @return unique citations formatted for CiteSource #' @examples #' # Load example data from the package @@ -17,7 +17,7 @@ #' dedup_results <- dedup_citations(examplecitations) #' -dedup_citations <- function(raw_citations, manual=FALSE, shiny_progress=FALSE, show_unknown_tags=FALSE){ +dedup_citations <- function(raw_citations, manual=FALSE, show_unknown_tags=FALSE){ # rename or coalesce columns targets <- c("journal", "number", "pages", "isbn", "record_id") @@ -37,7 +37,7 @@ dedup_citations <- function(raw_citations, manual=FALSE, shiny_progress=FALSE, s raw_citations$source <- raw_citations$cite_source raw_citations$label <- raw_citations$cite_label - dedup_results <- ASySD::dedup_citations(raw_citations, merge_citations = TRUE, extra_merge_fields = "cite_string", shiny_progress=shiny_progress, show_unknown_tags = show_unknown_tags) + dedup_results <- ASySD::dedup_citations(raw_citations, merge_citations = TRUE, extra_merge_fields = "cite_string", show_unknown_tags = show_unknown_tags) if(manual == FALSE){ diff --git a/inst/shiny-app/CiteSource/app.R b/inst/shiny-app/CiteSource/app.R index 1680863..06138ad 100644 --- a/inst/shiny-app/CiteSource/app.R +++ b/inst/shiny-app/CiteSource/app.R @@ -1,4 +1,6 @@ library(DT) +library(CiteSource) +library(dplyr) # Set background colour shiny::tags$head(shiny::tags$style( @@ -342,7 +344,7 @@ server <- function(input, output, session) { suggested_source <- stringr::str_replace_all(input$file$name, ".ris", "") suggested_source <- stringr::str_replace_all(suggested_source, ".bib", "") suggested_source <- stringr::str_replace_all(suggested_source, ".txt", "") - upload_df <- read_citations( + upload_df <- CiteSource::read_citations( files = input$file$datapath, cite_sources = suggested_source, cite_labels = rep("", length(input$file$datapath)), @@ -479,10 +481,22 @@ server <- function(input, output, session) { rv$upload_df <- rv$upload_df %>% dplyr::mutate(record_id = as.character(1000 + dplyr::row_number())) # results of auto dedup - dedup_results <- dedup_citations(rv$upload_df, manual = TRUE, shiny_progress = TRUE, show_unknown_tags=TRUE) + dedup_results <- CiteSource::dedup_citations(rv$upload_df, manual = TRUE, show_unknown_tags=TRUE) - # unique and pairs to check sent to reactive values - rv$pairs_to_check <- dedup_results$manual_dedup + # Possible way of PRIORITISING manual dedup + # priority_df <- rv$upload_df %>% + # filter(cite_label %in% c("final", "screened")) %>% + # select(record_id) + + # manual_to_review <- dedup_results$manual_dedup %>% + # mutate(match_score_ls = RecordLinkage::levenshteinSim(title1, title2)) %>% + # arrange(desc(match_score_ls)) %>% + # mutate(priority = ifelse(record_id1 %in% priority_df$record_id, "Yes", "No")) %>% + # mutate(priority = ifelse(record_id2 %in% priority_df$record_id, "Yes", priority)) %>% + # arrange(desc(priority)) %>% + # filter(priority == "Yes") + + rv$pairs_to_check <- dedup_results$manual_dedup rv$latest_unique <- dedup_results$unique # generate shiny alert with dedup results @@ -597,9 +611,8 @@ server <- function(input, output, session) { datatable(data, options = list( - dom = "t", - pageLength = 10, - lengthMenu = c(10, 20, 30, 40), + pageLength = 100, info = FALSE, + lengthMenu = list(c(100, -1), c("100", "All")), columnDefs = list( list(visible = FALSE, diff --git a/man/calculate_phase_count.Rd b/man/calculate_phase_count.Rd index 098d8ab..c262240 100644 --- a/man/calculate_phase_count.Rd +++ b/man/calculate_phase_count.Rd @@ -29,3 +29,20 @@ will give a warning if these labels are not present in the input dataframes. The function will give a warning if 'screened' and 'final' labels are not present in the 'cite_label' column of the input dataframes. } +\examples{ +unique_citations <- data.frame( +db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), +cite_label = c("screened", "final", "screened", "final", "screened", "final"), +duplicate_id = c(102, 102, 103, 103, 104, 104), +other_data = 1:6 +) + +citations <- data.frame( +db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), +cite_label = c("screened", "final", "screened", "final", "screened", "final"), +other_data = 7:12 +) + +result <- calculate_phase_count(unique_citations, citations, "db_source") +result +} diff --git a/man/calculate_record_counts.Rd b/man/calculate_record_counts.Rd index 518df56..fbd38b2 100644 --- a/man/calculate_record_counts.Rd +++ b/man/calculate_record_counts.Rd @@ -24,3 +24,23 @@ This function calculates the counts of distinct records, records imported, and u It combines these counts into one dataframe and calculates several ratios and percentages related to the unique and distinct counts. It also calculates the total for each count type. } +\examples{ +unique_citations <- data.frame( + db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"), + other_data = 1:6 +) + +citations <- data.frame( + db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"), + other_data = 7:12 +) + +n_unique <- data.frame( + cite_source = c("Database1", "Database2", "Database2", "Database3", "Database3", "Database3"), + cite_label = c("search", "final", "search", "search", "search", "final"), + unique = c(1, 0, 1, 1, 1, 0) +) + +result <- calculate_record_counts(unique_citations, citations, n_unique, "db_source") +print(result) +} diff --git a/man/dedup_citations.Rd b/man/dedup_citations.Rd index 75f2608..2a2f459 100644 --- a/man/dedup_citations.Rd +++ b/man/dedup_citations.Rd @@ -4,19 +4,14 @@ \alias{dedup_citations} \title{Deduplicate citations - ASySD wrapper} \usage{ -dedup_citations( - raw_citations, - manual = FALSE, - shiny_progress = FALSE, - show_unknown_tags = FALSE -) +dedup_citations(raw_citations, manual = FALSE, show_unknown_tags = FALSE) } \arguments{ \item{raw_citations}{Citation dataframe with relevant columns} \item{manual}{logical. If TRUE, manually specify pairs of duplicates to merge. Default is FALSE.} -\item{shiny_progress}{logical. If TRUE, show a progress bar in the Shiny app. Default is FALSE.} +\item{show_unknown_tags}{When a label, source, or other merged field is missing, do you want this to show as "unknown"?} } \value{ unique citations formatted for CiteSource