Skip to content

Commit

Permalink
Merge pull request #171 from ESHackathon/asysd_fixes
Browse files Browse the repository at this point in the history
As discussed
  • Loading branch information
LukasWallrich authored May 3, 2024
2 parents a0f24d7 + 869b5a0 commit e4d5676
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 51 deletions.
67 changes: 33 additions & 34 deletions R/count.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,25 +112,24 @@ record_counts <- function(unique_citations, citations, db_colname) {
#'
#' @return A dataframe with counts of distinct records, imported records, and unique records for each source, including total counts and several calculated ratios and percentages.
#' @examples
# unique_citations <- data.frame(
# db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"),
# other_data = 1:6
# )
#
# citations <- data.frame(
# db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"),
# other_data = 7:12
# )
#
# n_unique <- data.frame(
# cite_source = c("Database1", "Database2", "Database2", "Database3", "Database3", "Database3"),
# cite_label = c("search", "final", "search", "search", "search", "final"),
# unique = c(1, 0, 1, 1, 1, 0)
# )
#
# result <- calculate_record_counts(unique_citations, citations, n_unique, "db_source")
# result

#' unique_citations <- data.frame(
#' db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"),
#' other_data = 1:6
#' )
#'
#' citations <- data.frame(
#' db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"),
#' other_data = 7:12
#' )
#'
#' n_unique <- data.frame(
#' cite_source = c("Database1", "Database2", "Database2", "Database3", "Database3", "Database3"),
#' cite_label = c("search", "final", "search", "search", "search", "final"),
#' unique = c(1, 0, 1, 1, 1, 0)
#' )
#'
#' result <- calculate_record_counts(unique_citations, citations, n_unique, "db_source")
#' print(result)

calculate_record_counts <- function(unique_citations, citations, n_unique, db_colname) {

Expand Down Expand Up @@ -222,21 +221,21 @@ calculate_record_counts <- function(unique_citations, citations, n_unique, db_co
#' and recall for each source, as well as totals.
#'
#' @examples
# unique_citations <- data.frame(
# db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"),
# cite_label = c("screened", "final", "screened", "final", "screened", "final"),
# duplicate_id = c(102, 102, 103, 103, 104, 104),
# other_data = 1:6
# )
#
# citations <- data.frame(
# db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"),
# cite_label = c("screened", "final", "screened", "final", "screened", "final"),
# other_data = 7:12
# )
#
# result <- calculate_phase_count(unique_citations, citations, "db_source")
# result
#' unique_citations <- data.frame(
#' db_source = c("Database1", "Database1", "Database2", "Database3", "Database3", "Database3"),
#' cite_label = c("screened", "final", "screened", "final", "screened", "final"),
#' duplicate_id = c(102, 102, 103, 103, 104, 104),
#' other_data = 1:6
#' )
#'
#' citations <- data.frame(
#' db_source = c("Database1", "Database1", "Database1", "Database2", "Database2", "Database3"),
#' cite_label = c("screened", "final", "screened", "final", "screened", "final"),
#' other_data = 7:12
#' )
#'
#' result <- calculate_phase_count(unique_citations, citations, "db_source")
#' result


calculate_phase_count <- function(unique_citations, citations, db_colname) {
Expand Down
6 changes: 3 additions & 3 deletions R/dedup.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @export
#' @param raw_citations Citation dataframe with relevant columns
#' @param manual logical. If TRUE, manually specify pairs of duplicates to merge. Default is FALSE.
#' @param shiny_progress logical. If TRUE, show a progress bar in the Shiny app. Default is FALSE.
#' @param show_unknown_tags When a label, source, or other merged field is missing, do you want this to show as "unknown"?
#' @return unique citations formatted for CiteSource
#' @examples
#' # Load example data from the package
Expand All @@ -17,7 +17,7 @@
#' dedup_results <- dedup_citations(examplecitations)
#'

dedup_citations <- function(raw_citations, manual=FALSE, shiny_progress=FALSE, show_unknown_tags=FALSE){
dedup_citations <- function(raw_citations, manual=FALSE, show_unknown_tags=FALSE){

# rename or coalesce columns
targets <- c("journal", "number", "pages", "isbn", "record_id")
Expand All @@ -37,7 +37,7 @@ dedup_citations <- function(raw_citations, manual=FALSE, shiny_progress=FALSE, s
raw_citations$source <- raw_citations$cite_source
raw_citations$label <- raw_citations$cite_label

dedup_results <- ASySD::dedup_citations(raw_citations, merge_citations = TRUE, extra_merge_fields = "cite_string", shiny_progress=shiny_progress, show_unknown_tags = show_unknown_tags)
dedup_results <- ASySD::dedup_citations(raw_citations, merge_citations = TRUE, extra_merge_fields = "cite_string", show_unknown_tags = show_unknown_tags)

if(manual == FALSE){

Expand Down
27 changes: 20 additions & 7 deletions inst/shiny-app/CiteSource/app.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
library(DT)
library(CiteSource)
library(dplyr)

# Set background colour
shiny::tags$head(shiny::tags$style(
Expand Down Expand Up @@ -342,7 +344,7 @@ server <- function(input, output, session) {
suggested_source <- stringr::str_replace_all(input$file$name, ".ris", "")
suggested_source <- stringr::str_replace_all(suggested_source, ".bib", "")
suggested_source <- stringr::str_replace_all(suggested_source, ".txt", "")
upload_df <- read_citations(
upload_df <- CiteSource::read_citations(
files = input$file$datapath,
cite_sources = suggested_source,
cite_labels = rep("", length(input$file$datapath)),
Expand Down Expand Up @@ -479,10 +481,22 @@ server <- function(input, output, session) {
rv$upload_df <- rv$upload_df %>% dplyr::mutate(record_id = as.character(1000 + dplyr::row_number()))

# results of auto dedup
dedup_results <- dedup_citations(rv$upload_df, manual = TRUE, shiny_progress = TRUE, show_unknown_tags=TRUE)
dedup_results <- CiteSource::dedup_citations(rv$upload_df, manual = TRUE, show_unknown_tags=TRUE)

# unique and pairs to check sent to reactive values
rv$pairs_to_check <- dedup_results$manual_dedup
# Possible way of PRIORITISING manual dedup
# priority_df <- rv$upload_df %>%
# filter(cite_label %in% c("final", "screened")) %>%
# select(record_id)

# manual_to_review <- dedup_results$manual_dedup %>%
# mutate(match_score_ls = RecordLinkage::levenshteinSim(title1, title2)) %>%
# arrange(desc(match_score_ls)) %>%
# mutate(priority = ifelse(record_id1 %in% priority_df$record_id, "Yes", "No")) %>%
# mutate(priority = ifelse(record_id2 %in% priority_df$record_id, "Yes", priority)) %>%
# arrange(desc(priority)) %>%
# filter(priority == "Yes")

rv$pairs_to_check <- dedup_results$manual_dedup
rv$latest_unique <- dedup_results$unique

# generate shiny alert with dedup results
Expand Down Expand Up @@ -597,9 +611,8 @@ server <- function(input, output, session) {

datatable(data,
options = list(
dom = "t",
pageLength = 10,
lengthMenu = c(10, 20, 30, 40),
pageLength = 100, info = FALSE,
lengthMenu = list(c(100, -1), c("100", "All")),
columnDefs =
list(
list(visible = FALSE,
Expand Down
17 changes: 17 additions & 0 deletions man/calculate_phase_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/calculate_record_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 2 additions & 7 deletions man/dedup_citations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e4d5676

Please sign in to comment.